1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/tun.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/rawip_impl.h> 99 #include <inet/rts_impl.h> 100 #include <sys/squeue_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern int ip_squeue_flag; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 #define IP6_MBLK_OK 0 185 #define IP6_MBLK_HDR_ERR 1 186 #define IP6_MBLK_LEN_ERR 2 187 188 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 189 boolean_t, zoneid_t); 190 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 191 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 192 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 193 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 194 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 195 boolean_t, boolean_t, cred_t *); 196 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 197 iulp_t *, ip_stack_t *); 198 static void ip_bind_post_handling_v6(conn_t *, mblk_t *, boolean_t, 199 boolean_t, ip_stack_t *); 200 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 201 const in6_addr_t *, uint16_t, boolean_t); 202 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 205 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 206 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 207 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 208 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 209 uint8_t *, uint_t, uint8_t, ip_stack_t *); 210 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 211 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 212 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 213 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 214 conn_t *, int, int, zoneid_t); 215 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 216 ipif_t **); 217 218 /* 219 * A template for an IPv6 AR_ENTRY_QUERY 220 */ 221 static areq_t ipv6_areq_template = { 222 AR_ENTRY_QUERY, /* cmd */ 223 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 224 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 225 IP6_DL_SAP, /* protocol, from arps perspective */ 226 sizeof (areq_t), /* target addr offset */ 227 IPV6_ADDR_LEN, /* target addr_length */ 228 0, /* flags */ 229 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 230 IPV6_ADDR_LEN, /* sender addr length */ 231 6, /* xmit_count */ 232 1000, /* (re)xmit_interval in milliseconds */ 233 4 /* max # of requests to buffer */ 234 /* anything else filled in by the code */ 235 }; 236 237 /* 238 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 239 * The message has already been checksummed and if needed, 240 * a copy has been made to be sent any interested ICMP client (conn) 241 * Note that this is different than icmp_inbound() which does the fanout 242 * to conn's as well as local processing of the ICMP packets. 243 * 244 * All error messages are passed to the matching transport stream. 245 * 246 * Zones notes: 247 * The packet is only processed in the context of the specified zone: typically 248 * only this zone will reply to an echo request. This means that the caller must 249 * call icmp_inbound_v6() for each relevant zone. 250 */ 251 static void 252 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 253 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 254 mblk_t *dl_mp) 255 { 256 icmp6_t *icmp6; 257 ip6_t *ip6h; 258 boolean_t interested; 259 in6_addr_t origsrc; 260 mblk_t *first_mp; 261 ipsec_in_t *ii; 262 ip_stack_t *ipst = ill->ill_ipst; 263 264 ASSERT(ill != NULL); 265 first_mp = mp; 266 if (mctl_present) { 267 mp = first_mp->b_cont; 268 ASSERT(mp != NULL); 269 270 ii = (ipsec_in_t *)first_mp->b_rptr; 271 ASSERT(ii->ipsec_in_type == IPSEC_IN); 272 } 273 274 ip6h = (ip6_t *)mp->b_rptr; 275 276 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 277 278 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 279 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 280 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 281 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 282 freemsg(first_mp); 283 return; 284 } 285 ip6h = (ip6_t *)mp->b_rptr; 286 } 287 if (ipst->ips_icmp_accept_clear_messages == 0) { 288 first_mp = ipsec_check_global_policy(first_mp, NULL, 289 NULL, ip6h, mctl_present, ipst->ips_netstack); 290 if (first_mp == NULL) 291 return; 292 } 293 294 /* 295 * On a labeled system, we have to check whether the zone itself is 296 * permitted to receive raw traffic. 297 */ 298 if (is_system_labeled()) { 299 if (zoneid == ALL_ZONES) 300 zoneid = tsol_packet_to_zoneid(mp); 301 if (!tsol_can_accept_raw(mp, B_FALSE)) { 302 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 303 zoneid)); 304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 305 freemsg(first_mp); 306 return; 307 } 308 } 309 310 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 311 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 312 icmp6->icmp6_code)); 313 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 314 315 /* Initiate IPPF processing here */ 316 if (IP6_IN_IPP(flags, ipst)) { 317 318 /* 319 * If the ifindex changes due to SIOCSLIFINDEX 320 * packet may return to IP on the wrong ill. 321 */ 322 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 323 if (mp == NULL) { 324 if (mctl_present) { 325 freeb(first_mp); 326 } 327 return; 328 } 329 } 330 331 switch (icmp6->icmp6_type) { 332 case ICMP6_DST_UNREACH: 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 334 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 335 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 336 break; 337 338 case ICMP6_TIME_EXCEEDED: 339 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 340 break; 341 342 case ICMP6_PARAM_PROB: 343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 344 break; 345 346 case ICMP6_PACKET_TOO_BIG: 347 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 348 zoneid); 349 return; 350 case ICMP6_ECHO_REQUEST: 351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 352 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 353 !ipst->ips_ipv6_resp_echo_mcast) 354 break; 355 356 /* 357 * We must have exclusive use of the mblk to convert it to 358 * a response. 359 * If not, we copy it. 360 */ 361 if (mp->b_datap->db_ref > 1) { 362 mblk_t *mp1; 363 364 mp1 = copymsg(mp); 365 freemsg(mp); 366 if (mp1 == NULL) { 367 BUMP_MIB(ill->ill_icmp6_mib, 368 ipv6IfIcmpInErrors); 369 if (mctl_present) 370 freeb(first_mp); 371 return; 372 } 373 mp = mp1; 374 ip6h = (ip6_t *)mp->b_rptr; 375 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 376 if (mctl_present) 377 first_mp->b_cont = mp; 378 else 379 first_mp = mp; 380 } 381 382 /* 383 * Turn the echo into an echo reply. 384 * Remove any extension headers (do not reverse a source route) 385 * and clear the flow id (keep traffic class for now). 386 */ 387 if (hdr_length != IPV6_HDR_LEN) { 388 int i; 389 390 for (i = 0; i < IPV6_HDR_LEN; i++) 391 mp->b_rptr[hdr_length - i - 1] = 392 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 393 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 394 ip6h = (ip6_t *)mp->b_rptr; 395 ip6h->ip6_nxt = IPPROTO_ICMPV6; 396 hdr_length = IPV6_HDR_LEN; 397 } 398 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 399 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 400 401 ip6h->ip6_plen = 402 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 403 origsrc = ip6h->ip6_src; 404 /* 405 * Reverse the source and destination addresses. 406 * If the return address is a multicast, zero out the source 407 * (ip_wput_v6 will set an address). 408 */ 409 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 410 ip6h->ip6_src = ipv6_all_zeros; 411 ip6h->ip6_dst = origsrc; 412 } else { 413 ip6h->ip6_src = ip6h->ip6_dst; 414 ip6h->ip6_dst = origsrc; 415 } 416 417 /* set the hop limit */ 418 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 419 420 /* 421 * Prepare for checksum by putting icmp length in the icmp 422 * checksum field. The checksum is calculated in ip_wput_v6. 423 */ 424 icmp6->icmp6_cksum = ip6h->ip6_plen; 425 426 if (!mctl_present) { 427 /* 428 * This packet should go out the same way as it 429 * came in i.e in clear. To make sure that global 430 * policy will not be applied to this in ip_wput, 431 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 432 */ 433 ASSERT(first_mp == mp); 434 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 435 if (first_mp == NULL) { 436 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 437 freemsg(mp); 438 return; 439 } 440 ii = (ipsec_in_t *)first_mp->b_rptr; 441 442 /* This is not a secure packet */ 443 ii->ipsec_in_secure = B_FALSE; 444 first_mp->b_cont = mp; 445 } 446 ii->ipsec_in_zoneid = zoneid; 447 ASSERT(zoneid != ALL_ZONES); 448 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 449 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 450 return; 451 } 452 put(WR(q), first_mp); 453 return; 454 455 case ICMP6_ECHO_REPLY: 456 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 457 break; 458 459 case ND_ROUTER_SOLICIT: 460 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 461 break; 462 463 case ND_ROUTER_ADVERT: 464 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 465 break; 466 467 case ND_NEIGHBOR_SOLICIT: 468 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 469 if (mctl_present) 470 freeb(first_mp); 471 /* XXX may wish to pass first_mp up to ndp_input someday. */ 472 ndp_input(inill, mp, dl_mp); 473 return; 474 475 case ND_NEIGHBOR_ADVERT: 476 BUMP_MIB(ill->ill_icmp6_mib, 477 ipv6IfIcmpInNeighborAdvertisements); 478 if (mctl_present) 479 freeb(first_mp); 480 /* XXX may wish to pass first_mp up to ndp_input someday. */ 481 ndp_input(inill, mp, dl_mp); 482 return; 483 484 case ND_REDIRECT: { 485 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 486 487 if (ipst->ips_ipv6_ignore_redirect) 488 break; 489 490 /* 491 * As there is no upper client to deliver, we don't 492 * need the first_mp any more. 493 */ 494 if (mctl_present) 495 freeb(first_mp); 496 if (!pullupmsg(mp, -1)) { 497 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 498 break; 499 } 500 icmp_redirect_v6(q, mp, ill); 501 return; 502 } 503 504 /* 505 * The next three icmp messages will be handled by MLD. 506 * Pass all valid MLD packets up to any process(es) 507 * listening on a raw ICMP socket. MLD messages are 508 * freed by mld_input function. 509 */ 510 case MLD_LISTENER_QUERY: 511 case MLD_LISTENER_REPORT: 512 case MLD_LISTENER_REDUCTION: 513 if (mctl_present) 514 freeb(first_mp); 515 mld_input(q, mp, ill); 516 return; 517 default: 518 break; 519 } 520 if (interested) { 521 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 522 inill, mctl_present, zoneid); 523 } else { 524 freemsg(first_mp); 525 } 526 } 527 528 /* 529 * Process received IPv6 ICMP Packet too big. 530 * After updating any IRE it does the fanout to any matching transport streams. 531 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 532 */ 533 /* ARGSUSED */ 534 static void 535 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 536 boolean_t mctl_present, zoneid_t zoneid) 537 { 538 ip6_t *ip6h; 539 ip6_t *inner_ip6h; 540 icmp6_t *icmp6; 541 uint16_t hdr_length; 542 uint32_t mtu; 543 ire_t *ire, *first_ire; 544 mblk_t *first_mp; 545 ip_stack_t *ipst = ill->ill_ipst; 546 547 first_mp = mp; 548 if (mctl_present) 549 mp = first_mp->b_cont; 550 /* 551 * We must have exclusive use of the mblk to update the MTU 552 * in the packet. 553 * If not, we copy it. 554 * 555 * If there's an M_CTL present, we know that allocated first_mp 556 * earlier in this function, so we know first_mp has refcnt of one. 557 */ 558 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 559 if (mp->b_datap->db_ref > 1) { 560 mblk_t *mp1; 561 562 mp1 = copymsg(mp); 563 freemsg(mp); 564 if (mp1 == NULL) { 565 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 566 if (mctl_present) 567 freeb(first_mp); 568 return; 569 } 570 mp = mp1; 571 if (mctl_present) 572 first_mp->b_cont = mp; 573 else 574 first_mp = mp; 575 } 576 ip6h = (ip6_t *)mp->b_rptr; 577 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 578 hdr_length = ip_hdr_length_v6(mp, ip6h); 579 else 580 hdr_length = IPV6_HDR_LEN; 581 582 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 583 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 584 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 585 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 586 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 587 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 588 freemsg(first_mp); 589 return; 590 } 591 ip6h = (ip6_t *)mp->b_rptr; 592 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 593 inner_ip6h = (ip6_t *)&icmp6[1]; 594 } 595 596 /* 597 * For link local destinations matching simply on IRE type is not 598 * sufficient. Same link local addresses for different ILL's is 599 * possible. 600 */ 601 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 602 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 603 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 604 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 605 606 if (first_ire == NULL) { 607 if (ip_debug > 2) { 608 /* ip1dbg */ 609 pr_addr_dbg("icmp_inbound_too_big_v6:" 610 "no ire for dst %s\n", AF_INET6, 611 &inner_ip6h->ip6_dst); 612 } 613 freemsg(first_mp); 614 return; 615 } 616 617 mtu = ntohl(icmp6->icmp6_mtu); 618 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 619 for (ire = first_ire; ire != NULL && 620 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 621 ire = ire->ire_next) { 622 mutex_enter(&ire->ire_lock); 623 if (mtu < IPV6_MIN_MTU) { 624 ip1dbg(("Received mtu less than IPv6 " 625 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 626 mtu = IPV6_MIN_MTU; 627 /* 628 * If an mtu less than IPv6 min mtu is received, 629 * we must include a fragment header in 630 * subsequent packets. 631 */ 632 ire->ire_frag_flag |= IPH_FRAG_HDR; 633 } 634 ip1dbg(("Received mtu from router: %d\n", mtu)); 635 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 636 /* Record the new max frag size for the ULP. */ 637 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 638 /* 639 * If we need a fragment header in every packet 640 * (above case or multirouting), make sure the 641 * ULP takes it into account when computing the 642 * payload size. 643 */ 644 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 645 sizeof (ip6_frag_t)); 646 } else { 647 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 648 } 649 mutex_exit(&ire->ire_lock); 650 } 651 rw_exit(&first_ire->ire_bucket->irb_lock); 652 ire_refrele(first_ire); 653 } else { 654 irb_t *irb = NULL; 655 /* 656 * for non-link local destinations we match only on the IRE type 657 */ 658 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 659 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 660 ipst); 661 if (ire == NULL) { 662 if (ip_debug > 2) { 663 /* ip1dbg */ 664 pr_addr_dbg("icmp_inbound_too_big_v6:" 665 "no ire for dst %s\n", 666 AF_INET6, &inner_ip6h->ip6_dst); 667 } 668 freemsg(first_mp); 669 return; 670 } 671 irb = ire->ire_bucket; 672 ire_refrele(ire); 673 rw_enter(&irb->irb_lock, RW_READER); 674 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 675 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 676 &inner_ip6h->ip6_dst)) { 677 mtu = ntohl(icmp6->icmp6_mtu); 678 mutex_enter(&ire->ire_lock); 679 if (mtu < IPV6_MIN_MTU) { 680 ip1dbg(("Received mtu less than IPv6" 681 "min mtu %d: %d\n", 682 IPV6_MIN_MTU, mtu)); 683 mtu = IPV6_MIN_MTU; 684 /* 685 * If an mtu less than IPv6 min mtu is 686 * received, we must include a fragment 687 * header in subsequent packets. 688 */ 689 ire->ire_frag_flag |= IPH_FRAG_HDR; 690 } 691 692 ip1dbg(("Received mtu from router: %d\n", mtu)); 693 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 694 /* Record the new max frag size for the ULP. */ 695 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 696 /* 697 * If we need a fragment header in 698 * every packet (above case or 699 * multirouting), make sure the ULP 700 * takes it into account when computing 701 * the payload size. 702 */ 703 icmp6->icmp6_mtu = 704 htonl(ire->ire_max_frag - 705 sizeof (ip6_frag_t)); 706 } else { 707 icmp6->icmp6_mtu = 708 htonl(ire->ire_max_frag); 709 } 710 mutex_exit(&ire->ire_lock); 711 } 712 } 713 rw_exit(&irb->irb_lock); 714 } 715 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 716 mctl_present, zoneid); 717 } 718 719 /* 720 * Fanout received ICMPv6 error packets to the transports. 721 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 722 */ 723 void 724 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 725 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 726 zoneid_t zoneid) 727 { 728 uint16_t *up; /* Pointer to ports in ULP header */ 729 uint32_t ports; /* reversed ports for fanout */ 730 ip6_t rip6h; /* With reversed addresses */ 731 uint16_t hdr_length; 732 uint8_t *nexthdrp; 733 uint8_t nexthdr; 734 mblk_t *first_mp; 735 ipsec_in_t *ii; 736 tcpha_t *tcpha; 737 conn_t *connp; 738 ip_stack_t *ipst = ill->ill_ipst; 739 740 first_mp = mp; 741 if (mctl_present) { 742 mp = first_mp->b_cont; 743 ASSERT(mp != NULL); 744 745 ii = (ipsec_in_t *)first_mp->b_rptr; 746 ASSERT(ii->ipsec_in_type == IPSEC_IN); 747 } else { 748 ii = NULL; 749 } 750 751 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 752 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 753 754 /* 755 * Need to pullup everything in order to use 756 * ip_hdr_length_nexthdr_v6() 757 */ 758 if (mp->b_cont != NULL) { 759 if (!pullupmsg(mp, -1)) { 760 ip1dbg(("icmp_inbound_error_fanout_v6: " 761 "pullupmsg failed\n")); 762 goto drop_pkt; 763 } 764 ip6h = (ip6_t *)mp->b_rptr; 765 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 766 } 767 768 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 769 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 770 goto drop_pkt; 771 772 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 773 goto drop_pkt; 774 nexthdr = *nexthdrp; 775 776 /* Set message type, must be done after pullups */ 777 mp->b_datap->db_type = M_CTL; 778 779 /* Try to pass the ICMP message to clients who need it */ 780 switch (nexthdr) { 781 case IPPROTO_UDP: { 782 /* 783 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 784 * UDP header to get the port information. 785 */ 786 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 787 mp->b_wptr) { 788 break; 789 } 790 /* 791 * Attempt to find a client stream based on port. 792 * Note that we do a reverse lookup since the header is 793 * in the form we sent it out. 794 * The rip6h header is only used for the IPCL_UDP_MATCH_V6 795 * and we only set the src and dst addresses and nexthdr. 796 */ 797 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 798 rip6h.ip6_src = ip6h->ip6_dst; 799 rip6h.ip6_dst = ip6h->ip6_src; 800 rip6h.ip6_nxt = nexthdr; 801 ((uint16_t *)&ports)[0] = up[1]; 802 ((uint16_t *)&ports)[1] = up[0]; 803 804 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 805 IP6_NO_IPPOLICY, mctl_present, zoneid); 806 return; 807 } 808 case IPPROTO_TCP: { 809 /* 810 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 811 * the TCP header to get the port information. 812 */ 813 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 814 mp->b_wptr) { 815 break; 816 } 817 818 /* 819 * Attempt to find a client stream based on port. 820 * Note that we do a reverse lookup since the header is 821 * in the form we sent it out. 822 * The rip6h header is only used for the IP_TCP_*MATCH_V6 and 823 * we only set the src and dst addresses and nexthdr. 824 */ 825 826 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 827 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 828 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 829 if (connp == NULL) { 830 goto drop_pkt; 831 } 832 833 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 834 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 835 return; 836 837 } 838 case IPPROTO_SCTP: 839 /* 840 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 841 * the SCTP header to get the port information. 842 */ 843 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 844 mp->b_wptr) { 845 break; 846 } 847 848 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 849 ((uint16_t *)&ports)[0] = up[1]; 850 ((uint16_t *)&ports)[1] = up[0]; 851 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 852 mctl_present, IP6_NO_IPPOLICY, zoneid); 853 return; 854 case IPPROTO_ESP: 855 case IPPROTO_AH: { 856 int ipsec_rc; 857 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 858 859 /* 860 * We need a IPSEC_IN in the front to fanout to AH/ESP. 861 * We will re-use the IPSEC_IN if it is already present as 862 * AH/ESP will not affect any fields in the IPSEC_IN for 863 * ICMP errors. If there is no IPSEC_IN, allocate a new 864 * one and attach it in the front. 865 */ 866 if (ii != NULL) { 867 /* 868 * ip_fanout_proto_again converts the ICMP errors 869 * that come back from AH/ESP to M_DATA so that 870 * if it is non-AH/ESP and we do a pullupmsg in 871 * this function, it would work. Convert it back 872 * to M_CTL before we send up as this is a ICMP 873 * error. This could have been generated locally or 874 * by some router. Validate the inner IPSEC 875 * headers. 876 * 877 * NOTE : ill_index is used by ip_fanout_proto_again 878 * to locate the ill. 879 */ 880 ASSERT(ill != NULL); 881 ii->ipsec_in_ill_index = 882 ill->ill_phyint->phyint_ifindex; 883 ii->ipsec_in_rill_index = 884 inill->ill_phyint->phyint_ifindex; 885 first_mp->b_cont->b_datap->db_type = M_CTL; 886 } else { 887 /* 888 * IPSEC_IN is not present. We attach a ipsec_in 889 * message and send up to IPSEC for validating 890 * and removing the IPSEC headers. Clear 891 * ipsec_in_secure so that when we return 892 * from IPSEC, we don't mistakenly think that this 893 * is a secure packet came from the network. 894 * 895 * NOTE : ill_index is used by ip_fanout_proto_again 896 * to locate the ill. 897 */ 898 ASSERT(first_mp == mp); 899 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 900 ASSERT(ill != NULL); 901 if (first_mp == NULL) { 902 freemsg(mp); 903 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 904 return; 905 } 906 ii = (ipsec_in_t *)first_mp->b_rptr; 907 908 /* This is not a secure packet */ 909 ii->ipsec_in_secure = B_FALSE; 910 first_mp->b_cont = mp; 911 mp->b_datap->db_type = M_CTL; 912 ii->ipsec_in_ill_index = 913 ill->ill_phyint->phyint_ifindex; 914 ii->ipsec_in_rill_index = 915 inill->ill_phyint->phyint_ifindex; 916 } 917 918 if (!ipsec_loaded(ipss)) { 919 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 920 return; 921 } 922 923 if (nexthdr == IPPROTO_ESP) 924 ipsec_rc = ipsecesp_icmp_error(first_mp); 925 else 926 ipsec_rc = ipsecah_icmp_error(first_mp); 927 if (ipsec_rc == IPSEC_STATUS_FAILED) 928 return; 929 930 ip_fanout_proto_again(first_mp, ill, inill, NULL); 931 return; 932 } 933 case IPPROTO_ENCAP: 934 case IPPROTO_IPV6: 935 if ((uint8_t *)ip6h + hdr_length + 936 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 937 sizeof (ip6_t)) > mp->b_wptr) { 938 goto drop_pkt; 939 } 940 941 if (nexthdr == IPPROTO_ENCAP || 942 !IN6_ARE_ADDR_EQUAL( 943 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 944 &ip6h->ip6_src) || 945 !IN6_ARE_ADDR_EQUAL( 946 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 947 &ip6h->ip6_dst)) { 948 /* 949 * For tunnels that have used IPsec protection, 950 * we need to adjust the MTU to take into account 951 * the IPsec overhead. 952 */ 953 if (ii != NULL) 954 icmp6->icmp6_mtu = htonl( 955 ntohl(icmp6->icmp6_mtu) - 956 ipsec_in_extra_length(first_mp)); 957 } else { 958 /* 959 * Self-encapsulated case. As in the ipv4 case, 960 * we need to strip the 2nd IP header. Since mp 961 * is already pulled-up, we can simply bcopy 962 * the 3rd header + data over the 2nd header. 963 */ 964 uint16_t unused_len; 965 ip6_t *inner_ip6h = (ip6_t *) 966 ((uchar_t *)ip6h + hdr_length); 967 968 /* 969 * Make sure we don't do recursion more than once. 970 */ 971 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 972 &unused_len, &nexthdrp) || 973 *nexthdrp == IPPROTO_IPV6) { 974 goto drop_pkt; 975 } 976 977 /* 978 * We are about to modify the packet. Make a copy if 979 * someone else has a reference to it. 980 */ 981 if (DB_REF(mp) > 1) { 982 mblk_t *mp1; 983 uint16_t icmp6_offset; 984 985 mp1 = copymsg(mp); 986 if (mp1 == NULL) { 987 goto drop_pkt; 988 } 989 icmp6_offset = (uint16_t) 990 ((uchar_t *)icmp6 - mp->b_rptr); 991 freemsg(mp); 992 mp = mp1; 993 994 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 995 ip6h = (ip6_t *)&icmp6[1]; 996 inner_ip6h = (ip6_t *) 997 ((uchar_t *)ip6h + hdr_length); 998 999 if (mctl_present) 1000 first_mp->b_cont = mp; 1001 else 1002 first_mp = mp; 1003 } 1004 1005 /* 1006 * Need to set db_type back to M_DATA before 1007 * refeeding mp into this function. 1008 */ 1009 DB_TYPE(mp) = M_DATA; 1010 1011 /* 1012 * Copy the 3rd header + remaining data on top 1013 * of the 2nd header. 1014 */ 1015 bcopy(inner_ip6h, ip6h, 1016 mp->b_wptr - (uchar_t *)inner_ip6h); 1017 1018 /* 1019 * Subtract length of the 2nd header. 1020 */ 1021 mp->b_wptr -= hdr_length; 1022 1023 /* 1024 * Now recurse, and see what I _really_ should be 1025 * doing here. 1026 */ 1027 icmp_inbound_error_fanout_v6(q, first_mp, 1028 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1029 mctl_present, zoneid); 1030 return; 1031 } 1032 /* FALLTHRU */ 1033 default: 1034 /* 1035 * The rip6h header is only used for the lookup and we 1036 * only set the src and dst addresses and nexthdr. 1037 */ 1038 rip6h.ip6_src = ip6h->ip6_dst; 1039 rip6h.ip6_dst = ip6h->ip6_src; 1040 rip6h.ip6_nxt = nexthdr; 1041 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1042 IP6_NO_IPPOLICY, mctl_present, zoneid); 1043 return; 1044 } 1045 /* NOTREACHED */ 1046 drop_pkt: 1047 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1048 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1049 freemsg(first_mp); 1050 } 1051 1052 /* 1053 * Process received IPv6 ICMP Redirect messages. 1054 */ 1055 /* ARGSUSED */ 1056 static void 1057 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1058 { 1059 ip6_t *ip6h; 1060 uint16_t hdr_length; 1061 nd_redirect_t *rd; 1062 ire_t *ire; 1063 ire_t *prev_ire; 1064 ire_t *redir_ire; 1065 in6_addr_t *src, *dst, *gateway; 1066 nd_opt_hdr_t *opt; 1067 nce_t *nce; 1068 int nce_flags = 0; 1069 int err = 0; 1070 boolean_t redirect_to_router = B_FALSE; 1071 int len; 1072 int optlen; 1073 iulp_t ulp_info = { 0 }; 1074 ill_t *prev_ire_ill; 1075 ipif_t *ipif; 1076 ip_stack_t *ipst = ill->ill_ipst; 1077 1078 ip6h = (ip6_t *)mp->b_rptr; 1079 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1080 hdr_length = ip_hdr_length_v6(mp, ip6h); 1081 else 1082 hdr_length = IPV6_HDR_LEN; 1083 1084 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1085 len = mp->b_wptr - mp->b_rptr - hdr_length; 1086 src = &ip6h->ip6_src; 1087 dst = &rd->nd_rd_dst; 1088 gateway = &rd->nd_rd_target; 1089 1090 /* Verify if it is a valid redirect */ 1091 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1092 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1093 (rd->nd_rd_code != 0) || 1094 (len < sizeof (nd_redirect_t)) || 1095 (IN6_IS_ADDR_V4MAPPED(dst)) || 1096 (IN6_IS_ADDR_MULTICAST(dst))) { 1097 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1098 freemsg(mp); 1099 return; 1100 } 1101 1102 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1103 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1104 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1105 freemsg(mp); 1106 return; 1107 } 1108 1109 if (len > sizeof (nd_redirect_t)) { 1110 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1111 len - sizeof (nd_redirect_t))) { 1112 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1113 freemsg(mp); 1114 return; 1115 } 1116 } 1117 1118 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1119 redirect_to_router = B_TRUE; 1120 nce_flags |= NCE_F_ISROUTER; 1121 } 1122 1123 /* ipif will be refreleased afterwards */ 1124 ipif = ipif_get_next_ipif(NULL, ill); 1125 if (ipif == NULL) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Verify that the IP source address of the redirect is 1132 * the same as the current first-hop router for the specified 1133 * ICMP destination address. 1134 * Also, Make sure we had a route for the dest in question and 1135 * that route was pointing to the old gateway (the source of the 1136 * redirect packet.) 1137 */ 1138 1139 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1140 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1141 1142 /* 1143 * Check that 1144 * the redirect was not from ourselves 1145 * old gateway is still directly reachable 1146 */ 1147 if (prev_ire == NULL || 1148 prev_ire->ire_type == IRE_LOCAL) { 1149 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1150 ipif_refrele(ipif); 1151 goto fail_redirect; 1152 } 1153 prev_ire_ill = ire_to_ill(prev_ire); 1154 ASSERT(prev_ire_ill != NULL); 1155 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1156 nce_flags |= NCE_F_NONUD; 1157 1158 /* 1159 * Should we use the old ULP info to create the new gateway? From 1160 * a user's perspective, we should inherit the info so that it 1161 * is a "smooth" transition. If we do not do that, then new 1162 * connections going thru the new gateway will have no route metrics, 1163 * which is counter-intuitive to user. From a network point of 1164 * view, this may or may not make sense even though the new gateway 1165 * is still directly connected to us so the route metrics should not 1166 * change much. 1167 * 1168 * But if the old ire_uinfo is not initialized, we do another 1169 * recursive lookup on the dest using the new gateway. There may 1170 * be a route to that. If so, use it to initialize the redirect 1171 * route. 1172 */ 1173 if (prev_ire->ire_uinfo.iulp_set) { 1174 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1175 } else if (redirect_to_router) { 1176 /* 1177 * Only do the following if the redirection is really to 1178 * a router. 1179 */ 1180 ire_t *tmp_ire; 1181 ire_t *sire; 1182 1183 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1184 ALL_ZONES, 0, NULL, 1185 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1186 ipst); 1187 if (sire != NULL) { 1188 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1189 ASSERT(tmp_ire != NULL); 1190 ire_refrele(tmp_ire); 1191 ire_refrele(sire); 1192 } else if (tmp_ire != NULL) { 1193 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1194 sizeof (iulp_t)); 1195 ire_refrele(tmp_ire); 1196 } 1197 } 1198 1199 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1200 opt = (nd_opt_hdr_t *)&rd[1]; 1201 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1202 if (opt != NULL) { 1203 err = ndp_lookup_then_add_v6(ill, 1204 B_FALSE, /* don't match across illgrp */ 1205 (uchar_t *)&opt[1], /* Link layer address */ 1206 gateway, 1207 &ipv6_all_ones, /* prefix mask */ 1208 &ipv6_all_zeros, /* Mapping mask */ 1209 0, 1210 nce_flags, 1211 ND_STALE, 1212 &nce); 1213 switch (err) { 1214 case 0: 1215 NCE_REFRELE(nce); 1216 break; 1217 case EEXIST: 1218 /* 1219 * Check to see if link layer address has changed and 1220 * process the nce_state accordingly. 1221 */ 1222 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1223 NCE_REFRELE(nce); 1224 break; 1225 default: 1226 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1227 err)); 1228 ipif_refrele(ipif); 1229 goto fail_redirect; 1230 } 1231 } 1232 if (redirect_to_router) { 1233 /* icmp_redirect_ok_v6() must have already verified this */ 1234 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1235 1236 /* 1237 * Create a Route Association. This will allow us to remember 1238 * a router told us to use the particular gateway. 1239 */ 1240 ire = ire_create_v6( 1241 dst, 1242 &ipv6_all_ones, /* mask */ 1243 &prev_ire->ire_src_addr_v6, /* source addr */ 1244 gateway, /* gateway addr */ 1245 &prev_ire->ire_max_frag, /* max frag */ 1246 NULL, /* no src nce */ 1247 NULL, /* no rfq */ 1248 NULL, /* no stq */ 1249 IRE_HOST, 1250 prev_ire->ire_ipif, 1251 NULL, 1252 0, 1253 0, 1254 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1255 &ulp_info, 1256 NULL, 1257 NULL, 1258 ipst); 1259 } else { 1260 queue_t *stq; 1261 1262 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1263 ? ipif->ipif_rq : ipif->ipif_wq; 1264 1265 /* 1266 * Just create an on link entry, i.e. interface route. 1267 */ 1268 ire = ire_create_v6( 1269 dst, /* gateway == dst */ 1270 &ipv6_all_ones, /* mask */ 1271 &prev_ire->ire_src_addr_v6, /* source addr */ 1272 &ipv6_all_zeros, /* gateway addr */ 1273 &prev_ire->ire_max_frag, /* max frag */ 1274 NULL, /* no src nce */ 1275 NULL, /* ire rfq */ 1276 stq, /* ire stq */ 1277 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1278 prev_ire->ire_ipif, 1279 &ipv6_all_ones, 1280 0, 1281 0, 1282 (RTF_DYNAMIC | RTF_HOST), 1283 &ulp_info, 1284 NULL, 1285 NULL, 1286 ipst); 1287 } 1288 1289 /* Release reference from earlier ipif_get_next_ipif() */ 1290 ipif_refrele(ipif); 1291 1292 if (ire == NULL) 1293 goto fail_redirect; 1294 1295 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1296 1297 /* tell routing sockets that we received a redirect */ 1298 ip_rts_change_v6(RTM_REDIRECT, 1299 &rd->nd_rd_dst, 1300 &rd->nd_rd_target, 1301 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1302 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1303 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1304 1305 /* 1306 * Delete any existing IRE_HOST type ires for this destination. 1307 * This together with the added IRE has the effect of 1308 * modifying an existing redirect. 1309 */ 1310 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1311 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1312 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1313 1314 ire_refrele(ire); /* Held in ire_add_v6 */ 1315 1316 if (redir_ire != NULL) { 1317 if (redir_ire->ire_flags & RTF_DYNAMIC) 1318 ire_delete(redir_ire); 1319 ire_refrele(redir_ire); 1320 } 1321 } 1322 1323 if (prev_ire->ire_type == IRE_CACHE) 1324 ire_delete(prev_ire); 1325 ire_refrele(prev_ire); 1326 prev_ire = NULL; 1327 1328 fail_redirect: 1329 if (prev_ire != NULL) 1330 ire_refrele(prev_ire); 1331 freemsg(mp); 1332 } 1333 1334 static ill_t * 1335 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1336 { 1337 ill_t *ill; 1338 1339 ASSERT(WR(q) == q); 1340 1341 if (q->q_next != NULL) { 1342 ill = (ill_t *)q->q_ptr; 1343 if (ILL_CAN_LOOKUP(ill)) 1344 ill_refhold(ill); 1345 else 1346 ill = NULL; 1347 } else { 1348 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1349 NULL, NULL, NULL, NULL, NULL, ipst); 1350 } 1351 if (ill == NULL) 1352 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1353 return (ill); 1354 } 1355 1356 /* 1357 * Assigns an appropriate source address to the packet. 1358 * If origdst is one of our IP addresses that use it as the source. 1359 * If the queue is an ill queue then select a source from that ill. 1360 * Otherwise pick a source based on a route lookup back to the origsrc. 1361 * 1362 * src is the return parameter. Returns a pointer to src or NULL if failure. 1363 */ 1364 static in6_addr_t * 1365 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1366 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1367 { 1368 ill_t *ill; 1369 ire_t *ire; 1370 ipif_t *ipif; 1371 1372 ASSERT(!(wq->q_flag & QREADR)); 1373 if (wq->q_next != NULL) { 1374 ill = (ill_t *)wq->q_ptr; 1375 } else { 1376 ill = NULL; 1377 } 1378 1379 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1380 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1381 ipst); 1382 if (ire != NULL) { 1383 /* Destined to one of our addresses */ 1384 *src = *origdst; 1385 ire_refrele(ire); 1386 return (src); 1387 } 1388 if (ire != NULL) { 1389 ire_refrele(ire); 1390 ire = NULL; 1391 } 1392 if (ill == NULL) { 1393 /* What is the route back to the original source? */ 1394 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1395 NULL, NULL, zoneid, NULL, 1396 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1397 if (ire == NULL) { 1398 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1399 return (NULL); 1400 } 1401 ASSERT(ire->ire_ipif != NULL); 1402 ill = ire->ire_ipif->ipif_ill; 1403 ire_refrele(ire); 1404 } 1405 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1406 IPV6_PREFER_SRC_DEFAULT, zoneid); 1407 if (ipif != NULL) { 1408 *src = ipif->ipif_v6src_addr; 1409 ipif_refrele(ipif); 1410 return (src); 1411 } 1412 /* 1413 * Unusual case - can't find a usable source address to reach the 1414 * original source. Use what in the route to the source. 1415 */ 1416 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1417 NULL, NULL, zoneid, NULL, 1418 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1419 if (ire == NULL) { 1420 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1421 return (NULL); 1422 } 1423 ASSERT(ire != NULL); 1424 *src = ire->ire_src_addr_v6; 1425 ire_refrele(ire); 1426 return (src); 1427 } 1428 1429 /* 1430 * Build and ship an IPv6 ICMP message using the packet data in mp, 1431 * and the ICMP header pointed to by "stuff". (May be called as 1432 * writer.) 1433 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1434 * verify that an icmp error packet can be sent. 1435 * 1436 * If q is an ill write side queue (which is the case when packets 1437 * arrive from ip_rput) then ip_wput code will ensure that packets to 1438 * link-local destinations are sent out that ill. 1439 * 1440 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1441 * source address (see above function). 1442 */ 1443 static void 1444 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1445 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1446 ip_stack_t *ipst) 1447 { 1448 ip6_t *ip6h; 1449 in6_addr_t v6dst; 1450 size_t len_needed; 1451 size_t msg_len; 1452 mblk_t *mp1; 1453 icmp6_t *icmp6; 1454 ill_t *ill; 1455 in6_addr_t v6src; 1456 mblk_t *ipsec_mp; 1457 ipsec_out_t *io; 1458 1459 ill = ip_queue_to_ill_v6(q, ipst); 1460 if (ill == NULL) { 1461 freemsg(mp); 1462 return; 1463 } 1464 1465 if (mctl_present) { 1466 /* 1467 * If it is : 1468 * 1469 * 1) a IPSEC_OUT, then this is caused by outbound 1470 * datagram originating on this host. IPSEC processing 1471 * may or may not have been done. Refer to comments above 1472 * icmp_inbound_error_fanout for details. 1473 * 1474 * 2) a IPSEC_IN if we are generating a icmp_message 1475 * for an incoming datagram destined for us i.e called 1476 * from ip_fanout_send_icmp. 1477 */ 1478 ipsec_info_t *in; 1479 1480 ipsec_mp = mp; 1481 mp = ipsec_mp->b_cont; 1482 1483 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1484 ip6h = (ip6_t *)mp->b_rptr; 1485 1486 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1487 in->ipsec_info_type == IPSEC_IN); 1488 1489 if (in->ipsec_info_type == IPSEC_IN) { 1490 /* 1491 * Convert the IPSEC_IN to IPSEC_OUT. 1492 */ 1493 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1494 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1495 ill_refrele(ill); 1496 return; 1497 } 1498 } else { 1499 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1500 io = (ipsec_out_t *)in; 1501 /* 1502 * Clear out ipsec_out_proc_begin, so we do a fresh 1503 * ire lookup. 1504 */ 1505 io->ipsec_out_proc_begin = B_FALSE; 1506 } 1507 } else { 1508 /* 1509 * This is in clear. The icmp message we are building 1510 * here should go out in clear. 1511 */ 1512 ipsec_in_t *ii; 1513 ASSERT(mp->b_datap->db_type == M_DATA); 1514 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1515 if (ipsec_mp == NULL) { 1516 freemsg(mp); 1517 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1518 ill_refrele(ill); 1519 return; 1520 } 1521 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1522 1523 /* This is not a secure packet */ 1524 ii->ipsec_in_secure = B_FALSE; 1525 /* 1526 * For trusted extensions using a shared IP address we can 1527 * send using any zoneid. 1528 */ 1529 if (zoneid == ALL_ZONES) 1530 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1531 else 1532 ii->ipsec_in_zoneid = zoneid; 1533 ipsec_mp->b_cont = mp; 1534 ip6h = (ip6_t *)mp->b_rptr; 1535 /* 1536 * Convert the IPSEC_IN to IPSEC_OUT. 1537 */ 1538 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1539 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1540 ill_refrele(ill); 1541 return; 1542 } 1543 } 1544 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1545 1546 if (v6src_ptr != NULL) { 1547 v6src = *v6src_ptr; 1548 } else { 1549 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1550 &v6src, zoneid, ipst) == NULL) { 1551 freemsg(ipsec_mp); 1552 ill_refrele(ill); 1553 return; 1554 } 1555 } 1556 v6dst = ip6h->ip6_src; 1557 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1558 msg_len = msgdsize(mp); 1559 if (msg_len > len_needed) { 1560 if (!adjmsg(mp, len_needed - msg_len)) { 1561 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1562 freemsg(ipsec_mp); 1563 ill_refrele(ill); 1564 return; 1565 } 1566 msg_len = len_needed; 1567 } 1568 mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp); 1569 if (mp1 == NULL) { 1570 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1571 freemsg(ipsec_mp); 1572 ill_refrele(ill); 1573 return; 1574 } 1575 ill_refrele(ill); 1576 mp1->b_cont = mp; 1577 mp = mp1; 1578 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1579 io->ipsec_out_type == IPSEC_OUT); 1580 ipsec_mp->b_cont = mp; 1581 1582 /* 1583 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1584 * node generates be accepted in peace by all on-host destinations. 1585 * If we do NOT assume that all on-host destinations trust 1586 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1587 * (Look for ipsec_out_icmp_loopback). 1588 */ 1589 io->ipsec_out_icmp_loopback = B_TRUE; 1590 1591 ip6h = (ip6_t *)mp->b_rptr; 1592 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1593 1594 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1595 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1596 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1597 ip6h->ip6_dst = v6dst; 1598 ip6h->ip6_src = v6src; 1599 msg_len += IPV6_HDR_LEN + len; 1600 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1601 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1602 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1603 } 1604 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1605 icmp6 = (icmp6_t *)&ip6h[1]; 1606 bcopy(stuff, (char *)icmp6, len); 1607 /* 1608 * Prepare for checksum by putting icmp length in the icmp 1609 * checksum field. The checksum is calculated in ip_wput_v6. 1610 */ 1611 icmp6->icmp6_cksum = ip6h->ip6_plen; 1612 if (icmp6->icmp6_type == ND_REDIRECT) { 1613 ip6h->ip6_hops = IPV6_MAX_HOPS; 1614 } 1615 /* Send to V6 writeside put routine */ 1616 put(q, ipsec_mp); 1617 } 1618 1619 /* 1620 * Update the output mib when ICMPv6 packets are sent. 1621 */ 1622 static void 1623 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1624 { 1625 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1626 1627 switch (icmp6->icmp6_type) { 1628 case ICMP6_DST_UNREACH: 1629 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1630 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1631 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1632 break; 1633 1634 case ICMP6_TIME_EXCEEDED: 1635 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1636 break; 1637 1638 case ICMP6_PARAM_PROB: 1639 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1640 break; 1641 1642 case ICMP6_PACKET_TOO_BIG: 1643 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1644 break; 1645 1646 case ICMP6_ECHO_REQUEST: 1647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1648 break; 1649 1650 case ICMP6_ECHO_REPLY: 1651 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1652 break; 1653 1654 case ND_ROUTER_SOLICIT: 1655 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1656 break; 1657 1658 case ND_ROUTER_ADVERT: 1659 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1660 break; 1661 1662 case ND_NEIGHBOR_SOLICIT: 1663 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1664 break; 1665 1666 case ND_NEIGHBOR_ADVERT: 1667 BUMP_MIB(ill->ill_icmp6_mib, 1668 ipv6IfIcmpOutNeighborAdvertisements); 1669 break; 1670 1671 case ND_REDIRECT: 1672 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1673 break; 1674 1675 case MLD_LISTENER_QUERY: 1676 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1677 break; 1678 1679 case MLD_LISTENER_REPORT: 1680 case MLD_V2_LISTENER_REPORT: 1681 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1682 break; 1683 1684 case MLD_LISTENER_REDUCTION: 1685 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1686 break; 1687 } 1688 } 1689 1690 /* 1691 * Check if it is ok to send an ICMPv6 error packet in 1692 * response to the IP packet in mp. 1693 * Free the message and return null if no 1694 * ICMP error packet should be sent. 1695 */ 1696 static mblk_t * 1697 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1698 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1699 { 1700 ip6_t *ip6h; 1701 1702 if (!mp) 1703 return (NULL); 1704 1705 ip6h = (ip6_t *)mp->b_rptr; 1706 1707 /* Check if source address uniquely identifies the host */ 1708 1709 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1710 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1711 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1712 freemsg(mp); 1713 return (NULL); 1714 } 1715 1716 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1717 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1718 icmp6_t *icmp6; 1719 1720 if (mp->b_wptr - mp->b_rptr < len_needed) { 1721 if (!pullupmsg(mp, len_needed)) { 1722 ill_t *ill; 1723 1724 ill = ip_queue_to_ill_v6(q, ipst); 1725 if (ill == NULL) { 1726 BUMP_MIB(&ipst->ips_icmp6_mib, 1727 ipv6IfIcmpInErrors); 1728 } else { 1729 BUMP_MIB(ill->ill_icmp6_mib, 1730 ipv6IfIcmpInErrors); 1731 ill_refrele(ill); 1732 } 1733 freemsg(mp); 1734 return (NULL); 1735 } 1736 ip6h = (ip6_t *)mp->b_rptr; 1737 } 1738 icmp6 = (icmp6_t *)&ip6h[1]; 1739 /* Explicitly do not generate errors in response to redirects */ 1740 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1741 icmp6->icmp6_type == ND_REDIRECT) { 1742 freemsg(mp); 1743 return (NULL); 1744 } 1745 } 1746 /* 1747 * Check that the destination is not multicast and that the packet 1748 * was not sent on link layer broadcast or multicast. (Exception 1749 * is Packet too big message as per the draft - when mcast_ok is set.) 1750 */ 1751 if (!mcast_ok && 1752 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1753 freemsg(mp); 1754 return (NULL); 1755 } 1756 if (icmp_err_rate_limit(ipst)) { 1757 /* 1758 * Only send ICMP error packets every so often. 1759 * This should be done on a per port/source basis, 1760 * but for now this will suffice. 1761 */ 1762 freemsg(mp); 1763 return (NULL); 1764 } 1765 return (mp); 1766 } 1767 1768 /* 1769 * Generate an ICMPv6 redirect message. 1770 * Include target link layer address option if it exits. 1771 * Always include redirect header. 1772 */ 1773 static void 1774 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1775 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1776 { 1777 nd_redirect_t *rd; 1778 nd_opt_rd_hdr_t *rdh; 1779 uchar_t *buf; 1780 nce_t *nce = NULL; 1781 nd_opt_hdr_t *opt; 1782 int len; 1783 int ll_opt_len = 0; 1784 int max_redir_hdr_data_len; 1785 int pkt_len; 1786 in6_addr_t *srcp; 1787 ip_stack_t *ipst = ill->ill_ipst; 1788 1789 /* 1790 * We are called from ip_rput where we could 1791 * not have attached an IPSEC_IN. 1792 */ 1793 ASSERT(mp->b_datap->db_type == M_DATA); 1794 1795 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1796 if (mp == NULL) 1797 return; 1798 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1799 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1800 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1801 ill->ill_phys_addr_length + 7)/8 * 8; 1802 } 1803 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1804 ASSERT(len % 4 == 0); 1805 buf = kmem_alloc(len, KM_NOSLEEP); 1806 if (buf == NULL) { 1807 if (nce != NULL) 1808 NCE_REFRELE(nce); 1809 freemsg(mp); 1810 return; 1811 } 1812 1813 rd = (nd_redirect_t *)buf; 1814 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1815 rd->nd_rd_code = 0; 1816 rd->nd_rd_reserved = 0; 1817 rd->nd_rd_target = *targetp; 1818 rd->nd_rd_dst = *dest; 1819 1820 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1821 if (nce != NULL && ll_opt_len != 0) { 1822 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1823 opt->nd_opt_len = ll_opt_len/8; 1824 bcopy((char *)nce->nce_res_mp->b_rptr + 1825 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1826 ill->ill_phys_addr_length); 1827 } 1828 if (nce != NULL) 1829 NCE_REFRELE(nce); 1830 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1831 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1832 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1833 max_redir_hdr_data_len = 1834 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1835 pkt_len = msgdsize(mp); 1836 /* Make sure mp is 8 byte aligned */ 1837 if (pkt_len > max_redir_hdr_data_len) { 1838 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1839 sizeof (nd_opt_rd_hdr_t))/8; 1840 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1841 } else { 1842 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1843 (void) adjmsg(mp, -(pkt_len % 8)); 1844 } 1845 rdh->nd_opt_rh_reserved1 = 0; 1846 rdh->nd_opt_rh_reserved2 = 0; 1847 /* ipif_v6src_addr contains the link-local source address */ 1848 srcp = &ill->ill_ipif->ipif_v6src_addr; 1849 1850 /* Redirects sent by router, and router is global zone */ 1851 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1852 kmem_free(buf, len); 1853 } 1854 1855 1856 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1857 void 1858 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1859 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1860 ip_stack_t *ipst) 1861 { 1862 icmp6_t icmp6; 1863 boolean_t mctl_present; 1864 mblk_t *first_mp; 1865 1866 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1867 1868 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1869 if (mp == NULL) { 1870 if (mctl_present) 1871 freeb(first_mp); 1872 return; 1873 } 1874 bzero(&icmp6, sizeof (icmp6_t)); 1875 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1876 icmp6.icmp6_code = code; 1877 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1878 zoneid, ipst); 1879 } 1880 1881 /* 1882 * Generate an ICMP unreachable message. 1883 */ 1884 void 1885 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1886 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1887 ip_stack_t *ipst) 1888 { 1889 icmp6_t icmp6; 1890 boolean_t mctl_present; 1891 mblk_t *first_mp; 1892 1893 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1894 1895 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1896 if (mp == NULL) { 1897 if (mctl_present) 1898 freeb(first_mp); 1899 return; 1900 } 1901 bzero(&icmp6, sizeof (icmp6_t)); 1902 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1903 icmp6.icmp6_code = code; 1904 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1905 zoneid, ipst); 1906 } 1907 1908 /* 1909 * Generate an ICMP pkt too big message. 1910 */ 1911 static void 1912 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1913 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1914 { 1915 icmp6_t icmp6; 1916 mblk_t *first_mp; 1917 boolean_t mctl_present; 1918 1919 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1920 1921 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1922 if (mp == NULL) { 1923 if (mctl_present) 1924 freeb(first_mp); 1925 return; 1926 } 1927 bzero(&icmp6, sizeof (icmp6_t)); 1928 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1929 icmp6.icmp6_code = 0; 1930 icmp6.icmp6_mtu = htonl(mtu); 1931 1932 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1933 zoneid, ipst); 1934 } 1935 1936 /* 1937 * Generate an ICMP parameter problem message. (May be called as writer.) 1938 * 'offset' is the offset from the beginning of the packet in error. 1939 */ 1940 static void 1941 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1942 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1943 ip_stack_t *ipst) 1944 { 1945 icmp6_t icmp6; 1946 boolean_t mctl_present; 1947 mblk_t *first_mp; 1948 1949 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1950 1951 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1952 if (mp == NULL) { 1953 if (mctl_present) 1954 freeb(first_mp); 1955 return; 1956 } 1957 bzero((char *)&icmp6, sizeof (icmp6_t)); 1958 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1959 icmp6.icmp6_code = code; 1960 icmp6.icmp6_pptr = htonl(offset); 1961 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1962 zoneid, ipst); 1963 } 1964 1965 /* 1966 * This code will need to take into account the possibility of binding 1967 * to a link local address on a multi-homed host, in which case the 1968 * outgoing interface (from the conn) will need to be used when getting 1969 * an ire for the dst. Going through proper outgoing interface and 1970 * choosing the source address corresponding to the outgoing interface 1971 * is necessary when the destination address is a link-local address and 1972 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 1973 * This can happen when active connection is setup; thus ipp pointer 1974 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 1975 * pointer is passed as ipp pointer. 1976 */ 1977 mblk_t * 1978 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 1979 { 1980 ssize_t len; 1981 int protocol; 1982 struct T_bind_req *tbr; 1983 sin6_t *sin6; 1984 ipa6_conn_t *ac6; 1985 in6_addr_t *v6srcp; 1986 in6_addr_t *v6dstp; 1987 uint16_t lport; 1988 uint16_t fport; 1989 uchar_t *ucp; 1990 int error = 0; 1991 boolean_t local_bind; 1992 ipa6_conn_x_t *acx6; 1993 boolean_t verify_dst; 1994 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1995 cred_t *cr; 1996 1997 /* 1998 * All Solaris components should pass a db_credp 1999 * for this TPI message, hence we ASSERT. 2000 * But in case there is some other M_PROTO that looks 2001 * like a TPI message sent by some other kernel 2002 * component, we check and return an error. 2003 */ 2004 cr = msg_getcred(mp, NULL); 2005 ASSERT(cr != NULL); 2006 if (cr == NULL) { 2007 error = EINVAL; 2008 goto bad_addr; 2009 } 2010 2011 ASSERT(connp->conn_af_isv6); 2012 len = mp->b_wptr - mp->b_rptr; 2013 if (len < (sizeof (*tbr) + 1)) { 2014 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2015 "ip_bind_v6: bogus msg, len %ld", len); 2016 goto bad_addr; 2017 } 2018 /* Back up and extract the protocol identifier. */ 2019 mp->b_wptr--; 2020 tbr = (struct T_bind_req *)mp->b_rptr; 2021 /* Reset the message type in preparation for shipping it back. */ 2022 mp->b_datap->db_type = M_PCPROTO; 2023 2024 protocol = *mp->b_wptr & 0xFF; 2025 connp->conn_ulp = (uint8_t)protocol; 2026 2027 /* 2028 * Check for a zero length address. This is from a protocol that 2029 * wants to register to receive all packets of its type. 2030 */ 2031 if (tbr->ADDR_length == 0) { 2032 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2033 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2034 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2035 NULL) { 2036 /* 2037 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2038 * Do not allow others to bind to these. 2039 */ 2040 goto bad_addr; 2041 } 2042 2043 /* 2044 * 2045 * The udp module never sends down a zero-length address, 2046 * and allowing this on a labeled system will break MLP 2047 * functionality. 2048 */ 2049 if (is_system_labeled() && protocol == IPPROTO_UDP) 2050 goto bad_addr; 2051 2052 /* Allow ipsec plumbing */ 2053 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2054 protocol != IPPROTO_ESP) 2055 goto bad_addr; 2056 2057 connp->conn_srcv6 = ipv6_all_zeros; 2058 ipcl_proto_insert_v6(connp, protocol); 2059 2060 tbr->PRIM_type = T_BIND_ACK; 2061 return (mp); 2062 } 2063 2064 /* Extract the address pointer from the message. */ 2065 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2066 tbr->ADDR_length); 2067 if (ucp == NULL) { 2068 ip1dbg(("ip_bind_v6: no address\n")); 2069 goto bad_addr; 2070 } 2071 if (!OK_32PTR(ucp)) { 2072 ip1dbg(("ip_bind_v6: unaligned address\n")); 2073 goto bad_addr; 2074 } 2075 2076 switch (tbr->ADDR_length) { 2077 default: 2078 ip1dbg(("ip_bind_v6: bad address length %d\n", 2079 (int)tbr->ADDR_length)); 2080 goto bad_addr; 2081 2082 case IPV6_ADDR_LEN: 2083 /* Verification of local address only */ 2084 v6srcp = (in6_addr_t *)ucp; 2085 lport = 0; 2086 local_bind = B_TRUE; 2087 break; 2088 2089 case sizeof (sin6_t): 2090 sin6 = (sin6_t *)ucp; 2091 v6srcp = &sin6->sin6_addr; 2092 lport = sin6->sin6_port; 2093 local_bind = B_TRUE; 2094 break; 2095 2096 case sizeof (ipa6_conn_t): 2097 /* 2098 * Verify that both the source and destination addresses 2099 * are valid. 2100 */ 2101 ac6 = (ipa6_conn_t *)ucp; 2102 v6srcp = &ac6->ac6_laddr; 2103 v6dstp = &ac6->ac6_faddr; 2104 fport = ac6->ac6_fport; 2105 /* For raw socket, the local port is not set. */ 2106 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2107 connp->conn_lport; 2108 local_bind = B_FALSE; 2109 /* Always verify destination reachability. */ 2110 verify_dst = B_TRUE; 2111 break; 2112 2113 case sizeof (ipa6_conn_x_t): 2114 /* 2115 * Verify that the source address is valid. 2116 */ 2117 acx6 = (ipa6_conn_x_t *)ucp; 2118 ac6 = &acx6->ac6x_conn; 2119 v6srcp = &ac6->ac6_laddr; 2120 v6dstp = &ac6->ac6_faddr; 2121 fport = ac6->ac6_fport; 2122 lport = ac6->ac6_lport; 2123 local_bind = B_FALSE; 2124 /* 2125 * Client that passed ipa6_conn_x_t to us specifies whether to 2126 * verify destination reachability. 2127 */ 2128 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2129 break; 2130 } 2131 if (local_bind) { 2132 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2133 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2134 } else { 2135 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2136 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2137 } 2138 2139 if (error == 0) { 2140 /* Send it home. */ 2141 mp->b_datap->db_type = M_PCPROTO; 2142 tbr->PRIM_type = T_BIND_ACK; 2143 return (mp); 2144 } 2145 2146 bad_addr: 2147 ASSERT(error != EINPROGRESS); 2148 if (error > 0) 2149 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2150 else 2151 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2152 return (mp); 2153 } 2154 2155 static void 2156 ip_bind_post_handling_v6(conn_t *connp, mblk_t *mp, 2157 boolean_t version_changed, boolean_t ire_requested, ip_stack_t *ipst) 2158 { 2159 /* Update conn_send and pktversion if v4/v6 changed */ 2160 if (version_changed) { 2161 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2162 } 2163 2164 /* 2165 * Pass the IPSEC headers size in ire_ipsec_overhead. 2166 * We can't do this in ip_bind_insert_ire because the policy 2167 * may not have been inherited at that point in time and hence 2168 * conn_out_enforce_policy may not be set. 2169 */ 2170 if (ire_requested && connp->conn_out_enforce_policy && 2171 mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE) { 2172 ire_t *ire = (ire_t *)mp->b_rptr; 2173 ASSERT(MBLKL(mp) >= sizeof (ire_t)); 2174 ire->ire_ipsec_overhead = (conn_ipsec_length(connp)); 2175 } 2176 } 2177 2178 /* 2179 * Here address is verified to be a valid local address. 2180 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2181 * address is also considered a valid local address. 2182 * In the case of a multicast address, however, the 2183 * upper protocol is expected to reset the src address 2184 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2185 * no packets are emitted with multicast address as 2186 * source address. 2187 * The addresses valid for bind are: 2188 * (1) - in6addr_any 2189 * (2) - IP address of an UP interface 2190 * (3) - IP address of a DOWN interface 2191 * (4) - a multicast address. In this case 2192 * the conn will only receive packets destined to 2193 * the specified multicast address. Note: the 2194 * application still has to issue an 2195 * IPV6_JOIN_GROUP socket option. 2196 * 2197 * In all the above cases, the bound address must be valid in the current zone. 2198 * When the address is loopback or multicast, there might be many matching IREs 2199 * so bind has to look up based on the zone. 2200 */ 2201 /* 2202 * Verify the local IP address. Does not change the conn_t except 2203 * conn_fully_bound and conn_policy_cached. 2204 */ 2205 static int 2206 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2207 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2208 { 2209 int error = 0; 2210 ire_t *src_ire = NULL; 2211 zoneid_t zoneid; 2212 mblk_t *mp = NULL; 2213 boolean_t ire_requested; 2214 boolean_t ipsec_policy_set; 2215 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2216 2217 if (mpp) 2218 mp = *mpp; 2219 2220 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2221 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2222 2223 /* 2224 * If it was previously connected, conn_fully_bound would have 2225 * been set. 2226 */ 2227 connp->conn_fully_bound = B_FALSE; 2228 2229 zoneid = connp->conn_zoneid; 2230 2231 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2232 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2233 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2234 /* 2235 * If an address other than in6addr_any is requested, 2236 * we verify that it is a valid address for bind 2237 * Note: Following code is in if-else-if form for 2238 * readability compared to a condition check. 2239 */ 2240 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2241 /* LINTED - statement has no consequent */ 2242 if (IRE_IS_LOCAL(src_ire)) { 2243 /* 2244 * (2) Bind to address of local UP interface 2245 */ 2246 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2247 ipif_t *multi_ipif = NULL; 2248 ire_t *save_ire; 2249 /* 2250 * (4) bind to multicast address. 2251 * Fake out the IRE returned to upper 2252 * layer to be a broadcast IRE in 2253 * ip_bind_insert_ire_v6(). 2254 * Pass other information that matches 2255 * the ipif (e.g. the source address). 2256 * conn_multicast_ill is only used for 2257 * IPv6 packets 2258 */ 2259 mutex_enter(&connp->conn_lock); 2260 if (connp->conn_multicast_ill != NULL) { 2261 (void) ipif_lookup_zoneid( 2262 connp->conn_multicast_ill, zoneid, 0, 2263 &multi_ipif); 2264 } else { 2265 /* 2266 * Look for default like 2267 * ip_wput_v6 2268 */ 2269 multi_ipif = ipif_lookup_group_v6( 2270 &ipv6_unspecified_group, zoneid, ipst); 2271 } 2272 mutex_exit(&connp->conn_lock); 2273 save_ire = src_ire; 2274 src_ire = NULL; 2275 if (multi_ipif == NULL || !ire_requested || 2276 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2277 src_ire = save_ire; 2278 error = EADDRNOTAVAIL; 2279 } else { 2280 ASSERT(src_ire != NULL); 2281 if (save_ire != NULL) 2282 ire_refrele(save_ire); 2283 } 2284 if (multi_ipif != NULL) 2285 ipif_refrele(multi_ipif); 2286 } else { 2287 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2288 /* 2289 * Not a valid address for bind 2290 */ 2291 error = EADDRNOTAVAIL; 2292 } 2293 } 2294 2295 if (error != 0) { 2296 /* Red Alert! Attempting to be a bogon! */ 2297 if (ip_debug > 2) { 2298 /* ip1dbg */ 2299 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2300 " address %s\n", AF_INET6, v6src); 2301 } 2302 goto bad_addr; 2303 } 2304 } 2305 2306 /* 2307 * Allow setting new policies. For example, disconnects come 2308 * down as ipa_t bind. As we would have set conn_policy_cached 2309 * to B_TRUE before, we should set it to B_FALSE, so that policy 2310 * can change after the disconnect. 2311 */ 2312 connp->conn_policy_cached = B_FALSE; 2313 2314 /* If not fanout_insert this was just an address verification */ 2315 if (fanout_insert) { 2316 /* 2317 * The addresses have been verified. Time to insert in 2318 * the correct fanout list. 2319 */ 2320 connp->conn_srcv6 = *v6src; 2321 connp->conn_remv6 = ipv6_all_zeros; 2322 connp->conn_lport = lport; 2323 connp->conn_fport = 0; 2324 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2325 } 2326 if (error == 0) { 2327 if (ire_requested) { 2328 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2329 ipst)) { 2330 error = -1; 2331 goto bad_addr; 2332 } 2333 mp = *mpp; 2334 } else if (ipsec_policy_set) { 2335 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2336 error = -1; 2337 goto bad_addr; 2338 } 2339 } 2340 } 2341 bad_addr: 2342 if (error != 0) { 2343 if (connp->conn_anon_port) { 2344 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2345 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2346 B_FALSE); 2347 } 2348 connp->conn_mlp_type = mlptSingle; 2349 } 2350 2351 if (src_ire != NULL) 2352 ire_refrele(src_ire); 2353 2354 if (ipsec_policy_set) { 2355 ASSERT(mp != NULL); 2356 freeb(mp); 2357 /* 2358 * As of now assume that nothing else accompanies 2359 * IPSEC_POLICY_SET. 2360 */ 2361 *mpp = NULL; 2362 } 2363 2364 return (error); 2365 } 2366 int 2367 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2368 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2369 { 2370 int error; 2371 boolean_t ire_requested; 2372 mblk_t *mp = NULL; 2373 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2374 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2375 2376 /* 2377 * Note that we allow connect to broadcast and multicast 2378 * address when ire_requested is set. Thus the ULP 2379 * has to check for IRE_BROADCAST and multicast. 2380 */ 2381 if (mpp) 2382 mp = *mpp; 2383 ire_requested = (mp && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2384 2385 ASSERT(connp->conn_af_isv6); 2386 connp->conn_ulp = protocol; 2387 2388 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2389 /* Bind to IPv4 address */ 2390 ipaddr_t v4src; 2391 2392 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2393 2394 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2395 fanout_insert); 2396 if (error != 0) 2397 goto bad_addr; 2398 connp->conn_pkt_isv6 = B_FALSE; 2399 } else { 2400 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2401 error = 0; 2402 goto bad_addr; 2403 } 2404 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2405 lport, fanout_insert); 2406 if (error != 0) 2407 goto bad_addr; 2408 connp->conn_pkt_isv6 = B_TRUE; 2409 } 2410 2411 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2412 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2413 return (0); 2414 2415 bad_addr: 2416 if (error < 0) 2417 error = -TBADADDR; 2418 return (error); 2419 } 2420 2421 /* 2422 * Verify that both the source and destination addresses 2423 * are valid. If verify_dst, then destination address must also be reachable, 2424 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2425 * It takes ip6_pkt_t * as one of the arguments to determine correct 2426 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2427 * destination address. Note that parameter ipp is only useful for TCP connect 2428 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2429 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2430 * 2431 */ 2432 int 2433 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2434 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2435 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2436 boolean_t verify_dst, cred_t *cr) 2437 { 2438 ire_t *src_ire; 2439 ire_t *dst_ire; 2440 int error = 0; 2441 ire_t *sire = NULL; 2442 ire_t *md_dst_ire = NULL; 2443 ill_t *md_ill = NULL; 2444 ill_t *dst_ill = NULL; 2445 ipif_t *src_ipif = NULL; 2446 zoneid_t zoneid; 2447 boolean_t ill_held = B_FALSE; 2448 mblk_t *mp = NULL; 2449 boolean_t ire_requested = B_FALSE; 2450 boolean_t ipsec_policy_set = B_FALSE; 2451 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2452 ts_label_t *tsl = NULL; 2453 2454 if (mpp) 2455 mp = *mpp; 2456 2457 if (mp != NULL) { 2458 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2459 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2460 } 2461 if (cr != NULL) 2462 tsl = crgetlabel(cr); 2463 2464 src_ire = dst_ire = NULL; 2465 /* 2466 * If we never got a disconnect before, clear it now. 2467 */ 2468 connp->conn_fully_bound = B_FALSE; 2469 2470 zoneid = connp->conn_zoneid; 2471 2472 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2473 ipif_t *ipif; 2474 2475 /* 2476 * Use an "emulated" IRE_BROADCAST to tell the transport it 2477 * is a multicast. 2478 * Pass other information that matches 2479 * the ipif (e.g. the source address). 2480 * 2481 * conn_multicast_ill is only used for IPv6 packets 2482 */ 2483 mutex_enter(&connp->conn_lock); 2484 if (connp->conn_multicast_ill != NULL) { 2485 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2486 zoneid, 0, &ipif); 2487 } else { 2488 /* Look for default like ip_wput_v6 */ 2489 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2490 } 2491 mutex_exit(&connp->conn_lock); 2492 if (ipif == NULL || ire_requested || 2493 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2494 if (ipif != NULL) 2495 ipif_refrele(ipif); 2496 if (ip_debug > 2) { 2497 /* ip1dbg */ 2498 pr_addr_dbg("ip_bind_connected_v6: bad " 2499 "connected multicast %s\n", AF_INET6, 2500 v6dst); 2501 } 2502 error = ENETUNREACH; 2503 goto bad_addr; 2504 } 2505 if (ipif != NULL) 2506 ipif_refrele(ipif); 2507 } else { 2508 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2509 NULL, &sire, zoneid, tsl, 2510 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2511 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2512 ipst); 2513 /* 2514 * We also prevent ire's with src address INADDR_ANY to 2515 * be used, which are created temporarily for 2516 * sending out packets from endpoints that have 2517 * conn_unspec_src set. 2518 */ 2519 if (dst_ire == NULL || 2520 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2521 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2522 /* 2523 * When verifying destination reachability, we always 2524 * complain. 2525 * 2526 * When not verifying destination reachability but we 2527 * found an IRE, i.e. the destination is reachable, 2528 * then the other tests still apply and we complain. 2529 */ 2530 if (verify_dst || (dst_ire != NULL)) { 2531 if (ip_debug > 2) { 2532 /* ip1dbg */ 2533 pr_addr_dbg("ip_bind_connected_v6: bad" 2534 " connected dst %s\n", AF_INET6, 2535 v6dst); 2536 } 2537 if (dst_ire == NULL || 2538 !(dst_ire->ire_type & IRE_HOST)) { 2539 error = ENETUNREACH; 2540 } else { 2541 error = EHOSTUNREACH; 2542 } 2543 goto bad_addr; 2544 } 2545 } 2546 } 2547 2548 /* 2549 * We now know that routing will allow us to reach the destination. 2550 * Check whether Trusted Solaris policy allows communication with this 2551 * host, and pretend that the destination is unreachable if not. 2552 * 2553 * This is never a problem for TCP, since that transport is known to 2554 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2555 * handling. If the remote is unreachable, it will be detected at that 2556 * point, so there's no reason to check it here. 2557 * 2558 * Note that for sendto (and other datagram-oriented friends), this 2559 * check is done as part of the data path label computation instead. 2560 * The check here is just to make non-TCP connect() report the right 2561 * error. 2562 */ 2563 if (dst_ire != NULL && is_system_labeled() && 2564 !IPCL_IS_TCP(connp) && 2565 tsol_compute_label_v6(cr, v6dst, NULL, 2566 connp->conn_mac_exempt, ipst) != 0) { 2567 error = EHOSTUNREACH; 2568 if (ip_debug > 2) { 2569 pr_addr_dbg("ip_bind_connected: no label for dst %s\n", 2570 AF_INET6, v6dst); 2571 } 2572 goto bad_addr; 2573 } 2574 2575 /* 2576 * If the app does a connect(), it means that it will most likely 2577 * send more than 1 packet to the destination. It makes sense 2578 * to clear the temporary flag. 2579 */ 2580 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2581 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2582 irb_t *irb = dst_ire->ire_bucket; 2583 2584 rw_enter(&irb->irb_lock, RW_WRITER); 2585 /* 2586 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2587 * the lock in order to guarantee irb_tmp_ire_cnt. 2588 */ 2589 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2590 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2591 irb->irb_tmp_ire_cnt--; 2592 } 2593 rw_exit(&irb->irb_lock); 2594 } 2595 2596 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2597 2598 /* 2599 * See if we should notify ULP about MDT; we do this whether or not 2600 * ire_requested is TRUE, in order to handle active connects; MDT 2601 * eligibility tests for passive connects are handled separately 2602 * through tcp_adapt_ire(). We do this before the source address 2603 * selection, because dst_ire may change after a call to 2604 * ipif_select_source_v6(). This is a best-effort check, as the 2605 * packet for this connection may not actually go through 2606 * dst_ire->ire_stq, and the exact IRE can only be known after 2607 * calling ip_newroute_v6(). This is why we further check on the 2608 * IRE during Multidata packet transmission in tcp_multisend(). 2609 */ 2610 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2611 dst_ire != NULL && 2612 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2613 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2614 ILL_MDT_CAPABLE(md_ill)) { 2615 md_dst_ire = dst_ire; 2616 IRE_REFHOLD(md_dst_ire); 2617 } 2618 2619 if (dst_ire != NULL && 2620 dst_ire->ire_type == IRE_LOCAL && 2621 dst_ire->ire_zoneid != zoneid && 2622 dst_ire->ire_zoneid != ALL_ZONES) { 2623 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2624 zoneid, 0, NULL, 2625 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2626 MATCH_IRE_RJ_BHOLE, ipst); 2627 if (src_ire == NULL) { 2628 error = EHOSTUNREACH; 2629 goto bad_addr; 2630 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2631 if (!(src_ire->ire_type & IRE_HOST)) 2632 error = ENETUNREACH; 2633 else 2634 error = EHOSTUNREACH; 2635 goto bad_addr; 2636 } 2637 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2638 src_ipif = src_ire->ire_ipif; 2639 ipif_refhold(src_ipif); 2640 *v6src = src_ipif->ipif_v6lcl_addr; 2641 } 2642 ire_refrele(src_ire); 2643 src_ire = NULL; 2644 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2645 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2646 *v6src = sire->ire_src_addr_v6; 2647 ire_refrele(dst_ire); 2648 dst_ire = sire; 2649 sire = NULL; 2650 } else if (dst_ire->ire_type == IRE_CACHE && 2651 (dst_ire->ire_flags & RTF_SETSRC)) { 2652 ASSERT(dst_ire->ire_zoneid == zoneid || 2653 dst_ire->ire_zoneid == ALL_ZONES); 2654 *v6src = dst_ire->ire_src_addr_v6; 2655 } else { 2656 /* 2657 * Pick a source address so that a proper inbound load 2658 * spreading would happen. Use dst_ill specified by the 2659 * app. when socket option or scopeid is set. 2660 */ 2661 int err; 2662 2663 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2664 uint_t if_index; 2665 2666 /* 2667 * Scope id or IPV6_PKTINFO 2668 */ 2669 2670 if_index = ipp->ipp_ifindex; 2671 dst_ill = ill_lookup_on_ifindex( 2672 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2673 ipst); 2674 if (dst_ill == NULL) { 2675 ip1dbg(("ip_bind_connected_v6:" 2676 " bad ifindex %d\n", if_index)); 2677 error = EADDRNOTAVAIL; 2678 goto bad_addr; 2679 } 2680 ill_held = B_TRUE; 2681 } else if (connp->conn_outgoing_ill != NULL) { 2682 /* 2683 * For IPV6_BOUND_IF socket option, 2684 * conn_outgoing_ill should be set 2685 * already in TCP or UDP/ICMP. 2686 */ 2687 dst_ill = conn_get_held_ill(connp, 2688 &connp->conn_outgoing_ill, &err); 2689 if (err == ILL_LOOKUP_FAILED) { 2690 ip1dbg(("ip_bind_connected_v6:" 2691 "no ill for bound_if\n")); 2692 error = EADDRNOTAVAIL; 2693 goto bad_addr; 2694 } 2695 ill_held = B_TRUE; 2696 } else if (dst_ire->ire_stq != NULL) { 2697 /* No need to hold ill here */ 2698 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2699 } else { 2700 /* No need to hold ill here */ 2701 dst_ill = dst_ire->ire_ipif->ipif_ill; 2702 } 2703 if (ip6_asp_can_lookup(ipst)) { 2704 src_ipif = ipif_select_source_v6(dst_ill, 2705 v6dst, B_FALSE, connp->conn_src_preferences, 2706 zoneid); 2707 ip6_asp_table_refrele(ipst); 2708 if (src_ipif == NULL) { 2709 pr_addr_dbg("ip_bind_connected_v6: " 2710 "no usable source address for " 2711 "connection to %s\n", 2712 AF_INET6, v6dst); 2713 error = EADDRNOTAVAIL; 2714 goto bad_addr; 2715 } 2716 *v6src = src_ipif->ipif_v6lcl_addr; 2717 } else { 2718 error = EADDRNOTAVAIL; 2719 goto bad_addr; 2720 } 2721 } 2722 } 2723 2724 /* 2725 * We do ire_route_lookup_v6() here (and not an interface lookup) 2726 * as we assert that v6src should only come from an 2727 * UP interface for hard binding. 2728 */ 2729 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2730 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2731 2732 /* src_ire must be a local|loopback */ 2733 if (!IRE_IS_LOCAL(src_ire)) { 2734 if (ip_debug > 2) { 2735 /* ip1dbg */ 2736 pr_addr_dbg("ip_bind_connected_v6: bad " 2737 "connected src %s\n", AF_INET6, v6src); 2738 } 2739 error = EADDRNOTAVAIL; 2740 goto bad_addr; 2741 } 2742 2743 /* 2744 * If the source address is a loopback address, the 2745 * destination had best be local or multicast. 2746 * The transports that can't handle multicast will reject 2747 * those addresses. 2748 */ 2749 if (src_ire->ire_type == IRE_LOOPBACK && 2750 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2751 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2752 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2753 error = -1; 2754 goto bad_addr; 2755 } 2756 /* 2757 * Allow setting new policies. For example, disconnects come 2758 * down as ipa_t bind. As we would have set conn_policy_cached 2759 * to B_TRUE before, we should set it to B_FALSE, so that policy 2760 * can change after the disconnect. 2761 */ 2762 connp->conn_policy_cached = B_FALSE; 2763 2764 /* 2765 * The addresses have been verified. Initialize the conn 2766 * before calling the policy as they expect the conns 2767 * initialized. 2768 */ 2769 connp->conn_srcv6 = *v6src; 2770 connp->conn_remv6 = *v6dst; 2771 connp->conn_lport = lport; 2772 connp->conn_fport = fport; 2773 2774 ASSERT(!(ipsec_policy_set && ire_requested)); 2775 if (ire_requested) { 2776 iulp_t *ulp_info = NULL; 2777 2778 /* 2779 * Note that sire will not be NULL if this is an off-link 2780 * connection and there is not cache for that dest yet. 2781 * 2782 * XXX Because of an existing bug, if there are multiple 2783 * default routes, the IRE returned now may not be the actual 2784 * default route used (default routes are chosen in a 2785 * round robin fashion). So if the metrics for different 2786 * default routes are different, we may return the wrong 2787 * metrics. This will not be a problem if the existing 2788 * bug is fixed. 2789 */ 2790 if (sire != NULL) 2791 ulp_info = &(sire->ire_uinfo); 2792 2793 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2794 ipst)) { 2795 error = -1; 2796 goto bad_addr; 2797 } 2798 } else if (ipsec_policy_set) { 2799 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2800 error = -1; 2801 goto bad_addr; 2802 } 2803 } 2804 2805 /* 2806 * Cache IPsec policy in this conn. If we have per-socket policy, 2807 * we'll cache that. If we don't, we'll inherit global policy. 2808 * 2809 * We can't insert until the conn reflects the policy. Note that 2810 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2811 * connections where we don't have a policy. This is to prevent 2812 * global policy lookups in the inbound path. 2813 * 2814 * If we insert before we set conn_policy_cached, 2815 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2816 * because global policy cound be non-empty. We normally call 2817 * ipsec_check_policy() for conn_policy_cached connections only if 2818 * conn_in_enforce_policy is set. But in this case, 2819 * conn_policy_cached can get set anytime since we made the 2820 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2821 * is called, which will make the above assumption false. Thus, we 2822 * need to insert after we set conn_policy_cached. 2823 */ 2824 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2825 goto bad_addr; 2826 2827 /* If not fanout_insert this was just an address verification */ 2828 if (fanout_insert) { 2829 /* 2830 * The addresses have been verified. Time to insert in 2831 * the correct fanout list. 2832 */ 2833 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2834 connp->conn_ports, 2835 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2836 } 2837 if (error == 0) { 2838 connp->conn_fully_bound = B_TRUE; 2839 /* 2840 * Our initial checks for MDT have passed; the IRE is not 2841 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2842 * be supporting MDT. Pass the IRE, IPC and ILL into 2843 * ip_mdinfo_return(), which performs further checks 2844 * against them and upon success, returns the MDT info 2845 * mblk which we will attach to the bind acknowledgment. 2846 */ 2847 if (md_dst_ire != NULL) { 2848 mblk_t *mdinfo_mp; 2849 2850 ASSERT(md_ill != NULL); 2851 ASSERT(md_ill->ill_mdt_capab != NULL); 2852 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2853 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2854 if (mp == NULL) { 2855 *mpp = mdinfo_mp; 2856 } else { 2857 linkb(mp, mdinfo_mp); 2858 } 2859 } 2860 } 2861 } 2862 bad_addr: 2863 if (ipsec_policy_set) { 2864 ASSERT(mp != NULL); 2865 freeb(mp); 2866 /* 2867 * As of now assume that nothing else accompanies 2868 * IPSEC_POLICY_SET. 2869 */ 2870 *mpp = NULL; 2871 } 2872 refrele_and_quit: 2873 if (src_ire != NULL) 2874 IRE_REFRELE(src_ire); 2875 if (dst_ire != NULL) 2876 IRE_REFRELE(dst_ire); 2877 if (sire != NULL) 2878 IRE_REFRELE(sire); 2879 if (src_ipif != NULL) 2880 ipif_refrele(src_ipif); 2881 if (md_dst_ire != NULL) 2882 IRE_REFRELE(md_dst_ire); 2883 if (ill_held && dst_ill != NULL) 2884 ill_refrele(dst_ill); 2885 return (error); 2886 } 2887 2888 /* ARGSUSED */ 2889 int 2890 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2891 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2892 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2893 boolean_t verify_dst, cred_t *cr) 2894 { 2895 int error = 0; 2896 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2897 boolean_t ire_requested; 2898 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2899 2900 /* 2901 * Note that we allow connect to broadcast and multicast 2902 * address when ire_requested is set. Thus the ULP 2903 * has to check for IRE_BROADCAST and multicast. 2904 */ 2905 ASSERT(mpp != NULL); 2906 ire_requested = (*mpp != NULL && DB_TYPE(*mpp) == IRE_DB_REQ_TYPE); 2907 2908 ASSERT(connp->conn_af_isv6); 2909 connp->conn_ulp = protocol; 2910 2911 /* For raw socket, the local port is not set. */ 2912 lport = lport != 0 ? lport : connp->conn_lport; 2913 2914 /* 2915 * Bind to local and remote address. Local might be 2916 * unspecified in which case it will be extracted from 2917 * ire_src_addr_v6 2918 */ 2919 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2920 /* Connect to IPv4 address */ 2921 ipaddr_t v4src; 2922 ipaddr_t v4dst; 2923 2924 /* Is the source unspecified or mapped? */ 2925 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2926 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2927 ip1dbg(("ip_proto_bind_connected_v6: " 2928 "dst is mapped, but not the src\n")); 2929 goto bad_addr; 2930 } 2931 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2932 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2933 2934 /* Always verify destination reachability. */ 2935 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2936 lport, v4dst, fport, B_TRUE, B_TRUE, cr); 2937 if (error != 0) 2938 goto bad_addr; 2939 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2940 connp->conn_pkt_isv6 = B_FALSE; 2941 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2942 ip1dbg(("ip_proto_bind_connected_v6: " 2943 "src is mapped, but not the dst\n")); 2944 goto bad_addr; 2945 } else { 2946 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2947 lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2948 if (error != 0) 2949 goto bad_addr; 2950 connp->conn_pkt_isv6 = B_TRUE; 2951 } 2952 2953 ip_bind_post_handling_v6(connp, mpp ? *mpp : NULL, 2954 orig_pkt_isv6 != connp->conn_pkt_isv6, ire_requested, ipst); 2955 2956 /* Send it home. */ 2957 return (0); 2958 2959 bad_addr: 2960 if (error == 0) 2961 error = -TBADADDR; 2962 return (error); 2963 } 2964 2965 /* 2966 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2967 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2968 */ 2969 /* ARGSUSED4 */ 2970 static boolean_t 2971 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2972 iulp_t *ulp_info, ip_stack_t *ipst) 2973 { 2974 mblk_t *mp = *mpp; 2975 ire_t *ret_ire; 2976 2977 ASSERT(mp != NULL); 2978 2979 if (ire != NULL) { 2980 /* 2981 * mp initialized above to IRE_DB_REQ_TYPE 2982 * appended mblk. Its <upper protocol>'s 2983 * job to make sure there is room. 2984 */ 2985 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 2986 return (B_FALSE); 2987 2988 mp->b_datap->db_type = IRE_DB_TYPE; 2989 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 2990 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 2991 ret_ire = (ire_t *)mp->b_rptr; 2992 if (IN6_IS_ADDR_MULTICAST(dst) || 2993 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 2994 ret_ire->ire_type = IRE_BROADCAST; 2995 ret_ire->ire_addr_v6 = *dst; 2996 } 2997 if (ulp_info != NULL) { 2998 bcopy(ulp_info, &(ret_ire->ire_uinfo), 2999 sizeof (iulp_t)); 3000 } 3001 ret_ire->ire_mp = mp; 3002 } else { 3003 /* 3004 * No IRE was found. Remove IRE mblk. 3005 */ 3006 *mpp = mp->b_cont; 3007 freeb(mp); 3008 } 3009 return (B_TRUE); 3010 } 3011 3012 /* 3013 * Add an ip6i_t header to the front of the mblk. 3014 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3015 * Returns NULL if allocation fails (and frees original message). 3016 * Used in outgoing path when going through ip_newroute_*v6(). 3017 * Used in incoming path to pass ifindex to transports. 3018 */ 3019 mblk_t * 3020 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3021 { 3022 mblk_t *mp1; 3023 ip6i_t *ip6i; 3024 ip6_t *ip6h; 3025 3026 ip6h = (ip6_t *)mp->b_rptr; 3027 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3028 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3029 mp->b_datap->db_ref > 1) { 3030 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3031 if (mp1 == NULL) { 3032 freemsg(mp); 3033 return (NULL); 3034 } 3035 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3036 mp1->b_cont = mp; 3037 mp = mp1; 3038 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3039 } 3040 mp->b_rptr = (uchar_t *)ip6i; 3041 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3042 ip6i->ip6i_nxt = IPPROTO_RAW; 3043 if (ill != NULL) { 3044 ip6i->ip6i_flags = IP6I_IFINDEX; 3045 /* 3046 * If `ill' is in an IPMP group, make sure we use the IPMP 3047 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3048 * IPMP interface index and not an underlying interface index. 3049 */ 3050 if (IS_UNDER_IPMP(ill)) 3051 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3052 else 3053 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3054 } else { 3055 ip6i->ip6i_flags = 0; 3056 } 3057 ip6i->ip6i_nexthop = *dst; 3058 return (mp); 3059 } 3060 3061 /* 3062 * Handle protocols with which IP is less intimate. There 3063 * can be more than one stream bound to a particular 3064 * protocol. When this is the case, normally each one gets a copy 3065 * of any incoming packets. 3066 * However, if the packet was tunneled and not multicast we only send to it 3067 * the first match. 3068 * 3069 * Zones notes: 3070 * Packets will be distributed to streams in all zones. This is really only 3071 * useful for ICMPv6 as only applications in the global zone can create raw 3072 * sockets for other protocols. 3073 */ 3074 static void 3075 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3076 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3077 boolean_t mctl_present, zoneid_t zoneid) 3078 { 3079 queue_t *rq; 3080 mblk_t *mp1, *first_mp1; 3081 in6_addr_t dst = ip6h->ip6_dst; 3082 in6_addr_t src = ip6h->ip6_src; 3083 boolean_t one_only; 3084 mblk_t *first_mp = mp; 3085 boolean_t secure, shared_addr; 3086 conn_t *connp, *first_connp, *next_connp; 3087 connf_t *connfp; 3088 ip_stack_t *ipst = inill->ill_ipst; 3089 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3090 3091 if (mctl_present) { 3092 mp = first_mp->b_cont; 3093 secure = ipsec_in_is_secure(first_mp); 3094 ASSERT(mp != NULL); 3095 } else { 3096 secure = B_FALSE; 3097 } 3098 3099 /* 3100 * If the packet was tunneled and not multicast we only send to it 3101 * the first match. 3102 */ 3103 one_only = ((nexthdr == IPPROTO_ENCAP || nexthdr == IPPROTO_IPV6) && 3104 !IN6_IS_ADDR_MULTICAST(&dst)); 3105 3106 shared_addr = (zoneid == ALL_ZONES); 3107 if (shared_addr) { 3108 /* 3109 * We don't allow multilevel ports for raw IP, so no need to 3110 * check for that here. 3111 */ 3112 zoneid = tsol_packet_to_zoneid(mp); 3113 } 3114 3115 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3116 mutex_enter(&connfp->connf_lock); 3117 connp = connfp->connf_head; 3118 for (connp = connfp->connf_head; connp != NULL; 3119 connp = connp->conn_next) { 3120 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3121 zoneid) && 3122 (!is_system_labeled() || 3123 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3124 connp))) 3125 break; 3126 } 3127 3128 if (connp == NULL) { 3129 /* 3130 * No one bound to this port. Is 3131 * there a client that wants all 3132 * unclaimed datagrams? 3133 */ 3134 mutex_exit(&connfp->connf_lock); 3135 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3136 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3137 nexthdr_offset, mctl_present, zoneid, ipst)) { 3138 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3139 } 3140 3141 return; 3142 } 3143 3144 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3145 3146 CONN_INC_REF(connp); 3147 first_connp = connp; 3148 3149 /* 3150 * XXX: Fix the multiple protocol listeners case. We should not 3151 * be walking the conn->next list here. 3152 */ 3153 if (one_only) { 3154 /* 3155 * Only send message to one tunnel driver by immediately 3156 * terminating the loop. 3157 */ 3158 connp = NULL; 3159 } else { 3160 connp = connp->conn_next; 3161 3162 } 3163 for (;;) { 3164 while (connp != NULL) { 3165 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3166 flags, zoneid) && 3167 (!is_system_labeled() || 3168 tsol_receive_local(mp, &dst, IPV6_VERSION, 3169 shared_addr, connp))) 3170 break; 3171 connp = connp->conn_next; 3172 } 3173 3174 /* 3175 * Just copy the data part alone. The mctl part is 3176 * needed just for verifying policy and it is never 3177 * sent up. 3178 */ 3179 if (connp == NULL || 3180 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3181 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3182 /* 3183 * No more intested clients or memory 3184 * allocation failed 3185 */ 3186 connp = first_connp; 3187 break; 3188 } 3189 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3190 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3191 CONN_INC_REF(connp); 3192 mutex_exit(&connfp->connf_lock); 3193 rq = connp->conn_rq; 3194 /* 3195 * For link-local always add ifindex so that transport can set 3196 * sin6_scope_id. Avoid it for ICMP error fanout. 3197 */ 3198 if ((connp->conn_ip_recvpktinfo || 3199 IN6_IS_ADDR_LINKLOCAL(&src)) && 3200 (flags & IP_FF_IPINFO)) { 3201 /* Add header */ 3202 mp1 = ip_add_info_v6(mp1, inill, &dst); 3203 } 3204 if (mp1 == NULL) { 3205 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3206 } else if ( 3207 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3208 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3209 if (flags & IP_FF_RAWIP) { 3210 BUMP_MIB(ill->ill_ip_mib, 3211 rawipIfStatsInOverflows); 3212 } else { 3213 BUMP_MIB(ill->ill_icmp6_mib, 3214 ipv6IfIcmpInOverflows); 3215 } 3216 3217 freemsg(mp1); 3218 } else { 3219 /* 3220 * Don't enforce here if we're a tunnel - let "tun" do 3221 * it instead. 3222 */ 3223 if (!IPCL_IS_IPTUN(connp) && 3224 (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3225 secure)) { 3226 first_mp1 = ipsec_check_inbound_policy( 3227 first_mp1, connp, NULL, ip6h, mctl_present); 3228 } 3229 if (first_mp1 != NULL) { 3230 if (mctl_present) 3231 freeb(first_mp1); 3232 BUMP_MIB(ill->ill_ip_mib, 3233 ipIfStatsHCInDelivers); 3234 (connp->conn_recv)(connp, mp1, NULL); 3235 } 3236 } 3237 mutex_enter(&connfp->connf_lock); 3238 /* Follow the next pointer before releasing the conn. */ 3239 next_connp = connp->conn_next; 3240 CONN_DEC_REF(connp); 3241 connp = next_connp; 3242 } 3243 3244 /* Last one. Send it upstream. */ 3245 mutex_exit(&connfp->connf_lock); 3246 3247 /* Initiate IPPF processing */ 3248 if (IP6_IN_IPP(flags, ipst)) { 3249 uint_t ifindex; 3250 3251 mutex_enter(&ill->ill_lock); 3252 ifindex = ill->ill_phyint->phyint_ifindex; 3253 mutex_exit(&ill->ill_lock); 3254 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3255 if (mp == NULL) { 3256 CONN_DEC_REF(connp); 3257 if (mctl_present) 3258 freeb(first_mp); 3259 return; 3260 } 3261 } 3262 3263 /* 3264 * For link-local always add ifindex so that transport can set 3265 * sin6_scope_id. Avoid it for ICMP error fanout. 3266 */ 3267 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3268 (flags & IP_FF_IPINFO)) { 3269 /* Add header */ 3270 mp = ip_add_info_v6(mp, inill, &dst); 3271 if (mp == NULL) { 3272 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3273 CONN_DEC_REF(connp); 3274 if (mctl_present) 3275 freeb(first_mp); 3276 return; 3277 } else if (mctl_present) { 3278 first_mp->b_cont = mp; 3279 } else { 3280 first_mp = mp; 3281 } 3282 } 3283 3284 rq = connp->conn_rq; 3285 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3286 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3287 3288 if (flags & IP_FF_RAWIP) { 3289 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3290 } else { 3291 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3292 } 3293 3294 freemsg(first_mp); 3295 } else { 3296 if (IPCL_IS_IPTUN(connp)) { 3297 /* 3298 * Tunneled packet. We enforce policy in the tunnel 3299 * module itself. 3300 * 3301 * Send the WHOLE packet up (incl. IPSEC_IN) without 3302 * a policy check. 3303 */ 3304 putnext(rq, first_mp); 3305 CONN_DEC_REF(connp); 3306 return; 3307 } 3308 /* 3309 * Don't enforce here if we're a tunnel - let "tun" do 3310 * it instead. 3311 */ 3312 if (nexthdr != IPPROTO_ENCAP && nexthdr != IPPROTO_IPV6 && 3313 (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure)) { 3314 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3315 NULL, ip6h, mctl_present); 3316 if (first_mp == NULL) { 3317 CONN_DEC_REF(connp); 3318 return; 3319 } 3320 } 3321 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3322 (connp->conn_recv)(connp, mp, NULL); 3323 if (mctl_present) 3324 freeb(first_mp); 3325 } 3326 CONN_DEC_REF(connp); 3327 } 3328 3329 /* 3330 * Send an ICMP error after patching up the packet appropriately. Returns 3331 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3332 */ 3333 int 3334 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3335 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3336 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3337 { 3338 ip6_t *ip6h; 3339 mblk_t *first_mp; 3340 boolean_t secure; 3341 unsigned char db_type; 3342 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3343 3344 first_mp = mp; 3345 if (mctl_present) { 3346 mp = mp->b_cont; 3347 secure = ipsec_in_is_secure(first_mp); 3348 ASSERT(mp != NULL); 3349 } else { 3350 /* 3351 * If this is an ICMP error being reported - which goes 3352 * up as M_CTLs, we need to convert them to M_DATA till 3353 * we finish checking with global policy because 3354 * ipsec_check_global_policy() assumes M_DATA as clear 3355 * and M_CTL as secure. 3356 */ 3357 db_type = mp->b_datap->db_type; 3358 mp->b_datap->db_type = M_DATA; 3359 secure = B_FALSE; 3360 } 3361 /* 3362 * We are generating an icmp error for some inbound packet. 3363 * Called from all ip_fanout_(udp, tcp, proto) functions. 3364 * Before we generate an error, check with global policy 3365 * to see whether this is allowed to enter the system. As 3366 * there is no "conn", we are checking with global policy. 3367 */ 3368 ip6h = (ip6_t *)mp->b_rptr; 3369 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3370 first_mp = ipsec_check_global_policy(first_mp, NULL, 3371 NULL, ip6h, mctl_present, ipst->ips_netstack); 3372 if (first_mp == NULL) 3373 return (0); 3374 } 3375 3376 if (!mctl_present) 3377 mp->b_datap->db_type = db_type; 3378 3379 if (flags & IP_FF_SEND_ICMP) { 3380 if (flags & IP_FF_HDR_COMPLETE) { 3381 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3382 freemsg(first_mp); 3383 return (1); 3384 } 3385 } 3386 switch (icmp_type) { 3387 case ICMP6_DST_UNREACH: 3388 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3389 B_FALSE, B_FALSE, zoneid, ipst); 3390 break; 3391 case ICMP6_PARAM_PROB: 3392 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3393 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3394 break; 3395 default: 3396 #ifdef DEBUG 3397 panic("ip_fanout_send_icmp_v6: wrong type"); 3398 /*NOTREACHED*/ 3399 #else 3400 freemsg(first_mp); 3401 break; 3402 #endif 3403 } 3404 } else { 3405 freemsg(first_mp); 3406 return (0); 3407 } 3408 3409 return (1); 3410 } 3411 3412 3413 /* 3414 * Fanout for TCP packets 3415 * The caller puts <fport, lport> in the ports parameter. 3416 */ 3417 static void 3418 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3419 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3420 { 3421 mblk_t *first_mp; 3422 boolean_t secure; 3423 conn_t *connp; 3424 tcph_t *tcph; 3425 boolean_t syn_present = B_FALSE; 3426 ip_stack_t *ipst = inill->ill_ipst; 3427 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3428 3429 first_mp = mp; 3430 if (mctl_present) { 3431 mp = first_mp->b_cont; 3432 secure = ipsec_in_is_secure(first_mp); 3433 ASSERT(mp != NULL); 3434 } else { 3435 secure = B_FALSE; 3436 } 3437 3438 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3439 3440 if (connp == NULL || 3441 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3442 /* 3443 * No hard-bound match. Send Reset. 3444 */ 3445 dblk_t *dp = mp->b_datap; 3446 uint32_t ill_index; 3447 3448 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3449 3450 /* Initiate IPPf processing, if needed. */ 3451 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3452 (flags & IP6_NO_IPPOLICY)) { 3453 ill_index = ill->ill_phyint->phyint_ifindex; 3454 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3455 if (first_mp == NULL) { 3456 if (connp != NULL) 3457 CONN_DEC_REF(connp); 3458 return; 3459 } 3460 } 3461 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3462 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3463 ipst->ips_netstack->netstack_tcp, connp); 3464 if (connp != NULL) 3465 CONN_DEC_REF(connp); 3466 return; 3467 } 3468 3469 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3470 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3471 if (connp->conn_flags & IPCL_TCP) { 3472 squeue_t *sqp; 3473 3474 /* 3475 * For fused tcp loopback, assign the eager's 3476 * squeue to be that of the active connect's. 3477 */ 3478 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3479 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3480 !secure && 3481 !IP6_IN_IPP(flags, ipst)) { 3482 ASSERT(Q_TO_CONN(q) != NULL); 3483 sqp = Q_TO_CONN(q)->conn_sqp; 3484 } else { 3485 sqp = IP_SQUEUE_GET(lbolt); 3486 } 3487 3488 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3489 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3490 3491 /* 3492 * db_cksumstuff is unused in the incoming 3493 * path; Thus store the ifindex here. It will 3494 * be cleared in tcp_conn_create_v6(). 3495 */ 3496 DB_CKSUMSTUFF(mp) = 3497 (intptr_t)ill->ill_phyint->phyint_ifindex; 3498 syn_present = B_TRUE; 3499 } 3500 } 3501 3502 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3503 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3504 if ((flags & TH_RST) || (flags & TH_URG)) { 3505 CONN_DEC_REF(connp); 3506 freemsg(first_mp); 3507 return; 3508 } 3509 if (flags & TH_ACK) { 3510 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3511 ipst->ips_netstack->netstack_tcp, connp); 3512 CONN_DEC_REF(connp); 3513 return; 3514 } 3515 3516 CONN_DEC_REF(connp); 3517 freemsg(first_mp); 3518 return; 3519 } 3520 3521 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3522 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3523 NULL, ip6h, mctl_present); 3524 if (first_mp == NULL) { 3525 CONN_DEC_REF(connp); 3526 return; 3527 } 3528 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3529 ASSERT(syn_present); 3530 if (mctl_present) { 3531 ASSERT(first_mp != mp); 3532 first_mp->b_datap->db_struioflag |= 3533 STRUIO_POLICY; 3534 } else { 3535 ASSERT(first_mp == mp); 3536 mp->b_datap->db_struioflag &= 3537 ~STRUIO_EAGER; 3538 mp->b_datap->db_struioflag |= 3539 STRUIO_POLICY; 3540 } 3541 } else { 3542 /* 3543 * Discard first_mp early since we're dealing with a 3544 * fully-connected conn_t and tcp doesn't do policy in 3545 * this case. Also, if someone is bound to IPPROTO_TCP 3546 * over raw IP, they don't expect to see a M_CTL. 3547 */ 3548 if (mctl_present) { 3549 freeb(first_mp); 3550 mctl_present = B_FALSE; 3551 } 3552 first_mp = mp; 3553 } 3554 } 3555 3556 /* Initiate IPPF processing */ 3557 if (IP6_IN_IPP(flags, ipst)) { 3558 uint_t ifindex; 3559 3560 mutex_enter(&ill->ill_lock); 3561 ifindex = ill->ill_phyint->phyint_ifindex; 3562 mutex_exit(&ill->ill_lock); 3563 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3564 if (mp == NULL) { 3565 CONN_DEC_REF(connp); 3566 if (mctl_present) { 3567 freeb(first_mp); 3568 } 3569 return; 3570 } else if (mctl_present) { 3571 /* 3572 * ip_add_info_v6 might return a new mp. 3573 */ 3574 ASSERT(first_mp != mp); 3575 first_mp->b_cont = mp; 3576 } else { 3577 first_mp = mp; 3578 } 3579 } 3580 3581 /* 3582 * For link-local always add ifindex so that TCP can bind to that 3583 * interface. Avoid it for ICMP error fanout. 3584 */ 3585 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3586 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3587 (flags & IP_FF_IPINFO))) { 3588 /* Add header */ 3589 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3590 if (mp == NULL) { 3591 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3592 CONN_DEC_REF(connp); 3593 if (mctl_present) 3594 freeb(first_mp); 3595 return; 3596 } else if (mctl_present) { 3597 ASSERT(first_mp != mp); 3598 first_mp->b_cont = mp; 3599 } else { 3600 first_mp = mp; 3601 } 3602 } 3603 3604 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3605 if (IPCL_IS_TCP(connp)) { 3606 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3607 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3608 } else { 3609 /* SOCK_RAW, IPPROTO_TCP case */ 3610 (connp->conn_recv)(connp, first_mp, NULL); 3611 CONN_DEC_REF(connp); 3612 } 3613 } 3614 3615 /* 3616 * Fanout for UDP packets. 3617 * The caller puts <fport, lport> in the ports parameter. 3618 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3619 * 3620 * If SO_REUSEADDR is set all multicast and broadcast packets 3621 * will be delivered to all streams bound to the same port. 3622 * 3623 * Zones notes: 3624 * Multicast packets will be distributed to streams in all zones. 3625 */ 3626 static void 3627 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3628 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3629 zoneid_t zoneid) 3630 { 3631 uint32_t dstport, srcport; 3632 in6_addr_t dst; 3633 mblk_t *first_mp; 3634 boolean_t secure; 3635 conn_t *connp; 3636 connf_t *connfp; 3637 conn_t *first_conn; 3638 conn_t *next_conn; 3639 mblk_t *mp1, *first_mp1; 3640 in6_addr_t src; 3641 boolean_t shared_addr; 3642 ip_stack_t *ipst = inill->ill_ipst; 3643 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3644 3645 first_mp = mp; 3646 if (mctl_present) { 3647 mp = first_mp->b_cont; 3648 secure = ipsec_in_is_secure(first_mp); 3649 ASSERT(mp != NULL); 3650 } else { 3651 secure = B_FALSE; 3652 } 3653 3654 /* Extract ports in net byte order */ 3655 dstport = htons(ntohl(ports) & 0xFFFF); 3656 srcport = htons(ntohl(ports) >> 16); 3657 dst = ip6h->ip6_dst; 3658 src = ip6h->ip6_src; 3659 3660 shared_addr = (zoneid == ALL_ZONES); 3661 if (shared_addr) { 3662 /* 3663 * No need to handle exclusive-stack zones since ALL_ZONES 3664 * only applies to the shared stack. 3665 */ 3666 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3667 /* 3668 * If no shared MLP is found, tsol_mlp_findzone returns 3669 * ALL_ZONES. In that case, we assume it's SLP, and 3670 * search for the zone based on the packet label. 3671 * That will also return ALL_ZONES on failure, but 3672 * we never allow conn_zoneid to be set to ALL_ZONES. 3673 */ 3674 if (zoneid == ALL_ZONES) 3675 zoneid = tsol_packet_to_zoneid(mp); 3676 } 3677 3678 /* Attempt to find a client stream based on destination port. */ 3679 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3680 mutex_enter(&connfp->connf_lock); 3681 connp = connfp->connf_head; 3682 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3683 /* 3684 * Not multicast. Send to the one (first) client we find. 3685 */ 3686 while (connp != NULL) { 3687 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3688 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3689 conn_wantpacket_v6(connp, ill, ip6h, 3690 flags, zoneid)) { 3691 break; 3692 } 3693 connp = connp->conn_next; 3694 } 3695 if (connp == NULL || connp->conn_upq == NULL) 3696 goto notfound; 3697 3698 if (is_system_labeled() && 3699 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3700 connp)) 3701 goto notfound; 3702 3703 /* Found a client */ 3704 CONN_INC_REF(connp); 3705 mutex_exit(&connfp->connf_lock); 3706 3707 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3708 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3709 freemsg(first_mp); 3710 CONN_DEC_REF(connp); 3711 return; 3712 } 3713 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3714 first_mp = ipsec_check_inbound_policy(first_mp, 3715 connp, NULL, ip6h, mctl_present); 3716 if (first_mp == NULL) { 3717 CONN_DEC_REF(connp); 3718 return; 3719 } 3720 } 3721 /* Initiate IPPF processing */ 3722 if (IP6_IN_IPP(flags, ipst)) { 3723 uint_t ifindex; 3724 3725 mutex_enter(&ill->ill_lock); 3726 ifindex = ill->ill_phyint->phyint_ifindex; 3727 mutex_exit(&ill->ill_lock); 3728 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3729 if (mp == NULL) { 3730 CONN_DEC_REF(connp); 3731 if (mctl_present) 3732 freeb(first_mp); 3733 return; 3734 } 3735 } 3736 /* 3737 * For link-local always add ifindex so that 3738 * transport can set sin6_scope_id. Avoid it for 3739 * ICMP error fanout. 3740 */ 3741 if ((connp->conn_ip_recvpktinfo || 3742 IN6_IS_ADDR_LINKLOCAL(&src)) && 3743 (flags & IP_FF_IPINFO)) { 3744 /* Add header */ 3745 mp = ip_add_info_v6(mp, inill, &dst); 3746 if (mp == NULL) { 3747 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3748 CONN_DEC_REF(connp); 3749 if (mctl_present) 3750 freeb(first_mp); 3751 return; 3752 } else if (mctl_present) { 3753 first_mp->b_cont = mp; 3754 } else { 3755 first_mp = mp; 3756 } 3757 } 3758 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3759 3760 /* Send it upstream */ 3761 (connp->conn_recv)(connp, mp, NULL); 3762 3763 IP6_STAT(ipst, ip6_udp_fannorm); 3764 CONN_DEC_REF(connp); 3765 if (mctl_present) 3766 freeb(first_mp); 3767 return; 3768 } 3769 3770 while (connp != NULL) { 3771 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3772 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3773 (!is_system_labeled() || 3774 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3775 connp))) 3776 break; 3777 connp = connp->conn_next; 3778 } 3779 3780 if (connp == NULL || connp->conn_upq == NULL) 3781 goto notfound; 3782 3783 first_conn = connp; 3784 3785 CONN_INC_REF(connp); 3786 connp = connp->conn_next; 3787 for (;;) { 3788 while (connp != NULL) { 3789 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3790 src) && conn_wantpacket_v6(connp, ill, ip6h, 3791 flags, zoneid) && 3792 (!is_system_labeled() || 3793 tsol_receive_local(mp, &dst, IPV6_VERSION, 3794 shared_addr, connp))) 3795 break; 3796 connp = connp->conn_next; 3797 } 3798 /* 3799 * Just copy the data part alone. The mctl part is 3800 * needed just for verifying policy and it is never 3801 * sent up. 3802 */ 3803 if (connp == NULL || 3804 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3805 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3806 /* 3807 * No more interested clients or memory 3808 * allocation failed 3809 */ 3810 connp = first_conn; 3811 break; 3812 } 3813 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3814 CONN_INC_REF(connp); 3815 mutex_exit(&connfp->connf_lock); 3816 /* 3817 * For link-local always add ifindex so that transport 3818 * can set sin6_scope_id. Avoid it for ICMP error 3819 * fanout. 3820 */ 3821 if ((connp->conn_ip_recvpktinfo || 3822 IN6_IS_ADDR_LINKLOCAL(&src)) && 3823 (flags & IP_FF_IPINFO)) { 3824 /* Add header */ 3825 mp1 = ip_add_info_v6(mp1, inill, &dst); 3826 } 3827 /* mp1 could have changed */ 3828 if (mctl_present) 3829 first_mp1->b_cont = mp1; 3830 else 3831 first_mp1 = mp1; 3832 if (mp1 == NULL) { 3833 if (mctl_present) 3834 freeb(first_mp1); 3835 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3836 goto next_one; 3837 } 3838 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3839 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3840 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3841 freemsg(first_mp1); 3842 goto next_one; 3843 } 3844 3845 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3846 first_mp1 = ipsec_check_inbound_policy 3847 (first_mp1, connp, NULL, ip6h, 3848 mctl_present); 3849 } 3850 if (first_mp1 != NULL) { 3851 if (mctl_present) 3852 freeb(first_mp1); 3853 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3854 3855 /* Send it upstream */ 3856 (connp->conn_recv)(connp, mp1, NULL); 3857 } 3858 next_one: 3859 mutex_enter(&connfp->connf_lock); 3860 /* Follow the next pointer before releasing the conn. */ 3861 next_conn = connp->conn_next; 3862 IP6_STAT(ipst, ip6_udp_fanmb); 3863 CONN_DEC_REF(connp); 3864 connp = next_conn; 3865 } 3866 3867 /* Last one. Send it upstream. */ 3868 mutex_exit(&connfp->connf_lock); 3869 3870 /* Initiate IPPF processing */ 3871 if (IP6_IN_IPP(flags, ipst)) { 3872 uint_t ifindex; 3873 3874 mutex_enter(&ill->ill_lock); 3875 ifindex = ill->ill_phyint->phyint_ifindex; 3876 mutex_exit(&ill->ill_lock); 3877 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3878 if (mp == NULL) { 3879 CONN_DEC_REF(connp); 3880 if (mctl_present) { 3881 freeb(first_mp); 3882 } 3883 return; 3884 } 3885 } 3886 3887 /* 3888 * For link-local always add ifindex so that transport can set 3889 * sin6_scope_id. Avoid it for ICMP error fanout. 3890 */ 3891 if ((connp->conn_ip_recvpktinfo || 3892 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3893 /* Add header */ 3894 mp = ip_add_info_v6(mp, inill, &dst); 3895 if (mp == NULL) { 3896 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3897 CONN_DEC_REF(connp); 3898 if (mctl_present) 3899 freeb(first_mp); 3900 return; 3901 } else if (mctl_present) { 3902 first_mp->b_cont = mp; 3903 } else { 3904 first_mp = mp; 3905 } 3906 } 3907 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3908 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3909 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3910 freemsg(mp); 3911 } else { 3912 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3913 first_mp = ipsec_check_inbound_policy(first_mp, 3914 connp, NULL, ip6h, mctl_present); 3915 if (first_mp == NULL) { 3916 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3917 CONN_DEC_REF(connp); 3918 return; 3919 } 3920 } 3921 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3922 3923 /* Send it upstream */ 3924 (connp->conn_recv)(connp, mp, NULL); 3925 } 3926 IP6_STAT(ipst, ip6_udp_fanmb); 3927 CONN_DEC_REF(connp); 3928 if (mctl_present) 3929 freeb(first_mp); 3930 return; 3931 3932 notfound: 3933 mutex_exit(&connfp->connf_lock); 3934 /* 3935 * No one bound to this port. Is 3936 * there a client that wants all 3937 * unclaimed datagrams? 3938 */ 3939 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3940 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3941 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3942 zoneid); 3943 } else { 3944 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3945 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3946 mctl_present, zoneid, ipst)) { 3947 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3948 } 3949 } 3950 } 3951 3952 /* 3953 * int ip_find_hdr_v6() 3954 * 3955 * This routine is used by the upper layer protocols and the IP tunnel 3956 * module to: 3957 * - Set extension header pointers to appropriate locations 3958 * - Determine IPv6 header length and return it 3959 * - Return a pointer to the last nexthdr value 3960 * 3961 * The caller must initialize ipp_fields. 3962 * 3963 * NOTE: If multiple extension headers of the same type are present, 3964 * ip_find_hdr_v6() will set the respective extension header pointers 3965 * to the first one that it encounters in the IPv6 header. It also 3966 * skips fragment headers. This routine deals with malformed packets 3967 * of various sorts in which case the returned length is up to the 3968 * malformed part. 3969 */ 3970 int 3971 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3972 { 3973 uint_t length, ehdrlen; 3974 uint8_t nexthdr; 3975 uint8_t *whereptr, *endptr; 3976 ip6_dest_t *tmpdstopts; 3977 ip6_rthdr_t *tmprthdr; 3978 ip6_hbh_t *tmphopopts; 3979 ip6_frag_t *tmpfraghdr; 3980 3981 length = IPV6_HDR_LEN; 3982 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3983 endptr = mp->b_wptr; 3984 3985 nexthdr = ip6h->ip6_nxt; 3986 while (whereptr < endptr) { 3987 /* Is there enough left for len + nexthdr? */ 3988 if (whereptr + MIN_EHDR_LEN > endptr) 3989 goto done; 3990 3991 switch (nexthdr) { 3992 case IPPROTO_HOPOPTS: 3993 tmphopopts = (ip6_hbh_t *)whereptr; 3994 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3995 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3996 goto done; 3997 nexthdr = tmphopopts->ip6h_nxt; 3998 /* return only 1st hbh */ 3999 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 4000 ipp->ipp_fields |= IPPF_HOPOPTS; 4001 ipp->ipp_hopopts = tmphopopts; 4002 ipp->ipp_hopoptslen = ehdrlen; 4003 } 4004 break; 4005 case IPPROTO_DSTOPTS: 4006 tmpdstopts = (ip6_dest_t *)whereptr; 4007 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 4008 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 4009 goto done; 4010 nexthdr = tmpdstopts->ip6d_nxt; 4011 /* 4012 * ipp_dstopts is set to the destination header after a 4013 * routing header. 4014 * Assume it is a post-rthdr destination header 4015 * and adjust when we find an rthdr. 4016 */ 4017 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 4018 ipp->ipp_fields |= IPPF_DSTOPTS; 4019 ipp->ipp_dstopts = tmpdstopts; 4020 ipp->ipp_dstoptslen = ehdrlen; 4021 } 4022 break; 4023 case IPPROTO_ROUTING: 4024 tmprthdr = (ip6_rthdr_t *)whereptr; 4025 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 4026 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 4027 goto done; 4028 nexthdr = tmprthdr->ip6r_nxt; 4029 /* return only 1st rthdr */ 4030 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 4031 ipp->ipp_fields |= IPPF_RTHDR; 4032 ipp->ipp_rthdr = tmprthdr; 4033 ipp->ipp_rthdrlen = ehdrlen; 4034 } 4035 /* 4036 * Make any destination header we've seen be a 4037 * pre-rthdr destination header. 4038 */ 4039 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4040 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4041 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4042 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4043 ipp->ipp_dstopts = NULL; 4044 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4045 ipp->ipp_dstoptslen = 0; 4046 } 4047 break; 4048 case IPPROTO_FRAGMENT: 4049 tmpfraghdr = (ip6_frag_t *)whereptr; 4050 ehdrlen = sizeof (ip6_frag_t); 4051 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4052 goto done; 4053 nexthdr = tmpfraghdr->ip6f_nxt; 4054 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4055 ipp->ipp_fields |= IPPF_FRAGHDR; 4056 ipp->ipp_fraghdr = tmpfraghdr; 4057 ipp->ipp_fraghdrlen = ehdrlen; 4058 } 4059 break; 4060 case IPPROTO_NONE: 4061 default: 4062 goto done; 4063 } 4064 length += ehdrlen; 4065 whereptr += ehdrlen; 4066 } 4067 done: 4068 if (nexthdrp != NULL) 4069 *nexthdrp = nexthdr; 4070 return (length); 4071 } 4072 4073 int 4074 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4075 { 4076 ire_t *ire; 4077 4078 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4079 ire = ire_lookup_local_v6(zoneid, ipst); 4080 if (ire == NULL) { 4081 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4082 return (1); 4083 } 4084 ip6h->ip6_src = ire->ire_addr_v6; 4085 ire_refrele(ire); 4086 } 4087 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4088 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4089 return (0); 4090 } 4091 4092 /* 4093 * Try to determine where and what are the IPv6 header length and 4094 * pointer to nexthdr value for the upper layer protocol (or an 4095 * unknown next hdr). 4096 * 4097 * Parameters returns a pointer to the nexthdr value; 4098 * Must handle malformed packets of various sorts. 4099 * Function returns failure for malformed cases. 4100 */ 4101 boolean_t 4102 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4103 uint8_t **nexthdrpp) 4104 { 4105 uint16_t length; 4106 uint_t ehdrlen; 4107 uint8_t *nexthdrp; 4108 uint8_t *whereptr; 4109 uint8_t *endptr; 4110 ip6_dest_t *desthdr; 4111 ip6_rthdr_t *rthdr; 4112 ip6_frag_t *fraghdr; 4113 4114 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4115 length = IPV6_HDR_LEN; 4116 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4117 endptr = mp->b_wptr; 4118 4119 nexthdrp = &ip6h->ip6_nxt; 4120 while (whereptr < endptr) { 4121 /* Is there enough left for len + nexthdr? */ 4122 if (whereptr + MIN_EHDR_LEN > endptr) 4123 break; 4124 4125 switch (*nexthdrp) { 4126 case IPPROTO_HOPOPTS: 4127 case IPPROTO_DSTOPTS: 4128 /* Assumes the headers are identical for hbh and dst */ 4129 desthdr = (ip6_dest_t *)whereptr; 4130 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4131 if ((uchar_t *)desthdr + ehdrlen > endptr) 4132 return (B_FALSE); 4133 nexthdrp = &desthdr->ip6d_nxt; 4134 break; 4135 case IPPROTO_ROUTING: 4136 rthdr = (ip6_rthdr_t *)whereptr; 4137 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4138 if ((uchar_t *)rthdr + ehdrlen > endptr) 4139 return (B_FALSE); 4140 nexthdrp = &rthdr->ip6r_nxt; 4141 break; 4142 case IPPROTO_FRAGMENT: 4143 fraghdr = (ip6_frag_t *)whereptr; 4144 ehdrlen = sizeof (ip6_frag_t); 4145 if ((uchar_t *)&fraghdr[1] > endptr) 4146 return (B_FALSE); 4147 nexthdrp = &fraghdr->ip6f_nxt; 4148 break; 4149 case IPPROTO_NONE: 4150 /* No next header means we're finished */ 4151 default: 4152 *hdr_length_ptr = length; 4153 *nexthdrpp = nexthdrp; 4154 return (B_TRUE); 4155 } 4156 length += ehdrlen; 4157 whereptr += ehdrlen; 4158 *hdr_length_ptr = length; 4159 *nexthdrpp = nexthdrp; 4160 } 4161 switch (*nexthdrp) { 4162 case IPPROTO_HOPOPTS: 4163 case IPPROTO_DSTOPTS: 4164 case IPPROTO_ROUTING: 4165 case IPPROTO_FRAGMENT: 4166 /* 4167 * If any know extension headers are still to be processed, 4168 * the packet's malformed (or at least all the IP header(s) are 4169 * not in the same mblk - and that should never happen. 4170 */ 4171 return (B_FALSE); 4172 4173 default: 4174 /* 4175 * If we get here, we know that all of the IP headers were in 4176 * the same mblk, even if the ULP header is in the next mblk. 4177 */ 4178 *hdr_length_ptr = length; 4179 *nexthdrpp = nexthdrp; 4180 return (B_TRUE); 4181 } 4182 } 4183 4184 /* 4185 * Return the length of the IPv6 related headers (including extension headers) 4186 * Returns a length even if the packet is malformed. 4187 */ 4188 int 4189 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4190 { 4191 uint16_t hdr_len; 4192 uint8_t *nexthdrp; 4193 4194 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4195 return (hdr_len); 4196 } 4197 4198 /* 4199 * IPv6 - 4200 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4201 * to send out a packet to a destination address for which we do not have 4202 * specific routing information. 4203 * 4204 * Handle non-multicast packets. If ill is non-NULL the match is done 4205 * for that ill. 4206 * 4207 * When a specific ill is specified (using IPV6_PKTINFO, 4208 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4209 * on routing entries (ftable and ctable) that have a matching 4210 * ire->ire_ipif->ipif_ill. Thus this can only be used 4211 * for destinations that are on-link for the specific ill 4212 * and that can appear on multiple links. Thus it is useful 4213 * for multicast destinations, link-local destinations, and 4214 * at some point perhaps for site-local destinations (if the 4215 * node sits at a site boundary). 4216 * We create the cache entries in the regular ctable since 4217 * it can not "confuse" things for other destinations. 4218 * 4219 * NOTE : These are the scopes of some of the variables that point at IRE, 4220 * which needs to be followed while making any future modifications 4221 * to avoid memory leaks. 4222 * 4223 * - ire and sire are the entries looked up initially by 4224 * ire_ftable_lookup_v6. 4225 * - ipif_ire is used to hold the interface ire associated with 4226 * the new cache ire. But it's scope is limited, so we always REFRELE 4227 * it before branching out to error paths. 4228 * - save_ire is initialized before ire_create, so that ire returned 4229 * by ire_create will not over-write the ire. We REFRELE save_ire 4230 * before breaking out of the switch. 4231 * 4232 * Thus on failures, we have to REFRELE only ire and sire, if they 4233 * are not NULL. 4234 */ 4235 /* ARGSUSED */ 4236 void 4237 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4238 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4239 { 4240 in6_addr_t v6gw; 4241 in6_addr_t dst; 4242 ire_t *ire = NULL; 4243 ipif_t *src_ipif = NULL; 4244 ill_t *dst_ill = NULL; 4245 ire_t *sire = NULL; 4246 ire_t *save_ire; 4247 ip6_t *ip6h; 4248 int err = 0; 4249 mblk_t *first_mp; 4250 ipsec_out_t *io; 4251 ushort_t ire_marks = 0; 4252 int match_flags; 4253 ire_t *first_sire = NULL; 4254 mblk_t *copy_mp = NULL; 4255 mblk_t *xmit_mp = NULL; 4256 in6_addr_t save_dst; 4257 uint32_t multirt_flags = 4258 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4259 boolean_t multirt_is_resolvable; 4260 boolean_t multirt_resolve_next; 4261 boolean_t need_rele = B_FALSE; 4262 boolean_t ip6_asp_table_held = B_FALSE; 4263 tsol_ire_gw_secattr_t *attrp = NULL; 4264 tsol_gcgrp_t *gcgrp = NULL; 4265 tsol_gcgrp_addr_t ga; 4266 4267 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4268 4269 first_mp = mp; 4270 if (mp->b_datap->db_type == M_CTL) { 4271 mp = mp->b_cont; 4272 io = (ipsec_out_t *)first_mp->b_rptr; 4273 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4274 } else { 4275 io = NULL; 4276 } 4277 4278 ip6h = (ip6_t *)mp->b_rptr; 4279 4280 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4281 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4282 goto icmp_err_ret; 4283 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4284 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4285 goto icmp_err_ret; 4286 } 4287 4288 /* 4289 * If this IRE is created for forwarding or it is not for 4290 * TCP traffic, mark it as temporary. 4291 * 4292 * Is it sufficient just to check the next header?? 4293 */ 4294 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4295 ire_marks |= IRE_MARK_TEMPORARY; 4296 4297 /* 4298 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4299 * chain until it gets the most specific information available. 4300 * For example, we know that there is no IRE_CACHE for this dest, 4301 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4302 * ire_ftable_lookup_v6 will look up the gateway, etc. 4303 */ 4304 4305 if (ill == NULL) { 4306 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4307 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4308 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4309 NULL, &sire, zoneid, 0, msg_getlabel(mp), 4310 match_flags, ipst); 4311 } else { 4312 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4313 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4314 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4315 4316 /* 4317 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4318 * tied to an underlying interface, IS_UNDER_IPMP() may be 4319 * true even when building IREs that will be used for data 4320 * traffic. As such, use the packet's source address to 4321 * determine whether the traffic is test traffic, and set 4322 * MATCH_IRE_MARK_TESTHIDDEN if so. 4323 */ 4324 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4325 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4326 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4327 } 4328 4329 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4330 &sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst); 4331 } 4332 4333 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4334 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4335 4336 /* 4337 * We enter a loop that will be run only once in most cases. 4338 * The loop is re-entered in the case where the destination 4339 * can be reached through multiple RTF_MULTIRT-flagged routes. 4340 * The intention is to compute multiple routes to a single 4341 * destination in a single ip_newroute_v6 call. 4342 * The information is contained in sire->ire_flags. 4343 */ 4344 do { 4345 multirt_resolve_next = B_FALSE; 4346 4347 if (dst_ill != NULL) { 4348 ill_refrele(dst_ill); 4349 dst_ill = NULL; 4350 } 4351 if (src_ipif != NULL) { 4352 ipif_refrele(src_ipif); 4353 src_ipif = NULL; 4354 } 4355 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4356 ip3dbg(("ip_newroute_v6: starting new resolution " 4357 "with first_mp %p, tag %d\n", 4358 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4359 4360 /* 4361 * We check if there are trailing unresolved routes for 4362 * the destination contained in sire. 4363 */ 4364 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4365 &sire, multirt_flags, msg_getlabel(mp), ipst); 4366 4367 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4368 "ire %p, sire %p\n", 4369 multirt_is_resolvable, (void *)ire, (void *)sire)); 4370 4371 if (!multirt_is_resolvable) { 4372 /* 4373 * No more multirt routes to resolve; give up 4374 * (all routes resolved or no more resolvable 4375 * routes). 4376 */ 4377 if (ire != NULL) { 4378 ire_refrele(ire); 4379 ire = NULL; 4380 } 4381 } else { 4382 ASSERT(sire != NULL); 4383 ASSERT(ire != NULL); 4384 /* 4385 * We simply use first_sire as a flag that 4386 * indicates if a resolvable multirt route has 4387 * already been found during the preceding 4388 * loops. If it is not the case, we may have 4389 * to send an ICMP error to report that the 4390 * destination is unreachable. We do not 4391 * IRE_REFHOLD first_sire. 4392 */ 4393 if (first_sire == NULL) { 4394 first_sire = sire; 4395 } 4396 } 4397 } 4398 if ((ire == NULL) || (ire == sire)) { 4399 /* 4400 * either ire == NULL (the destination cannot be 4401 * resolved) or ire == sire (the gateway cannot be 4402 * resolved). At this point, there are no more routes 4403 * to resolve for the destination, thus we exit. 4404 */ 4405 if (ip_debug > 3) { 4406 /* ip2dbg */ 4407 pr_addr_dbg("ip_newroute_v6: " 4408 "can't resolve %s\n", AF_INET6, v6dstp); 4409 } 4410 ip3dbg(("ip_newroute_v6: " 4411 "ire %p, sire %p, first_sire %p\n", 4412 (void *)ire, (void *)sire, (void *)first_sire)); 4413 4414 if (sire != NULL) { 4415 ire_refrele(sire); 4416 sire = NULL; 4417 } 4418 4419 if (first_sire != NULL) { 4420 /* 4421 * At least one multirt route has been found 4422 * in the same ip_newroute() call; there is no 4423 * need to report an ICMP error. 4424 * first_sire was not IRE_REFHOLDed. 4425 */ 4426 MULTIRT_DEBUG_UNTAG(first_mp); 4427 freemsg(first_mp); 4428 return; 4429 } 4430 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4431 RTA_DST, ipst); 4432 goto icmp_err_ret; 4433 } 4434 4435 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4436 4437 /* 4438 * Verify that the returned IRE does not have either the 4439 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4440 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4441 */ 4442 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4443 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4444 goto icmp_err_ret; 4445 4446 /* 4447 * Increment the ire_ob_pkt_count field for ire if it is an 4448 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4449 * increment the same for the parent IRE, sire, if it is some 4450 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4451 */ 4452 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4453 UPDATE_OB_PKT_COUNT(ire); 4454 ire->ire_last_used_time = lbolt; 4455 } 4456 4457 if (sire != NULL) { 4458 mutex_enter(&sire->ire_lock); 4459 v6gw = sire->ire_gateway_addr_v6; 4460 mutex_exit(&sire->ire_lock); 4461 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4462 IRE_INTERFACE)) == 0); 4463 UPDATE_OB_PKT_COUNT(sire); 4464 sire->ire_last_used_time = lbolt; 4465 } else { 4466 v6gw = ipv6_all_zeros; 4467 } 4468 4469 /* 4470 * We have a route to reach the destination. Find the 4471 * appropriate ill, then get a source address that matches the 4472 * right scope via ipif_select_source_v6(). 4473 * 4474 * If we are here trying to create an IRE_CACHE for an offlink 4475 * destination and have an IRE_CACHE entry for VNI, then use 4476 * ire_stq instead since VNI's queue is a black hole. 4477 * 4478 * Note: While we pick a dst_ill we are really only interested 4479 * in the ill for load spreading. The source ipif is 4480 * determined by source address selection below. 4481 */ 4482 if ((ire->ire_type == IRE_CACHE) && 4483 IS_VNI(ire->ire_ipif->ipif_ill)) { 4484 dst_ill = ire->ire_stq->q_ptr; 4485 ill_refhold(dst_ill); 4486 } else { 4487 ill_t *ill = ire->ire_ipif->ipif_ill; 4488 4489 if (IS_IPMP(ill)) { 4490 dst_ill = 4491 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4492 } else { 4493 dst_ill = ill; 4494 ill_refhold(dst_ill); 4495 } 4496 } 4497 4498 if (dst_ill == NULL) { 4499 if (ip_debug > 2) { 4500 pr_addr_dbg("ip_newroute_v6 : no dst " 4501 "ill for dst %s\n", AF_INET6, v6dstp); 4502 } 4503 goto icmp_err_ret; 4504 } 4505 4506 if (ill != NULL && dst_ill != ill && 4507 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4508 /* 4509 * We should have found a route matching "ill" 4510 * as we called ire_ftable_lookup_v6 with 4511 * MATCH_IRE_ILL. Rather than asserting when 4512 * there is a mismatch, we just drop the packet. 4513 */ 4514 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4515 "dst_ill %s ill %s\n", dst_ill->ill_name, 4516 ill->ill_name)); 4517 goto icmp_err_ret; 4518 } 4519 4520 /* 4521 * Pick a source address which matches the scope of the 4522 * destination address. 4523 * For RTF_SETSRC routes, the source address is imposed by the 4524 * parent ire (sire). 4525 */ 4526 ASSERT(src_ipif == NULL); 4527 4528 /* 4529 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4530 * tied to the underlying interface, IS_UNDER_IPMP() may be 4531 * true even when building IREs that will be used for data 4532 * traffic. As such, see if the packet's source address is a 4533 * test address, and if so use that test address's ipif for 4534 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4535 * ire_add_v6() can work properly. 4536 */ 4537 if (ill != NULL && IS_UNDER_IPMP(ill)) 4538 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4539 4540 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4541 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4542 ip6_asp_can_lookup(ipst)) { 4543 /* 4544 * The ire cache entry we're adding is for the 4545 * gateway itself. The source address in this case 4546 * is relative to the gateway's address. 4547 */ 4548 ip6_asp_table_held = B_TRUE; 4549 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4550 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4551 if (src_ipif != NULL) 4552 ire_marks |= IRE_MARK_USESRC_CHECK; 4553 } else if (src_ipif == NULL) { 4554 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4555 /* 4556 * Check that the ipif matching the requested 4557 * source address still exists. 4558 */ 4559 src_ipif = ipif_lookup_addr_v6( 4560 &sire->ire_src_addr_v6, NULL, zoneid, 4561 NULL, NULL, NULL, NULL, ipst); 4562 } 4563 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4564 ip6_asp_table_held = B_TRUE; 4565 src_ipif = ipif_select_source_v6(dst_ill, 4566 v6dstp, B_FALSE, 4567 IPV6_PREFER_SRC_DEFAULT, zoneid); 4568 if (src_ipif != NULL) 4569 ire_marks |= IRE_MARK_USESRC_CHECK; 4570 } 4571 } 4572 4573 if (src_ipif == NULL) { 4574 if (ip_debug > 2) { 4575 /* ip1dbg */ 4576 pr_addr_dbg("ip_newroute_v6: no src for " 4577 "dst %s\n", AF_INET6, v6dstp); 4578 printf("ip_newroute_v6: interface name %s\n", 4579 dst_ill->ill_name); 4580 } 4581 goto icmp_err_ret; 4582 } 4583 4584 if (ip_debug > 3) { 4585 /* ip2dbg */ 4586 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4587 AF_INET6, &v6gw); 4588 } 4589 ip2dbg(("\tire type %s (%d)\n", 4590 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4591 4592 /* 4593 * At this point in ip_newroute_v6(), ire is either the 4594 * IRE_CACHE of the next-hop gateway for an off-subnet 4595 * destination or an IRE_INTERFACE type that should be used 4596 * to resolve an on-subnet destination or an on-subnet 4597 * next-hop gateway. 4598 * 4599 * In the IRE_CACHE case, we have the following : 4600 * 4601 * 1) src_ipif - used for getting a source address. 4602 * 4603 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4604 * means packets using this IRE_CACHE will go out on dst_ill. 4605 * 4606 * 3) The IRE sire will point to the prefix that is the longest 4607 * matching route for the destination. These prefix types 4608 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4609 * 4610 * The newly created IRE_CACHE entry for the off-subnet 4611 * destination is tied to both the prefix route and the 4612 * interface route used to resolve the next-hop gateway 4613 * via the ire_phandle and ire_ihandle fields, respectively. 4614 * 4615 * In the IRE_INTERFACE case, we have the following : 4616 * 4617 * 1) src_ipif - used for getting a source address. 4618 * 4619 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4620 * means packets using the IRE_CACHE that we will build 4621 * here will go out on dst_ill. 4622 * 4623 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4624 * to be created will only be tied to the IRE_INTERFACE that 4625 * was derived from the ire_ihandle field. 4626 * 4627 * If sire is non-NULL, it means the destination is off-link 4628 * and we will first create the IRE_CACHE for the gateway. 4629 * Next time through ip_newroute_v6, we will create the 4630 * IRE_CACHE for the final destination as described above. 4631 */ 4632 save_ire = ire; 4633 switch (ire->ire_type) { 4634 case IRE_CACHE: { 4635 ire_t *ipif_ire; 4636 4637 ASSERT(sire != NULL); 4638 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4639 mutex_enter(&ire->ire_lock); 4640 v6gw = ire->ire_gateway_addr_v6; 4641 mutex_exit(&ire->ire_lock); 4642 } 4643 /* 4644 * We need 3 ire's to create a new cache ire for an 4645 * off-link destination from the cache ire of the 4646 * gateway. 4647 * 4648 * 1. The prefix ire 'sire' 4649 * 2. The cache ire of the gateway 'ire' 4650 * 3. The interface ire 'ipif_ire' 4651 * 4652 * We have (1) and (2). We lookup (3) below. 4653 * 4654 * If there is no interface route to the gateway, 4655 * it is a race condition, where we found the cache 4656 * but the inteface route has been deleted. 4657 */ 4658 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4659 if (ipif_ire == NULL) { 4660 ip1dbg(("ip_newroute_v6:" 4661 "ire_ihandle_lookup_offlink_v6 failed\n")); 4662 goto icmp_err_ret; 4663 } 4664 4665 /* 4666 * Note: the new ire inherits RTF_SETSRC 4667 * and RTF_MULTIRT to propagate these flags from prefix 4668 * to cache. 4669 */ 4670 4671 /* 4672 * Check cached gateway IRE for any security 4673 * attributes; if found, associate the gateway 4674 * credentials group to the destination IRE. 4675 */ 4676 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4677 mutex_enter(&attrp->igsa_lock); 4678 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4679 GCGRP_REFHOLD(gcgrp); 4680 mutex_exit(&attrp->igsa_lock); 4681 } 4682 4683 ire = ire_create_v6( 4684 v6dstp, /* dest address */ 4685 &ipv6_all_ones, /* mask */ 4686 &src_ipif->ipif_v6src_addr, /* source address */ 4687 &v6gw, /* gateway address */ 4688 &save_ire->ire_max_frag, 4689 NULL, /* src nce */ 4690 dst_ill->ill_rq, /* recv-from queue */ 4691 dst_ill->ill_wq, /* send-to queue */ 4692 IRE_CACHE, 4693 src_ipif, 4694 &sire->ire_mask_v6, /* Parent mask */ 4695 sire->ire_phandle, /* Parent handle */ 4696 ipif_ire->ire_ihandle, /* Interface handle */ 4697 sire->ire_flags & /* flags if any */ 4698 (RTF_SETSRC | RTF_MULTIRT), 4699 &(sire->ire_uinfo), 4700 NULL, 4701 gcgrp, 4702 ipst); 4703 4704 if (ire == NULL) { 4705 if (gcgrp != NULL) { 4706 GCGRP_REFRELE(gcgrp); 4707 gcgrp = NULL; 4708 } 4709 ire_refrele(save_ire); 4710 ire_refrele(ipif_ire); 4711 break; 4712 } 4713 4714 /* reference now held by IRE */ 4715 gcgrp = NULL; 4716 4717 ire->ire_marks |= ire_marks; 4718 4719 /* 4720 * Prevent sire and ipif_ire from getting deleted. The 4721 * newly created ire is tied to both of them via the 4722 * phandle and ihandle respectively. 4723 */ 4724 IRB_REFHOLD(sire->ire_bucket); 4725 /* Has it been removed already ? */ 4726 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4727 IRB_REFRELE(sire->ire_bucket); 4728 ire_refrele(ipif_ire); 4729 ire_refrele(save_ire); 4730 break; 4731 } 4732 4733 IRB_REFHOLD(ipif_ire->ire_bucket); 4734 /* Has it been removed already ? */ 4735 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4736 IRB_REFRELE(ipif_ire->ire_bucket); 4737 IRB_REFRELE(sire->ire_bucket); 4738 ire_refrele(ipif_ire); 4739 ire_refrele(save_ire); 4740 break; 4741 } 4742 4743 xmit_mp = first_mp; 4744 if (ire->ire_flags & RTF_MULTIRT) { 4745 copy_mp = copymsg(first_mp); 4746 if (copy_mp != NULL) { 4747 xmit_mp = copy_mp; 4748 MULTIRT_DEBUG_TAG(first_mp); 4749 } 4750 } 4751 ire_add_then_send(q, ire, xmit_mp); 4752 if (ip6_asp_table_held) { 4753 ip6_asp_table_refrele(ipst); 4754 ip6_asp_table_held = B_FALSE; 4755 } 4756 ire_refrele(save_ire); 4757 4758 /* Assert that sire is not deleted yet. */ 4759 ASSERT(sire->ire_ptpn != NULL); 4760 IRB_REFRELE(sire->ire_bucket); 4761 4762 /* Assert that ipif_ire is not deleted yet. */ 4763 ASSERT(ipif_ire->ire_ptpn != NULL); 4764 IRB_REFRELE(ipif_ire->ire_bucket); 4765 ire_refrele(ipif_ire); 4766 4767 if (copy_mp != NULL) { 4768 /* 4769 * Search for the next unresolved 4770 * multirt route. 4771 */ 4772 copy_mp = NULL; 4773 ipif_ire = NULL; 4774 ire = NULL; 4775 /* re-enter the loop */ 4776 multirt_resolve_next = B_TRUE; 4777 continue; 4778 } 4779 ire_refrele(sire); 4780 ill_refrele(dst_ill); 4781 ipif_refrele(src_ipif); 4782 return; 4783 } 4784 case IRE_IF_NORESOLVER: 4785 /* 4786 * We have what we need to build an IRE_CACHE. 4787 * 4788 * handle the Gated case, where we create 4789 * a NORESOLVER route for loopback. 4790 */ 4791 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4792 break; 4793 /* 4794 * TSol note: We are creating the ire cache for the 4795 * destination 'dst'. If 'dst' is offlink, going 4796 * through the first hop 'gw', the security attributes 4797 * of 'dst' must be set to point to the gateway 4798 * credentials of gateway 'gw'. If 'dst' is onlink, it 4799 * is possible that 'dst' is a potential gateway that is 4800 * referenced by some route that has some security 4801 * attributes. Thus in the former case, we need to do a 4802 * gcgrp_lookup of 'gw' while in the latter case we 4803 * need to do gcgrp_lookup of 'dst' itself. 4804 */ 4805 ga.ga_af = AF_INET6; 4806 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4807 ga.ga_addr = v6gw; 4808 else 4809 ga.ga_addr = *v6dstp; 4810 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4811 4812 /* 4813 * Note: the new ire inherits sire flags RTF_SETSRC 4814 * and RTF_MULTIRT to propagate those rules from prefix 4815 * to cache. 4816 */ 4817 ire = ire_create_v6( 4818 v6dstp, /* dest address */ 4819 &ipv6_all_ones, /* mask */ 4820 &src_ipif->ipif_v6src_addr, /* source address */ 4821 &v6gw, /* gateway address */ 4822 &save_ire->ire_max_frag, 4823 NULL, /* no src nce */ 4824 dst_ill->ill_rq, /* recv-from queue */ 4825 dst_ill->ill_wq, /* send-to queue */ 4826 IRE_CACHE, 4827 src_ipif, 4828 &save_ire->ire_mask_v6, /* Parent mask */ 4829 (sire != NULL) ? /* Parent handle */ 4830 sire->ire_phandle : 0, 4831 save_ire->ire_ihandle, /* Interface handle */ 4832 (sire != NULL) ? /* flags if any */ 4833 sire->ire_flags & 4834 (RTF_SETSRC | RTF_MULTIRT) : 0, 4835 &(save_ire->ire_uinfo), 4836 NULL, 4837 gcgrp, 4838 ipst); 4839 4840 if (ire == NULL) { 4841 if (gcgrp != NULL) { 4842 GCGRP_REFRELE(gcgrp); 4843 gcgrp = NULL; 4844 } 4845 ire_refrele(save_ire); 4846 break; 4847 } 4848 4849 /* reference now held by IRE */ 4850 gcgrp = NULL; 4851 4852 ire->ire_marks |= ire_marks; 4853 4854 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4855 dst = v6gw; 4856 else 4857 dst = *v6dstp; 4858 err = ndp_noresolver(dst_ill, &dst); 4859 if (err != 0) { 4860 ire_refrele(save_ire); 4861 break; 4862 } 4863 4864 /* Prevent save_ire from getting deleted */ 4865 IRB_REFHOLD(save_ire->ire_bucket); 4866 /* Has it been removed already ? */ 4867 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4868 IRB_REFRELE(save_ire->ire_bucket); 4869 ire_refrele(save_ire); 4870 break; 4871 } 4872 4873 xmit_mp = first_mp; 4874 /* 4875 * In case of MULTIRT, a copy of the current packet 4876 * to send is made to further re-enter the 4877 * loop and attempt another route resolution 4878 */ 4879 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4880 copy_mp = copymsg(first_mp); 4881 if (copy_mp != NULL) { 4882 xmit_mp = copy_mp; 4883 MULTIRT_DEBUG_TAG(first_mp); 4884 } 4885 } 4886 ire_add_then_send(q, ire, xmit_mp); 4887 if (ip6_asp_table_held) { 4888 ip6_asp_table_refrele(ipst); 4889 ip6_asp_table_held = B_FALSE; 4890 } 4891 4892 /* Assert that it is not deleted yet. */ 4893 ASSERT(save_ire->ire_ptpn != NULL); 4894 IRB_REFRELE(save_ire->ire_bucket); 4895 ire_refrele(save_ire); 4896 4897 if (copy_mp != NULL) { 4898 /* 4899 * If we found a (no)resolver, we ignore any 4900 * trailing top priority IRE_CACHE in 4901 * further loops. This ensures that we do not 4902 * omit any (no)resolver despite the priority 4903 * in this call. 4904 * IRE_CACHE, if any, will be processed 4905 * by another thread entering ip_newroute(), 4906 * (on resolver response, for example). 4907 * We use this to force multiple parallel 4908 * resolution as soon as a packet needs to be 4909 * sent. The result is, after one packet 4910 * emission all reachable routes are generally 4911 * resolved. 4912 * Otherwise, complete resolution of MULTIRT 4913 * routes would require several emissions as 4914 * side effect. 4915 */ 4916 multirt_flags &= ~MULTIRT_CACHEGW; 4917 4918 /* 4919 * Search for the next unresolved multirt 4920 * route. 4921 */ 4922 copy_mp = NULL; 4923 save_ire = NULL; 4924 ire = NULL; 4925 /* re-enter the loop */ 4926 multirt_resolve_next = B_TRUE; 4927 continue; 4928 } 4929 4930 /* Don't need sire anymore */ 4931 if (sire != NULL) 4932 ire_refrele(sire); 4933 ill_refrele(dst_ill); 4934 ipif_refrele(src_ipif); 4935 return; 4936 4937 case IRE_IF_RESOLVER: 4938 /* 4939 * We can't build an IRE_CACHE yet, but at least we 4940 * found a resolver that can help. 4941 */ 4942 dst = *v6dstp; 4943 4944 /* 4945 * To be at this point in the code with a non-zero gw 4946 * means that dst is reachable through a gateway that 4947 * we have never resolved. By changing dst to the gw 4948 * addr we resolve the gateway first. When 4949 * ire_add_then_send() tries to put the IP dg to dst, 4950 * it will reenter ip_newroute() at which time we will 4951 * find the IRE_CACHE for the gw and create another 4952 * IRE_CACHE above (for dst itself). 4953 */ 4954 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4955 save_dst = dst; 4956 dst = v6gw; 4957 v6gw = ipv6_all_zeros; 4958 } 4959 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4960 /* 4961 * Ask the external resolver to do its thing. 4962 * Make an mblk chain in the following form: 4963 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4964 */ 4965 mblk_t *ire_mp; 4966 mblk_t *areq_mp; 4967 areq_t *areq; 4968 in6_addr_t *addrp; 4969 4970 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4971 if (ip6_asp_table_held) { 4972 ip6_asp_table_refrele(ipst); 4973 ip6_asp_table_held = B_FALSE; 4974 } 4975 ire = ire_create_mp_v6( 4976 &dst, /* dest address */ 4977 &ipv6_all_ones, /* mask */ 4978 &src_ipif->ipif_v6src_addr, 4979 /* source address */ 4980 &v6gw, /* gateway address */ 4981 NULL, /* no src nce */ 4982 dst_ill->ill_rq, /* recv-from queue */ 4983 dst_ill->ill_wq, /* send-to queue */ 4984 IRE_CACHE, 4985 src_ipif, 4986 &save_ire->ire_mask_v6, /* Parent mask */ 4987 0, 4988 save_ire->ire_ihandle, 4989 /* Interface handle */ 4990 0, /* flags if any */ 4991 &(save_ire->ire_uinfo), 4992 NULL, 4993 NULL, 4994 ipst); 4995 4996 ire_refrele(save_ire); 4997 if (ire == NULL) { 4998 ip1dbg(("ip_newroute_v6:" 4999 "ire is NULL\n")); 5000 break; 5001 } 5002 5003 if ((sire != NULL) && 5004 (sire->ire_flags & RTF_MULTIRT)) { 5005 /* 5006 * processing a copy of the packet to 5007 * send for further resolution loops 5008 */ 5009 copy_mp = copymsg(first_mp); 5010 if (copy_mp != NULL) 5011 MULTIRT_DEBUG_TAG(copy_mp); 5012 } 5013 ire->ire_marks |= ire_marks; 5014 ire_mp = ire->ire_mp; 5015 /* 5016 * Now create or find an nce for this interface. 5017 * The hw addr will need to to be set from 5018 * the reply to the AR_ENTRY_QUERY that 5019 * we're about to send. This will be done in 5020 * ire_add_v6(). 5021 */ 5022 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 5023 switch (err) { 5024 case 0: 5025 /* 5026 * New cache entry created. 5027 * Break, then ask the external 5028 * resolver. 5029 */ 5030 break; 5031 case EINPROGRESS: 5032 /* 5033 * Resolution in progress; 5034 * packet has been queued by 5035 * ndp_resolver(). 5036 */ 5037 ire_delete(ire); 5038 ire = NULL; 5039 /* 5040 * Check if another multirt 5041 * route must be resolved. 5042 */ 5043 if (copy_mp != NULL) { 5044 /* 5045 * If we found a resolver, we 5046 * ignore any trailing top 5047 * priority IRE_CACHE in 5048 * further loops. The reason is 5049 * the same as for noresolver. 5050 */ 5051 multirt_flags &= 5052 ~MULTIRT_CACHEGW; 5053 /* 5054 * Search for the next 5055 * unresolved multirt route. 5056 */ 5057 first_mp = copy_mp; 5058 copy_mp = NULL; 5059 mp = first_mp; 5060 if (mp->b_datap->db_type == 5061 M_CTL) { 5062 mp = mp->b_cont; 5063 } 5064 ASSERT(sire != NULL); 5065 dst = save_dst; 5066 /* 5067 * re-enter the loop 5068 */ 5069 multirt_resolve_next = 5070 B_TRUE; 5071 continue; 5072 } 5073 5074 if (sire != NULL) 5075 ire_refrele(sire); 5076 ill_refrele(dst_ill); 5077 ipif_refrele(src_ipif); 5078 return; 5079 default: 5080 /* 5081 * Transient error; packet will be 5082 * freed. 5083 */ 5084 ire_delete(ire); 5085 ire = NULL; 5086 break; 5087 } 5088 if (err != 0) 5089 break; 5090 /* 5091 * Now set up the AR_ENTRY_QUERY and send it. 5092 */ 5093 areq_mp = ill_arp_alloc(dst_ill, 5094 (uchar_t *)&ipv6_areq_template, 5095 (caddr_t)&dst); 5096 if (areq_mp == NULL) { 5097 ip1dbg(("ip_newroute_v6:" 5098 "areq_mp is NULL\n")); 5099 freemsg(ire_mp); 5100 break; 5101 } 5102 areq = (areq_t *)areq_mp->b_rptr; 5103 addrp = (in6_addr_t *)((char *)areq + 5104 areq->areq_target_addr_offset); 5105 *addrp = dst; 5106 addrp = (in6_addr_t *)((char *)areq + 5107 areq->areq_sender_addr_offset); 5108 *addrp = src_ipif->ipif_v6src_addr; 5109 /* 5110 * link the chain, then send up to the resolver. 5111 */ 5112 linkb(areq_mp, ire_mp); 5113 linkb(areq_mp, mp); 5114 ip1dbg(("ip_newroute_v6:" 5115 "putnext to resolver\n")); 5116 putnext(dst_ill->ill_rq, areq_mp); 5117 /* 5118 * Check if another multirt route 5119 * must be resolved. 5120 */ 5121 ire = NULL; 5122 if (copy_mp != NULL) { 5123 /* 5124 * If we find a resolver, we ignore any 5125 * trailing top priority IRE_CACHE in 5126 * further loops. The reason is the 5127 * same as for noresolver. 5128 */ 5129 multirt_flags &= ~MULTIRT_CACHEGW; 5130 /* 5131 * Search for the next unresolved 5132 * multirt route. 5133 */ 5134 first_mp = copy_mp; 5135 copy_mp = NULL; 5136 mp = first_mp; 5137 if (mp->b_datap->db_type == M_CTL) { 5138 mp = mp->b_cont; 5139 } 5140 ASSERT(sire != NULL); 5141 dst = save_dst; 5142 /* 5143 * re-enter the loop 5144 */ 5145 multirt_resolve_next = B_TRUE; 5146 continue; 5147 } 5148 5149 if (sire != NULL) 5150 ire_refrele(sire); 5151 ill_refrele(dst_ill); 5152 ipif_refrele(src_ipif); 5153 return; 5154 } 5155 /* 5156 * Non-external resolver case. 5157 * 5158 * TSol note: Please see the note above the 5159 * IRE_IF_NORESOLVER case. 5160 */ 5161 ga.ga_af = AF_INET6; 5162 ga.ga_addr = dst; 5163 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5164 5165 ire = ire_create_v6( 5166 &dst, /* dest address */ 5167 &ipv6_all_ones, /* mask */ 5168 &src_ipif->ipif_v6src_addr, /* source address */ 5169 &v6gw, /* gateway address */ 5170 &save_ire->ire_max_frag, 5171 NULL, /* no src nce */ 5172 dst_ill->ill_rq, /* recv-from queue */ 5173 dst_ill->ill_wq, /* send-to queue */ 5174 IRE_CACHE, 5175 src_ipif, 5176 &save_ire->ire_mask_v6, /* Parent mask */ 5177 0, 5178 save_ire->ire_ihandle, /* Interface handle */ 5179 0, /* flags if any */ 5180 &(save_ire->ire_uinfo), 5181 NULL, 5182 gcgrp, 5183 ipst); 5184 5185 if (ire == NULL) { 5186 if (gcgrp != NULL) { 5187 GCGRP_REFRELE(gcgrp); 5188 gcgrp = NULL; 5189 } 5190 ire_refrele(save_ire); 5191 break; 5192 } 5193 5194 /* reference now held by IRE */ 5195 gcgrp = NULL; 5196 5197 if ((sire != NULL) && 5198 (sire->ire_flags & RTF_MULTIRT)) { 5199 copy_mp = copymsg(first_mp); 5200 if (copy_mp != NULL) 5201 MULTIRT_DEBUG_TAG(copy_mp); 5202 } 5203 5204 ire->ire_marks |= ire_marks; 5205 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5206 switch (err) { 5207 case 0: 5208 /* Prevent save_ire from getting deleted */ 5209 IRB_REFHOLD(save_ire->ire_bucket); 5210 /* Has it been removed already ? */ 5211 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5212 IRB_REFRELE(save_ire->ire_bucket); 5213 ire_refrele(save_ire); 5214 break; 5215 } 5216 5217 /* 5218 * We have a resolved cache entry, 5219 * add in the IRE. 5220 */ 5221 ire_add_then_send(q, ire, first_mp); 5222 if (ip6_asp_table_held) { 5223 ip6_asp_table_refrele(ipst); 5224 ip6_asp_table_held = B_FALSE; 5225 } 5226 5227 /* Assert that it is not deleted yet. */ 5228 ASSERT(save_ire->ire_ptpn != NULL); 5229 IRB_REFRELE(save_ire->ire_bucket); 5230 ire_refrele(save_ire); 5231 /* 5232 * Check if another multirt route 5233 * must be resolved. 5234 */ 5235 ire = NULL; 5236 if (copy_mp != NULL) { 5237 /* 5238 * If we find a resolver, we ignore any 5239 * trailing top priority IRE_CACHE in 5240 * further loops. The reason is the 5241 * same as for noresolver. 5242 */ 5243 multirt_flags &= ~MULTIRT_CACHEGW; 5244 /* 5245 * Search for the next unresolved 5246 * multirt route. 5247 */ 5248 first_mp = copy_mp; 5249 copy_mp = NULL; 5250 mp = first_mp; 5251 if (mp->b_datap->db_type == M_CTL) { 5252 mp = mp->b_cont; 5253 } 5254 ASSERT(sire != NULL); 5255 dst = save_dst; 5256 /* 5257 * re-enter the loop 5258 */ 5259 multirt_resolve_next = B_TRUE; 5260 continue; 5261 } 5262 5263 if (sire != NULL) 5264 ire_refrele(sire); 5265 ill_refrele(dst_ill); 5266 ipif_refrele(src_ipif); 5267 return; 5268 5269 case EINPROGRESS: 5270 /* 5271 * mp was consumed - presumably queued. 5272 * No need for ire, presumably resolution is 5273 * in progress, and ire will be added when the 5274 * address is resolved. 5275 */ 5276 if (ip6_asp_table_held) { 5277 ip6_asp_table_refrele(ipst); 5278 ip6_asp_table_held = B_FALSE; 5279 } 5280 ASSERT(ire->ire_nce == NULL); 5281 ire_delete(ire); 5282 ire_refrele(save_ire); 5283 /* 5284 * Check if another multirt route 5285 * must be resolved. 5286 */ 5287 ire = NULL; 5288 if (copy_mp != NULL) { 5289 /* 5290 * If we find a resolver, we ignore any 5291 * trailing top priority IRE_CACHE in 5292 * further loops. The reason is the 5293 * same as for noresolver. 5294 */ 5295 multirt_flags &= ~MULTIRT_CACHEGW; 5296 /* 5297 * Search for the next unresolved 5298 * multirt route. 5299 */ 5300 first_mp = copy_mp; 5301 copy_mp = NULL; 5302 mp = first_mp; 5303 if (mp->b_datap->db_type == M_CTL) { 5304 mp = mp->b_cont; 5305 } 5306 ASSERT(sire != NULL); 5307 dst = save_dst; 5308 /* 5309 * re-enter the loop 5310 */ 5311 multirt_resolve_next = B_TRUE; 5312 continue; 5313 } 5314 if (sire != NULL) 5315 ire_refrele(sire); 5316 ill_refrele(dst_ill); 5317 ipif_refrele(src_ipif); 5318 return; 5319 default: 5320 /* Some transient error */ 5321 ASSERT(ire->ire_nce == NULL); 5322 ire_refrele(save_ire); 5323 break; 5324 } 5325 break; 5326 default: 5327 break; 5328 } 5329 if (ip6_asp_table_held) { 5330 ip6_asp_table_refrele(ipst); 5331 ip6_asp_table_held = B_FALSE; 5332 } 5333 } while (multirt_resolve_next); 5334 5335 err_ret: 5336 ip1dbg(("ip_newroute_v6: dropped\n")); 5337 if (src_ipif != NULL) 5338 ipif_refrele(src_ipif); 5339 if (dst_ill != NULL) { 5340 need_rele = B_TRUE; 5341 ill = dst_ill; 5342 } 5343 if (ill != NULL) { 5344 if (mp->b_prev != NULL) { 5345 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5346 } else { 5347 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5348 } 5349 5350 if (need_rele) 5351 ill_refrele(ill); 5352 } else { 5353 if (mp->b_prev != NULL) { 5354 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5355 } else { 5356 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5357 } 5358 } 5359 /* Did this packet originate externally? */ 5360 if (mp->b_prev) { 5361 mp->b_next = NULL; 5362 mp->b_prev = NULL; 5363 } 5364 if (copy_mp != NULL) { 5365 MULTIRT_DEBUG_UNTAG(copy_mp); 5366 freemsg(copy_mp); 5367 } 5368 MULTIRT_DEBUG_UNTAG(first_mp); 5369 freemsg(first_mp); 5370 if (ire != NULL) 5371 ire_refrele(ire); 5372 if (sire != NULL) 5373 ire_refrele(sire); 5374 return; 5375 5376 icmp_err_ret: 5377 if (ip6_asp_table_held) 5378 ip6_asp_table_refrele(ipst); 5379 if (src_ipif != NULL) 5380 ipif_refrele(src_ipif); 5381 if (dst_ill != NULL) { 5382 need_rele = B_TRUE; 5383 ill = dst_ill; 5384 } 5385 ip1dbg(("ip_newroute_v6: no route\n")); 5386 if (sire != NULL) 5387 ire_refrele(sire); 5388 /* 5389 * We need to set sire to NULL to avoid double freeing if we 5390 * ever goto err_ret from below. 5391 */ 5392 sire = NULL; 5393 ip6h = (ip6_t *)mp->b_rptr; 5394 /* Skip ip6i_t header if present */ 5395 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5396 /* Make sure the IPv6 header is present */ 5397 if ((mp->b_wptr - (uchar_t *)ip6h) < 5398 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5399 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5400 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5401 goto err_ret; 5402 } 5403 } 5404 mp->b_rptr += sizeof (ip6i_t); 5405 ip6h = (ip6_t *)mp->b_rptr; 5406 } 5407 /* Did this packet originate externally? */ 5408 if (mp->b_prev) { 5409 if (ill != NULL) { 5410 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5411 } else { 5412 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5413 } 5414 mp->b_next = NULL; 5415 mp->b_prev = NULL; 5416 q = WR(q); 5417 } else { 5418 if (ill != NULL) { 5419 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5420 } else { 5421 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5422 } 5423 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5424 /* Failed */ 5425 if (copy_mp != NULL) { 5426 MULTIRT_DEBUG_UNTAG(copy_mp); 5427 freemsg(copy_mp); 5428 } 5429 MULTIRT_DEBUG_UNTAG(first_mp); 5430 freemsg(first_mp); 5431 if (ire != NULL) 5432 ire_refrele(ire); 5433 if (need_rele) 5434 ill_refrele(ill); 5435 return; 5436 } 5437 } 5438 5439 if (need_rele) 5440 ill_refrele(ill); 5441 5442 /* 5443 * At this point we will have ire only if RTF_BLACKHOLE 5444 * or RTF_REJECT flags are set on the IRE. It will not 5445 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5446 */ 5447 if (ire != NULL) { 5448 if (ire->ire_flags & RTF_BLACKHOLE) { 5449 ire_refrele(ire); 5450 if (copy_mp != NULL) { 5451 MULTIRT_DEBUG_UNTAG(copy_mp); 5452 freemsg(copy_mp); 5453 } 5454 MULTIRT_DEBUG_UNTAG(first_mp); 5455 freemsg(first_mp); 5456 return; 5457 } 5458 ire_refrele(ire); 5459 } 5460 if (ip_debug > 3) { 5461 /* ip2dbg */ 5462 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5463 AF_INET6, v6dstp); 5464 } 5465 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5466 B_FALSE, B_FALSE, zoneid, ipst); 5467 } 5468 5469 /* 5470 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5471 * we need to send out a packet to a destination address for which we do not 5472 * have specific routing information. It is only used for multicast packets. 5473 * 5474 * If unspec_src we allow creating an IRE with source address zero. 5475 * ire_send_v6() will delete it after the packet is sent. 5476 */ 5477 void 5478 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5479 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5480 zoneid_t zoneid) 5481 { 5482 ire_t *ire = NULL; 5483 ipif_t *src_ipif = NULL; 5484 int err = 0; 5485 ill_t *dst_ill = NULL; 5486 ire_t *save_ire; 5487 ipsec_out_t *io; 5488 ill_t *ill; 5489 mblk_t *first_mp; 5490 ire_t *fire = NULL; 5491 mblk_t *copy_mp = NULL; 5492 const in6_addr_t *ire_v6srcp; 5493 boolean_t probe = B_FALSE; 5494 boolean_t multirt_resolve_next; 5495 boolean_t ipif_held = B_FALSE; 5496 boolean_t ill_held = B_FALSE; 5497 boolean_t ip6_asp_table_held = B_FALSE; 5498 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5499 5500 /* 5501 * This loop is run only once in most cases. 5502 * We loop to resolve further routes only when the destination 5503 * can be reached through multiple RTF_MULTIRT-flagged ires. 5504 */ 5505 do { 5506 multirt_resolve_next = B_FALSE; 5507 if (dst_ill != NULL) { 5508 ill_refrele(dst_ill); 5509 dst_ill = NULL; 5510 } 5511 5512 if (src_ipif != NULL) { 5513 ipif_refrele(src_ipif); 5514 src_ipif = NULL; 5515 } 5516 ASSERT(ipif != NULL); 5517 ill = ipif->ipif_ill; 5518 5519 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5520 if (ip_debug > 2) { 5521 /* ip1dbg */ 5522 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5523 AF_INET6, v6dstp); 5524 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5525 ill->ill_name, ipif->ipif_isv6); 5526 } 5527 5528 first_mp = mp; 5529 if (mp->b_datap->db_type == M_CTL) { 5530 mp = mp->b_cont; 5531 io = (ipsec_out_t *)first_mp->b_rptr; 5532 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5533 } else { 5534 io = NULL; 5535 } 5536 5537 /* 5538 * If the interface is a pt-pt interface we look for an 5539 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5540 * local_address and the pt-pt destination address. 5541 * Otherwise we just match the local address. 5542 */ 5543 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5544 goto err_ret; 5545 } 5546 5547 /* 5548 * We check if an IRE_OFFSUBNET for the addr that goes through 5549 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5550 * RTF_MULTIRT flags must be honored. 5551 */ 5552 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5553 ip2dbg(("ip_newroute_ipif_v6: " 5554 "ipif_lookup_multi_ire_v6(" 5555 "ipif %p, dst %08x) = fire %p\n", 5556 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5557 (void *)fire)); 5558 5559 ASSERT(src_ipif == NULL); 5560 5561 /* 5562 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5563 * tied to the underlying interface, IS_UNDER_IPMP() may be 5564 * true even when building IREs that will be used for data 5565 * traffic. As such, see if the packet's source address is a 5566 * test address, and if so use that test address's ipif for 5567 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5568 * ire_add_v6() can work properly. 5569 */ 5570 if (IS_UNDER_IPMP(ill)) 5571 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5572 5573 /* 5574 * Determine the outbound (destination) ill for this route. 5575 * If IPMP is not in use, that's the same as our ill. If IPMP 5576 * is in-use and we're on the IPMP interface, or we're on an 5577 * underlying ill but sending data traffic, use a suitable 5578 * destination ill from the group. The latter case covers a 5579 * subtle edge condition with multicast: when we bring up an 5580 * IPv6 data address, we will create an NCE on an underlying 5581 * interface, and send solitications to ff02::1, which would 5582 * take us through here, and cause us to create an IRE for 5583 * ff02::1. To meet our defined semantics for multicast (and 5584 * ensure there aren't unexpected echoes), that IRE needs to 5585 * use the IPMP group's nominated multicast interface. 5586 * 5587 * Note: the source ipif is determined by source address 5588 * selection later. 5589 */ 5590 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5591 ill_t *ipmp_ill; 5592 ipmp_illgrp_t *illg; 5593 5594 if (IS_UNDER_IPMP(ill)) { 5595 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5596 } else { 5597 ipmp_ill = ill; 5598 ill_refhold(ipmp_ill); /* for symmetry */ 5599 } 5600 5601 if (ipmp_ill == NULL) 5602 goto err_ret; 5603 5604 illg = ipmp_ill->ill_grp; 5605 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5606 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5607 else 5608 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5609 5610 ill_refrele(ipmp_ill); 5611 } else { 5612 dst_ill = ill; 5613 ill_refhold(dst_ill); /* for symmetry */ 5614 } 5615 5616 if (dst_ill == NULL) { 5617 if (ip_debug > 2) { 5618 pr_addr_dbg("ip_newroute_ipif_v6: " 5619 "no dst ill for dst %s\n", 5620 AF_INET6, v6dstp); 5621 } 5622 goto err_ret; 5623 } 5624 5625 /* 5626 * Pick a source address which matches the scope of the 5627 * destination address. 5628 * For RTF_SETSRC routes, the source address is imposed by the 5629 * parent ire (fire). 5630 */ 5631 5632 if (src_ipif == NULL && fire != NULL && 5633 (fire->ire_flags & RTF_SETSRC)) { 5634 /* 5635 * Check that the ipif matching the requested source 5636 * address still exists. 5637 */ 5638 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5639 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5640 } 5641 5642 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5643 ip6_asp_table_held = B_TRUE; 5644 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5645 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5646 } 5647 5648 if (src_ipif == NULL) { 5649 if (!unspec_src) { 5650 if (ip_debug > 2) { 5651 /* ip1dbg */ 5652 pr_addr_dbg("ip_newroute_ipif_v6: " 5653 "no src for dst %s\n", 5654 AF_INET6, v6dstp); 5655 printf(" through interface %s\n", 5656 dst_ill->ill_name); 5657 } 5658 goto err_ret; 5659 } 5660 ire_v6srcp = &ipv6_all_zeros; 5661 src_ipif = ipif; 5662 ipif_refhold(src_ipif); 5663 } else { 5664 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5665 } 5666 5667 ire = ipif_to_ire_v6(ipif); 5668 if (ire == NULL) { 5669 if (ip_debug > 2) { 5670 /* ip1dbg */ 5671 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5672 AF_INET6, &ipif->ipif_v6lcl_addr); 5673 printf("ip_newroute_ipif_v6: " 5674 "if %s\n", dst_ill->ill_name); 5675 } 5676 goto err_ret; 5677 } 5678 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5679 goto err_ret; 5680 5681 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5682 5683 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5684 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5685 if (ip_debug > 2) { 5686 /* ip1dbg */ 5687 pr_addr_dbg(" address %s\n", 5688 AF_INET6, &ire->ire_src_addr_v6); 5689 } 5690 save_ire = ire; 5691 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5692 (void *)ire, (void *)ipif)); 5693 5694 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5695 /* 5696 * an IRE_OFFSUBET was looked up 5697 * on that interface. 5698 * this ire has RTF_MULTIRT flag, 5699 * so the resolution loop 5700 * will be re-entered to resolve 5701 * additional routes on other 5702 * interfaces. For that purpose, 5703 * a copy of the packet is 5704 * made at this point. 5705 */ 5706 fire->ire_last_used_time = lbolt; 5707 copy_mp = copymsg(first_mp); 5708 if (copy_mp) { 5709 MULTIRT_DEBUG_TAG(copy_mp); 5710 } 5711 } 5712 5713 switch (ire->ire_type) { 5714 case IRE_IF_NORESOLVER: { 5715 /* 5716 * We have what we need to build an IRE_CACHE. 5717 * 5718 * handle the Gated case, where we create 5719 * a NORESOLVER route for loopback. 5720 */ 5721 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5722 break; 5723 /* 5724 * The newly created ire will inherit the flags of the 5725 * parent ire, if any. 5726 */ 5727 ire = ire_create_v6( 5728 v6dstp, /* dest address */ 5729 &ipv6_all_ones, /* mask */ 5730 ire_v6srcp, /* source address */ 5731 NULL, /* gateway address */ 5732 &save_ire->ire_max_frag, 5733 NULL, /* no src nce */ 5734 dst_ill->ill_rq, /* recv-from queue */ 5735 dst_ill->ill_wq, /* send-to queue */ 5736 IRE_CACHE, 5737 src_ipif, 5738 NULL, 5739 (fire != NULL) ? /* Parent handle */ 5740 fire->ire_phandle : 0, 5741 save_ire->ire_ihandle, /* Interface handle */ 5742 (fire != NULL) ? 5743 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5744 0, 5745 &ire_uinfo_null, 5746 NULL, 5747 NULL, 5748 ipst); 5749 5750 if (ire == NULL) { 5751 ire_refrele(save_ire); 5752 break; 5753 } 5754 5755 err = ndp_noresolver(dst_ill, v6dstp); 5756 if (err != 0) { 5757 ire_refrele(save_ire); 5758 break; 5759 } 5760 5761 /* Prevent save_ire from getting deleted */ 5762 IRB_REFHOLD(save_ire->ire_bucket); 5763 /* Has it been removed already ? */ 5764 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5765 IRB_REFRELE(save_ire->ire_bucket); 5766 ire_refrele(save_ire); 5767 break; 5768 } 5769 5770 ire_add_then_send(q, ire, first_mp); 5771 if (ip6_asp_table_held) { 5772 ip6_asp_table_refrele(ipst); 5773 ip6_asp_table_held = B_FALSE; 5774 } 5775 5776 /* Assert that it is not deleted yet. */ 5777 ASSERT(save_ire->ire_ptpn != NULL); 5778 IRB_REFRELE(save_ire->ire_bucket); 5779 ire_refrele(save_ire); 5780 if (fire != NULL) { 5781 ire_refrele(fire); 5782 fire = NULL; 5783 } 5784 5785 /* 5786 * The resolution loop is re-entered if we 5787 * actually are in a multirouting case. 5788 */ 5789 if (copy_mp != NULL) { 5790 boolean_t need_resolve = 5791 ire_multirt_need_resolve_v6(v6dstp, 5792 msg_getlabel(copy_mp), ipst); 5793 if (!need_resolve) { 5794 MULTIRT_DEBUG_UNTAG(copy_mp); 5795 freemsg(copy_mp); 5796 copy_mp = NULL; 5797 } else { 5798 /* 5799 * ipif_lookup_group_v6() calls 5800 * ire_lookup_multi_v6() that uses 5801 * ire_ftable_lookup_v6() to find 5802 * an IRE_INTERFACE for the group. 5803 * In the multirt case, 5804 * ire_lookup_multi_v6() then invokes 5805 * ire_multirt_lookup_v6() to find 5806 * the next resolvable ire. 5807 * As a result, we obtain a new 5808 * interface, derived from the 5809 * next ire. 5810 */ 5811 if (ipif_held) { 5812 ipif_refrele(ipif); 5813 ipif_held = B_FALSE; 5814 } 5815 ipif = ipif_lookup_group_v6(v6dstp, 5816 zoneid, ipst); 5817 ip2dbg(("ip_newroute_ipif: " 5818 "multirt dst %08x, ipif %p\n", 5819 ntohl(V4_PART_OF_V6((*v6dstp))), 5820 (void *)ipif)); 5821 if (ipif != NULL) { 5822 ipif_held = B_TRUE; 5823 mp = copy_mp; 5824 copy_mp = NULL; 5825 multirt_resolve_next = 5826 B_TRUE; 5827 continue; 5828 } else { 5829 freemsg(copy_mp); 5830 } 5831 } 5832 } 5833 ill_refrele(dst_ill); 5834 if (ipif_held) { 5835 ipif_refrele(ipif); 5836 ipif_held = B_FALSE; 5837 } 5838 if (src_ipif != NULL) 5839 ipif_refrele(src_ipif); 5840 return; 5841 } 5842 case IRE_IF_RESOLVER: { 5843 5844 ASSERT(dst_ill->ill_isv6); 5845 5846 /* 5847 * We obtain a partial IRE_CACHE which we will pass 5848 * along with the resolver query. When the response 5849 * comes back it will be there ready for us to add. 5850 */ 5851 /* 5852 * the newly created ire will inherit the flags of the 5853 * parent ire, if any. 5854 */ 5855 ire = ire_create_v6( 5856 v6dstp, /* dest address */ 5857 &ipv6_all_ones, /* mask */ 5858 ire_v6srcp, /* source address */ 5859 NULL, /* gateway address */ 5860 &save_ire->ire_max_frag, 5861 NULL, /* src nce */ 5862 dst_ill->ill_rq, /* recv-from queue */ 5863 dst_ill->ill_wq, /* send-to queue */ 5864 IRE_CACHE, 5865 src_ipif, 5866 NULL, 5867 (fire != NULL) ? /* Parent handle */ 5868 fire->ire_phandle : 0, 5869 save_ire->ire_ihandle, /* Interface handle */ 5870 (fire != NULL) ? 5871 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5872 0, 5873 &ire_uinfo_null, 5874 NULL, 5875 NULL, 5876 ipst); 5877 5878 if (ire == NULL) { 5879 ire_refrele(save_ire); 5880 break; 5881 } 5882 5883 /* Resolve and add ire to the ctable */ 5884 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5885 switch (err) { 5886 case 0: 5887 /* Prevent save_ire from getting deleted */ 5888 IRB_REFHOLD(save_ire->ire_bucket); 5889 /* Has it been removed already ? */ 5890 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5891 IRB_REFRELE(save_ire->ire_bucket); 5892 ire_refrele(save_ire); 5893 break; 5894 } 5895 /* 5896 * We have a resolved cache entry, 5897 * add in the IRE. 5898 */ 5899 ire_add_then_send(q, ire, first_mp); 5900 if (ip6_asp_table_held) { 5901 ip6_asp_table_refrele(ipst); 5902 ip6_asp_table_held = B_FALSE; 5903 } 5904 5905 /* Assert that it is not deleted yet. */ 5906 ASSERT(save_ire->ire_ptpn != NULL); 5907 IRB_REFRELE(save_ire->ire_bucket); 5908 ire_refrele(save_ire); 5909 if (fire != NULL) { 5910 ire_refrele(fire); 5911 fire = NULL; 5912 } 5913 5914 /* 5915 * The resolution loop is re-entered if we 5916 * actually are in a multirouting case. 5917 */ 5918 if (copy_mp != NULL) { 5919 boolean_t need_resolve = 5920 ire_multirt_need_resolve_v6(v6dstp, 5921 msg_getlabel(copy_mp), ipst); 5922 if (!need_resolve) { 5923 MULTIRT_DEBUG_UNTAG(copy_mp); 5924 freemsg(copy_mp); 5925 copy_mp = NULL; 5926 } else { 5927 /* 5928 * ipif_lookup_group_v6() calls 5929 * ire_lookup_multi_v6() that 5930 * uses ire_ftable_lookup_v6() 5931 * to find an IRE_INTERFACE for 5932 * the group. In the multirt 5933 * case, ire_lookup_multi_v6() 5934 * then invokes 5935 * ire_multirt_lookup_v6() to 5936 * find the next resolvable ire. 5937 * As a result, we obtain a new 5938 * interface, derived from the 5939 * next ire. 5940 */ 5941 if (ipif_held) { 5942 ipif_refrele(ipif); 5943 ipif_held = B_FALSE; 5944 } 5945 ipif = ipif_lookup_group_v6( 5946 v6dstp, zoneid, ipst); 5947 ip2dbg(("ip_newroute_ipif: " 5948 "multirt dst %08x, " 5949 "ipif %p\n", 5950 ntohl(V4_PART_OF_V6( 5951 (*v6dstp))), 5952 (void *)ipif)); 5953 if (ipif != NULL) { 5954 ipif_held = B_TRUE; 5955 mp = copy_mp; 5956 copy_mp = NULL; 5957 multirt_resolve_next = 5958 B_TRUE; 5959 continue; 5960 } else { 5961 freemsg(copy_mp); 5962 } 5963 } 5964 } 5965 ill_refrele(dst_ill); 5966 if (ipif_held) { 5967 ipif_refrele(ipif); 5968 ipif_held = B_FALSE; 5969 } 5970 if (src_ipif != NULL) 5971 ipif_refrele(src_ipif); 5972 return; 5973 5974 case EINPROGRESS: 5975 /* 5976 * mp was consumed - presumably queued. 5977 * No need for ire, presumably resolution is 5978 * in progress, and ire will be added when the 5979 * address is resolved. 5980 */ 5981 if (ip6_asp_table_held) { 5982 ip6_asp_table_refrele(ipst); 5983 ip6_asp_table_held = B_FALSE; 5984 } 5985 ire_delete(ire); 5986 ire_refrele(save_ire); 5987 if (fire != NULL) { 5988 ire_refrele(fire); 5989 fire = NULL; 5990 } 5991 5992 /* 5993 * The resolution loop is re-entered if we 5994 * actually are in a multirouting case. 5995 */ 5996 if (copy_mp != NULL) { 5997 boolean_t need_resolve = 5998 ire_multirt_need_resolve_v6(v6dstp, 5999 msg_getlabel(copy_mp), ipst); 6000 if (!need_resolve) { 6001 MULTIRT_DEBUG_UNTAG(copy_mp); 6002 freemsg(copy_mp); 6003 copy_mp = NULL; 6004 } else { 6005 /* 6006 * ipif_lookup_group_v6() calls 6007 * ire_lookup_multi_v6() that 6008 * uses ire_ftable_lookup_v6() 6009 * to find an IRE_INTERFACE for 6010 * the group. In the multirt 6011 * case, ire_lookup_multi_v6() 6012 * then invokes 6013 * ire_multirt_lookup_v6() to 6014 * find the next resolvable ire. 6015 * As a result, we obtain a new 6016 * interface, derived from the 6017 * next ire. 6018 */ 6019 if (ipif_held) { 6020 ipif_refrele(ipif); 6021 ipif_held = B_FALSE; 6022 } 6023 ipif = ipif_lookup_group_v6( 6024 v6dstp, zoneid, ipst); 6025 ip2dbg(("ip_newroute_ipif: " 6026 "multirt dst %08x, " 6027 "ipif %p\n", 6028 ntohl(V4_PART_OF_V6( 6029 (*v6dstp))), 6030 (void *)ipif)); 6031 if (ipif != NULL) { 6032 ipif_held = B_TRUE; 6033 mp = copy_mp; 6034 copy_mp = NULL; 6035 multirt_resolve_next = 6036 B_TRUE; 6037 continue; 6038 } else { 6039 freemsg(copy_mp); 6040 } 6041 } 6042 } 6043 ill_refrele(dst_ill); 6044 if (ipif_held) { 6045 ipif_refrele(ipif); 6046 ipif_held = B_FALSE; 6047 } 6048 if (src_ipif != NULL) 6049 ipif_refrele(src_ipif); 6050 return; 6051 default: 6052 /* Some transient error */ 6053 ire_refrele(save_ire); 6054 break; 6055 } 6056 break; 6057 } 6058 default: 6059 break; 6060 } 6061 if (ip6_asp_table_held) { 6062 ip6_asp_table_refrele(ipst); 6063 ip6_asp_table_held = B_FALSE; 6064 } 6065 } while (multirt_resolve_next); 6066 6067 err_ret: 6068 if (ip6_asp_table_held) 6069 ip6_asp_table_refrele(ipst); 6070 if (ire != NULL) 6071 ire_refrele(ire); 6072 if (fire != NULL) 6073 ire_refrele(fire); 6074 if (ipif != NULL && ipif_held) 6075 ipif_refrele(ipif); 6076 if (src_ipif != NULL) 6077 ipif_refrele(src_ipif); 6078 6079 /* Multicast - no point in trying to generate ICMP error */ 6080 if (dst_ill != NULL) { 6081 ill = dst_ill; 6082 ill_held = B_TRUE; 6083 } 6084 if (mp->b_prev || mp->b_next) { 6085 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6086 } else { 6087 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6088 } 6089 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6090 mp->b_next = NULL; 6091 mp->b_prev = NULL; 6092 freemsg(first_mp); 6093 if (ill_held) 6094 ill_refrele(ill); 6095 } 6096 6097 /* 6098 * Parse and process any hop-by-hop or destination options. 6099 * 6100 * Assumes that q is an ill read queue so that ICMP errors for link-local 6101 * destinations are sent out the correct interface. 6102 * 6103 * Returns -1 if there was an error and mp has been consumed. 6104 * Returns 0 if no special action is needed. 6105 * Returns 1 if the packet contained a router alert option for this node 6106 * which is verified to be "interesting/known" for our implementation. 6107 * 6108 * XXX Note: In future as more hbh or dest options are defined, 6109 * it may be better to have different routines for hbh and dest 6110 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6111 * may have same value in different namespaces. Or is it same namespace ?? 6112 * Current code checks for each opt_type (other than pads) if it is in 6113 * the expected nexthdr (hbh or dest) 6114 */ 6115 static int 6116 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6117 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6118 { 6119 uint8_t opt_type; 6120 uint_t optused; 6121 int ret = 0; 6122 mblk_t *first_mp; 6123 const char *errtype; 6124 zoneid_t zoneid; 6125 ill_t *ill = q->q_ptr; 6126 ipif_t *ipif; 6127 6128 first_mp = mp; 6129 if (mp->b_datap->db_type == M_CTL) { 6130 mp = mp->b_cont; 6131 } 6132 6133 while (optlen != 0) { 6134 opt_type = *optptr; 6135 if (opt_type == IP6OPT_PAD1) { 6136 optused = 1; 6137 } else { 6138 if (optlen < 2) 6139 goto bad_opt; 6140 errtype = "malformed"; 6141 if (opt_type == ip6opt_ls) { 6142 optused = 2 + optptr[1]; 6143 if (optused > optlen) 6144 goto bad_opt; 6145 } else switch (opt_type) { 6146 case IP6OPT_PADN: 6147 /* 6148 * Note:We don't verify that (N-2) pad octets 6149 * are zero as required by spec. Adhere to 6150 * "be liberal in what you accept..." part of 6151 * implementation philosophy (RFC791,RFC1122) 6152 */ 6153 optused = 2 + optptr[1]; 6154 if (optused > optlen) 6155 goto bad_opt; 6156 break; 6157 6158 case IP6OPT_JUMBO: 6159 if (hdr_type != IPPROTO_HOPOPTS) 6160 goto opt_error; 6161 goto opt_error; /* XXX Not implemented! */ 6162 6163 case IP6OPT_ROUTER_ALERT: { 6164 struct ip6_opt_router *or; 6165 6166 if (hdr_type != IPPROTO_HOPOPTS) 6167 goto opt_error; 6168 optused = 2 + optptr[1]; 6169 if (optused > optlen) 6170 goto bad_opt; 6171 or = (struct ip6_opt_router *)optptr; 6172 /* Check total length and alignment */ 6173 if (optused != sizeof (*or) || 6174 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6175 goto opt_error; 6176 /* Check value */ 6177 switch (*((uint16_t *)or->ip6or_value)) { 6178 case IP6_ALERT_MLD: 6179 case IP6_ALERT_RSVP: 6180 ret = 1; 6181 } 6182 break; 6183 } 6184 case IP6OPT_HOME_ADDRESS: { 6185 /* 6186 * Minimal support for the home address option 6187 * (which is required by all IPv6 nodes). 6188 * Implement by just swapping the home address 6189 * and source address. 6190 * XXX Note: this has IPsec implications since 6191 * AH needs to take this into account. 6192 * Also, when IPsec is used we need to ensure 6193 * that this is only processed once 6194 * in the received packet (to avoid swapping 6195 * back and forth). 6196 * NOTE:This option processing is considered 6197 * to be unsafe and prone to a denial of 6198 * service attack. 6199 * The current processing is not safe even with 6200 * IPsec secured IP packets. Since the home 6201 * address option processing requirement still 6202 * is in the IETF draft and in the process of 6203 * being redefined for its usage, it has been 6204 * decided to turn off the option by default. 6205 * If this section of code needs to be executed, 6206 * ndd variable ip6_ignore_home_address_opt 6207 * should be set to 0 at the user's own risk. 6208 */ 6209 struct ip6_opt_home_address *oh; 6210 in6_addr_t tmp; 6211 6212 if (ipst->ips_ipv6_ignore_home_address_opt) 6213 goto opt_error; 6214 6215 if (hdr_type != IPPROTO_DSTOPTS) 6216 goto opt_error; 6217 optused = 2 + optptr[1]; 6218 if (optused > optlen) 6219 goto bad_opt; 6220 6221 /* 6222 * We did this dest. opt the first time 6223 * around (i.e. before AH processing). 6224 * If we've done AH... stop now. 6225 */ 6226 if (first_mp != mp) { 6227 ipsec_in_t *ii; 6228 6229 ii = (ipsec_in_t *)first_mp->b_rptr; 6230 if (ii->ipsec_in_ah_sa != NULL) 6231 break; 6232 } 6233 6234 oh = (struct ip6_opt_home_address *)optptr; 6235 /* Check total length and alignment */ 6236 if (optused < sizeof (*oh) || 6237 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6238 goto opt_error; 6239 /* Swap ip6_src and the home address */ 6240 tmp = ip6h->ip6_src; 6241 /* XXX Note: only 8 byte alignment option */ 6242 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6243 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6244 break; 6245 } 6246 6247 case IP6OPT_TUNNEL_LIMIT: 6248 if (hdr_type != IPPROTO_DSTOPTS) { 6249 goto opt_error; 6250 } 6251 optused = 2 + optptr[1]; 6252 if (optused > optlen) { 6253 goto bad_opt; 6254 } 6255 if (optused != 3) { 6256 goto opt_error; 6257 } 6258 break; 6259 6260 default: 6261 errtype = "unknown"; 6262 /* FALLTHROUGH */ 6263 opt_error: 6264 /* Determine which zone should send error */ 6265 zoneid = ipif_lookup_addr_zoneid_v6( 6266 &ip6h->ip6_dst, ill, ipst); 6267 switch (IP6OPT_TYPE(opt_type)) { 6268 case IP6OPT_TYPE_SKIP: 6269 optused = 2 + optptr[1]; 6270 if (optused > optlen) 6271 goto bad_opt; 6272 ip1dbg(("ip_process_options_v6: %s " 6273 "opt 0x%x skipped\n", 6274 errtype, opt_type)); 6275 break; 6276 case IP6OPT_TYPE_DISCARD: 6277 ip1dbg(("ip_process_options_v6: %s " 6278 "opt 0x%x; packet dropped\n", 6279 errtype, opt_type)); 6280 freemsg(first_mp); 6281 return (-1); 6282 case IP6OPT_TYPE_ICMP: 6283 if (zoneid == ALL_ZONES) { 6284 freemsg(first_mp); 6285 return (-1); 6286 } 6287 icmp_param_problem_v6(WR(q), first_mp, 6288 ICMP6_PARAMPROB_OPTION, 6289 (uint32_t)(optptr - 6290 (uint8_t *)ip6h), 6291 B_FALSE, B_FALSE, zoneid, ipst); 6292 return (-1); 6293 case IP6OPT_TYPE_FORCEICMP: 6294 /* 6295 * If we don't have a zone and the dst 6296 * addr is multicast, then pick a zone 6297 * based on the inbound interface. 6298 */ 6299 if (zoneid == ALL_ZONES && 6300 IN6_IS_ADDR_MULTICAST( 6301 &ip6h->ip6_dst)) { 6302 ipif = ipif_select_source_v6( 6303 ill, &ip6h->ip6_src, 6304 B_TRUE, 6305 IPV6_PREFER_SRC_DEFAULT, 6306 ALL_ZONES); 6307 if (ipif != NULL) { 6308 zoneid = 6309 ipif->ipif_zoneid; 6310 ipif_refrele(ipif); 6311 } 6312 } 6313 if (zoneid == ALL_ZONES) { 6314 freemsg(first_mp); 6315 return (-1); 6316 } 6317 icmp_param_problem_v6(WR(q), first_mp, 6318 ICMP6_PARAMPROB_OPTION, 6319 (uint32_t)(optptr - 6320 (uint8_t *)ip6h), 6321 B_FALSE, B_TRUE, zoneid, ipst); 6322 return (-1); 6323 default: 6324 ASSERT(0); 6325 } 6326 } 6327 } 6328 optlen -= optused; 6329 optptr += optused; 6330 } 6331 return (ret); 6332 6333 bad_opt: 6334 /* Determine which zone should send error */ 6335 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6336 if (zoneid == ALL_ZONES) { 6337 freemsg(first_mp); 6338 } else { 6339 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6340 (uint32_t)(optptr - (uint8_t *)ip6h), 6341 B_FALSE, B_FALSE, zoneid, ipst); 6342 } 6343 return (-1); 6344 } 6345 6346 /* 6347 * Process a routing header that is not yet empty. 6348 * Only handles type 0 routing headers. 6349 */ 6350 static void 6351 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6352 ill_t *ill, uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6353 { 6354 ip6_rthdr0_t *rthdr; 6355 uint_t ehdrlen; 6356 uint_t numaddr; 6357 in6_addr_t *addrptr; 6358 in6_addr_t tmp; 6359 ip_stack_t *ipst = ill->ill_ipst; 6360 6361 ASSERT(rth->ip6r_segleft != 0); 6362 6363 if (!ipst->ips_ipv6_forward_src_routed) { 6364 /* XXX Check for source routed out same interface? */ 6365 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6366 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6367 freemsg(hada_mp); 6368 freemsg(mp); 6369 return; 6370 } 6371 6372 if (rth->ip6r_type != 0) { 6373 if (hada_mp != NULL) 6374 goto hada_drop; 6375 /* Sent by forwarding path, and router is global zone */ 6376 icmp_param_problem_v6(WR(q), mp, 6377 ICMP6_PARAMPROB_HEADER, 6378 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 6379 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6380 return; 6381 } 6382 rthdr = (ip6_rthdr0_t *)rth; 6383 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 6384 ASSERT(mp->b_rptr + ehdrlen <= mp->b_wptr); 6385 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 6386 /* rthdr->ip6r0_len is twice the number of addresses in the header */ 6387 if (rthdr->ip6r0_len & 0x1) { 6388 /* An odd length is impossible */ 6389 if (hada_mp != NULL) 6390 goto hada_drop; 6391 /* Sent by forwarding path, and router is global zone */ 6392 icmp_param_problem_v6(WR(q), mp, 6393 ICMP6_PARAMPROB_HEADER, 6394 (uint32_t)((uchar_t *)&rthdr->ip6r0_len - (uchar_t *)ip6h), 6395 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6396 return; 6397 } 6398 numaddr = rthdr->ip6r0_len / 2; 6399 if (rthdr->ip6r0_segleft > numaddr) { 6400 /* segleft exceeds number of addresses in routing header */ 6401 if (hada_mp != NULL) 6402 goto hada_drop; 6403 /* Sent by forwarding path, and router is global zone */ 6404 icmp_param_problem_v6(WR(q), mp, 6405 ICMP6_PARAMPROB_HEADER, 6406 (uint32_t)((uchar_t *)&rthdr->ip6r0_segleft - 6407 (uchar_t *)ip6h), 6408 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6409 return; 6410 } 6411 addrptr += (numaddr - rthdr->ip6r0_segleft); 6412 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) || 6413 IN6_IS_ADDR_MULTICAST(addrptr)) { 6414 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6415 freemsg(hada_mp); 6416 freemsg(mp); 6417 return; 6418 } 6419 /* Swap */ 6420 tmp = *addrptr; 6421 *addrptr = ip6h->ip6_dst; 6422 ip6h->ip6_dst = tmp; 6423 rthdr->ip6r0_segleft--; 6424 /* Don't allow any mapped addresses - ip_wput_v6 can't handle them */ 6425 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6426 if (hada_mp != NULL) 6427 goto hada_drop; 6428 /* Sent by forwarding path, and router is global zone */ 6429 icmp_unreachable_v6(WR(q), mp, ICMP6_DST_UNREACH_NOROUTE, 6430 B_FALSE, B_FALSE, GLOBAL_ZONEID, ipst); 6431 return; 6432 } 6433 if (ip_check_v6_mblk(mp, ill) == IP6_MBLK_OK) { 6434 ip6h = (ip6_t *)mp->b_rptr; 6435 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6436 } else { 6437 freemsg(mp); 6438 } 6439 return; 6440 hada_drop: 6441 /* IPsec kstats: bean counter? */ 6442 freemsg(hada_mp); 6443 freemsg(mp); 6444 } 6445 6446 /* 6447 * Read side put procedure for IPv6 module. 6448 */ 6449 void 6450 ip_rput_v6(queue_t *q, mblk_t *mp) 6451 { 6452 mblk_t *first_mp; 6453 mblk_t *hada_mp = NULL; 6454 ip6_t *ip6h; 6455 boolean_t ll_multicast = B_FALSE; 6456 boolean_t mctl_present = B_FALSE; 6457 ill_t *ill; 6458 struct iocblk *iocp; 6459 uint_t flags = 0; 6460 mblk_t *dl_mp; 6461 ip_stack_t *ipst; 6462 int check; 6463 6464 ill = (ill_t *)q->q_ptr; 6465 ipst = ill->ill_ipst; 6466 if (ill->ill_state_flags & ILL_CONDEMNED) { 6467 union DL_primitives *dl; 6468 6469 dl = (union DL_primitives *)mp->b_rptr; 6470 /* 6471 * Things are opening or closing - only accept DLPI 6472 * ack messages. If the stream is closing and ip_wsrv 6473 * has completed, ip_close is out of the qwait, but has 6474 * not yet completed qprocsoff. Don't proceed any further 6475 * because the ill has been cleaned up and things hanging 6476 * off the ill have been freed. 6477 */ 6478 if ((mp->b_datap->db_type != M_PCPROTO) || 6479 (dl->dl_primitive == DL_UNITDATA_IND)) { 6480 inet_freemsg(mp); 6481 return; 6482 } 6483 } 6484 6485 dl_mp = NULL; 6486 switch (mp->b_datap->db_type) { 6487 case M_DATA: { 6488 int hlen; 6489 uchar_t *ucp; 6490 struct ether_header *eh; 6491 dl_unitdata_ind_t *dui; 6492 6493 /* 6494 * This is a work-around for CR 6451644, a bug in Nemo. It 6495 * should be removed when that problem is fixed. 6496 */ 6497 if (ill->ill_mactype == DL_ETHER && 6498 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6499 (ucp = mp->b_rptr)[-1] == (IP6_DL_SAP & 0xFF) && 6500 ucp[-2] == (IP6_DL_SAP >> 8)) { 6501 if (hlen >= sizeof (struct ether_vlan_header) && 6502 ucp[-5] == 0 && ucp[-6] == 0x81) 6503 ucp -= sizeof (struct ether_vlan_header); 6504 else 6505 ucp -= sizeof (struct ether_header); 6506 /* 6507 * If it's a group address, then fabricate a 6508 * DL_UNITDATA_IND message. 6509 */ 6510 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6511 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6512 BPRI_HI)) != NULL) { 6513 eh = (struct ether_header *)ucp; 6514 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6515 DB_TYPE(dl_mp) = M_PROTO; 6516 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6517 dui->dl_primitive = DL_UNITDATA_IND; 6518 dui->dl_dest_addr_length = 8; 6519 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6520 dui->dl_src_addr_length = 8; 6521 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6522 8; 6523 dui->dl_group_address = 1; 6524 ucp = (uchar_t *)(dui + 1); 6525 if (ill->ill_sap_length > 0) 6526 ucp += ill->ill_sap_length; 6527 bcopy(&eh->ether_dhost, ucp, 6); 6528 bcopy(&eh->ether_shost, ucp + 8, 6); 6529 ucp = (uchar_t *)(dui + 1); 6530 if (ill->ill_sap_length < 0) 6531 ucp += 8 + ill->ill_sap_length; 6532 bcopy(&eh->ether_type, ucp, 2); 6533 bcopy(&eh->ether_type, ucp + 8, 2); 6534 } 6535 } 6536 break; 6537 } 6538 6539 case M_PROTO: 6540 case M_PCPROTO: 6541 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6542 DL_UNITDATA_IND) { 6543 /* Go handle anything other than data elsewhere. */ 6544 ip_rput_dlpi(q, mp); 6545 return; 6546 } 6547 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6548 6549 /* Save the DLPI header. */ 6550 dl_mp = mp; 6551 mp = mp->b_cont; 6552 dl_mp->b_cont = NULL; 6553 break; 6554 case M_BREAK: 6555 panic("ip_rput_v6: got an M_BREAK"); 6556 /*NOTREACHED*/ 6557 case M_IOCACK: 6558 iocp = (struct iocblk *)mp->b_rptr; 6559 switch (iocp->ioc_cmd) { 6560 case DL_IOC_HDR_INFO: 6561 ill = (ill_t *)q->q_ptr; 6562 ill_fastpath_ack(ill, mp); 6563 return; 6564 6565 case SIOCGTUNPARAM: 6566 case OSIOCGTUNPARAM: 6567 ip_rput_other(NULL, q, mp, NULL); 6568 return; 6569 6570 case SIOCSTUNPARAM: 6571 case OSIOCSTUNPARAM: 6572 /* Go through qwriter */ 6573 break; 6574 default: 6575 putnext(q, mp); 6576 return; 6577 } 6578 /* FALLTHRU */ 6579 case M_ERROR: 6580 case M_HANGUP: 6581 mutex_enter(&ill->ill_lock); 6582 if (ill->ill_state_flags & ILL_CONDEMNED) { 6583 mutex_exit(&ill->ill_lock); 6584 freemsg(mp); 6585 return; 6586 } 6587 ill_refhold_locked(ill); 6588 mutex_exit(&ill->ill_lock); 6589 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6590 return; 6591 case M_CTL: 6592 if ((MBLKL(mp) > sizeof (int)) && 6593 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6594 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6595 mctl_present = B_TRUE; 6596 break; 6597 } 6598 putnext(q, mp); 6599 return; 6600 case M_IOCNAK: 6601 iocp = (struct iocblk *)mp->b_rptr; 6602 switch (iocp->ioc_cmd) { 6603 case DL_IOC_HDR_INFO: 6604 case SIOCGTUNPARAM: 6605 case OSIOCGTUNPARAM: 6606 ip_rput_other(NULL, q, mp, NULL); 6607 return; 6608 6609 case SIOCSTUNPARAM: 6610 case OSIOCSTUNPARAM: 6611 mutex_enter(&ill->ill_lock); 6612 if (ill->ill_state_flags & ILL_CONDEMNED) { 6613 mutex_exit(&ill->ill_lock); 6614 freemsg(mp); 6615 return; 6616 } 6617 ill_refhold_locked(ill); 6618 mutex_exit(&ill->ill_lock); 6619 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6620 return; 6621 default: 6622 break; 6623 } 6624 /* FALLTHRU */ 6625 default: 6626 putnext(q, mp); 6627 return; 6628 } 6629 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6630 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6631 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6632 /* 6633 * if db_ref > 1 then copymsg and free original. Packet may be 6634 * changed and do not want other entity who has a reference to this 6635 * message to trip over the changes. This is a blind change because 6636 * trying to catch all places that might change packet is too 6637 * difficult (since it may be a module above this one). 6638 */ 6639 if (mp->b_datap->db_ref > 1) { 6640 mblk_t *mp1; 6641 6642 mp1 = copymsg(mp); 6643 freemsg(mp); 6644 if (mp1 == NULL) { 6645 first_mp = NULL; 6646 goto discard; 6647 } 6648 mp = mp1; 6649 } 6650 first_mp = mp; 6651 if (mctl_present) { 6652 hada_mp = first_mp; 6653 mp = first_mp->b_cont; 6654 } 6655 6656 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6657 freemsg(mp); 6658 return; 6659 } 6660 6661 ip6h = (ip6_t *)mp->b_rptr; 6662 6663 /* 6664 * ip:::receive must see ipv6 packets with a full header, 6665 * and so is placed after the IP6_MBLK_HDR_ERR check. 6666 */ 6667 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6668 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6669 int, 0); 6670 6671 if (check != IP6_MBLK_OK) { 6672 freemsg(mp); 6673 return; 6674 } 6675 6676 DTRACE_PROBE4(ip6__physical__in__start, 6677 ill_t *, ill, ill_t *, NULL, 6678 ip6_t *, ip6h, mblk_t *, first_mp); 6679 6680 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6681 ipst->ips_ipv6firewall_physical_in, 6682 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6683 6684 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6685 6686 if (first_mp == NULL) 6687 return; 6688 6689 /* 6690 * Attach any necessary label information to this packet. 6691 */ 6692 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6693 if (ip6opt_ls != 0) 6694 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6695 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6696 goto discard; 6697 } 6698 6699 /* IP observability hook. */ 6700 if (ipst->ips_ipobs_enabled) { 6701 zoneid_t dzone; 6702 6703 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6704 ALL_ZONES); 6705 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6706 IPV6_VERSION, 0, ipst); 6707 } 6708 6709 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6710 IPV6_DEFAULT_VERS_AND_FLOW) { 6711 /* 6712 * It may be a bit too expensive to do this mapped address 6713 * check here, but in the interest of robustness, it seems 6714 * like the correct place. 6715 * TODO: Avoid this check for e.g. connected TCP sockets 6716 */ 6717 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6718 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6719 goto discard; 6720 } 6721 6722 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6723 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6724 goto discard; 6725 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6726 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6727 goto discard; 6728 } 6729 6730 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6731 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6732 } else { 6733 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6734 goto discard; 6735 } 6736 freemsg(dl_mp); 6737 return; 6738 6739 discard: 6740 if (dl_mp != NULL) 6741 freeb(dl_mp); 6742 freemsg(first_mp); 6743 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6744 } 6745 6746 /* 6747 * Walk through the IPv6 packet in mp and see if there's an AH header 6748 * in it. See if the AH header needs to get done before other headers in 6749 * the packet. (Worker function for ipsec_early_ah_v6().) 6750 */ 6751 #define IPSEC_HDR_DONT_PROCESS 0 6752 #define IPSEC_HDR_PROCESS 1 6753 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6754 static int 6755 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6756 { 6757 uint_t length; 6758 uint_t ehdrlen; 6759 uint8_t *whereptr; 6760 uint8_t *endptr; 6761 uint8_t *nexthdrp; 6762 ip6_dest_t *desthdr; 6763 ip6_rthdr_t *rthdr; 6764 ip6_t *ip6h; 6765 6766 /* 6767 * For now just pullup everything. In general, the less pullups, 6768 * the better, but there's so much squirrelling through anyway, 6769 * it's just easier this way. 6770 */ 6771 if (!pullupmsg(mp, -1)) { 6772 return (IPSEC_MEMORY_ERROR); 6773 } 6774 6775 ip6h = (ip6_t *)mp->b_rptr; 6776 length = IPV6_HDR_LEN; 6777 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6778 endptr = mp->b_wptr; 6779 6780 /* 6781 * We can't just use the argument nexthdr in the place 6782 * of nexthdrp becaue we don't dereference nexthdrp 6783 * till we confirm whether it is a valid address. 6784 */ 6785 nexthdrp = &ip6h->ip6_nxt; 6786 while (whereptr < endptr) { 6787 /* Is there enough left for len + nexthdr? */ 6788 if (whereptr + MIN_EHDR_LEN > endptr) 6789 return (IPSEC_MEMORY_ERROR); 6790 6791 switch (*nexthdrp) { 6792 case IPPROTO_HOPOPTS: 6793 case IPPROTO_DSTOPTS: 6794 /* Assumes the headers are identical for hbh and dst */ 6795 desthdr = (ip6_dest_t *)whereptr; 6796 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6797 if ((uchar_t *)desthdr + ehdrlen > endptr) 6798 return (IPSEC_MEMORY_ERROR); 6799 /* 6800 * Return DONT_PROCESS because the destination 6801 * options header may be for each hop in a 6802 * routing-header, and we only want AH if we're 6803 * finished with routing headers. 6804 */ 6805 if (*nexthdrp == IPPROTO_DSTOPTS) 6806 return (IPSEC_HDR_DONT_PROCESS); 6807 nexthdrp = &desthdr->ip6d_nxt; 6808 break; 6809 case IPPROTO_ROUTING: 6810 rthdr = (ip6_rthdr_t *)whereptr; 6811 6812 /* 6813 * If there's more hops left on the routing header, 6814 * return now with DON'T PROCESS. 6815 */ 6816 if (rthdr->ip6r_segleft > 0) 6817 return (IPSEC_HDR_DONT_PROCESS); 6818 6819 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6820 if ((uchar_t *)rthdr + ehdrlen > endptr) 6821 return (IPSEC_MEMORY_ERROR); 6822 nexthdrp = &rthdr->ip6r_nxt; 6823 break; 6824 case IPPROTO_FRAGMENT: 6825 /* Wait for reassembly */ 6826 return (IPSEC_HDR_DONT_PROCESS); 6827 case IPPROTO_AH: 6828 *nexthdr = IPPROTO_AH; 6829 return (IPSEC_HDR_PROCESS); 6830 case IPPROTO_NONE: 6831 /* No next header means we're finished */ 6832 default: 6833 return (IPSEC_HDR_DONT_PROCESS); 6834 } 6835 length += ehdrlen; 6836 whereptr += ehdrlen; 6837 } 6838 /* 6839 * Malformed/truncated packet. 6840 */ 6841 return (IPSEC_MEMORY_ERROR); 6842 } 6843 6844 /* 6845 * Path for AH if options are present. If this is the first time we are 6846 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6847 * Otherwise, just fanout. Return value answers the boolean question: 6848 * "Did I consume the mblk you sent me?" 6849 * 6850 * Sometimes AH needs to be done before other IPv6 headers for security 6851 * reasons. This function (and its ipsec_needs_processing_v6() above) 6852 * indicates if that is so, and fans out to the appropriate IPsec protocol 6853 * for the datagram passed in. 6854 */ 6855 static boolean_t 6856 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6857 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6858 { 6859 mblk_t *mp; 6860 uint8_t nexthdr; 6861 ipsec_in_t *ii = NULL; 6862 ah_t *ah; 6863 ipsec_status_t ipsec_rc; 6864 ip_stack_t *ipst = ill->ill_ipst; 6865 netstack_t *ns = ipst->ips_netstack; 6866 ipsec_stack_t *ipss = ns->netstack_ipsec; 6867 6868 ASSERT((hada_mp == NULL) || (!mctl_present)); 6869 6870 switch (ipsec_needs_processing_v6( 6871 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6872 case IPSEC_MEMORY_ERROR: 6873 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6874 freemsg(hada_mp); 6875 freemsg(first_mp); 6876 return (B_TRUE); 6877 case IPSEC_HDR_DONT_PROCESS: 6878 return (B_FALSE); 6879 } 6880 6881 /* Default means send it to AH! */ 6882 ASSERT(nexthdr == IPPROTO_AH); 6883 if (!mctl_present) { 6884 mp = first_mp; 6885 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6886 if (first_mp == NULL) { 6887 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6888 "allocation failure.\n")); 6889 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6890 freemsg(hada_mp); 6891 freemsg(mp); 6892 return (B_TRUE); 6893 } 6894 /* 6895 * Store the ill_index so that when we come back 6896 * from IPSEC we ride on the same queue. 6897 */ 6898 ii = (ipsec_in_t *)first_mp->b_rptr; 6899 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6900 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6901 first_mp->b_cont = mp; 6902 } 6903 /* 6904 * Cache hardware acceleration info. 6905 */ 6906 if (hada_mp != NULL) { 6907 ASSERT(ii != NULL); 6908 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6909 "caching data attr.\n")); 6910 ii->ipsec_in_accelerated = B_TRUE; 6911 ii->ipsec_in_da = hada_mp; 6912 } 6913 6914 if (!ipsec_loaded(ipss)) { 6915 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6916 return (B_TRUE); 6917 } 6918 6919 ah = ipsec_inbound_ah_sa(first_mp, ns); 6920 if (ah == NULL) 6921 return (B_TRUE); 6922 ASSERT(ii->ipsec_in_ah_sa != NULL); 6923 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6924 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6925 6926 switch (ipsec_rc) { 6927 case IPSEC_STATUS_SUCCESS: 6928 /* we're done with IPsec processing, send it up */ 6929 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6930 break; 6931 case IPSEC_STATUS_FAILED: 6932 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6933 break; 6934 case IPSEC_STATUS_PENDING: 6935 /* no action needed */ 6936 break; 6937 } 6938 return (B_TRUE); 6939 } 6940 6941 /* 6942 * Validate the IPv6 mblk for alignment. 6943 */ 6944 int 6945 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6946 { 6947 int pkt_len, ip6_len; 6948 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6949 6950 /* check for alignment and full IPv6 header */ 6951 if (!OK_32PTR((uchar_t *)ip6h) || 6952 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6953 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6954 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6955 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6956 return (IP6_MBLK_HDR_ERR); 6957 } 6958 ip6h = (ip6_t *)mp->b_rptr; 6959 } 6960 6961 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6962 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6963 6964 if (mp->b_cont == NULL) 6965 pkt_len = mp->b_wptr - mp->b_rptr; 6966 else 6967 pkt_len = msgdsize(mp); 6968 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6969 6970 /* 6971 * Check for bogus (too short packet) and packet which 6972 * was padded by the link layer. 6973 */ 6974 if (ip6_len != pkt_len) { 6975 ssize_t diff; 6976 6977 if (ip6_len > pkt_len) { 6978 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 6979 ip6_len, pkt_len)); 6980 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 6981 return (IP6_MBLK_LEN_ERR); 6982 } 6983 diff = (ssize_t)(pkt_len - ip6_len); 6984 6985 if (!adjmsg(mp, -diff)) { 6986 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6987 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6988 return (IP6_MBLK_LEN_ERR); 6989 } 6990 } 6991 return (IP6_MBLK_OK); 6992 } 6993 6994 /* 6995 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6996 * ip_rput_v6 has already verified alignment, the min length, the version, 6997 * and db_ref = 1. 6998 * 6999 * The ill passed in (the arg named inill) is the ill that the packet 7000 * actually arrived on. We need to remember this when saving the 7001 * input interface index into potential IPV6_PKTINFO data in 7002 * ip_add_info_v6(). 7003 * 7004 * This routine doesn't free dl_mp; that's the caller's responsibility on 7005 * return. (Note that the callers are complex enough that there's no tail 7006 * recursion here anyway.) 7007 */ 7008 void 7009 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 7010 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 7011 { 7012 ire_t *ire = NULL; 7013 ill_t *ill = inill; 7014 ill_t *outill; 7015 ipif_t *ipif; 7016 uint8_t *whereptr; 7017 uint8_t nexthdr; 7018 uint16_t remlen; 7019 uint_t prev_nexthdr_offset; 7020 uint_t used; 7021 size_t old_pkt_len; 7022 size_t pkt_len; 7023 uint16_t ip6_len; 7024 uint_t hdr_len; 7025 boolean_t mctl_present; 7026 mblk_t *first_mp; 7027 mblk_t *first_mp1; 7028 boolean_t no_forward; 7029 ip6_hbh_t *hbhhdr; 7030 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 7031 conn_t *connp; 7032 uint32_t ports; 7033 zoneid_t zoneid = GLOBAL_ZONEID; 7034 uint16_t hck_flags, reass_hck_flags; 7035 uint32_t reass_sum; 7036 boolean_t cksum_err; 7037 mblk_t *mp1; 7038 ip_stack_t *ipst = inill->ill_ipst; 7039 7040 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 7041 7042 if (hada_mp != NULL) { 7043 /* 7044 * It's an IPsec accelerated packet. 7045 * Keep a pointer to the data attributes around until 7046 * we allocate the ipsecinfo structure. 7047 */ 7048 IPSECHW_DEBUG(IPSECHW_PKT, 7049 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 7050 hada_mp->b_cont = NULL; 7051 /* 7052 * Since it is accelerated, it came directly from 7053 * the ill. 7054 */ 7055 ASSERT(mctl_present == B_FALSE); 7056 ASSERT(mp->b_datap->db_type != M_CTL); 7057 } 7058 7059 ip6h = (ip6_t *)mp->b_rptr; 7060 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 7061 old_pkt_len = pkt_len = ip6_len; 7062 7063 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 7064 hck_flags = DB_CKSUMFLAGS(mp); 7065 else 7066 hck_flags = 0; 7067 7068 /* Clear checksum flags in case we need to forward */ 7069 DB_CKSUMFLAGS(mp) = 0; 7070 reass_sum = reass_hck_flags = 0; 7071 7072 nexthdr = ip6h->ip6_nxt; 7073 7074 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 7075 (uchar_t *)ip6h); 7076 whereptr = (uint8_t *)&ip6h[1]; 7077 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 7078 7079 /* Process hop by hop header options */ 7080 if (nexthdr == IPPROTO_HOPOPTS) { 7081 uint_t ehdrlen; 7082 uint8_t *optptr; 7083 7084 if (remlen < MIN_EHDR_LEN) 7085 goto pkt_too_short; 7086 if (mp->b_cont != NULL && 7087 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 7088 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 7089 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7090 freemsg(hada_mp); 7091 freemsg(first_mp); 7092 return; 7093 } 7094 ip6h = (ip6_t *)mp->b_rptr; 7095 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7096 } 7097 hbhhdr = (ip6_hbh_t *)whereptr; 7098 nexthdr = hbhhdr->ip6h_nxt; 7099 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 7100 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 7101 7102 if (remlen < ehdrlen) 7103 goto pkt_too_short; 7104 if (mp->b_cont != NULL && 7105 whereptr + ehdrlen > mp->b_wptr) { 7106 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 7107 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7108 freemsg(hada_mp); 7109 freemsg(first_mp); 7110 return; 7111 } 7112 ip6h = (ip6_t *)mp->b_rptr; 7113 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7114 hbhhdr = (ip6_hbh_t *)whereptr; 7115 } 7116 7117 optptr = whereptr + 2; 7118 whereptr += ehdrlen; 7119 remlen -= ehdrlen; 7120 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7121 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7122 case -1: 7123 /* 7124 * Packet has been consumed and any 7125 * needed ICMP messages sent. 7126 */ 7127 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7128 freemsg(hada_mp); 7129 return; 7130 case 0: 7131 /* no action needed */ 7132 break; 7133 case 1: 7134 /* Known router alert */ 7135 goto ipv6forus; 7136 } 7137 } 7138 7139 /* 7140 * On incoming v6 multicast packets we will bypass the ire table, 7141 * and assume that the read queue corresponds to the targetted 7142 * interface. 7143 * 7144 * The effect of this is the same as the IPv4 original code, but is 7145 * much cleaner I think. See ip_rput for how that was done. 7146 */ 7147 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7148 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7149 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7150 7151 /* 7152 * So that we don't end up with dups, only one ill in an IPMP 7153 * group is nominated to receive multicast data traffic. 7154 * However, link-locals on any underlying interfaces will have 7155 * joined their solicited-node multicast addresses and we must 7156 * accept those packets. (We don't attempt to precisely 7157 * filter out duplicate solicited-node multicast packets since 7158 * e.g. an IPMP interface and underlying interface may have 7159 * the same solicited-node multicast address.) Note that we 7160 * won't generally have duplicates because we only issue a 7161 * DL_ENABMULTI_REQ on one interface in a group; the exception 7162 * is when PHYI_MULTI_BCAST is set. 7163 */ 7164 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7165 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7166 goto drop_pkt; 7167 } 7168 7169 /* 7170 * XXX TODO Give to mrouted to for multicast forwarding. 7171 */ 7172 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7173 ALL_ZONES) == NULL) { 7174 if (ip_debug > 3) { 7175 /* ip2dbg */ 7176 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7177 " which is not for us: %s\n", AF_INET6, 7178 &ip6h->ip6_dst); 7179 } 7180 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7181 freemsg(hada_mp); 7182 freemsg(first_mp); 7183 return; 7184 } 7185 if (ip_debug > 3) { 7186 /* ip2dbg */ 7187 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7188 AF_INET6, &ip6h->ip6_dst); 7189 } 7190 zoneid = GLOBAL_ZONEID; 7191 goto ipv6forus; 7192 } 7193 7194 ipif = ill->ill_ipif; 7195 7196 /* 7197 * If a packet was received on an interface that is a 6to4 tunnel, 7198 * incoming IPv6 packets, with a 6to4 addressed IPv6 destination, must 7199 * be checked to have a 6to4 prefix (2002:V4ADDR::/48) that is equal to 7200 * the 6to4 prefix of the address configured on the receiving interface. 7201 * Otherwise, the packet was delivered to this interface in error and 7202 * the packet must be dropped. 7203 */ 7204 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 7205 7206 if (!IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 7207 &ip6h->ip6_dst)) { 7208 if (ip_debug > 2) { 7209 /* ip1dbg */ 7210 pr_addr_dbg("ip_rput_data_v6: received 6to4 " 7211 "addressed packet which is not for us: " 7212 "%s\n", AF_INET6, &ip6h->ip6_dst); 7213 } 7214 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7215 freemsg(first_mp); 7216 return; 7217 } 7218 } 7219 7220 /* 7221 * Find an ire that matches destination. For link-local addresses 7222 * we have to match the ill. 7223 * TBD for site local addresses. 7224 */ 7225 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7226 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7227 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7228 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7229 } else { 7230 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7231 msg_getlabel(mp), ipst); 7232 7233 if (ire != NULL && ire->ire_stq != NULL && 7234 ire->ire_zoneid != GLOBAL_ZONEID && 7235 ire->ire_zoneid != ALL_ZONES) { 7236 /* 7237 * Should only use IREs that are visible from the 7238 * global zone for forwarding. 7239 */ 7240 ire_refrele(ire); 7241 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7242 GLOBAL_ZONEID, msg_getlabel(mp), ipst); 7243 } 7244 } 7245 7246 if (ire == NULL) { 7247 /* 7248 * No matching IRE found. Mark this packet as having 7249 * originated externally. 7250 */ 7251 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7252 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7253 if (!(ill->ill_flags & ILLF_ROUTER)) { 7254 BUMP_MIB(ill->ill_ip_mib, 7255 ipIfStatsInAddrErrors); 7256 } 7257 freemsg(hada_mp); 7258 freemsg(first_mp); 7259 return; 7260 } 7261 if (ip6h->ip6_hops <= 1) { 7262 if (hada_mp != NULL) 7263 goto hada_drop; 7264 /* Sent by forwarding path, and router is global zone */ 7265 icmp_time_exceeded_v6(WR(q), first_mp, 7266 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7267 GLOBAL_ZONEID, ipst); 7268 return; 7269 } 7270 /* 7271 * Per RFC 3513 section 2.5.2, we must not forward packets with 7272 * an unspecified source address. 7273 */ 7274 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7275 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7276 freemsg(hada_mp); 7277 freemsg(first_mp); 7278 return; 7279 } 7280 mp->b_prev = (mblk_t *)(uintptr_t) 7281 ill->ill_phyint->phyint_ifindex; 7282 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7283 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7284 GLOBAL_ZONEID, ipst); 7285 return; 7286 } 7287 /* we have a matching IRE */ 7288 if (ire->ire_stq != NULL) { 7289 /* 7290 * To be quicker, we may wish not to chase pointers 7291 * (ire->ire_ipif->ipif_ill...) and instead store the 7292 * forwarding policy in the ire. An unfortunate side- 7293 * effect of this would be requiring an ire flush whenever 7294 * the ILLF_ROUTER flag changes. For now, chase pointers 7295 * once and store in the boolean no_forward. 7296 * 7297 * This appears twice to keep it out of the non-forwarding, 7298 * yes-it's-for-us-on-the-right-interface case. 7299 */ 7300 no_forward = ((ill->ill_flags & 7301 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7302 7303 ASSERT(first_mp == mp); 7304 /* 7305 * This ire has a send-to queue - forward the packet. 7306 */ 7307 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7308 freemsg(hada_mp); 7309 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7310 if (no_forward) { 7311 BUMP_MIB(ill->ill_ip_mib, 7312 ipIfStatsInAddrErrors); 7313 } 7314 freemsg(mp); 7315 ire_refrele(ire); 7316 return; 7317 } 7318 /* 7319 * ipIfStatsHCInForwDatagrams should only be increment if there 7320 * will be an attempt to forward the packet, which is why we 7321 * increment after the above condition has been checked. 7322 */ 7323 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7324 if (ip6h->ip6_hops <= 1) { 7325 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7326 /* Sent by forwarding path, and router is global zone */ 7327 icmp_time_exceeded_v6(WR(q), mp, 7328 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7329 GLOBAL_ZONEID, ipst); 7330 ire_refrele(ire); 7331 return; 7332 } 7333 /* 7334 * Per RFC 3513 section 2.5.2, we must not forward packets with 7335 * an unspecified source address. 7336 */ 7337 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7338 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7339 freemsg(mp); 7340 ire_refrele(ire); 7341 return; 7342 } 7343 7344 if (is_system_labeled()) { 7345 mblk_t *mp1; 7346 7347 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7348 BUMP_MIB(ill->ill_ip_mib, 7349 ipIfStatsForwProhibits); 7350 freemsg(mp); 7351 ire_refrele(ire); 7352 return; 7353 } 7354 /* Size may have changed */ 7355 mp = mp1; 7356 ip6h = (ip6_t *)mp->b_rptr; 7357 pkt_len = msgdsize(mp); 7358 } 7359 7360 if (pkt_len > ire->ire_max_frag) { 7361 int max_frag = ire->ire_max_frag; 7362 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7363 /* 7364 * Handle labeled packet resizing. 7365 */ 7366 if (is_system_labeled()) { 7367 max_frag = tsol_pmtu_adjust(mp, max_frag, 7368 pkt_len - old_pkt_len, AF_INET6); 7369 } 7370 7371 /* Sent by forwarding path, and router is global zone */ 7372 icmp_pkt2big_v6(WR(q), mp, max_frag, 7373 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7374 ire_refrele(ire); 7375 return; 7376 } 7377 7378 /* 7379 * Check to see if we're forwarding the packet to a 7380 * different link from which it came. If so, check the 7381 * source and destination addresses since routers must not 7382 * forward any packets with link-local source or 7383 * destination addresses to other links. Otherwise (if 7384 * we're forwarding onto the same link), conditionally send 7385 * a redirect message. 7386 */ 7387 if (ire->ire_rfq != q && 7388 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7389 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7390 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7391 BUMP_MIB(ill->ill_ip_mib, 7392 ipIfStatsInAddrErrors); 7393 freemsg(mp); 7394 ire_refrele(ire); 7395 return; 7396 } 7397 /* TBD add site-local check at site boundary? */ 7398 } else if (ipst->ips_ipv6_send_redirects) { 7399 in6_addr_t *v6targ; 7400 in6_addr_t gw_addr_v6; 7401 ire_t *src_ire_v6 = NULL; 7402 7403 /* 7404 * Don't send a redirect when forwarding a source 7405 * routed packet. 7406 */ 7407 if (ip_source_routed_v6(ip6h, mp, ipst)) 7408 goto forward; 7409 7410 mutex_enter(&ire->ire_lock); 7411 gw_addr_v6 = ire->ire_gateway_addr_v6; 7412 mutex_exit(&ire->ire_lock); 7413 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7414 v6targ = &gw_addr_v6; 7415 /* 7416 * We won't send redirects to a router 7417 * that doesn't have a link local 7418 * address, but will forward. 7419 */ 7420 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7421 BUMP_MIB(ill->ill_ip_mib, 7422 ipIfStatsInAddrErrors); 7423 goto forward; 7424 } 7425 } else { 7426 v6targ = &ip6h->ip6_dst; 7427 } 7428 7429 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7430 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7431 GLOBAL_ZONEID, 0, NULL, 7432 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7433 ipst); 7434 7435 if (src_ire_v6 != NULL) { 7436 /* 7437 * The source is directly connected. 7438 */ 7439 mp1 = copymsg(mp); 7440 if (mp1 != NULL) { 7441 icmp_send_redirect_v6(WR(q), 7442 mp1, v6targ, &ip6h->ip6_dst, 7443 ill, B_FALSE); 7444 } 7445 ire_refrele(src_ire_v6); 7446 } 7447 } 7448 7449 forward: 7450 /* Hoplimit verified above */ 7451 ip6h->ip6_hops--; 7452 7453 outill = ire->ire_ipif->ipif_ill; 7454 7455 DTRACE_PROBE4(ip6__forwarding__start, 7456 ill_t *, inill, ill_t *, outill, 7457 ip6_t *, ip6h, mblk_t *, mp); 7458 7459 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7460 ipst->ips_ipv6firewall_forwarding, 7461 inill, outill, ip6h, mp, mp, 0, ipst); 7462 7463 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7464 7465 if (mp != NULL) { 7466 UPDATE_IB_PKT_COUNT(ire); 7467 ire->ire_last_used_time = lbolt; 7468 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7469 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7470 } 7471 IRE_REFRELE(ire); 7472 return; 7473 } 7474 7475 /* 7476 * Need to put on correct queue for reassembly to find it. 7477 * No need to use put() since reassembly has its own locks. 7478 * Note: multicast packets and packets destined to addresses 7479 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7480 * the arriving ill. Unlike the IPv4 case, enabling strict 7481 * destination multihoming will prevent accepting packets 7482 * addressed to an IRE_LOCAL on lo0. 7483 */ 7484 if (ire->ire_rfq != q) { 7485 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7486 == NULL) { 7487 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7488 freemsg(hada_mp); 7489 freemsg(first_mp); 7490 return; 7491 } 7492 if (ire->ire_rfq != NULL) { 7493 q = ire->ire_rfq; 7494 ill = (ill_t *)q->q_ptr; 7495 ASSERT(ill != NULL); 7496 } 7497 } 7498 7499 zoneid = ire->ire_zoneid; 7500 UPDATE_IB_PKT_COUNT(ire); 7501 ire->ire_last_used_time = lbolt; 7502 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7503 ire_refrele(ire); 7504 ire = NULL; 7505 ipv6forus: 7506 /* 7507 * Looks like this packet is for us one way or another. 7508 * This is where we'll process destination headers etc. 7509 */ 7510 for (; ; ) { 7511 switch (nexthdr) { 7512 case IPPROTO_TCP: { 7513 uint16_t *up; 7514 uint32_t sum; 7515 int offset; 7516 7517 hdr_len = pkt_len - remlen; 7518 7519 if (hada_mp != NULL) { 7520 ip0dbg(("tcp hada drop\n")); 7521 goto hada_drop; 7522 } 7523 7524 7525 /* TCP needs all of the TCP header */ 7526 if (remlen < TCP_MIN_HEADER_LENGTH) 7527 goto pkt_too_short; 7528 if (mp->b_cont != NULL && 7529 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7530 if (!pullupmsg(mp, 7531 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7532 BUMP_MIB(ill->ill_ip_mib, 7533 ipIfStatsInDiscards); 7534 freemsg(first_mp); 7535 return; 7536 } 7537 hck_flags = 0; 7538 ip6h = (ip6_t *)mp->b_rptr; 7539 whereptr = (uint8_t *)ip6h + hdr_len; 7540 } 7541 /* 7542 * Extract the offset field from the TCP header. 7543 */ 7544 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7545 if (offset != 5) { 7546 if (offset < 5) { 7547 ip1dbg(("ip_rput_data_v6: short " 7548 "TCP data offset")); 7549 BUMP_MIB(ill->ill_ip_mib, 7550 ipIfStatsInDiscards); 7551 freemsg(first_mp); 7552 return; 7553 } 7554 /* 7555 * There must be TCP options. 7556 * Make sure we can grab them. 7557 */ 7558 offset <<= 2; 7559 if (remlen < offset) 7560 goto pkt_too_short; 7561 if (mp->b_cont != NULL && 7562 whereptr + offset > mp->b_wptr) { 7563 if (!pullupmsg(mp, 7564 hdr_len + offset)) { 7565 BUMP_MIB(ill->ill_ip_mib, 7566 ipIfStatsInDiscards); 7567 freemsg(first_mp); 7568 return; 7569 } 7570 hck_flags = 0; 7571 ip6h = (ip6_t *)mp->b_rptr; 7572 whereptr = (uint8_t *)ip6h + hdr_len; 7573 } 7574 } 7575 7576 up = (uint16_t *)&ip6h->ip6_src; 7577 /* 7578 * TCP checksum calculation. First sum up the 7579 * pseudo-header fields: 7580 * - Source IPv6 address 7581 * - Destination IPv6 address 7582 * - TCP payload length 7583 * - TCP protocol ID 7584 */ 7585 sum = htons(IPPROTO_TCP + remlen) + 7586 up[0] + up[1] + up[2] + up[3] + 7587 up[4] + up[5] + up[6] + up[7] + 7588 up[8] + up[9] + up[10] + up[11] + 7589 up[12] + up[13] + up[14] + up[15]; 7590 7591 /* Fold initial sum */ 7592 sum = (sum & 0xffff) + (sum >> 16); 7593 7594 mp1 = mp->b_cont; 7595 7596 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7597 IP6_STAT(ipst, ip6_in_sw_cksum); 7598 7599 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7600 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7601 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7602 mp, mp1, cksum_err); 7603 7604 if (cksum_err) { 7605 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7606 7607 if (hck_flags & HCK_FULLCKSUM) { 7608 IP6_STAT(ipst, 7609 ip6_tcp_in_full_hw_cksum_err); 7610 } else if (hck_flags & HCK_PARTIALCKSUM) { 7611 IP6_STAT(ipst, 7612 ip6_tcp_in_part_hw_cksum_err); 7613 } else { 7614 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7615 } 7616 freemsg(first_mp); 7617 return; 7618 } 7619 tcp_fanout: 7620 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7621 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7622 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7623 return; 7624 } 7625 case IPPROTO_SCTP: 7626 { 7627 sctp_hdr_t *sctph; 7628 uint32_t calcsum, pktsum; 7629 uint_t hdr_len = pkt_len - remlen; 7630 sctp_stack_t *sctps; 7631 7632 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7633 7634 /* SCTP needs all of the SCTP header */ 7635 if (remlen < sizeof (*sctph)) { 7636 goto pkt_too_short; 7637 } 7638 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7639 ASSERT(mp->b_cont != NULL); 7640 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7641 BUMP_MIB(ill->ill_ip_mib, 7642 ipIfStatsInDiscards); 7643 freemsg(mp); 7644 return; 7645 } 7646 ip6h = (ip6_t *)mp->b_rptr; 7647 whereptr = (uint8_t *)ip6h + hdr_len; 7648 } 7649 7650 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7651 /* checksum */ 7652 pktsum = sctph->sh_chksum; 7653 sctph->sh_chksum = 0; 7654 calcsum = sctp_cksum(mp, hdr_len); 7655 if (calcsum != pktsum) { 7656 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7657 freemsg(mp); 7658 return; 7659 } 7660 sctph->sh_chksum = pktsum; 7661 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7662 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7663 ports, zoneid, mp, sctps)) == NULL) { 7664 ip_fanout_sctp_raw(first_mp, ill, 7665 (ipha_t *)ip6h, B_FALSE, ports, 7666 mctl_present, 7667 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7668 B_TRUE, zoneid); 7669 return; 7670 } 7671 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7672 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7673 B_FALSE, mctl_present); 7674 return; 7675 } 7676 case IPPROTO_UDP: { 7677 uint16_t *up; 7678 uint32_t sum; 7679 7680 hdr_len = pkt_len - remlen; 7681 7682 if (hada_mp != NULL) { 7683 ip0dbg(("udp hada drop\n")); 7684 goto hada_drop; 7685 } 7686 7687 /* Verify that at least the ports are present */ 7688 if (remlen < UDPH_SIZE) 7689 goto pkt_too_short; 7690 if (mp->b_cont != NULL && 7691 whereptr + UDPH_SIZE > mp->b_wptr) { 7692 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7693 BUMP_MIB(ill->ill_ip_mib, 7694 ipIfStatsInDiscards); 7695 freemsg(first_mp); 7696 return; 7697 } 7698 hck_flags = 0; 7699 ip6h = (ip6_t *)mp->b_rptr; 7700 whereptr = (uint8_t *)ip6h + hdr_len; 7701 } 7702 7703 /* 7704 * Before going through the regular checksum 7705 * calculation, make sure the received checksum 7706 * is non-zero. RFC 2460 says, a 0x0000 checksum 7707 * in a UDP packet (within IPv6 packet) is invalid 7708 * and should be replaced by 0xffff. This makes 7709 * sense as regular checksum calculation will 7710 * pass for both the cases i.e. 0x0000 and 0xffff. 7711 * Removing one of the case makes error detection 7712 * stronger. 7713 */ 7714 7715 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7716 /* 0x0000 checksum is invalid */ 7717 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7718 "checksum value 0x0000\n")); 7719 BUMP_MIB(ill->ill_ip_mib, 7720 udpIfStatsInCksumErrs); 7721 freemsg(first_mp); 7722 return; 7723 } 7724 7725 up = (uint16_t *)&ip6h->ip6_src; 7726 7727 /* 7728 * UDP checksum calculation. First sum up the 7729 * pseudo-header fields: 7730 * - Source IPv6 address 7731 * - Destination IPv6 address 7732 * - UDP payload length 7733 * - UDP protocol ID 7734 */ 7735 7736 sum = htons(IPPROTO_UDP + remlen) + 7737 up[0] + up[1] + up[2] + up[3] + 7738 up[4] + up[5] + up[6] + up[7] + 7739 up[8] + up[9] + up[10] + up[11] + 7740 up[12] + up[13] + up[14] + up[15]; 7741 7742 /* Fold initial sum */ 7743 sum = (sum & 0xffff) + (sum >> 16); 7744 7745 if (reass_hck_flags != 0) { 7746 hck_flags = reass_hck_flags; 7747 7748 IP_CKSUM_RECV_REASS(hck_flags, 7749 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7750 sum, reass_sum, cksum_err); 7751 } else { 7752 mp1 = mp->b_cont; 7753 7754 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7755 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7756 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7757 mp, mp1, cksum_err); 7758 } 7759 7760 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7761 IP6_STAT(ipst, ip6_in_sw_cksum); 7762 7763 if (cksum_err) { 7764 BUMP_MIB(ill->ill_ip_mib, 7765 udpIfStatsInCksumErrs); 7766 7767 if (hck_flags & HCK_FULLCKSUM) 7768 IP6_STAT(ipst, 7769 ip6_udp_in_full_hw_cksum_err); 7770 else if (hck_flags & HCK_PARTIALCKSUM) 7771 IP6_STAT(ipst, 7772 ip6_udp_in_part_hw_cksum_err); 7773 else 7774 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7775 7776 freemsg(first_mp); 7777 return; 7778 } 7779 goto udp_fanout; 7780 } 7781 case IPPROTO_ICMPV6: { 7782 uint16_t *up; 7783 uint32_t sum; 7784 uint_t hdr_len = pkt_len - remlen; 7785 7786 if (hada_mp != NULL) { 7787 ip0dbg(("icmp hada drop\n")); 7788 goto hada_drop; 7789 } 7790 7791 up = (uint16_t *)&ip6h->ip6_src; 7792 sum = htons(IPPROTO_ICMPV6 + remlen) + 7793 up[0] + up[1] + up[2] + up[3] + 7794 up[4] + up[5] + up[6] + up[7] + 7795 up[8] + up[9] + up[10] + up[11] + 7796 up[12] + up[13] + up[14] + up[15]; 7797 sum = (sum & 0xffff) + (sum >> 16); 7798 sum = IP_CSUM(mp, hdr_len, sum); 7799 if (sum != 0) { 7800 /* IPv6 ICMP checksum failed */ 7801 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7802 "failed %x\n", 7803 sum)); 7804 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7805 BUMP_MIB(ill->ill_icmp6_mib, 7806 ipv6IfIcmpInErrors); 7807 freemsg(first_mp); 7808 return; 7809 } 7810 7811 icmp_fanout: 7812 /* Check variable for testing applications */ 7813 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7814 freemsg(first_mp); 7815 return; 7816 } 7817 /* 7818 * Assume that there is always at least one conn for 7819 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7820 * where there is no conn. 7821 */ 7822 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7823 ilm_t *ilm; 7824 ilm_walker_t ilw; 7825 7826 ASSERT(!IS_LOOPBACK(ill)); 7827 /* 7828 * In the multicast case, applications may have 7829 * joined the group from different zones, so we 7830 * need to deliver the packet to each of them. 7831 * Loop through the multicast memberships 7832 * structures (ilm) on the receive ill and send 7833 * a copy of the packet up each matching one. 7834 */ 7835 ilm = ilm_walker_start(&ilw, inill); 7836 for (; ilm != NULL; 7837 ilm = ilm_walker_step(&ilw, ilm)) { 7838 if (!IN6_ARE_ADDR_EQUAL( 7839 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7840 continue; 7841 if (!ipif_lookup_zoneid( 7842 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7843 IPIF_UP, NULL)) 7844 continue; 7845 7846 first_mp1 = ip_copymsg(first_mp); 7847 if (first_mp1 == NULL) 7848 continue; 7849 icmp_inbound_v6(q, first_mp1, 7850 ilw.ilw_walk_ill, inill, 7851 hdr_len, mctl_present, 0, 7852 ilm->ilm_zoneid, dl_mp); 7853 } 7854 ilm_walker_finish(&ilw); 7855 } else { 7856 first_mp1 = ip_copymsg(first_mp); 7857 if (first_mp1 != NULL) 7858 icmp_inbound_v6(q, first_mp1, ill, 7859 inill, hdr_len, mctl_present, 0, 7860 zoneid, dl_mp); 7861 } 7862 } 7863 /* FALLTHRU */ 7864 default: { 7865 /* 7866 * Handle protocols with which IPv6 is less intimate. 7867 */ 7868 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7869 7870 if (hada_mp != NULL) { 7871 ip0dbg(("default hada drop\n")); 7872 goto hada_drop; 7873 } 7874 7875 /* 7876 * Enable sending ICMP for "Unknown" nexthdr 7877 * case. i.e. where we did not FALLTHRU from 7878 * IPPROTO_ICMPV6 processing case above. 7879 * If we did FALLTHRU, then the packet has already been 7880 * processed for IPPF, don't process it again in 7881 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7882 * flags 7883 */ 7884 if (nexthdr != IPPROTO_ICMPV6) 7885 proto_flags |= IP_FF_SEND_ICMP; 7886 else 7887 proto_flags |= IP6_NO_IPPOLICY; 7888 7889 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7890 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7891 mctl_present, zoneid); 7892 return; 7893 } 7894 7895 case IPPROTO_DSTOPTS: { 7896 uint_t ehdrlen; 7897 uint8_t *optptr; 7898 ip6_dest_t *desthdr; 7899 7900 /* If packet is too short, look no further */ 7901 if (remlen < MIN_EHDR_LEN) 7902 goto pkt_too_short; 7903 7904 /* Check if AH is present. */ 7905 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7906 inill, hada_mp, zoneid)) { 7907 return; 7908 } 7909 7910 /* 7911 * Reinitialize pointers, as ipsec_early_ah_v6() does 7912 * complete pullups. We don't have to do more pullups 7913 * as a result. 7914 */ 7915 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7916 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7917 ip6h = (ip6_t *)mp->b_rptr; 7918 7919 desthdr = (ip6_dest_t *)whereptr; 7920 nexthdr = desthdr->ip6d_nxt; 7921 prev_nexthdr_offset = (uint_t)(whereptr - 7922 (uint8_t *)ip6h); 7923 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7924 if (remlen < ehdrlen) 7925 goto pkt_too_short; 7926 optptr = whereptr + 2; 7927 /* 7928 * Note: XXX This code does not seem to make 7929 * distinction between Destination Options Header 7930 * being before/after Routing Header which can 7931 * happen if we are at the end of source route. 7932 * This may become significant in future. 7933 * (No real significant Destination Options are 7934 * defined/implemented yet ). 7935 */ 7936 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7937 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7938 case -1: 7939 /* 7940 * Packet has been consumed and any needed 7941 * ICMP errors sent. 7942 */ 7943 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7944 freemsg(hada_mp); 7945 return; 7946 case 0: 7947 /* No action needed continue */ 7948 break; 7949 case 1: 7950 /* 7951 * Unnexpected return value 7952 * (Router alert is a Hop-by-Hop option) 7953 */ 7954 #ifdef DEBUG 7955 panic("ip_rput_data_v6: router " 7956 "alert hbh opt indication in dest opt"); 7957 /*NOTREACHED*/ 7958 #else 7959 freemsg(hada_mp); 7960 freemsg(first_mp); 7961 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7962 return; 7963 #endif 7964 } 7965 used = ehdrlen; 7966 break; 7967 } 7968 case IPPROTO_FRAGMENT: { 7969 ip6_frag_t *fraghdr; 7970 size_t no_frag_hdr_len; 7971 7972 if (hada_mp != NULL) { 7973 ip0dbg(("frag hada drop\n")); 7974 goto hada_drop; 7975 } 7976 7977 ASSERT(first_mp == mp); 7978 if (remlen < sizeof (ip6_frag_t)) 7979 goto pkt_too_short; 7980 7981 if (mp->b_cont != NULL && 7982 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7983 if (!pullupmsg(mp, 7984 pkt_len - remlen + sizeof (ip6_frag_t))) { 7985 BUMP_MIB(ill->ill_ip_mib, 7986 ipIfStatsInDiscards); 7987 freemsg(mp); 7988 return; 7989 } 7990 hck_flags = 0; 7991 ip6h = (ip6_t *)mp->b_rptr; 7992 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7993 } 7994 7995 fraghdr = (ip6_frag_t *)whereptr; 7996 used = (uint_t)sizeof (ip6_frag_t); 7997 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 7998 7999 /* 8000 * Invoke the CGTP (multirouting) filtering module to 8001 * process the incoming packet. Packets identified as 8002 * duplicates must be discarded. Filtering is active 8003 * only if the the ip_cgtp_filter ndd variable is 8004 * non-zero. 8005 */ 8006 if (ipst->ips_ip_cgtp_filter && 8007 ipst->ips_ip_cgtp_filter_ops != NULL) { 8008 int cgtp_flt_pkt; 8009 netstackid_t stackid; 8010 8011 stackid = ipst->ips_netstack->netstack_stackid; 8012 8013 cgtp_flt_pkt = 8014 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 8015 stackid, inill->ill_phyint->phyint_ifindex, 8016 ip6h, fraghdr); 8017 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 8018 freemsg(mp); 8019 return; 8020 } 8021 } 8022 8023 /* Restore the flags */ 8024 DB_CKSUMFLAGS(mp) = hck_flags; 8025 8026 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 8027 remlen - used, &prev_nexthdr_offset, 8028 &reass_sum, &reass_hck_flags); 8029 if (mp == NULL) { 8030 /* Reassembly is still pending */ 8031 return; 8032 } 8033 /* The first mblk are the headers before the frag hdr */ 8034 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 8035 8036 first_mp = mp; /* mp has most likely changed! */ 8037 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 8038 ip6h = (ip6_t *)mp->b_rptr; 8039 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 8040 whereptr = mp->b_rptr + no_frag_hdr_len; 8041 remlen = ntohs(ip6h->ip6_plen) + 8042 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 8043 pkt_len = msgdsize(mp); 8044 used = 0; 8045 break; 8046 } 8047 case IPPROTO_HOPOPTS: { 8048 if (hada_mp != NULL) { 8049 ip0dbg(("hop hada drop\n")); 8050 goto hada_drop; 8051 } 8052 /* 8053 * Illegal header sequence. 8054 * (Hop-by-hop headers are processed above 8055 * and required to immediately follow IPv6 header) 8056 */ 8057 icmp_param_problem_v6(WR(q), first_mp, 8058 ICMP6_PARAMPROB_NEXTHEADER, 8059 prev_nexthdr_offset, 8060 B_FALSE, B_FALSE, zoneid, ipst); 8061 return; 8062 } 8063 case IPPROTO_ROUTING: { 8064 uint_t ehdrlen; 8065 ip6_rthdr_t *rthdr; 8066 8067 /* If packet is too short, look no further */ 8068 if (remlen < MIN_EHDR_LEN) 8069 goto pkt_too_short; 8070 8071 /* Check if AH is present. */ 8072 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 8073 inill, hada_mp, zoneid)) { 8074 return; 8075 } 8076 8077 /* 8078 * Reinitialize pointers, as ipsec_early_ah_v6() does 8079 * complete pullups. We don't have to do more pullups 8080 * as a result. 8081 */ 8082 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 8083 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 8084 ip6h = (ip6_t *)mp->b_rptr; 8085 8086 rthdr = (ip6_rthdr_t *)whereptr; 8087 nexthdr = rthdr->ip6r_nxt; 8088 prev_nexthdr_offset = (uint_t)(whereptr - 8089 (uint8_t *)ip6h); 8090 ehdrlen = 8 * (rthdr->ip6r_len + 1); 8091 if (remlen < ehdrlen) 8092 goto pkt_too_short; 8093 if (rthdr->ip6r_segleft != 0) { 8094 /* Not end of source route */ 8095 if (ll_multicast) { 8096 BUMP_MIB(ill->ill_ip_mib, 8097 ipIfStatsForwProhibits); 8098 freemsg(hada_mp); 8099 freemsg(mp); 8100 return; 8101 } 8102 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 8103 flags, hada_mp, dl_mp); 8104 return; 8105 } 8106 used = ehdrlen; 8107 break; 8108 } 8109 case IPPROTO_AH: 8110 case IPPROTO_ESP: { 8111 /* 8112 * Fast path for AH/ESP. If this is the first time 8113 * we are sending a datagram to AH/ESP, allocate 8114 * a IPSEC_IN message and prepend it. Otherwise, 8115 * just fanout. 8116 */ 8117 8118 ipsec_in_t *ii; 8119 int ipsec_rc; 8120 ipsec_stack_t *ipss; 8121 8122 ipss = ipst->ips_netstack->netstack_ipsec; 8123 if (!mctl_present) { 8124 ASSERT(first_mp == mp); 8125 first_mp = ipsec_in_alloc(B_FALSE, 8126 ipst->ips_netstack); 8127 if (first_mp == NULL) { 8128 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8129 "allocation failure.\n")); 8130 BUMP_MIB(ill->ill_ip_mib, 8131 ipIfStatsInDiscards); 8132 freemsg(mp); 8133 return; 8134 } 8135 /* 8136 * Store the ill_index so that when we come back 8137 * from IPSEC we ride on the same queue. 8138 */ 8139 ii = (ipsec_in_t *)first_mp->b_rptr; 8140 ii->ipsec_in_ill_index = 8141 ill->ill_phyint->phyint_ifindex; 8142 ii->ipsec_in_rill_index = 8143 inill->ill_phyint->phyint_ifindex; 8144 first_mp->b_cont = mp; 8145 /* 8146 * Cache hardware acceleration info. 8147 */ 8148 if (hada_mp != NULL) { 8149 IPSECHW_DEBUG(IPSECHW_PKT, 8150 ("ip_rput_data_v6: " 8151 "caching data attr.\n")); 8152 ii->ipsec_in_accelerated = B_TRUE; 8153 ii->ipsec_in_da = hada_mp; 8154 hada_mp = NULL; 8155 } 8156 } else { 8157 ii = (ipsec_in_t *)first_mp->b_rptr; 8158 } 8159 8160 if (!ipsec_loaded(ipss)) { 8161 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8162 zoneid, ipst); 8163 return; 8164 } 8165 8166 /* select inbound SA and have IPsec process the pkt */ 8167 if (nexthdr == IPPROTO_ESP) { 8168 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8169 ipst->ips_netstack); 8170 if (esph == NULL) 8171 return; 8172 ASSERT(ii->ipsec_in_esp_sa != NULL); 8173 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8174 NULL); 8175 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8176 first_mp, esph); 8177 } else { 8178 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8179 ipst->ips_netstack); 8180 if (ah == NULL) 8181 return; 8182 ASSERT(ii->ipsec_in_ah_sa != NULL); 8183 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8184 NULL); 8185 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8186 first_mp, ah); 8187 } 8188 8189 switch (ipsec_rc) { 8190 case IPSEC_STATUS_SUCCESS: 8191 break; 8192 case IPSEC_STATUS_FAILED: 8193 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8194 /* FALLTHRU */ 8195 case IPSEC_STATUS_PENDING: 8196 return; 8197 } 8198 /* we're done with IPsec processing, send it up */ 8199 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8200 return; 8201 } 8202 case IPPROTO_NONE: 8203 /* All processing is done. Count as "delivered". */ 8204 freemsg(hada_mp); 8205 freemsg(first_mp); 8206 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8207 return; 8208 } 8209 whereptr += used; 8210 ASSERT(remlen >= used); 8211 remlen -= used; 8212 } 8213 /* NOTREACHED */ 8214 8215 pkt_too_short: 8216 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8217 ip6_len, pkt_len, remlen)); 8218 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8219 freemsg(hada_mp); 8220 freemsg(first_mp); 8221 return; 8222 udp_fanout: 8223 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8224 connp = NULL; 8225 } else { 8226 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8227 ipst); 8228 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8229 CONN_DEC_REF(connp); 8230 connp = NULL; 8231 } 8232 } 8233 8234 if (connp == NULL) { 8235 uint32_t ports; 8236 8237 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8238 UDP_PORTS_OFFSET); 8239 IP6_STAT(ipst, ip6_udp_slow_path); 8240 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8241 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8242 zoneid); 8243 return; 8244 } 8245 8246 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8247 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8248 freemsg(first_mp); 8249 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8250 CONN_DEC_REF(connp); 8251 return; 8252 } 8253 8254 /* Initiate IPPF processing */ 8255 if (IP6_IN_IPP(flags, ipst)) { 8256 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8257 if (mp == NULL) { 8258 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8259 CONN_DEC_REF(connp); 8260 return; 8261 } 8262 } 8263 8264 if (connp->conn_ip_recvpktinfo || 8265 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8266 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8267 if (mp == NULL) { 8268 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8269 CONN_DEC_REF(connp); 8270 return; 8271 } 8272 } 8273 8274 IP6_STAT(ipst, ip6_udp_fast_path); 8275 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8276 8277 /* Send it upstream */ 8278 (connp->conn_recv)(connp, mp, NULL); 8279 8280 CONN_DEC_REF(connp); 8281 freemsg(hada_mp); 8282 return; 8283 8284 hada_drop: 8285 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8286 /* IPsec kstats: bump counter here */ 8287 freemsg(hada_mp); 8288 freemsg(first_mp); 8289 } 8290 8291 /* 8292 * Reassemble fragment. 8293 * When it returns a completed message the first mblk will only contain 8294 * the headers prior to the fragment header. 8295 * 8296 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8297 * of the preceding header. This is needed to patch the previous header's 8298 * nexthdr field when reassembly completes. 8299 */ 8300 static mblk_t * 8301 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8302 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8303 uint32_t *cksum_val, uint16_t *cksum_flags) 8304 { 8305 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8306 uint16_t offset; 8307 boolean_t more_frags; 8308 uint8_t nexthdr = fraghdr->ip6f_nxt; 8309 in6_addr_t *v6dst_ptr; 8310 in6_addr_t *v6src_ptr; 8311 uint_t end; 8312 uint_t hdr_length; 8313 size_t count; 8314 ipf_t *ipf; 8315 ipf_t **ipfp; 8316 ipfb_t *ipfb; 8317 mblk_t *mp1; 8318 uint8_t ecn_info = 0; 8319 size_t msg_len; 8320 mblk_t *tail_mp; 8321 mblk_t *t_mp; 8322 boolean_t pruned = B_FALSE; 8323 uint32_t sum_val; 8324 uint16_t sum_flags; 8325 ip_stack_t *ipst = ill->ill_ipst; 8326 8327 if (cksum_val != NULL) 8328 *cksum_val = 0; 8329 if (cksum_flags != NULL) 8330 *cksum_flags = 0; 8331 8332 /* 8333 * We utilize hardware computed checksum info only for UDP since 8334 * IP fragmentation is a normal occurence for the protocol. In 8335 * addition, checksum offload support for IP fragments carrying 8336 * UDP payload is commonly implemented across network adapters. 8337 */ 8338 ASSERT(inill != NULL); 8339 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8340 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8341 mblk_t *mp1 = mp->b_cont; 8342 int32_t len; 8343 8344 /* Record checksum information from the packet */ 8345 sum_val = (uint32_t)DB_CKSUM16(mp); 8346 sum_flags = DB_CKSUMFLAGS(mp); 8347 8348 /* fragmented payload offset from beginning of mblk */ 8349 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8350 8351 if ((sum_flags & HCK_PARTIALCKSUM) && 8352 (mp1 == NULL || mp1->b_cont == NULL) && 8353 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8354 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8355 uint32_t adj; 8356 /* 8357 * Partial checksum has been calculated by hardware 8358 * and attached to the packet; in addition, any 8359 * prepended extraneous data is even byte aligned. 8360 * If any such data exists, we adjust the checksum; 8361 * this would also handle any postpended data. 8362 */ 8363 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8364 mp, mp1, len, adj); 8365 8366 /* One's complement subtract extraneous checksum */ 8367 if (adj >= sum_val) 8368 sum_val = ~(adj - sum_val) & 0xFFFF; 8369 else 8370 sum_val -= adj; 8371 } 8372 } else { 8373 sum_val = 0; 8374 sum_flags = 0; 8375 } 8376 8377 /* Clear hardware checksumming flag */ 8378 DB_CKSUMFLAGS(mp) = 0; 8379 8380 /* 8381 * Note: Fragment offset in header is in 8-octet units. 8382 * Clearing least significant 3 bits not only extracts 8383 * it but also gets it in units of octets. 8384 */ 8385 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8386 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8387 8388 /* 8389 * Is the more frags flag on and the payload length not a multiple 8390 * of eight? 8391 */ 8392 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8393 zoneid_t zoneid; 8394 8395 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8396 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8397 if (zoneid == ALL_ZONES) { 8398 freemsg(mp); 8399 return (NULL); 8400 } 8401 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8402 (uint32_t)((char *)&ip6h->ip6_plen - 8403 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8404 return (NULL); 8405 } 8406 8407 v6src_ptr = &ip6h->ip6_src; 8408 v6dst_ptr = &ip6h->ip6_dst; 8409 end = remlen; 8410 8411 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8412 end += offset; 8413 8414 /* 8415 * Would fragment cause reassembled packet to have a payload length 8416 * greater than IP_MAXPACKET - the max payload size? 8417 */ 8418 if (end > IP_MAXPACKET) { 8419 zoneid_t zoneid; 8420 8421 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8422 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8423 if (zoneid == ALL_ZONES) { 8424 freemsg(mp); 8425 return (NULL); 8426 } 8427 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8428 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8429 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8430 return (NULL); 8431 } 8432 8433 /* 8434 * This packet just has one fragment. Reassembly not 8435 * needed. 8436 */ 8437 if (!more_frags && offset == 0) { 8438 goto reass_done; 8439 } 8440 8441 /* 8442 * Drop the fragmented as early as possible, if 8443 * we don't have resource(s) to re-assemble. 8444 */ 8445 if (ipst->ips_ip_reass_queue_bytes == 0) { 8446 freemsg(mp); 8447 return (NULL); 8448 } 8449 8450 /* Record the ECN field info. */ 8451 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8452 /* 8453 * If this is not the first fragment, dump the unfragmentable 8454 * portion of the packet. 8455 */ 8456 if (offset) 8457 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8458 8459 /* 8460 * Fragmentation reassembly. Each ILL has a hash table for 8461 * queueing packets undergoing reassembly for all IPIFs 8462 * associated with the ILL. The hash is based on the packet 8463 * IP ident field. The ILL frag hash table was allocated 8464 * as a timer block at the time the ILL was created. Whenever 8465 * there is anything on the reassembly queue, the timer will 8466 * be running. 8467 */ 8468 msg_len = MBLKSIZE(mp); 8469 tail_mp = mp; 8470 while (tail_mp->b_cont != NULL) { 8471 tail_mp = tail_mp->b_cont; 8472 msg_len += MBLKSIZE(tail_mp); 8473 } 8474 /* 8475 * If the reassembly list for this ILL will get too big 8476 * prune it. 8477 */ 8478 8479 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8480 ipst->ips_ip_reass_queue_bytes) { 8481 ill_frag_prune(ill, 8482 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8483 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8484 pruned = B_TRUE; 8485 } 8486 8487 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8488 mutex_enter(&ipfb->ipfb_lock); 8489 8490 ipfp = &ipfb->ipfb_ipf; 8491 /* Try to find an existing fragment queue for this packet. */ 8492 for (;;) { 8493 ipf = ipfp[0]; 8494 if (ipf) { 8495 /* 8496 * It has to match on ident, source address, and 8497 * dest address. 8498 */ 8499 if (ipf->ipf_ident == ident && 8500 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8501 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8502 8503 /* 8504 * If we have received too many 8505 * duplicate fragments for this packet 8506 * free it. 8507 */ 8508 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8509 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8510 freemsg(mp); 8511 mutex_exit(&ipfb->ipfb_lock); 8512 return (NULL); 8513 } 8514 8515 break; 8516 } 8517 ipfp = &ipf->ipf_hash_next; 8518 continue; 8519 } 8520 8521 8522 /* 8523 * If we pruned the list, do we want to store this new 8524 * fragment?. We apply an optimization here based on the 8525 * fact that most fragments will be received in order. 8526 * So if the offset of this incoming fragment is zero, 8527 * it is the first fragment of a new packet. We will 8528 * keep it. Otherwise drop the fragment, as we have 8529 * probably pruned the packet already (since the 8530 * packet cannot be found). 8531 */ 8532 8533 if (pruned && offset != 0) { 8534 mutex_exit(&ipfb->ipfb_lock); 8535 freemsg(mp); 8536 return (NULL); 8537 } 8538 8539 /* New guy. Allocate a frag message. */ 8540 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8541 if (!mp1) { 8542 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8543 freemsg(mp); 8544 partial_reass_done: 8545 mutex_exit(&ipfb->ipfb_lock); 8546 return (NULL); 8547 } 8548 8549 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8550 /* 8551 * Too many fragmented packets in this hash bucket. 8552 * Free the oldest. 8553 */ 8554 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8555 } 8556 8557 mp1->b_cont = mp; 8558 8559 /* Initialize the fragment header. */ 8560 ipf = (ipf_t *)mp1->b_rptr; 8561 ipf->ipf_mp = mp1; 8562 ipf->ipf_ptphn = ipfp; 8563 ipfp[0] = ipf; 8564 ipf->ipf_hash_next = NULL; 8565 ipf->ipf_ident = ident; 8566 ipf->ipf_v6src = *v6src_ptr; 8567 ipf->ipf_v6dst = *v6dst_ptr; 8568 /* Record reassembly start time. */ 8569 ipf->ipf_timestamp = gethrestime_sec(); 8570 /* Record ipf generation and account for frag header */ 8571 ipf->ipf_gen = ill->ill_ipf_gen++; 8572 ipf->ipf_count = MBLKSIZE(mp1); 8573 ipf->ipf_protocol = nexthdr; 8574 ipf->ipf_nf_hdr_len = 0; 8575 ipf->ipf_prev_nexthdr_offset = 0; 8576 ipf->ipf_last_frag_seen = B_FALSE; 8577 ipf->ipf_ecn = ecn_info; 8578 ipf->ipf_num_dups = 0; 8579 ipfb->ipfb_frag_pkts++; 8580 ipf->ipf_checksum = 0; 8581 ipf->ipf_checksum_flags = 0; 8582 8583 /* Store checksum value in fragment header */ 8584 if (sum_flags != 0) { 8585 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8586 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8587 ipf->ipf_checksum = sum_val; 8588 ipf->ipf_checksum_flags = sum_flags; 8589 } 8590 8591 /* 8592 * We handle reassembly two ways. In the easy case, 8593 * where all the fragments show up in order, we do 8594 * minimal bookkeeping, and just clip new pieces on 8595 * the end. If we ever see a hole, then we go off 8596 * to ip_reassemble which has to mark the pieces and 8597 * keep track of the number of holes, etc. Obviously, 8598 * the point of having both mechanisms is so we can 8599 * handle the easy case as efficiently as possible. 8600 */ 8601 if (offset == 0) { 8602 /* Easy case, in-order reassembly so far. */ 8603 /* Update the byte count */ 8604 ipf->ipf_count += msg_len; 8605 ipf->ipf_tail_mp = tail_mp; 8606 /* 8607 * Keep track of next expected offset in 8608 * ipf_end. 8609 */ 8610 ipf->ipf_end = end; 8611 ipf->ipf_nf_hdr_len = hdr_length; 8612 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8613 } else { 8614 /* Hard case, hole at the beginning. */ 8615 ipf->ipf_tail_mp = NULL; 8616 /* 8617 * ipf_end == 0 means that we have given up 8618 * on easy reassembly. 8619 */ 8620 ipf->ipf_end = 0; 8621 8622 /* Forget checksum offload from now on */ 8623 ipf->ipf_checksum_flags = 0; 8624 8625 /* 8626 * ipf_hole_cnt is set by ip_reassemble. 8627 * ipf_count is updated by ip_reassemble. 8628 * No need to check for return value here 8629 * as we don't expect reassembly to complete or 8630 * fail for the first fragment itself. 8631 */ 8632 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8633 msg_len); 8634 } 8635 /* Update per ipfb and ill byte counts */ 8636 ipfb->ipfb_count += ipf->ipf_count; 8637 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8638 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8639 /* If the frag timer wasn't already going, start it. */ 8640 mutex_enter(&ill->ill_lock); 8641 ill_frag_timer_start(ill); 8642 mutex_exit(&ill->ill_lock); 8643 goto partial_reass_done; 8644 } 8645 8646 /* 8647 * If the packet's flag has changed (it could be coming up 8648 * from an interface different than the previous, therefore 8649 * possibly different checksum capability), then forget about 8650 * any stored checksum states. Otherwise add the value to 8651 * the existing one stored in the fragment header. 8652 */ 8653 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8654 sum_val += ipf->ipf_checksum; 8655 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8656 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8657 ipf->ipf_checksum = sum_val; 8658 } else if (ipf->ipf_checksum_flags != 0) { 8659 /* Forget checksum offload from now on */ 8660 ipf->ipf_checksum_flags = 0; 8661 } 8662 8663 /* 8664 * We have a new piece of a datagram which is already being 8665 * reassembled. Update the ECN info if all IP fragments 8666 * are ECN capable. If there is one which is not, clear 8667 * all the info. If there is at least one which has CE 8668 * code point, IP needs to report that up to transport. 8669 */ 8670 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8671 if (ecn_info == IPH_ECN_CE) 8672 ipf->ipf_ecn = IPH_ECN_CE; 8673 } else { 8674 ipf->ipf_ecn = IPH_ECN_NECT; 8675 } 8676 8677 if (offset && ipf->ipf_end == offset) { 8678 /* The new fragment fits at the end */ 8679 ipf->ipf_tail_mp->b_cont = mp; 8680 /* Update the byte count */ 8681 ipf->ipf_count += msg_len; 8682 /* Update per ipfb and ill byte counts */ 8683 ipfb->ipfb_count += msg_len; 8684 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8685 atomic_add_32(&ill->ill_frag_count, msg_len); 8686 if (more_frags) { 8687 /* More to come. */ 8688 ipf->ipf_end = end; 8689 ipf->ipf_tail_mp = tail_mp; 8690 goto partial_reass_done; 8691 } 8692 } else { 8693 /* 8694 * Go do the hard cases. 8695 * Call ip_reassemble(). 8696 */ 8697 int ret; 8698 8699 if (offset == 0) { 8700 if (ipf->ipf_prev_nexthdr_offset == 0) { 8701 ipf->ipf_nf_hdr_len = hdr_length; 8702 ipf->ipf_prev_nexthdr_offset = 8703 *prev_nexthdr_offset; 8704 } 8705 } 8706 /* Save current byte count */ 8707 count = ipf->ipf_count; 8708 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8709 8710 /* Count of bytes added and subtracted (freeb()ed) */ 8711 count = ipf->ipf_count - count; 8712 if (count) { 8713 /* Update per ipfb and ill byte counts */ 8714 ipfb->ipfb_count += count; 8715 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8716 atomic_add_32(&ill->ill_frag_count, count); 8717 } 8718 if (ret == IP_REASS_PARTIAL) { 8719 goto partial_reass_done; 8720 } else if (ret == IP_REASS_FAILED) { 8721 /* Reassembly failed. Free up all resources */ 8722 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8723 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8724 IP_REASS_SET_START(t_mp, 0); 8725 IP_REASS_SET_END(t_mp, 0); 8726 } 8727 freemsg(mp); 8728 goto partial_reass_done; 8729 } 8730 8731 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8732 } 8733 /* 8734 * We have completed reassembly. Unhook the frag header from 8735 * the reassembly list. 8736 * 8737 * Grab the unfragmentable header length next header value out 8738 * of the first fragment 8739 */ 8740 ASSERT(ipf->ipf_nf_hdr_len != 0); 8741 hdr_length = ipf->ipf_nf_hdr_len; 8742 8743 /* 8744 * Before we free the frag header, record the ECN info 8745 * to report back to the transport. 8746 */ 8747 ecn_info = ipf->ipf_ecn; 8748 8749 /* 8750 * Store the nextheader field in the header preceding the fragment 8751 * header 8752 */ 8753 nexthdr = ipf->ipf_protocol; 8754 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8755 ipfp = ipf->ipf_ptphn; 8756 8757 /* We need to supply these to caller */ 8758 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8759 sum_val = ipf->ipf_checksum; 8760 else 8761 sum_val = 0; 8762 8763 mp1 = ipf->ipf_mp; 8764 count = ipf->ipf_count; 8765 ipf = ipf->ipf_hash_next; 8766 if (ipf) 8767 ipf->ipf_ptphn = ipfp; 8768 ipfp[0] = ipf; 8769 atomic_add_32(&ill->ill_frag_count, -count); 8770 ASSERT(ipfb->ipfb_count >= count); 8771 ipfb->ipfb_count -= count; 8772 ipfb->ipfb_frag_pkts--; 8773 mutex_exit(&ipfb->ipfb_lock); 8774 /* Ditch the frag header. */ 8775 mp = mp1->b_cont; 8776 freeb(mp1); 8777 8778 /* 8779 * Make sure the packet is good by doing some sanity 8780 * check. If bad we can silentely drop the packet. 8781 */ 8782 reass_done: 8783 if (hdr_length < sizeof (ip6_frag_t)) { 8784 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8785 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8786 freemsg(mp); 8787 return (NULL); 8788 } 8789 8790 /* 8791 * Remove the fragment header from the initial header by 8792 * splitting the mblk into the non-fragmentable header and 8793 * everthing after the fragment extension header. This has the 8794 * side effect of putting all the headers that need destination 8795 * processing into the b_cont block-- on return this fact is 8796 * used in order to avoid having to look at the extensions 8797 * already processed. 8798 * 8799 * Note that this code assumes that the unfragmentable portion 8800 * of the header is in the first mblk and increments 8801 * the read pointer past it. If this assumption is broken 8802 * this code fails badly. 8803 */ 8804 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8805 mblk_t *nmp; 8806 8807 if (!(nmp = dupb(mp))) { 8808 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8809 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8810 freemsg(mp); 8811 return (NULL); 8812 } 8813 nmp->b_cont = mp->b_cont; 8814 mp->b_cont = nmp; 8815 nmp->b_rptr += hdr_length; 8816 } 8817 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8818 8819 ip6h = (ip6_t *)mp->b_rptr; 8820 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8821 8822 /* Restore original IP length in header. */ 8823 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8824 /* Record the ECN info. */ 8825 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8826 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8827 8828 /* Reassembly is successful; return checksum information if needed */ 8829 if (cksum_val != NULL) 8830 *cksum_val = sum_val; 8831 if (cksum_flags != NULL) 8832 *cksum_flags = sum_flags; 8833 8834 return (mp); 8835 } 8836 8837 /* 8838 * Walk through the options to see if there is a routing header. 8839 * If present get the destination which is the last address of 8840 * the option. 8841 */ 8842 in6_addr_t 8843 ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment) 8844 { 8845 mblk_t *current_mp = mp; 8846 uint8_t nexthdr; 8847 uint8_t *whereptr; 8848 ip6_hbh_t *hbhhdr; 8849 ip6_dest_t *dsthdr; 8850 ip6_rthdr0_t *rthdr; 8851 ip6_frag_t *fraghdr; 8852 int ehdrlen; 8853 int left; 8854 in6_addr_t *ap, rv; 8855 8856 rv = ip6h->ip6_dst; 8857 8858 if (is_fragment != NULL) 8859 *is_fragment = B_FALSE; 8860 8861 if ((uint8_t *)ip6h >= current_mp->b_wptr || 8862 (uint8_t *)ip6h < current_mp->b_rptr) { 8863 /* Bad packet. Return what we can. */ 8864 DTRACE_PROBE2(ip_get_dst_v6_badpkt1, mblk_t *, mp, ip6_t *, 8865 ip6h); 8866 goto done; 8867 } 8868 8869 nexthdr = ip6h->ip6_nxt; 8870 8871 whereptr = (uint8_t *)ip6h; 8872 ehdrlen = sizeof (ip6_t); 8873 8874 for (;;) { 8875 while (whereptr + ehdrlen >= current_mp->b_wptr) { 8876 ehdrlen -= (current_mp->b_wptr - whereptr); 8877 current_mp = current_mp->b_cont; 8878 if (current_mp == NULL) { 8879 /* Bad packet. Return what we can. */ 8880 DTRACE_PROBE3(ip_get_dst_v6_badpkt2, 8881 mblk_t *, mp, mblk_t *, current_mp, ip6_t *, 8882 ip6h); 8883 goto done; 8884 } 8885 whereptr = current_mp->b_rptr; 8886 } 8887 whereptr += ehdrlen; 8888 8889 /* Enough room for next-header and length (2 bytes)? */ 8890 if (whereptr + 2 > current_mp->b_wptr) { 8891 whereptr -= (uintptr_t)current_mp->b_rptr; 8892 /* Grumble -- eat the pullup. */ 8893 if (!pullupmsg(current_mp, -1)) { 8894 DTRACE_PROBE3(ip_get_dst_v6_pullup_failed, 8895 mblk_t *, mp, mblk_t *, current_mp, ip6_t *, 8896 ip6h); 8897 goto done; 8898 } 8899 whereptr += (uintptr_t)current_mp->b_rptr; 8900 if (whereptr + 2 > current_mp->b_wptr) { 8901 /* Bad packet. Return what we can. */ 8902 DTRACE_PROBE3(ip_get_dst_v6_badpkt3, 8903 mblk_t *, mp, mblk_t *, current_mp, ip6_t *, 8904 ip6h); 8905 goto done; 8906 } 8907 } 8908 8909 ASSERT(nexthdr != IPPROTO_RAW); 8910 switch (nexthdr) { 8911 case IPPROTO_HOPOPTS: 8912 hbhhdr = (ip6_hbh_t *)whereptr; 8913 nexthdr = hbhhdr->ip6h_nxt; 8914 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 8915 break; 8916 case IPPROTO_DSTOPTS: 8917 dsthdr = (ip6_dest_t *)whereptr; 8918 nexthdr = dsthdr->ip6d_nxt; 8919 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 8920 break; 8921 case IPPROTO_ROUTING: 8922 rthdr = (ip6_rthdr0_t *)whereptr; 8923 nexthdr = rthdr->ip6r0_nxt; 8924 ehdrlen = 8 * (rthdr->ip6r0_len + 1); 8925 if (nexthdr + ehdrlen > (uintptr_t)current_mp->b_wptr) { 8926 /* Bad packet. Return what we can. */ 8927 DTRACE_PROBE3(ip_get_dst_v6_badpkt4, 8928 mblk_t *, mp, mblk_t *, current_mp, ip6_t *, 8929 ip6h); 8930 goto done; 8931 } 8932 8933 left = rthdr->ip6r0_segleft; 8934 ap = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 8935 rv = *(ap + left - 1); 8936 /* 8937 * If the caller doesn't care whether the packet 8938 * is a fragment or not, we can stop here since 8939 * we have our destination. 8940 */ 8941 if (is_fragment == NULL) 8942 goto done; 8943 break; 8944 case IPPROTO_FRAGMENT: 8945 fraghdr = (ip6_frag_t *)whereptr; 8946 nexthdr = fraghdr->ip6f_nxt; 8947 ehdrlen = sizeof (ip6_frag_t); 8948 if (is_fragment != NULL) 8949 *is_fragment = B_TRUE; 8950 /* FALLTHRU */ 8951 default: 8952 goto done; 8953 } 8954 } 8955 8956 done: 8957 return (rv); 8958 } 8959 8960 /* 8961 * ip_source_routed_v6: 8962 * This function is called by redirect code in ip_rput_data_v6 to 8963 * know whether this packet is source routed through this node i.e 8964 * whether this node (router) is part of the journey. This 8965 * function is called under two cases : 8966 * 8967 * case 1 : Routing header was processed by this node and 8968 * ip_process_rthdr replaced ip6_dst with the next hop 8969 * and we are forwarding the packet to the next hop. 8970 * 8971 * case 2 : Routing header was not processed by this node and we 8972 * are just forwarding the packet. 8973 * 8974 * For case (1) we don't want to send redirects. For case(2) we 8975 * want to send redirects. 8976 */ 8977 static boolean_t 8978 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8979 { 8980 uint8_t nexthdr; 8981 in6_addr_t *addrptr; 8982 ip6_rthdr0_t *rthdr; 8983 uint8_t numaddr; 8984 ip6_hbh_t *hbhhdr; 8985 uint_t ehdrlen; 8986 uint8_t *byteptr; 8987 8988 ip2dbg(("ip_source_routed_v6\n")); 8989 nexthdr = ip6h->ip6_nxt; 8990 ehdrlen = IPV6_HDR_LEN; 8991 8992 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8993 while (nexthdr == IPPROTO_HOPOPTS || 8994 nexthdr == IPPROTO_DSTOPTS) { 8995 byteptr = (uint8_t *)ip6h + ehdrlen; 8996 /* 8997 * Check if we have already processed 8998 * packets or we are just a forwarding 8999 * router which only pulled up msgs up 9000 * to IPV6HDR and one HBH ext header 9001 */ 9002 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9003 ip2dbg(("ip_source_routed_v6: Extension" 9004 " headers not processed\n")); 9005 return (B_FALSE); 9006 } 9007 hbhhdr = (ip6_hbh_t *)byteptr; 9008 nexthdr = hbhhdr->ip6h_nxt; 9009 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 9010 } 9011 switch (nexthdr) { 9012 case IPPROTO_ROUTING: 9013 byteptr = (uint8_t *)ip6h + ehdrlen; 9014 /* 9015 * If for some reason, we haven't pulled up 9016 * the routing hdr data mblk, then we must 9017 * not have processed it at all. So for sure 9018 * we are not part of the source routed journey. 9019 */ 9020 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 9021 ip2dbg(("ip_source_routed_v6: Routing" 9022 " header not processed\n")); 9023 return (B_FALSE); 9024 } 9025 rthdr = (ip6_rthdr0_t *)byteptr; 9026 /* 9027 * Either we are an intermediate router or the 9028 * last hop before destination and we have 9029 * already processed the routing header. 9030 * If segment_left is greater than or equal to zero, 9031 * then we must be the (numaddr - segleft) entry 9032 * of the routing header. Although ip6r0_segleft 9033 * is a unit8_t variable, we still check for zero 9034 * or greater value, if in case the data type 9035 * is changed someday in future. 9036 */ 9037 if (rthdr->ip6r0_segleft > 0 || 9038 rthdr->ip6r0_segleft == 0) { 9039 ire_t *ire = NULL; 9040 9041 numaddr = rthdr->ip6r0_len / 2; 9042 addrptr = (in6_addr_t *)((char *)rthdr + 9043 sizeof (*rthdr)); 9044 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 9045 if (addrptr != NULL) { 9046 ire = ire_ctable_lookup_v6(addrptr, NULL, 9047 IRE_LOCAL, NULL, ALL_ZONES, NULL, 9048 MATCH_IRE_TYPE, 9049 ipst); 9050 if (ire != NULL) { 9051 ire_refrele(ire); 9052 return (B_TRUE); 9053 } 9054 ip1dbg(("ip_source_routed_v6: No ire found\n")); 9055 } 9056 } 9057 /* FALLTHRU */ 9058 default: 9059 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 9060 return (B_FALSE); 9061 } 9062 } 9063 9064 /* 9065 * ip_wput_v6 -- Packets sent down from transport modules show up here. 9066 * Assumes that the following set of headers appear in the first 9067 * mblk: 9068 * ip6i_t (if present) CAN also appear as a separate mblk. 9069 * ip6_t 9070 * Any extension headers 9071 * TCP/UDP/SCTP header (if present) 9072 * The routine can handle an ICMPv6 header that is not in the first mblk. 9073 * 9074 * The order to determine the outgoing interface is as follows: 9075 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 9076 * 2. If q is an ill queue and (link local or multicast destination) then 9077 * use that ill. 9078 * 3. If IPV6_BOUND_IF has been set use that ill. 9079 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 9080 * look for the best IRE match for the unspecified group to determine 9081 * the ill. 9082 * 5. For unicast: Just do an IRE lookup for the best match. 9083 * 9084 * arg2 is always a queue_t *. 9085 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 9086 * the zoneid. 9087 * When that queue is not an ill_t, then arg must be a conn_t pointer. 9088 */ 9089 void 9090 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 9091 { 9092 conn_t *connp = NULL; 9093 queue_t *q = (queue_t *)arg2; 9094 ire_t *ire = NULL; 9095 ire_t *sctp_ire = NULL; 9096 ip6_t *ip6h; 9097 in6_addr_t *v6dstp; 9098 ill_t *ill = NULL; 9099 ipif_t *ipif; 9100 ip6i_t *ip6i; 9101 int cksum_request; /* -1 => normal. */ 9102 /* 1 => Skip TCP/UDP/SCTP checksum */ 9103 /* Otherwise contains insert offset for checksum */ 9104 int unspec_src; 9105 boolean_t do_outrequests; /* Increment OutRequests? */ 9106 mib2_ipIfStatsEntry_t *mibptr; 9107 int match_flags = MATCH_IRE_ILL; 9108 mblk_t *first_mp; 9109 boolean_t mctl_present; 9110 ipsec_out_t *io; 9111 boolean_t multirt_need_resolve = B_FALSE; 9112 mblk_t *copy_mp = NULL; 9113 int err = 0; 9114 int ip6i_flags = 0; 9115 zoneid_t zoneid; 9116 ill_t *saved_ill = NULL; 9117 boolean_t conn_lock_held; 9118 boolean_t need_decref = B_FALSE; 9119 ip_stack_t *ipst; 9120 9121 if (q->q_next != NULL) { 9122 ill = (ill_t *)q->q_ptr; 9123 ipst = ill->ill_ipst; 9124 } else { 9125 connp = (conn_t *)arg; 9126 ASSERT(connp != NULL); 9127 ipst = connp->conn_netstack->netstack_ip; 9128 } 9129 9130 /* 9131 * Highest bit in version field is Reachability Confirmation bit 9132 * used by NUD in ip_xmit_v6(). 9133 */ 9134 #ifdef _BIG_ENDIAN 9135 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 9136 #else 9137 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9138 #endif 9139 9140 /* 9141 * M_CTL comes from 6 places 9142 * 9143 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9144 * both V4 and V6 datagrams. 9145 * 9146 * 2) AH/ESP sends down M_CTL after doing their job with both 9147 * V4 and V6 datagrams. 9148 * 9149 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9150 * attached. 9151 * 9152 * 4) Notifications from an external resolver (for XRESOLV ifs) 9153 * 9154 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9155 * IPsec hardware acceleration support. 9156 * 9157 * 6) TUN_HELLO. 9158 * 9159 * We need to handle (1)'s IPv6 case and (3) here. For the 9160 * IPv4 case in (1), and (2), IPSEC processing has already 9161 * started. The code in ip_wput() already knows how to handle 9162 * continuing IPSEC processing (for IPv4 and IPv6). All other 9163 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9164 * for handling. 9165 */ 9166 first_mp = mp; 9167 mctl_present = B_FALSE; 9168 io = NULL; 9169 9170 /* Multidata transmit? */ 9171 if (DB_TYPE(mp) == M_MULTIDATA) { 9172 /* 9173 * We should never get here, since all Multidata messages 9174 * originating from tcp should have been directed over to 9175 * tcp_multisend() in the first place. 9176 */ 9177 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9178 freemsg(mp); 9179 return; 9180 } else if (DB_TYPE(mp) == M_CTL) { 9181 uint32_t mctltype = 0; 9182 uint32_t mlen = MBLKL(first_mp); 9183 9184 mp = mp->b_cont; 9185 mctl_present = B_TRUE; 9186 io = (ipsec_out_t *)first_mp->b_rptr; 9187 9188 /* 9189 * Validate this M_CTL message. The only three types of 9190 * M_CTL messages we expect to see in this code path are 9191 * ipsec_out_t or ipsec_in_t structures (allocated as 9192 * ipsec_info_t unions), or ipsec_ctl_t structures. 9193 * The ipsec_out_type and ipsec_in_type overlap in the two 9194 * data structures, and they are either set to IPSEC_OUT 9195 * or IPSEC_IN depending on which data structure it is. 9196 * ipsec_ctl_t is an IPSEC_CTL. 9197 * 9198 * All other M_CTL messages are sent to ip_wput_nondata() 9199 * for handling. 9200 */ 9201 if (mlen >= sizeof (io->ipsec_out_type)) 9202 mctltype = io->ipsec_out_type; 9203 9204 if ((mlen == sizeof (ipsec_ctl_t)) && 9205 (mctltype == IPSEC_CTL)) { 9206 ip_output(arg, first_mp, arg2, caller); 9207 return; 9208 } 9209 9210 if ((mlen < sizeof (ipsec_info_t)) || 9211 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9212 mp == NULL) { 9213 ip_wput_nondata(NULL, q, first_mp, NULL); 9214 return; 9215 } 9216 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9217 if (q->q_next == NULL) { 9218 ip6h = (ip6_t *)mp->b_rptr; 9219 /* 9220 * For a freshly-generated TCP dgram that needs IPV6 9221 * processing, don't call ip_wput immediately. We can 9222 * tell this by the ipsec_out_proc_begin. In-progress 9223 * IPSEC_OUT messages have proc_begin set to TRUE, 9224 * and we want to send all IPSEC_IN messages to 9225 * ip_wput() for IPsec processing or finishing. 9226 */ 9227 if (mctltype == IPSEC_IN || 9228 IPVER(ip6h) != IPV6_VERSION || 9229 io->ipsec_out_proc_begin) { 9230 mibptr = &ipst->ips_ip6_mib; 9231 goto notv6; 9232 } 9233 } 9234 } else if (DB_TYPE(mp) != M_DATA) { 9235 ip_wput_nondata(NULL, q, mp, NULL); 9236 return; 9237 } 9238 9239 ip6h = (ip6_t *)mp->b_rptr; 9240 9241 if (IPVER(ip6h) != IPV6_VERSION) { 9242 mibptr = &ipst->ips_ip6_mib; 9243 goto notv6; 9244 } 9245 9246 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9247 (connp == NULL || !connp->conn_ulp_labeled)) { 9248 cred_t *cr; 9249 9250 if (connp != NULL) { 9251 ASSERT(CONN_CRED(connp) != NULL); 9252 err = tsol_check_label_v6(BEST_CRED(mp, connp), 9253 &mp, connp->conn_mac_exempt, ipst); 9254 } else if ((cr = msg_getcred(mp, NULL)) != NULL) { 9255 err = tsol_check_label_v6(cr, &mp, B_FALSE, ipst); 9256 } 9257 if (mctl_present) 9258 first_mp->b_cont = mp; 9259 else 9260 first_mp = mp; 9261 if (err != 0) { 9262 DTRACE_PROBE3( 9263 tsol_ip_log_drop_checklabel_ip6, char *, 9264 "conn(1), failed to check/update mp(2)", 9265 conn_t, connp, mblk_t, mp); 9266 freemsg(first_mp); 9267 return; 9268 } 9269 ip6h = (ip6_t *)mp->b_rptr; 9270 } 9271 if (q->q_next != NULL) { 9272 /* 9273 * We don't know if this ill will be used for IPv6 9274 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9275 * ipif_set_values() sets the ill_isv6 flag to true if 9276 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9277 * just drop the packet. 9278 */ 9279 if (!ill->ill_isv6) { 9280 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9281 "ILLF_IPV6 was set\n")); 9282 freemsg(first_mp); 9283 return; 9284 } 9285 /* For uniformity do a refhold */ 9286 mutex_enter(&ill->ill_lock); 9287 if (!ILL_CAN_LOOKUP(ill)) { 9288 mutex_exit(&ill->ill_lock); 9289 freemsg(first_mp); 9290 return; 9291 } 9292 ill_refhold_locked(ill); 9293 mutex_exit(&ill->ill_lock); 9294 mibptr = ill->ill_ip_mib; 9295 9296 ASSERT(mibptr != NULL); 9297 unspec_src = 0; 9298 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9299 do_outrequests = B_FALSE; 9300 zoneid = (zoneid_t)(uintptr_t)arg; 9301 } else { 9302 ASSERT(connp != NULL); 9303 zoneid = connp->conn_zoneid; 9304 9305 /* is queue flow controlled? */ 9306 if ((q->q_first || connp->conn_draining) && 9307 (caller == IP_WPUT)) { 9308 /* 9309 * 1) TCP sends down M_CTL for detached connections. 9310 * 2) AH/ESP sends down M_CTL. 9311 * 9312 * We don't flow control either of the above. Only 9313 * UDP and others are flow controlled for which we 9314 * can't have a M_CTL. 9315 */ 9316 ASSERT(first_mp == mp); 9317 (void) putq(q, mp); 9318 return; 9319 } 9320 mibptr = &ipst->ips_ip6_mib; 9321 unspec_src = connp->conn_unspec_src; 9322 do_outrequests = B_TRUE; 9323 if (mp->b_flag & MSGHASREF) { 9324 mp->b_flag &= ~MSGHASREF; 9325 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9326 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9327 need_decref = B_TRUE; 9328 } 9329 9330 /* 9331 * If there is a policy, try to attach an ipsec_out in 9332 * the front. At the end, first_mp either points to a 9333 * M_DATA message or IPSEC_OUT message linked to a 9334 * M_DATA message. We have to do it now as we might 9335 * lose the "conn" if we go through ip_newroute. 9336 */ 9337 if (!mctl_present && 9338 (connp->conn_out_enforce_policy || 9339 connp->conn_latch != NULL)) { 9340 ASSERT(first_mp == mp); 9341 /* XXX Any better way to get the protocol fast ? */ 9342 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9343 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9344 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9345 if (need_decref) 9346 CONN_DEC_REF(connp); 9347 return; 9348 } else { 9349 ASSERT(mp->b_datap->db_type == M_CTL); 9350 first_mp = mp; 9351 mp = mp->b_cont; 9352 mctl_present = B_TRUE; 9353 io = (ipsec_out_t *)first_mp->b_rptr; 9354 } 9355 } 9356 } 9357 9358 /* check for alignment and full IPv6 header */ 9359 if (!OK_32PTR((uchar_t *)ip6h) || 9360 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9361 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9362 if (do_outrequests) 9363 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9364 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9365 freemsg(first_mp); 9366 if (ill != NULL) 9367 ill_refrele(ill); 9368 if (need_decref) 9369 CONN_DEC_REF(connp); 9370 return; 9371 } 9372 v6dstp = &ip6h->ip6_dst; 9373 cksum_request = -1; 9374 ip6i = NULL; 9375 9376 /* 9377 * Once neighbor discovery has completed, ndp_process() will provide 9378 * locally generated packets for which processing can be reattempted. 9379 * In these cases, connp is NULL and the original zone is part of a 9380 * prepended ipsec_out_t. 9381 */ 9382 if (io != NULL) { 9383 /* 9384 * When coming from icmp_input_v6, the zoneid might not match 9385 * for the loopback case, because inside icmp_input_v6 the 9386 * queue_t is a conn queue from the sending side. 9387 */ 9388 zoneid = io->ipsec_out_zoneid; 9389 ASSERT(zoneid != ALL_ZONES); 9390 } 9391 9392 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9393 /* 9394 * This is an ip6i_t header followed by an ip6_hdr. 9395 * Check which fields are set. 9396 * 9397 * When the packet comes from a transport we should have 9398 * all needed headers in the first mblk. However, when 9399 * going through ip_newroute*_v6 the ip6i might be in 9400 * a separate mblk when we return here. In that case 9401 * we pullup everything to ensure that extension and transport 9402 * headers "stay" in the first mblk. 9403 */ 9404 ip6i = (ip6i_t *)ip6h; 9405 ip6i_flags = ip6i->ip6i_flags; 9406 9407 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9408 ((mp->b_wptr - (uchar_t *)ip6i) >= 9409 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9410 9411 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9412 if (!pullupmsg(mp, -1)) { 9413 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9414 if (do_outrequests) { 9415 BUMP_MIB(mibptr, 9416 ipIfStatsHCOutRequests); 9417 } 9418 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9419 freemsg(first_mp); 9420 if (ill != NULL) 9421 ill_refrele(ill); 9422 if (need_decref) 9423 CONN_DEC_REF(connp); 9424 return; 9425 } 9426 ip6h = (ip6_t *)mp->b_rptr; 9427 v6dstp = &ip6h->ip6_dst; 9428 ip6i = (ip6i_t *)ip6h; 9429 } 9430 ip6h = (ip6_t *)&ip6i[1]; 9431 9432 /* 9433 * Advance rptr past the ip6i_t to get ready for 9434 * transmitting the packet. However, if the packet gets 9435 * passed to ip_newroute*_v6 then rptr is moved back so 9436 * that the ip6i_t header can be inspected when the 9437 * packet comes back here after passing through 9438 * ire_add_then_send. 9439 */ 9440 mp->b_rptr = (uchar_t *)ip6h; 9441 9442 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9443 ASSERT(ip6i->ip6i_ifindex != 0); 9444 if (ill != NULL) 9445 ill_refrele(ill); 9446 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9447 NULL, NULL, NULL, NULL, ipst); 9448 if (ill == NULL) { 9449 if (do_outrequests) { 9450 BUMP_MIB(mibptr, 9451 ipIfStatsHCOutRequests); 9452 } 9453 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9454 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9455 ip6i->ip6i_ifindex)); 9456 if (need_decref) 9457 CONN_DEC_REF(connp); 9458 freemsg(first_mp); 9459 return; 9460 } 9461 mibptr = ill->ill_ip_mib; 9462 /* 9463 * Preserve the index so that when we return from 9464 * IPSEC processing, we know where to send the packet. 9465 */ 9466 if (mctl_present) { 9467 ASSERT(io != NULL); 9468 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9469 } 9470 } 9471 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9472 cred_t *cr = msg_getcred(mp, NULL); 9473 9474 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9475 if (secpolicy_net_rawaccess(cr) != 0) { 9476 /* 9477 * Use IPCL_ZONEID to honor SO_ALLZONES. 9478 */ 9479 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9480 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9481 NULL, connp != NULL ? 9482 IPCL_ZONEID(connp) : zoneid, NULL, 9483 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9484 if (ire == NULL) { 9485 if (do_outrequests) 9486 BUMP_MIB(mibptr, 9487 ipIfStatsHCOutRequests); 9488 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9489 ip1dbg(("ip_wput_v6: bad source " 9490 "addr\n")); 9491 freemsg(first_mp); 9492 if (ill != NULL) 9493 ill_refrele(ill); 9494 if (need_decref) 9495 CONN_DEC_REF(connp); 9496 return; 9497 } 9498 ire_refrele(ire); 9499 } 9500 /* No need to verify again when using ip_newroute */ 9501 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9502 } 9503 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9504 /* 9505 * Make sure they match since ip_newroute*_v6 etc might 9506 * (unknown to them) inspect ip6i_nexthop when 9507 * they think they access ip6_dst. 9508 */ 9509 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9510 } 9511 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9512 cksum_request = 1; 9513 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9514 cksum_request = ip6i->ip6i_checksum_off; 9515 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9516 unspec_src = 1; 9517 9518 if (do_outrequests && ill != NULL) { 9519 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9520 do_outrequests = B_FALSE; 9521 } 9522 /* 9523 * Store ip6i_t info that we need after we come back 9524 * from IPSEC processing. 9525 */ 9526 if (mctl_present) { 9527 ASSERT(io != NULL); 9528 io->ipsec_out_unspec_src = unspec_src; 9529 } 9530 } 9531 if (connp != NULL && connp->conn_dontroute) 9532 ip6h->ip6_hops = 1; 9533 9534 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9535 goto ipv6multicast; 9536 9537 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9538 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9539 ASSERT(ill != NULL); 9540 goto send_from_ill; 9541 } 9542 9543 /* 9544 * 2. If q is an ill queue and there's a link-local destination 9545 * then use that ill. 9546 */ 9547 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9548 goto send_from_ill; 9549 9550 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9551 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9552 ill_t *conn_outgoing_ill; 9553 9554 conn_outgoing_ill = conn_get_held_ill(connp, 9555 &connp->conn_outgoing_ill, &err); 9556 if (err == ILL_LOOKUP_FAILED) { 9557 if (ill != NULL) 9558 ill_refrele(ill); 9559 if (need_decref) 9560 CONN_DEC_REF(connp); 9561 freemsg(first_mp); 9562 return; 9563 } 9564 if (ill != NULL) 9565 ill_refrele(ill); 9566 ill = conn_outgoing_ill; 9567 mibptr = ill->ill_ip_mib; 9568 goto send_from_ill; 9569 } 9570 9571 /* 9572 * 4. For unicast: Just do an IRE lookup for the best match. 9573 * If we get here for a link-local address it is rather random 9574 * what interface we pick on a multihomed host. 9575 * *If* there is an IRE_CACHE (and the link-local address 9576 * isn't duplicated on multi links) this will find the IRE_CACHE. 9577 * Otherwise it will use one of the matching IRE_INTERFACE routes 9578 * for the link-local prefix. Hence, applications 9579 * *should* be encouraged to specify an outgoing interface when sending 9580 * to a link local address. 9581 */ 9582 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9583 !connp->conn_fully_bound)) { 9584 /* 9585 * We cache IRE_CACHEs to avoid lookups. We don't do 9586 * this for the tcp global queue and listen end point 9587 * as it does not really have a real destination to 9588 * talk to. 9589 */ 9590 ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp), 9591 ipst); 9592 } else { 9593 /* 9594 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9595 * grab a lock here to check for CONDEMNED as it is okay 9596 * to send a packet or two with the IRE_CACHE that is going 9597 * away. 9598 */ 9599 mutex_enter(&connp->conn_lock); 9600 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9601 if (ire != NULL && 9602 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9603 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9604 9605 IRE_REFHOLD(ire); 9606 mutex_exit(&connp->conn_lock); 9607 9608 } else { 9609 boolean_t cached = B_FALSE; 9610 9611 connp->conn_ire_cache = NULL; 9612 mutex_exit(&connp->conn_lock); 9613 /* Release the old ire */ 9614 if (ire != NULL && sctp_ire == NULL) 9615 IRE_REFRELE_NOTR(ire); 9616 9617 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9618 msg_getlabel(mp), ipst); 9619 if (ire != NULL) { 9620 IRE_REFHOLD_NOTR(ire); 9621 9622 mutex_enter(&connp->conn_lock); 9623 if (CONN_CACHE_IRE(connp) && 9624 (connp->conn_ire_cache == NULL)) { 9625 rw_enter(&ire->ire_bucket->irb_lock, 9626 RW_READER); 9627 if (!(ire->ire_marks & 9628 IRE_MARK_CONDEMNED)) { 9629 connp->conn_ire_cache = ire; 9630 cached = B_TRUE; 9631 } 9632 rw_exit(&ire->ire_bucket->irb_lock); 9633 } 9634 mutex_exit(&connp->conn_lock); 9635 9636 /* 9637 * We can continue to use the ire but since it 9638 * was not cached, we should drop the extra 9639 * reference. 9640 */ 9641 if (!cached) 9642 IRE_REFRELE_NOTR(ire); 9643 } 9644 } 9645 } 9646 9647 if (ire != NULL) { 9648 if (do_outrequests) { 9649 /* Handle IRE_LOCAL's that might appear here */ 9650 if (ire->ire_type == IRE_CACHE) { 9651 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9652 ill_ip_mib; 9653 } else { 9654 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9655 } 9656 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9657 } 9658 9659 /* 9660 * Check if the ire has the RTF_MULTIRT flag, inherited 9661 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9662 */ 9663 if (ire->ire_flags & RTF_MULTIRT) { 9664 /* 9665 * Force hop limit of multirouted packets if required. 9666 * The hop limit of such packets is bounded by the 9667 * ip_multirt_ttl ndd variable. 9668 * NDP packets must have a hop limit of 255; don't 9669 * change the hop limit in that case. 9670 */ 9671 if ((ipst->ips_ip_multirt_ttl > 0) && 9672 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9673 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9674 if (ip_debug > 3) { 9675 ip2dbg(("ip_wput_v6: forcing multirt " 9676 "hop limit to %d (was %d) ", 9677 ipst->ips_ip_multirt_ttl, 9678 ip6h->ip6_hops)); 9679 pr_addr_dbg("v6dst %s\n", AF_INET6, 9680 &ire->ire_addr_v6); 9681 } 9682 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9683 } 9684 9685 /* 9686 * We look at this point if there are pending 9687 * unresolved routes. ire_multirt_need_resolve_v6() 9688 * checks in O(n) that all IRE_OFFSUBNET ire 9689 * entries for the packet's destination and 9690 * flagged RTF_MULTIRT are currently resolved. 9691 * If some remain unresolved, we do a copy 9692 * of the current message. It will be used 9693 * to initiate additional route resolutions. 9694 */ 9695 multirt_need_resolve = 9696 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9697 msg_getlabel(first_mp), ipst); 9698 ip2dbg(("ip_wput_v6: ire %p, " 9699 "multirt_need_resolve %d, first_mp %p\n", 9700 (void *)ire, multirt_need_resolve, 9701 (void *)first_mp)); 9702 if (multirt_need_resolve) { 9703 copy_mp = copymsg(first_mp); 9704 if (copy_mp != NULL) { 9705 MULTIRT_DEBUG_TAG(copy_mp); 9706 } 9707 } 9708 } 9709 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9710 connp, caller, ip6i_flags, zoneid); 9711 if (need_decref) { 9712 CONN_DEC_REF(connp); 9713 connp = NULL; 9714 } 9715 IRE_REFRELE(ire); 9716 9717 /* 9718 * Try to resolve another multiroute if 9719 * ire_multirt_need_resolve_v6() deemed it necessary. 9720 * copy_mp will be consumed (sent or freed) by 9721 * ip_newroute_v6(). 9722 */ 9723 if (copy_mp != NULL) { 9724 if (mctl_present) { 9725 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9726 } else { 9727 ip6h = (ip6_t *)copy_mp->b_rptr; 9728 } 9729 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9730 &ip6h->ip6_src, NULL, zoneid, ipst); 9731 } 9732 if (ill != NULL) 9733 ill_refrele(ill); 9734 return; 9735 } 9736 9737 /* 9738 * No full IRE for this destination. Send it to 9739 * ip_newroute_v6 to see if anything else matches. 9740 * Mark this packet as having originated on this 9741 * machine. 9742 * Update rptr if there was an ip6i_t header. 9743 */ 9744 mp->b_prev = NULL; 9745 mp->b_next = NULL; 9746 if (ip6i != NULL) 9747 mp->b_rptr -= sizeof (ip6i_t); 9748 9749 if (unspec_src) { 9750 if (ip6i == NULL) { 9751 /* 9752 * Add ip6i_t header to carry unspec_src 9753 * until the packet comes back in ip_wput_v6. 9754 */ 9755 mp = ip_add_info_v6(mp, NULL, v6dstp); 9756 if (mp == NULL) { 9757 if (do_outrequests) 9758 BUMP_MIB(mibptr, 9759 ipIfStatsHCOutRequests); 9760 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9761 if (mctl_present) 9762 freeb(first_mp); 9763 if (ill != NULL) 9764 ill_refrele(ill); 9765 if (need_decref) 9766 CONN_DEC_REF(connp); 9767 return; 9768 } 9769 ip6i = (ip6i_t *)mp->b_rptr; 9770 9771 if (mctl_present) { 9772 ASSERT(first_mp != mp); 9773 first_mp->b_cont = mp; 9774 } else { 9775 first_mp = mp; 9776 } 9777 9778 if ((mp->b_wptr - (uchar_t *)ip6i) == 9779 sizeof (ip6i_t)) { 9780 /* 9781 * ndp_resolver called from ip_newroute_v6 9782 * expects pulled up message. 9783 */ 9784 if (!pullupmsg(mp, -1)) { 9785 ip1dbg(("ip_wput_v6: pullupmsg" 9786 " failed\n")); 9787 if (do_outrequests) { 9788 BUMP_MIB(mibptr, 9789 ipIfStatsHCOutRequests); 9790 } 9791 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9792 freemsg(first_mp); 9793 if (ill != NULL) 9794 ill_refrele(ill); 9795 if (need_decref) 9796 CONN_DEC_REF(connp); 9797 return; 9798 } 9799 ip6i = (ip6i_t *)mp->b_rptr; 9800 } 9801 ip6h = (ip6_t *)&ip6i[1]; 9802 v6dstp = &ip6h->ip6_dst; 9803 } 9804 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9805 if (mctl_present) { 9806 ASSERT(io != NULL); 9807 io->ipsec_out_unspec_src = unspec_src; 9808 } 9809 } 9810 if (do_outrequests) 9811 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9812 if (need_decref) 9813 CONN_DEC_REF(connp); 9814 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9815 if (ill != NULL) 9816 ill_refrele(ill); 9817 return; 9818 9819 9820 /* 9821 * Handle multicast packets with or without an conn. 9822 * Assumes that the transports set ip6_hops taking 9823 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9824 * into account. 9825 */ 9826 ipv6multicast: 9827 ip2dbg(("ip_wput_v6: multicast\n")); 9828 9829 /* 9830 * Hold the conn_lock till we refhold the ill of interest that is 9831 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9832 * while holding any locks, postpone the refrele until after the 9833 * conn_lock is dropped. 9834 */ 9835 if (connp != NULL) { 9836 mutex_enter(&connp->conn_lock); 9837 conn_lock_held = B_TRUE; 9838 } else { 9839 conn_lock_held = B_FALSE; 9840 } 9841 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9842 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9843 ASSERT(ill != NULL); 9844 } else if (ill != NULL) { 9845 /* 9846 * 2. If q is an ill queue and (link local or multicast 9847 * destination) then use that ill. 9848 * We don't need the ipif initialization here. 9849 * This useless assert below is just to prevent lint from 9850 * reporting a null body if statement. 9851 */ 9852 ASSERT(ill != NULL); 9853 } else if (connp != NULL) { 9854 /* 9855 * 3. If IPV6_BOUND_IF has been set use that ill. 9856 * 9857 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9858 * Otherwise look for the best IRE match for the unspecified 9859 * group to determine the ill. 9860 * 9861 * conn_multicast_ill is used for only IPv6 packets. 9862 * conn_multicast_ipif is used for only IPv4 packets. 9863 * Thus a PF_INET6 socket send both IPv4 and IPv6 9864 * multicast packets using different IP*_MULTICAST_IF 9865 * interfaces. 9866 */ 9867 if (connp->conn_outgoing_ill != NULL) { 9868 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9869 if (err == ILL_LOOKUP_FAILED) { 9870 ip1dbg(("ip_output_v6: multicast" 9871 " conn_outgoing_ill no ipif\n")); 9872 multicast_discard: 9873 ASSERT(saved_ill == NULL); 9874 if (conn_lock_held) 9875 mutex_exit(&connp->conn_lock); 9876 if (ill != NULL) 9877 ill_refrele(ill); 9878 freemsg(first_mp); 9879 if (do_outrequests) 9880 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9881 if (need_decref) 9882 CONN_DEC_REF(connp); 9883 return; 9884 } 9885 ill = connp->conn_outgoing_ill; 9886 } else if (connp->conn_multicast_ill != NULL) { 9887 err = ill_check_and_refhold(connp->conn_multicast_ill); 9888 if (err == ILL_LOOKUP_FAILED) { 9889 ip1dbg(("ip_output_v6: multicast" 9890 " conn_multicast_ill no ipif\n")); 9891 goto multicast_discard; 9892 } 9893 ill = connp->conn_multicast_ill; 9894 } else { 9895 mutex_exit(&connp->conn_lock); 9896 conn_lock_held = B_FALSE; 9897 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9898 if (ipif == NULL) { 9899 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9900 goto multicast_discard; 9901 } 9902 /* 9903 * We have a ref to this ipif, so we can safely 9904 * access ipif_ill. 9905 */ 9906 ill = ipif->ipif_ill; 9907 mutex_enter(&ill->ill_lock); 9908 if (!ILL_CAN_LOOKUP(ill)) { 9909 mutex_exit(&ill->ill_lock); 9910 ipif_refrele(ipif); 9911 ill = NULL; 9912 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9913 goto multicast_discard; 9914 } 9915 ill_refhold_locked(ill); 9916 mutex_exit(&ill->ill_lock); 9917 ipif_refrele(ipif); 9918 /* 9919 * Save binding until IPV6_MULTICAST_IF 9920 * changes it 9921 */ 9922 mutex_enter(&connp->conn_lock); 9923 connp->conn_multicast_ill = ill; 9924 mutex_exit(&connp->conn_lock); 9925 } 9926 } 9927 if (conn_lock_held) 9928 mutex_exit(&connp->conn_lock); 9929 9930 if (saved_ill != NULL) 9931 ill_refrele(saved_ill); 9932 9933 ASSERT(ill != NULL); 9934 /* 9935 * For multicast loopback interfaces replace the multicast address 9936 * with a unicast address for the ire lookup. 9937 */ 9938 if (IS_LOOPBACK(ill)) 9939 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9940 9941 mibptr = ill->ill_ip_mib; 9942 if (do_outrequests) { 9943 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9944 do_outrequests = B_FALSE; 9945 } 9946 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9947 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9948 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9949 9950 /* 9951 * As we may lose the conn by the time we reach ip_wput_ire_v6 9952 * we copy conn_multicast_loop and conn_dontroute on to an 9953 * ipsec_out. In case if this datagram goes out secure, 9954 * we need the ill_index also. Copy that also into the 9955 * ipsec_out. 9956 */ 9957 if (mctl_present) { 9958 io = (ipsec_out_t *)first_mp->b_rptr; 9959 ASSERT(first_mp->b_datap->db_type == M_CTL); 9960 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9961 } else { 9962 ASSERT(mp == first_mp); 9963 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9964 NULL) { 9965 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9966 freemsg(mp); 9967 if (ill != NULL) 9968 ill_refrele(ill); 9969 if (need_decref) 9970 CONN_DEC_REF(connp); 9971 return; 9972 } 9973 io = (ipsec_out_t *)first_mp->b_rptr; 9974 /* This is not a secure packet */ 9975 io->ipsec_out_secure = B_FALSE; 9976 io->ipsec_out_use_global_policy = B_TRUE; 9977 io->ipsec_out_zoneid = 9978 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9979 first_mp->b_cont = mp; 9980 mctl_present = B_TRUE; 9981 } 9982 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9983 io->ipsec_out_unspec_src = unspec_src; 9984 if (connp != NULL) 9985 io->ipsec_out_dontroute = connp->conn_dontroute; 9986 9987 send_from_ill: 9988 ASSERT(ill != NULL); 9989 ASSERT(mibptr == ill->ill_ip_mib); 9990 9991 if (do_outrequests) { 9992 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9993 do_outrequests = B_FALSE; 9994 } 9995 9996 /* 9997 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 9998 * an underlying interface, IS_UNDER_IPMP() may be true even when 9999 * building IREs that will be used for data traffic. As such, use the 10000 * packet's source address to determine whether the traffic is test 10001 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 10002 * 10003 * Separately, we also need to mark probe packets so that ND can 10004 * process them specially; see the comments in nce_queue_mp_common(). 10005 */ 10006 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10007 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 10008 if (ip6i == NULL) { 10009 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 10010 if (mctl_present) 10011 freeb(first_mp); 10012 goto discard; 10013 } 10014 10015 if (mctl_present) 10016 first_mp->b_cont = mp; 10017 else 10018 first_mp = mp; 10019 10020 /* ndp_resolver() expects a pulled-up message */ 10021 if (MBLKL(mp) == sizeof (ip6i_t) && 10022 pullupmsg(mp, -1) == 0) { 10023 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 10024 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10025 ill_refrele(ill); 10026 if (need_decref) 10027 CONN_DEC_REF(connp); 10028 return; 10029 } 10030 ip6i = (ip6i_t *)mp->b_rptr; 10031 ip6h = (ip6_t *)&ip6i[1]; 10032 v6dstp = &ip6h->ip6_dst; 10033 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 10034 } 10035 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 10036 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 10037 } 10038 10039 if (io != NULL) 10040 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 10041 10042 /* 10043 * When a specific ill is specified (using IPV6_PKTINFO, 10044 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 10045 * on routing entries (ftable and ctable) that have a matching 10046 * ire->ire_ipif->ipif_ill. Thus this can only be used 10047 * for destinations that are on-link for the specific ill 10048 * and that can appear on multiple links. Thus it is useful 10049 * for multicast destinations, link-local destinations, and 10050 * at some point perhaps for site-local destinations (if the 10051 * node sits at a site boundary). 10052 * We create the cache entries in the regular ctable since 10053 * it can not "confuse" things for other destinations. 10054 * table. 10055 * 10056 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 10057 * It is used only when ire_cache_lookup is used above. 10058 */ 10059 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 10060 zoneid, msg_getlabel(mp), match_flags, ipst); 10061 if (ire != NULL) { 10062 /* 10063 * Check if the ire has the RTF_MULTIRT flag, inherited 10064 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 10065 */ 10066 if (ire->ire_flags & RTF_MULTIRT) { 10067 /* 10068 * Force hop limit of multirouted packets if required. 10069 * The hop limit of such packets is bounded by the 10070 * ip_multirt_ttl ndd variable. 10071 * NDP packets must have a hop limit of 255; don't 10072 * change the hop limit in that case. 10073 */ 10074 if ((ipst->ips_ip_multirt_ttl > 0) && 10075 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 10076 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 10077 if (ip_debug > 3) { 10078 ip2dbg(("ip_wput_v6: forcing multirt " 10079 "hop limit to %d (was %d) ", 10080 ipst->ips_ip_multirt_ttl, 10081 ip6h->ip6_hops)); 10082 pr_addr_dbg("v6dst %s\n", AF_INET6, 10083 &ire->ire_addr_v6); 10084 } 10085 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 10086 } 10087 10088 /* 10089 * We look at this point if there are pending 10090 * unresolved routes. ire_multirt_need_resolve_v6() 10091 * checks in O(n) that all IRE_OFFSUBNET ire 10092 * entries for the packet's destination and 10093 * flagged RTF_MULTIRT are currently resolved. 10094 * If some remain unresolved, we make a copy 10095 * of the current message. It will be used 10096 * to initiate additional route resolutions. 10097 */ 10098 multirt_need_resolve = 10099 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 10100 msg_getlabel(first_mp), ipst); 10101 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 10102 "multirt_need_resolve %d, first_mp %p\n", 10103 (void *)ire, multirt_need_resolve, 10104 (void *)first_mp)); 10105 if (multirt_need_resolve) { 10106 copy_mp = copymsg(first_mp); 10107 if (copy_mp != NULL) { 10108 MULTIRT_DEBUG_TAG(copy_mp); 10109 } 10110 } 10111 } 10112 10113 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 10114 ill->ill_name, (void *)ire, 10115 ill->ill_phyint->phyint_ifindex)); 10116 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 10117 connp, caller, ip6i_flags, zoneid); 10118 ire_refrele(ire); 10119 if (need_decref) { 10120 CONN_DEC_REF(connp); 10121 connp = NULL; 10122 } 10123 10124 /* 10125 * Try to resolve another multiroute if 10126 * ire_multirt_need_resolve_v6() deemed it necessary. 10127 * copy_mp will be consumed (sent or freed) by 10128 * ip_newroute_[ipif_]v6(). 10129 */ 10130 if (copy_mp != NULL) { 10131 if (mctl_present) { 10132 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10133 } else { 10134 ip6h = (ip6_t *)copy_mp->b_rptr; 10135 } 10136 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10137 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10138 zoneid, ipst); 10139 if (ipif == NULL) { 10140 ip1dbg(("ip_wput_v6: No ipif for " 10141 "multicast\n")); 10142 MULTIRT_DEBUG_UNTAG(copy_mp); 10143 freemsg(copy_mp); 10144 return; 10145 } 10146 ip_newroute_ipif_v6(q, copy_mp, ipif, 10147 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10148 zoneid); 10149 ipif_refrele(ipif); 10150 } else { 10151 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10152 &ip6h->ip6_src, ill, zoneid, ipst); 10153 } 10154 } 10155 ill_refrele(ill); 10156 return; 10157 } 10158 if (need_decref) { 10159 CONN_DEC_REF(connp); 10160 connp = NULL; 10161 } 10162 10163 /* Update rptr if there was an ip6i_t header. */ 10164 if (ip6i != NULL) 10165 mp->b_rptr -= sizeof (ip6i_t); 10166 if (unspec_src) { 10167 if (ip6i == NULL) { 10168 /* 10169 * Add ip6i_t header to carry unspec_src 10170 * until the packet comes back in ip_wput_v6. 10171 */ 10172 if (mctl_present) { 10173 first_mp->b_cont = 10174 ip_add_info_v6(mp, NULL, v6dstp); 10175 mp = first_mp->b_cont; 10176 if (mp == NULL) 10177 freeb(first_mp); 10178 } else { 10179 first_mp = mp = ip_add_info_v6(mp, NULL, 10180 v6dstp); 10181 } 10182 if (mp == NULL) { 10183 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10184 ill_refrele(ill); 10185 return; 10186 } 10187 ip6i = (ip6i_t *)mp->b_rptr; 10188 if ((mp->b_wptr - (uchar_t *)ip6i) == 10189 sizeof (ip6i_t)) { 10190 /* 10191 * ndp_resolver called from ip_newroute_v6 10192 * expects a pulled up message. 10193 */ 10194 if (!pullupmsg(mp, -1)) { 10195 ip1dbg(("ip_wput_v6: pullupmsg" 10196 " failed\n")); 10197 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10198 freemsg(first_mp); 10199 return; 10200 } 10201 ip6i = (ip6i_t *)mp->b_rptr; 10202 } 10203 ip6h = (ip6_t *)&ip6i[1]; 10204 v6dstp = &ip6h->ip6_dst; 10205 } 10206 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10207 if (mctl_present) { 10208 ASSERT(io != NULL); 10209 io->ipsec_out_unspec_src = unspec_src; 10210 } 10211 } 10212 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10213 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10214 &ip6h->ip6_src, unspec_src, zoneid); 10215 } else { 10216 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10217 zoneid, ipst); 10218 } 10219 ill_refrele(ill); 10220 return; 10221 10222 notv6: 10223 /* FIXME?: assume the caller calls the right version of ip_output? */ 10224 if (q->q_next == NULL) { 10225 connp = Q_TO_CONN(q); 10226 10227 /* 10228 * We can change conn_send for all types of conn, even 10229 * though only TCP uses it right now. 10230 * FIXME: sctp could use conn_send but doesn't currently. 10231 */ 10232 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10233 } 10234 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10235 (void) ip_output(arg, first_mp, arg2, caller); 10236 if (ill != NULL) 10237 ill_refrele(ill); 10238 } 10239 10240 /* 10241 * If this is a conn_t queue, then we pass in the conn. This includes the 10242 * zoneid. 10243 * Otherwise, this is a message for an ill_t queue, 10244 * in which case we use the global zoneid since those are all part of 10245 * the global zone. 10246 */ 10247 void 10248 ip_wput_v6(queue_t *q, mblk_t *mp) 10249 { 10250 if (CONN_Q(q)) 10251 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10252 else 10253 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10254 } 10255 10256 /* 10257 * NULL send-to queue - packet is to be delivered locally. 10258 */ 10259 void 10260 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10261 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10262 { 10263 uint32_t ports; 10264 mblk_t *mp = first_mp, *first_mp1; 10265 boolean_t mctl_present; 10266 uint8_t nexthdr; 10267 uint16_t hdr_length; 10268 ipsec_out_t *io; 10269 mib2_ipIfStatsEntry_t *mibptr; 10270 ilm_t *ilm; 10271 uint_t nexthdr_offset; 10272 ip_stack_t *ipst = ill->ill_ipst; 10273 10274 if (DB_TYPE(mp) == M_CTL) { 10275 io = (ipsec_out_t *)mp->b_rptr; 10276 if (!io->ipsec_out_secure) { 10277 mp = mp->b_cont; 10278 freeb(first_mp); 10279 first_mp = mp; 10280 mctl_present = B_FALSE; 10281 } else { 10282 mctl_present = B_TRUE; 10283 mp = first_mp->b_cont; 10284 ipsec_out_to_in(first_mp); 10285 } 10286 } else { 10287 mctl_present = B_FALSE; 10288 } 10289 10290 /* 10291 * Remove reachability confirmation bit from version field 10292 * before passing the packet on to any firewall hooks or 10293 * looping back the packet. 10294 */ 10295 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10296 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10297 10298 DTRACE_PROBE4(ip6__loopback__in__start, 10299 ill_t *, ill, ill_t *, NULL, 10300 ip6_t *, ip6h, mblk_t *, first_mp); 10301 10302 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10303 ipst->ips_ipv6firewall_loopback_in, 10304 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10305 10306 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10307 10308 if (first_mp == NULL) 10309 return; 10310 10311 if (ipst->ips_ipobs_enabled) { 10312 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10313 zoneid_t stackzoneid = netstackid_to_zoneid( 10314 ipst->ips_netstack->netstack_stackid); 10315 10316 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10317 /* 10318 * ::1 is special, as we cannot lookup its zoneid by 10319 * address. For this case, restrict the lookup to the 10320 * source zone. 10321 */ 10322 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10323 lookup_zoneid = zoneid; 10324 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10325 lookup_zoneid); 10326 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10327 IPV6_VERSION, 0, ipst); 10328 } 10329 10330 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10331 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10332 int, 1); 10333 10334 nexthdr = ip6h->ip6_nxt; 10335 mibptr = ill->ill_ip_mib; 10336 10337 /* Fastpath */ 10338 switch (nexthdr) { 10339 case IPPROTO_TCP: 10340 case IPPROTO_UDP: 10341 case IPPROTO_ICMPV6: 10342 case IPPROTO_SCTP: 10343 hdr_length = IPV6_HDR_LEN; 10344 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10345 (uchar_t *)ip6h); 10346 break; 10347 default: { 10348 uint8_t *nexthdrp; 10349 10350 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10351 &hdr_length, &nexthdrp)) { 10352 /* Malformed packet */ 10353 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10354 freemsg(first_mp); 10355 return; 10356 } 10357 nexthdr = *nexthdrp; 10358 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10359 break; 10360 } 10361 } 10362 10363 UPDATE_OB_PKT_COUNT(ire); 10364 ire->ire_last_used_time = lbolt; 10365 10366 switch (nexthdr) { 10367 case IPPROTO_TCP: 10368 if (DB_TYPE(mp) == M_DATA) { 10369 /* 10370 * M_DATA mblk, so init mblk (chain) for 10371 * no struio(). 10372 */ 10373 mblk_t *mp1 = mp; 10374 10375 do { 10376 mp1->b_datap->db_struioflag = 0; 10377 } while ((mp1 = mp1->b_cont) != NULL); 10378 } 10379 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10380 TCP_PORTS_OFFSET); 10381 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10382 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10383 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10384 hdr_length, mctl_present, ire->ire_zoneid); 10385 return; 10386 10387 case IPPROTO_UDP: 10388 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10389 UDP_PORTS_OFFSET); 10390 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10391 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10392 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10393 return; 10394 10395 case IPPROTO_SCTP: 10396 { 10397 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10398 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10399 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10400 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10401 return; 10402 } 10403 case IPPROTO_ICMPV6: { 10404 icmp6_t *icmp6; 10405 10406 /* check for full IPv6+ICMPv6 header */ 10407 if ((mp->b_wptr - mp->b_rptr) < 10408 (hdr_length + ICMP6_MINLEN)) { 10409 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10410 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10411 " failed\n")); 10412 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10413 freemsg(first_mp); 10414 return; 10415 } 10416 ip6h = (ip6_t *)mp->b_rptr; 10417 } 10418 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10419 10420 /* Update output mib stats */ 10421 icmp_update_out_mib_v6(ill, icmp6); 10422 10423 /* Check variable for testing applications */ 10424 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10425 freemsg(first_mp); 10426 return; 10427 } 10428 /* 10429 * Assume that there is always at least one conn for 10430 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10431 * where there is no conn. 10432 */ 10433 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10434 !IS_LOOPBACK(ill)) { 10435 ilm_walker_t ilw; 10436 10437 /* 10438 * In the multicast case, applications may have 10439 * joined the group from different zones, so we 10440 * need to deliver the packet to each of them. 10441 * Loop through the multicast memberships 10442 * structures (ilm) on the receive ill and send 10443 * a copy of the packet up each matching one. 10444 * However, we don't do this for multicasts sent 10445 * on the loopback interface (PHYI_LOOPBACK flag 10446 * set) as they must stay in the sender's zone. 10447 */ 10448 ilm = ilm_walker_start(&ilw, ill); 10449 for (; ilm != NULL; 10450 ilm = ilm_walker_step(&ilw, ilm)) { 10451 if (!IN6_ARE_ADDR_EQUAL( 10452 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10453 continue; 10454 if ((fanout_flags & 10455 IP_FF_NO_MCAST_LOOP) && 10456 ilm->ilm_zoneid == ire->ire_zoneid) 10457 continue; 10458 if (!ipif_lookup_zoneid( 10459 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10460 IPIF_UP, NULL)) 10461 continue; 10462 10463 first_mp1 = ip_copymsg(first_mp); 10464 if (first_mp1 == NULL) 10465 continue; 10466 icmp_inbound_v6(q, first_mp1, 10467 ilw.ilw_walk_ill, ill, hdr_length, 10468 mctl_present, IP6_NO_IPPOLICY, 10469 ilm->ilm_zoneid, NULL); 10470 } 10471 ilm_walker_finish(&ilw); 10472 } else { 10473 first_mp1 = ip_copymsg(first_mp); 10474 if (first_mp1 != NULL) 10475 icmp_inbound_v6(q, first_mp1, ill, ill, 10476 hdr_length, mctl_present, 10477 IP6_NO_IPPOLICY, ire->ire_zoneid, 10478 NULL); 10479 } 10480 } 10481 /* FALLTHRU */ 10482 default: { 10483 /* 10484 * Handle protocols with which IPv6 is less intimate. 10485 */ 10486 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10487 10488 /* 10489 * Enable sending ICMP for "Unknown" nexthdr 10490 * case. i.e. where we did not FALLTHRU from 10491 * IPPROTO_ICMPV6 processing case above. 10492 */ 10493 if (nexthdr != IPPROTO_ICMPV6) 10494 fanout_flags |= IP_FF_SEND_ICMP; 10495 /* 10496 * Note: There can be more than one stream bound 10497 * to a particular protocol. When this is the case, 10498 * each one gets a copy of any incoming packets. 10499 */ 10500 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10501 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10502 mctl_present, ire->ire_zoneid); 10503 return; 10504 } 10505 } 10506 } 10507 10508 /* 10509 * Send packet using IRE. 10510 * Checksumming is controlled by cksum_request: 10511 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10512 * 1 => Skip TCP/UDP/SCTP checksum 10513 * Otherwise => checksum_request contains insert offset for checksum 10514 * 10515 * Assumes that the following set of headers appear in the first 10516 * mblk: 10517 * ip6_t 10518 * Any extension headers 10519 * TCP/UDP/SCTP header (if present) 10520 * The routine can handle an ICMPv6 header that is not in the first mblk. 10521 * 10522 * NOTE : This function does not ire_refrele the ire passed in as the 10523 * argument unlike ip_wput_ire where the REFRELE is done. 10524 * Refer to ip_wput_ire for more on this. 10525 */ 10526 static void 10527 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10528 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10529 { 10530 ip6_t *ip6h; 10531 uint8_t nexthdr; 10532 uint16_t hdr_length; 10533 uint_t reachable = 0x0; 10534 ill_t *ill; 10535 mib2_ipIfStatsEntry_t *mibptr; 10536 mblk_t *first_mp; 10537 boolean_t mctl_present; 10538 ipsec_out_t *io; 10539 boolean_t conn_dontroute; /* conn value for multicast */ 10540 boolean_t conn_multicast_loop; /* conn value for multicast */ 10541 boolean_t multicast_forward; /* Should we forward ? */ 10542 int max_frag; 10543 ip_stack_t *ipst = ire->ire_ipst; 10544 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10545 10546 ill = ire_to_ill(ire); 10547 first_mp = mp; 10548 multicast_forward = B_FALSE; 10549 10550 if (mp->b_datap->db_type != M_CTL) { 10551 ip6h = (ip6_t *)first_mp->b_rptr; 10552 } else { 10553 io = (ipsec_out_t *)first_mp->b_rptr; 10554 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10555 /* 10556 * Grab the zone id now because the M_CTL can be discarded by 10557 * ip_wput_ire_parse_ipsec_out() below. 10558 */ 10559 ASSERT(zoneid == io->ipsec_out_zoneid); 10560 ASSERT(zoneid != ALL_ZONES); 10561 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10562 /* 10563 * For the multicast case, ipsec_out carries conn_dontroute and 10564 * conn_multicast_loop as conn may not be available here. We 10565 * need this for multicast loopback and forwarding which is done 10566 * later in the code. 10567 */ 10568 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10569 conn_dontroute = io->ipsec_out_dontroute; 10570 conn_multicast_loop = io->ipsec_out_multicast_loop; 10571 /* 10572 * If conn_dontroute is not set or conn_multicast_loop 10573 * is set, we need to do forwarding/loopback. For 10574 * datagrams from ip_wput_multicast, conn_dontroute is 10575 * set to B_TRUE and conn_multicast_loop is set to 10576 * B_FALSE so that we neither do forwarding nor 10577 * loopback. 10578 */ 10579 if (!conn_dontroute || conn_multicast_loop) 10580 multicast_forward = B_TRUE; 10581 } 10582 } 10583 10584 /* 10585 * If the sender didn't supply the hop limit and there is a default 10586 * unicast hop limit associated with the output interface, we use 10587 * that if the packet is unicast. Interface specific unicast hop 10588 * limits as set via the SIOCSLIFLNKINFO ioctl. 10589 */ 10590 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10591 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10592 ip6h->ip6_hops = ill->ill_max_hops; 10593 } 10594 10595 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10596 ire->ire_zoneid != ALL_ZONES) { 10597 /* 10598 * When a zone sends a packet to another zone, we try to deliver 10599 * the packet under the same conditions as if the destination 10600 * was a real node on the network. To do so, we look for a 10601 * matching route in the forwarding table. 10602 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10603 * ip_newroute_v6() does. 10604 * Note that IRE_LOCAL are special, since they are used 10605 * when the zoneid doesn't match in some cases. This means that 10606 * we need to handle ipha_src differently since ire_src_addr 10607 * belongs to the receiving zone instead of the sending zone. 10608 * When ip_restrict_interzone_loopback is set, then 10609 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10610 * for loopback between zones when the logical "Ethernet" would 10611 * have looped them back. 10612 */ 10613 ire_t *src_ire; 10614 10615 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10616 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10617 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10618 if (src_ire != NULL && 10619 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10620 (!ipst->ips_ip_restrict_interzone_loopback || 10621 ire_local_same_lan(ire, src_ire))) { 10622 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10623 !unspec_src) { 10624 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10625 } 10626 ire_refrele(src_ire); 10627 } else { 10628 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10629 if (src_ire != NULL) { 10630 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10631 ire_refrele(src_ire); 10632 freemsg(first_mp); 10633 return; 10634 } 10635 ire_refrele(src_ire); 10636 } 10637 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10638 /* Failed */ 10639 freemsg(first_mp); 10640 return; 10641 } 10642 icmp_unreachable_v6(q, first_mp, 10643 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10644 zoneid, ipst); 10645 return; 10646 } 10647 } 10648 10649 if (mp->b_datap->db_type == M_CTL || 10650 ipss->ipsec_outbound_v6_policy_present) { 10651 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10652 connp, unspec_src, zoneid); 10653 if (mp == NULL) { 10654 return; 10655 } 10656 } 10657 10658 first_mp = mp; 10659 if (mp->b_datap->db_type == M_CTL) { 10660 io = (ipsec_out_t *)mp->b_rptr; 10661 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10662 mp = mp->b_cont; 10663 mctl_present = B_TRUE; 10664 } else { 10665 mctl_present = B_FALSE; 10666 } 10667 10668 ip6h = (ip6_t *)mp->b_rptr; 10669 nexthdr = ip6h->ip6_nxt; 10670 mibptr = ill->ill_ip_mib; 10671 10672 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10673 ipif_t *ipif; 10674 10675 /* 10676 * Select the source address using ipif_select_source_v6. 10677 */ 10678 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10679 IPV6_PREFER_SRC_DEFAULT, zoneid); 10680 if (ipif == NULL) { 10681 if (ip_debug > 2) { 10682 /* ip1dbg */ 10683 pr_addr_dbg("ip_wput_ire_v6: no src for " 10684 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10685 printf("through interface %s\n", ill->ill_name); 10686 } 10687 freemsg(first_mp); 10688 return; 10689 } 10690 ip6h->ip6_src = ipif->ipif_v6src_addr; 10691 ipif_refrele(ipif); 10692 } 10693 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10694 if ((connp != NULL && connp->conn_multicast_loop) || 10695 !IS_LOOPBACK(ill)) { 10696 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10697 ALL_ZONES) != NULL) { 10698 mblk_t *nmp; 10699 int fanout_flags = 0; 10700 10701 if (connp != NULL && 10702 !connp->conn_multicast_loop) { 10703 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10704 } 10705 ip1dbg(("ip_wput_ire_v6: " 10706 "Loopback multicast\n")); 10707 nmp = ip_copymsg(first_mp); 10708 if (nmp != NULL) { 10709 ip6_t *nip6h; 10710 mblk_t *mp_ip6h; 10711 10712 if (mctl_present) { 10713 nip6h = (ip6_t *) 10714 nmp->b_cont->b_rptr; 10715 mp_ip6h = nmp->b_cont; 10716 } else { 10717 nip6h = (ip6_t *)nmp->b_rptr; 10718 mp_ip6h = nmp; 10719 } 10720 10721 DTRACE_PROBE4( 10722 ip6__loopback__out__start, 10723 ill_t *, NULL, 10724 ill_t *, ill, 10725 ip6_t *, nip6h, 10726 mblk_t *, nmp); 10727 10728 FW_HOOKS6( 10729 ipst->ips_ip6_loopback_out_event, 10730 ipst->ips_ipv6firewall_loopback_out, 10731 NULL, ill, nip6h, nmp, mp_ip6h, 10732 0, ipst); 10733 10734 DTRACE_PROBE1( 10735 ip6__loopback__out__end, 10736 mblk_t *, nmp); 10737 10738 /* 10739 * DTrace this as ip:::send. A blocked 10740 * packet will fire the send probe, but 10741 * not the receive probe. 10742 */ 10743 DTRACE_IP7(send, mblk_t *, nmp, 10744 conn_t *, NULL, void_ip_t *, nip6h, 10745 __dtrace_ipsr_ill_t *, ill, 10746 ipha_t *, NULL, ip6_t *, nip6h, 10747 int, 1); 10748 10749 if (nmp != NULL) { 10750 /* 10751 * Deliver locally and to 10752 * every local zone, except 10753 * the sending zone when 10754 * IPV6_MULTICAST_LOOP is 10755 * disabled. 10756 */ 10757 ip_wput_local_v6(RD(q), ill, 10758 nip6h, nmp, ire, 10759 fanout_flags, zoneid); 10760 } 10761 } else { 10762 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10763 ip1dbg(("ip_wput_ire_v6: " 10764 "copymsg failed\n")); 10765 } 10766 } 10767 } 10768 if (ip6h->ip6_hops == 0 || 10769 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10770 IS_LOOPBACK(ill)) { 10771 /* 10772 * Local multicast or just loopback on loopback 10773 * interface. 10774 */ 10775 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10776 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10777 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10778 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10779 freemsg(first_mp); 10780 return; 10781 } 10782 } 10783 10784 if (ire->ire_stq != NULL) { 10785 uint32_t sum; 10786 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10787 ill_phyint->phyint_ifindex; 10788 queue_t *dev_q = ire->ire_stq->q_next; 10789 10790 /* 10791 * non-NULL send-to queue - packet is to be sent 10792 * out an interface. 10793 */ 10794 10795 /* Driver is flow-controlling? */ 10796 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10797 DEV_Q_FLOW_BLOCKED(dev_q)) { 10798 /* 10799 * Queue packet if we have an conn to give back 10800 * pressure. We can't queue packets intended for 10801 * hardware acceleration since we've tossed that 10802 * state already. If the packet is being fed back 10803 * from ire_send_v6, we don't know the position in 10804 * the queue to enqueue the packet and we discard 10805 * the packet. 10806 */ 10807 if (ipst->ips_ip_output_queue && connp != NULL && 10808 !mctl_present && caller != IRE_SEND) { 10809 if (caller == IP_WSRV) { 10810 idl_tx_list_t *idl_txl; 10811 10812 idl_txl = &ipst->ips_idl_tx_list[0]; 10813 connp->conn_did_putbq = 1; 10814 (void) putbq(connp->conn_wq, mp); 10815 conn_drain_insert(connp, idl_txl); 10816 /* 10817 * caller == IP_WSRV implies we are 10818 * the service thread, and the 10819 * queue is already noenabled. 10820 * The check for canput and 10821 * the putbq is not atomic. 10822 * So we need to check again. 10823 */ 10824 if (canput(dev_q)) 10825 connp->conn_did_putbq = 0; 10826 } else { 10827 (void) putq(connp->conn_wq, mp); 10828 } 10829 return; 10830 } 10831 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10832 freemsg(first_mp); 10833 return; 10834 } 10835 10836 /* 10837 * Look for reachability confirmations from the transport. 10838 */ 10839 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10840 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10841 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10842 if (mctl_present) 10843 io->ipsec_out_reachable = B_TRUE; 10844 } 10845 /* Fastpath */ 10846 switch (nexthdr) { 10847 case IPPROTO_TCP: 10848 case IPPROTO_UDP: 10849 case IPPROTO_ICMPV6: 10850 case IPPROTO_SCTP: 10851 hdr_length = IPV6_HDR_LEN; 10852 break; 10853 default: { 10854 uint8_t *nexthdrp; 10855 10856 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10857 &hdr_length, &nexthdrp)) { 10858 /* Malformed packet */ 10859 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10860 freemsg(first_mp); 10861 return; 10862 } 10863 nexthdr = *nexthdrp; 10864 break; 10865 } 10866 } 10867 10868 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10869 uint16_t *up; 10870 uint16_t *insp; 10871 10872 /* 10873 * The packet header is processed once for all, even 10874 * in the multirouting case. We disable hardware 10875 * checksum if the packet is multirouted, as it will be 10876 * replicated via several interfaces, and not all of 10877 * them may have this capability. 10878 */ 10879 if (cksum_request == 1 && 10880 !(ire->ire_flags & RTF_MULTIRT)) { 10881 /* Skip the transport checksum */ 10882 goto cksum_done; 10883 } 10884 /* 10885 * Do user-configured raw checksum. 10886 * Compute checksum and insert at offset "cksum_request" 10887 */ 10888 10889 /* check for enough headers for checksum */ 10890 cksum_request += hdr_length; /* offset from rptr */ 10891 if ((mp->b_wptr - mp->b_rptr) < 10892 (cksum_request + sizeof (int16_t))) { 10893 if (!pullupmsg(mp, 10894 cksum_request + sizeof (int16_t))) { 10895 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10896 " failed\n")); 10897 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10898 freemsg(first_mp); 10899 return; 10900 } 10901 ip6h = (ip6_t *)mp->b_rptr; 10902 } 10903 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10904 ASSERT(((uintptr_t)insp & 0x1) == 0); 10905 up = (uint16_t *)&ip6h->ip6_src; 10906 /* 10907 * icmp has placed length and routing 10908 * header adjustment in *insp. 10909 */ 10910 sum = htons(nexthdr) + 10911 up[0] + up[1] + up[2] + up[3] + 10912 up[4] + up[5] + up[6] + up[7] + 10913 up[8] + up[9] + up[10] + up[11] + 10914 up[12] + up[13] + up[14] + up[15]; 10915 sum = (sum & 0xffff) + (sum >> 16); 10916 *insp = IP_CSUM(mp, hdr_length, sum); 10917 } else if (nexthdr == IPPROTO_TCP) { 10918 uint16_t *up; 10919 10920 /* 10921 * Check for full IPv6 header + enough TCP header 10922 * to get at the checksum field. 10923 */ 10924 if ((mp->b_wptr - mp->b_rptr) < 10925 (hdr_length + TCP_CHECKSUM_OFFSET + 10926 TCP_CHECKSUM_SIZE)) { 10927 if (!pullupmsg(mp, hdr_length + 10928 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10929 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10930 " failed\n")); 10931 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10932 freemsg(first_mp); 10933 return; 10934 } 10935 ip6h = (ip6_t *)mp->b_rptr; 10936 } 10937 10938 up = (uint16_t *)&ip6h->ip6_src; 10939 /* 10940 * Note: The TCP module has stored the length value 10941 * into the tcp checksum field, so we don't 10942 * need to explicitly sum it in here. 10943 */ 10944 sum = up[0] + up[1] + up[2] + up[3] + 10945 up[4] + up[5] + up[6] + up[7] + 10946 up[8] + up[9] + up[10] + up[11] + 10947 up[12] + up[13] + up[14] + up[15]; 10948 10949 /* Fold the initial sum */ 10950 sum = (sum & 0xffff) + (sum >> 16); 10951 10952 up = (uint16_t *)(((uchar_t *)ip6h) + 10953 hdr_length + TCP_CHECKSUM_OFFSET); 10954 10955 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10956 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10957 ire->ire_max_frag, mctl_present, sum); 10958 10959 /* Software checksum? */ 10960 if (DB_CKSUMFLAGS(mp) == 0) { 10961 IP6_STAT(ipst, ip6_out_sw_cksum); 10962 IP6_STAT_UPDATE(ipst, 10963 ip6_tcp_out_sw_cksum_bytes, 10964 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10965 hdr_length); 10966 } 10967 } else if (nexthdr == IPPROTO_UDP) { 10968 uint16_t *up; 10969 10970 /* 10971 * check for full IPv6 header + enough UDP header 10972 * to get at the UDP checksum field 10973 */ 10974 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10975 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10976 if (!pullupmsg(mp, hdr_length + 10977 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10978 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10979 " failed\n")); 10980 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10981 freemsg(first_mp); 10982 return; 10983 } 10984 ip6h = (ip6_t *)mp->b_rptr; 10985 } 10986 up = (uint16_t *)&ip6h->ip6_src; 10987 /* 10988 * Note: The UDP module has stored the length value 10989 * into the udp checksum field, so we don't 10990 * need to explicitly sum it in here. 10991 */ 10992 sum = up[0] + up[1] + up[2] + up[3] + 10993 up[4] + up[5] + up[6] + up[7] + 10994 up[8] + up[9] + up[10] + up[11] + 10995 up[12] + up[13] + up[14] + up[15]; 10996 10997 /* Fold the initial sum */ 10998 sum = (sum & 0xffff) + (sum >> 16); 10999 11000 up = (uint16_t *)(((uchar_t *)ip6h) + 11001 hdr_length + UDP_CHECKSUM_OFFSET); 11002 11003 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 11004 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11005 ire->ire_max_frag, mctl_present, sum); 11006 11007 /* Software checksum? */ 11008 if (DB_CKSUMFLAGS(mp) == 0) { 11009 IP6_STAT(ipst, ip6_out_sw_cksum); 11010 IP6_STAT_UPDATE(ipst, 11011 ip6_udp_out_sw_cksum_bytes, 11012 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 11013 hdr_length); 11014 } 11015 } else if (nexthdr == IPPROTO_ICMPV6) { 11016 uint16_t *up; 11017 icmp6_t *icmp6; 11018 11019 /* check for full IPv6+ICMPv6 header */ 11020 if ((mp->b_wptr - mp->b_rptr) < 11021 (hdr_length + ICMP6_MINLEN)) { 11022 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 11023 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 11024 " failed\n")); 11025 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 11026 freemsg(first_mp); 11027 return; 11028 } 11029 ip6h = (ip6_t *)mp->b_rptr; 11030 } 11031 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 11032 up = (uint16_t *)&ip6h->ip6_src; 11033 /* 11034 * icmp has placed length and routing 11035 * header adjustment in icmp6_cksum. 11036 */ 11037 sum = htons(IPPROTO_ICMPV6) + 11038 up[0] + up[1] + up[2] + up[3] + 11039 up[4] + up[5] + up[6] + up[7] + 11040 up[8] + up[9] + up[10] + up[11] + 11041 up[12] + up[13] + up[14] + up[15]; 11042 sum = (sum & 0xffff) + (sum >> 16); 11043 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 11044 11045 /* Update output mib stats */ 11046 icmp_update_out_mib_v6(ill, icmp6); 11047 } else if (nexthdr == IPPROTO_SCTP) { 11048 sctp_hdr_t *sctph; 11049 11050 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 11051 if (!pullupmsg(mp, hdr_length + 11052 sizeof (*sctph))) { 11053 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 11054 " failed\n")); 11055 BUMP_MIB(ill->ill_ip_mib, 11056 ipIfStatsOutDiscards); 11057 freemsg(mp); 11058 return; 11059 } 11060 ip6h = (ip6_t *)mp->b_rptr; 11061 } 11062 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 11063 sctph->sh_chksum = 0; 11064 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 11065 } 11066 11067 cksum_done: 11068 /* 11069 * We force the insertion of a fragment header using the 11070 * IPH_FRAG_HDR flag in two cases: 11071 * - after reception of an ICMPv6 "packet too big" message 11072 * with a MTU < 1280 (cf. RFC 2460 section 5) 11073 * - for multirouted IPv6 packets, so that the receiver can 11074 * discard duplicates according to their fragment identifier 11075 * 11076 * Two flags modifed from the API can modify this behavior. 11077 * The first is IPV6_USE_MIN_MTU. With this API the user 11078 * can specify how to manage PMTUD for unicast and multicast. 11079 * 11080 * IPV6_DONTFRAG disallows fragmentation. 11081 */ 11082 max_frag = ire->ire_max_frag; 11083 switch (IP6I_USE_MIN_MTU_API(flags)) { 11084 case IPV6_USE_MIN_MTU_DEFAULT: 11085 case IPV6_USE_MIN_MTU_UNICAST: 11086 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11087 max_frag = IPV6_MIN_MTU; 11088 } 11089 break; 11090 11091 case IPV6_USE_MIN_MTU_NEVER: 11092 max_frag = IPV6_MIN_MTU; 11093 break; 11094 } 11095 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 11096 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 11097 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 11098 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11099 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11100 return; 11101 } 11102 11103 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 11104 (mp->b_cont ? msgdsize(mp) : 11105 mp->b_wptr - (uchar_t *)ip6h)) { 11106 ip0dbg(("Packet length mismatch: %d, %ld\n", 11107 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 11108 msgdsize(mp))); 11109 freemsg(first_mp); 11110 return; 11111 } 11112 /* Do IPSEC processing first */ 11113 if (mctl_present) { 11114 ipsec_out_process(q, first_mp, ire, ill_index); 11115 return; 11116 } 11117 ASSERT(mp->b_prev == NULL); 11118 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 11119 ntohs(ip6h->ip6_plen) + 11120 IPV6_HDR_LEN, max_frag)); 11121 ASSERT(mp == first_mp); 11122 /* Initiate IPPF processing */ 11123 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 11124 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11125 if (mp == NULL) { 11126 return; 11127 } 11128 } 11129 ip_wput_frag_v6(mp, ire, reachable, connp, 11130 caller, max_frag); 11131 return; 11132 } 11133 /* Do IPSEC processing first */ 11134 if (mctl_present) { 11135 int extra_len = ipsec_out_extra_length(first_mp); 11136 11137 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11138 max_frag) { 11139 /* 11140 * IPsec headers will push the packet over the 11141 * MTU limit. Issue an ICMPv6 Packet Too Big 11142 * message for this packet if the upper-layer 11143 * that issued this packet will be able to 11144 * react to the icmp_pkt2big_v6() that we'll 11145 * generate. 11146 */ 11147 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11148 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11149 return; 11150 } 11151 ipsec_out_process(q, first_mp, ire, ill_index); 11152 return; 11153 } 11154 /* 11155 * XXX multicast: add ip_mforward_v6() here. 11156 * Check conn_dontroute 11157 */ 11158 #ifdef lint 11159 /* 11160 * XXX The only purpose of this statement is to avoid lint 11161 * errors. See the above "XXX multicast". When that gets 11162 * fixed, remove this whole #ifdef lint section. 11163 */ 11164 ip3dbg(("multicast forward is %s.\n", 11165 (multicast_forward ? "TRUE" : "FALSE"))); 11166 #endif 11167 11168 UPDATE_OB_PKT_COUNT(ire); 11169 ire->ire_last_used_time = lbolt; 11170 ASSERT(mp == first_mp); 11171 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11172 } else { 11173 /* 11174 * DTrace this as ip:::send. A blocked packet will fire the 11175 * send probe, but not the receive probe. 11176 */ 11177 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11178 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11179 NULL, ip6_t *, ip6h, int, 1); 11180 DTRACE_PROBE4(ip6__loopback__out__start, 11181 ill_t *, NULL, ill_t *, ill, 11182 ip6_t *, ip6h, mblk_t *, first_mp); 11183 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11184 ipst->ips_ipv6firewall_loopback_out, 11185 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11186 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11187 if (first_mp != NULL) { 11188 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11189 zoneid); 11190 } 11191 } 11192 } 11193 11194 /* 11195 * Outbound IPv6 fragmentation routine using MDT. 11196 */ 11197 static void 11198 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11199 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11200 { 11201 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11202 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11203 mblk_t *hdr_mp, *md_mp = NULL; 11204 int i1; 11205 multidata_t *mmd; 11206 unsigned char *hdr_ptr, *pld_ptr; 11207 ip_pdescinfo_t pdi; 11208 uint32_t ident; 11209 size_t len; 11210 uint16_t offset; 11211 queue_t *stq = ire->ire_stq; 11212 ill_t *ill = (ill_t *)stq->q_ptr; 11213 ip_stack_t *ipst = ill->ill_ipst; 11214 11215 ASSERT(DB_TYPE(mp) == M_DATA); 11216 ASSERT(MBLKL(mp) > unfragmentable_len); 11217 11218 /* 11219 * Move read ptr past unfragmentable portion, we don't want this part 11220 * of the data in our fragments. 11221 */ 11222 mp->b_rptr += unfragmentable_len; 11223 11224 /* Calculate how many packets we will send out */ 11225 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11226 pkts = (i1 + max_chunk - 1) / max_chunk; 11227 ASSERT(pkts > 1); 11228 11229 /* Allocate a message block which will hold all the IP Headers. */ 11230 wroff = ipst->ips_ip_wroff_extra; 11231 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11232 11233 i1 = pkts * hdr_chunk_len; 11234 /* 11235 * Create the header buffer, Multidata and destination address 11236 * and SAP attribute that should be associated with it. 11237 */ 11238 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11239 ((hdr_mp->b_wptr += i1), 11240 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11241 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11242 freemsg(mp); 11243 if (md_mp == NULL) { 11244 freemsg(hdr_mp); 11245 } else { 11246 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11247 freemsg(md_mp); 11248 } 11249 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11250 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11251 return; 11252 } 11253 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11254 11255 /* 11256 * Add a payload buffer to the Multidata; this operation must not 11257 * fail, or otherwise our logic in this routine is broken. There 11258 * is no memory allocation done by the routine, so any returned 11259 * failure simply tells us that we've done something wrong. 11260 * 11261 * A failure tells us that either we're adding the same payload 11262 * buffer more than once, or we're trying to add more buffers than 11263 * allowed. None of the above cases should happen, and we panic 11264 * because either there's horrible heap corruption, and/or 11265 * programming mistake. 11266 */ 11267 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11268 goto pbuf_panic; 11269 } 11270 11271 hdr_ptr = hdr_mp->b_rptr; 11272 pld_ptr = mp->b_rptr; 11273 11274 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11275 11276 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11277 11278 /* 11279 * len is the total length of the fragmentable data in this 11280 * datagram. For each fragment sent, we will decrement len 11281 * by the amount of fragmentable data sent in that fragment 11282 * until len reaches zero. 11283 */ 11284 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11285 11286 offset = 0; 11287 prev_nexthdr_offset += wroff; 11288 11289 while (len != 0) { 11290 size_t mlen; 11291 ip6_t *fip6h; 11292 ip6_frag_t *fraghdr; 11293 int error; 11294 11295 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11296 mlen = MIN(len, max_chunk); 11297 len -= mlen; 11298 11299 fip6h = (ip6_t *)(hdr_ptr + wroff); 11300 ASSERT(OK_32PTR(fip6h)); 11301 bcopy(ip6h, fip6h, unfragmentable_len); 11302 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11303 11304 fip6h->ip6_plen = htons((uint16_t)(mlen + 11305 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11306 11307 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11308 unfragmentable_len); 11309 fraghdr->ip6f_nxt = nexthdr; 11310 fraghdr->ip6f_reserved = 0; 11311 fraghdr->ip6f_offlg = htons(offset) | 11312 ((len != 0) ? IP6F_MORE_FRAG : 0); 11313 fraghdr->ip6f_ident = ident; 11314 11315 /* 11316 * Record offset and size of header and data of the next packet 11317 * in the multidata message. 11318 */ 11319 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11320 unfragmentable_len + sizeof (ip6_frag_t), 0); 11321 PDESC_PLD_INIT(&pdi); 11322 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11323 ASSERT(i1 > 0); 11324 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11325 if (i1 == mlen) { 11326 pld_ptr += mlen; 11327 } else { 11328 i1 = mlen - i1; 11329 mp = mp->b_cont; 11330 ASSERT(mp != NULL); 11331 ASSERT(MBLKL(mp) >= i1); 11332 /* 11333 * Attach the next payload message block to the 11334 * multidata message. 11335 */ 11336 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11337 goto pbuf_panic; 11338 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11339 pld_ptr = mp->b_rptr + i1; 11340 } 11341 11342 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11343 KM_NOSLEEP)) == NULL) { 11344 /* 11345 * Any failure other than ENOMEM indicates that we 11346 * have passed in invalid pdesc info or parameters 11347 * to mmd_addpdesc, which must not happen. 11348 * 11349 * EINVAL is a result of failure on boundary checks 11350 * against the pdesc info contents. It should not 11351 * happen, and we panic because either there's 11352 * horrible heap corruption, and/or programming 11353 * mistake. 11354 */ 11355 if (error != ENOMEM) { 11356 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11357 "pdesc logic error detected for " 11358 "mmd %p pinfo %p (%d)\n", 11359 (void *)mmd, (void *)&pdi, error); 11360 /* NOTREACHED */ 11361 } 11362 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11363 /* Free unattached payload message blocks as well */ 11364 md_mp->b_cont = mp->b_cont; 11365 goto free_mmd; 11366 } 11367 11368 /* Advance fragment offset. */ 11369 offset += mlen; 11370 11371 /* Advance to location for next header in the buffer. */ 11372 hdr_ptr += hdr_chunk_len; 11373 11374 /* Did we reach the next payload message block? */ 11375 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11376 mp = mp->b_cont; 11377 /* 11378 * Attach the next message block with payload 11379 * data to the multidata message. 11380 */ 11381 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11382 goto pbuf_panic; 11383 pld_ptr = mp->b_rptr; 11384 } 11385 } 11386 11387 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11388 ASSERT(mp->b_wptr == pld_ptr); 11389 11390 /* Update IP statistics */ 11391 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11392 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11393 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11394 /* 11395 * The ipv6 header len is accounted for in unfragmentable_len so 11396 * when calculating the fragmentation overhead just add the frag 11397 * header len. 11398 */ 11399 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11400 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11401 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11402 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11403 11404 ire->ire_ob_pkt_count += pkts; 11405 if (ire->ire_ipif != NULL) 11406 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11407 11408 ire->ire_last_used_time = lbolt; 11409 /* Send it down */ 11410 putnext(stq, md_mp); 11411 return; 11412 11413 pbuf_panic: 11414 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11415 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11416 pbuf_idx); 11417 /* NOTREACHED */ 11418 } 11419 11420 /* 11421 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11422 * We have not optimized this in terms of number of mblks 11423 * allocated. For instance, for each fragment sent we always allocate a 11424 * mblk to hold the IPv6 header and fragment header. 11425 * 11426 * Assumes that all the extension headers are contained in the first mblk. 11427 * 11428 * The fragment header is inserted after an hop-by-hop options header 11429 * and after [an optional destinations header followed by] a routing header. 11430 * 11431 * NOTE : This function does not ire_refrele the ire passed in as 11432 * the argument. 11433 */ 11434 void 11435 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11436 int caller, int max_frag) 11437 { 11438 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11439 ip6_t *fip6h; 11440 mblk_t *hmp; 11441 mblk_t *hmp0; 11442 mblk_t *dmp; 11443 ip6_frag_t *fraghdr; 11444 size_t unfragmentable_len; 11445 size_t len; 11446 size_t mlen; 11447 size_t max_chunk; 11448 uint32_t ident; 11449 uint16_t off_flags; 11450 uint16_t offset = 0; 11451 ill_t *ill; 11452 uint8_t nexthdr; 11453 uint_t prev_nexthdr_offset; 11454 uint8_t *ptr; 11455 ip_stack_t *ipst = ire->ire_ipst; 11456 11457 ASSERT(ire->ire_type == IRE_CACHE); 11458 ill = (ill_t *)ire->ire_stq->q_ptr; 11459 11460 if (max_frag <= 0) { 11461 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11462 freemsg(mp); 11463 return; 11464 } 11465 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11466 11467 /* 11468 * Determine the length of the unfragmentable portion of this 11469 * datagram. This consists of the IPv6 header, a potential 11470 * hop-by-hop options header, a potential pre-routing-header 11471 * destination options header, and a potential routing header. 11472 */ 11473 nexthdr = ip6h->ip6_nxt; 11474 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11475 ptr = (uint8_t *)&ip6h[1]; 11476 11477 if (nexthdr == IPPROTO_HOPOPTS) { 11478 ip6_hbh_t *hbh_hdr; 11479 uint_t hdr_len; 11480 11481 hbh_hdr = (ip6_hbh_t *)ptr; 11482 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11483 nexthdr = hbh_hdr->ip6h_nxt; 11484 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11485 - (uint8_t *)ip6h; 11486 ptr += hdr_len; 11487 } 11488 if (nexthdr == IPPROTO_DSTOPTS) { 11489 ip6_dest_t *dest_hdr; 11490 uint_t hdr_len; 11491 11492 dest_hdr = (ip6_dest_t *)ptr; 11493 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11494 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11495 nexthdr = dest_hdr->ip6d_nxt; 11496 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11497 - (uint8_t *)ip6h; 11498 ptr += hdr_len; 11499 } 11500 } 11501 if (nexthdr == IPPROTO_ROUTING) { 11502 ip6_rthdr_t *rthdr; 11503 uint_t hdr_len; 11504 11505 rthdr = (ip6_rthdr_t *)ptr; 11506 nexthdr = rthdr->ip6r_nxt; 11507 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11508 - (uint8_t *)ip6h; 11509 hdr_len = 8 * (rthdr->ip6r_len + 1); 11510 ptr += hdr_len; 11511 } 11512 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11513 11514 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11515 sizeof (ip6_frag_t)) & ~7; 11516 11517 /* Check if we can use MDT to send out the frags. */ 11518 ASSERT(!IRE_IS_LOCAL(ire)); 11519 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11520 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11521 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11522 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11523 nexthdr, prev_nexthdr_offset); 11524 return; 11525 } 11526 11527 /* 11528 * Allocate an mblk with enough room for the link-layer 11529 * header, the unfragmentable part of the datagram, and the 11530 * fragment header. This (or a copy) will be used as the 11531 * first mblk for each fragment we send. 11532 */ 11533 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 11534 ipst->ips_ip_wroff_extra, mp); 11535 if (hmp == NULL) { 11536 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11537 freemsg(mp); 11538 return; 11539 } 11540 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11541 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11542 11543 fip6h = (ip6_t *)hmp->b_rptr; 11544 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11545 11546 bcopy(ip6h, fip6h, unfragmentable_len); 11547 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11548 11549 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11550 11551 fraghdr->ip6f_nxt = nexthdr; 11552 fraghdr->ip6f_reserved = 0; 11553 fraghdr->ip6f_offlg = 0; 11554 fraghdr->ip6f_ident = htonl(ident); 11555 11556 /* 11557 * len is the total length of the fragmentable data in this 11558 * datagram. For each fragment sent, we will decrement len 11559 * by the amount of fragmentable data sent in that fragment 11560 * until len reaches zero. 11561 */ 11562 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11563 11564 /* 11565 * Move read ptr past unfragmentable portion, we don't want this part 11566 * of the data in our fragments. 11567 */ 11568 mp->b_rptr += unfragmentable_len; 11569 11570 while (len != 0) { 11571 mlen = MIN(len, max_chunk); 11572 len -= mlen; 11573 if (len != 0) { 11574 /* Not last */ 11575 hmp0 = copyb(hmp); 11576 if (hmp0 == NULL) { 11577 freeb(hmp); 11578 freemsg(mp); 11579 BUMP_MIB(ill->ill_ip_mib, 11580 ipIfStatsOutFragFails); 11581 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11582 return; 11583 } 11584 off_flags = IP6F_MORE_FRAG; 11585 } else { 11586 /* Last fragment */ 11587 hmp0 = hmp; 11588 hmp = NULL; 11589 off_flags = 0; 11590 } 11591 fip6h = (ip6_t *)(hmp0->b_rptr); 11592 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11593 11594 fip6h->ip6_plen = htons((uint16_t)(mlen + 11595 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11596 /* 11597 * Note: Optimization alert. 11598 * In IPv6 (and IPv4) protocol header, Fragment Offset 11599 * ("offset") is 13 bits wide and in 8-octet units. 11600 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11601 * it occupies the most significant 13 bits. 11602 * (least significant 13 bits in IPv4). 11603 * We do not do any shifts here. Not shifting is same effect 11604 * as taking offset value in octet units, dividing by 8 and 11605 * then shifting 3 bits left to line it up in place in proper 11606 * place protocol header. 11607 */ 11608 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11609 11610 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11611 /* mp has already been freed by ip_carve_mp() */ 11612 if (hmp != NULL) 11613 freeb(hmp); 11614 freeb(hmp0); 11615 ip1dbg(("ip_carve_mp: failed\n")); 11616 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11617 return; 11618 } 11619 hmp0->b_cont = dmp; 11620 /* Get the priority marking, if any */ 11621 hmp0->b_band = dmp->b_band; 11622 UPDATE_OB_PKT_COUNT(ire); 11623 ire->ire_last_used_time = lbolt; 11624 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11625 caller, NULL); 11626 reachable = 0; /* No need to redo state machine in loop */ 11627 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11628 offset += mlen; 11629 } 11630 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11631 } 11632 11633 /* 11634 * Determine if the ill and multicast aspects of that packets 11635 * "matches" the conn. 11636 */ 11637 boolean_t 11638 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11639 zoneid_t zoneid) 11640 { 11641 ill_t *bound_ill; 11642 boolean_t wantpacket; 11643 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11644 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11645 11646 /* 11647 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11648 * unicast and multicast reception to conn_incoming_ill. 11649 * conn_wantpacket_v6 is called both for unicast and 11650 * multicast. 11651 */ 11652 bound_ill = connp->conn_incoming_ill; 11653 if (bound_ill != NULL) { 11654 if (IS_IPMP(bound_ill)) { 11655 if (bound_ill->ill_grp != ill->ill_grp) 11656 return (B_FALSE); 11657 } else { 11658 if (bound_ill != ill) 11659 return (B_FALSE); 11660 } 11661 } 11662 11663 if (connp->conn_multi_router) 11664 return (B_TRUE); 11665 11666 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11667 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11668 /* 11669 * Unicast case: we match the conn only if it's in the specified 11670 * zone. 11671 */ 11672 return (IPCL_ZONE_MATCH(connp, zoneid)); 11673 } 11674 11675 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11676 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11677 /* 11678 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11679 * disabled, therefore we don't dispatch the multicast packet to 11680 * the sending zone. 11681 */ 11682 return (B_FALSE); 11683 } 11684 11685 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11686 zoneid != ALL_ZONES) { 11687 /* 11688 * Multicast packet on the loopback interface: we only match 11689 * conns who joined the group in the specified zone. 11690 */ 11691 return (B_FALSE); 11692 } 11693 11694 mutex_enter(&connp->conn_lock); 11695 wantpacket = 11696 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11697 mutex_exit(&connp->conn_lock); 11698 11699 return (wantpacket); 11700 } 11701 11702 11703 /* 11704 * Transmit a packet and update any NUD state based on the flags 11705 * XXX need to "recover" any ip6i_t when doing putq! 11706 * 11707 * NOTE : This function does not ire_refrele the ire passed in as the 11708 * argument. 11709 */ 11710 void 11711 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11712 int caller, ipsec_out_t *io) 11713 { 11714 mblk_t *mp1; 11715 nce_t *nce = ire->ire_nce; 11716 ill_t *ill; 11717 ill_t *out_ill; 11718 uint64_t delta; 11719 ip6_t *ip6h; 11720 queue_t *stq = ire->ire_stq; 11721 ire_t *ire1 = NULL; 11722 ire_t *save_ire = ire; 11723 boolean_t multirt_send = B_FALSE; 11724 mblk_t *next_mp = NULL; 11725 ip_stack_t *ipst = ire->ire_ipst; 11726 boolean_t fp_prepend = B_FALSE; 11727 uint32_t hlen; 11728 11729 ip6h = (ip6_t *)mp->b_rptr; 11730 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11731 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11732 ASSERT(nce != NULL); 11733 ASSERT(mp->b_datap->db_type == M_DATA); 11734 ASSERT(stq != NULL); 11735 11736 ill = ire_to_ill(ire); 11737 if (!ill) { 11738 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11739 freemsg(mp); 11740 return; 11741 } 11742 11743 /* 11744 * If a packet is to be sent out an interface that is a 6to4 11745 * tunnel, outgoing IPv6 packets, with a 6to4 addressed IPv6 11746 * destination, must be checked to have a 6to4 prefix 11747 * (2002:V4ADDR::/48) that is NOT equal to the 6to4 prefix of 11748 * address configured on the sending interface. Otherwise, 11749 * the packet was delivered to this interface in error and the 11750 * packet must be dropped. 11751 */ 11752 if ((ill->ill_is_6to4tun) && IN6_IS_ADDR_6TO4(&ip6h->ip6_dst)) { 11753 ipif_t *ipif = ill->ill_ipif; 11754 11755 if (IN6_ARE_6TO4_PREFIX_EQUAL(&ipif->ipif_v6lcl_addr, 11756 &ip6h->ip6_dst)) { 11757 if (ip_debug > 2) { 11758 /* ip1dbg */ 11759 pr_addr_dbg("ip_xmit_v6: attempting to " 11760 "send 6to4 addressed IPv6 " 11761 "destination (%s) out the wrong " 11762 "interface.\n", AF_INET6, 11763 &ip6h->ip6_dst); 11764 } 11765 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 11766 freemsg(mp); 11767 return; 11768 } 11769 } 11770 11771 /* Flow-control check has been done in ip_wput_ire_v6 */ 11772 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11773 caller == IP_WSRV || canput(stq->q_next)) { 11774 uint32_t ill_index; 11775 11776 /* 11777 * In most cases, the emission loop below is entered only 11778 * once. Only in the case where the ire holds the 11779 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11780 * flagged ires in the bucket, and send the packet 11781 * through all crossed RTF_MULTIRT routes. 11782 */ 11783 if (ire->ire_flags & RTF_MULTIRT) { 11784 /* 11785 * Multirouting case. The bucket where ire is stored 11786 * probably holds other RTF_MULTIRT flagged ires 11787 * to the destination. In this call to ip_xmit_v6, 11788 * we attempt to send the packet through all 11789 * those ires. Thus, we first ensure that ire is the 11790 * first RTF_MULTIRT ire in the bucket, 11791 * before walking the ire list. 11792 */ 11793 ire_t *first_ire; 11794 irb_t *irb = ire->ire_bucket; 11795 ASSERT(irb != NULL); 11796 multirt_send = B_TRUE; 11797 11798 /* Make sure we do not omit any multiroute ire. */ 11799 IRB_REFHOLD(irb); 11800 for (first_ire = irb->irb_ire; 11801 first_ire != NULL; 11802 first_ire = first_ire->ire_next) { 11803 if ((first_ire->ire_flags & RTF_MULTIRT) && 11804 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11805 &ire->ire_addr_v6)) && 11806 !(first_ire->ire_marks & 11807 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11808 break; 11809 } 11810 11811 if ((first_ire != NULL) && (first_ire != ire)) { 11812 IRE_REFHOLD(first_ire); 11813 /* ire will be released by the caller */ 11814 ire = first_ire; 11815 nce = ire->ire_nce; 11816 stq = ire->ire_stq; 11817 ill = ire_to_ill(ire); 11818 } 11819 IRB_REFRELE(irb); 11820 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11821 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11822 ILL_MDT_USABLE(ill)) { 11823 /* 11824 * This tcp connection was marked as MDT-capable, but 11825 * it has been turned off due changes in the interface. 11826 * Now that the interface support is back, turn it on 11827 * by notifying tcp. We don't directly modify tcp_mdt, 11828 * since we leave all the details to the tcp code that 11829 * knows better. 11830 */ 11831 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11832 11833 if (mdimp == NULL) { 11834 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11835 "connp %p (ENOMEM)\n", (void *)connp)); 11836 } else { 11837 CONN_INC_REF(connp); 11838 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11839 tcp_input, connp, SQ_FILL, 11840 SQTAG_TCP_INPUT_MCTL); 11841 } 11842 } 11843 11844 do { 11845 mblk_t *mp_ip6h; 11846 11847 if (multirt_send) { 11848 irb_t *irb; 11849 /* 11850 * We are in a multiple send case, need to get 11851 * the next ire and make a duplicate of the 11852 * packet. ire1 holds here the next ire to 11853 * process in the bucket. If multirouting is 11854 * expected, any non-RTF_MULTIRT ire that has 11855 * the right destination address is ignored. 11856 */ 11857 irb = ire->ire_bucket; 11858 ASSERT(irb != NULL); 11859 11860 IRB_REFHOLD(irb); 11861 for (ire1 = ire->ire_next; 11862 ire1 != NULL; 11863 ire1 = ire1->ire_next) { 11864 if (!(ire1->ire_flags & RTF_MULTIRT)) 11865 continue; 11866 if (!IN6_ARE_ADDR_EQUAL( 11867 &ire1->ire_addr_v6, 11868 &ire->ire_addr_v6)) 11869 continue; 11870 if (ire1->ire_marks & 11871 IRE_MARK_CONDEMNED) 11872 continue; 11873 11874 /* Got one */ 11875 if (ire1 != save_ire) { 11876 IRE_REFHOLD(ire1); 11877 } 11878 break; 11879 } 11880 IRB_REFRELE(irb); 11881 11882 if (ire1 != NULL) { 11883 next_mp = copyb(mp); 11884 if ((next_mp == NULL) || 11885 ((mp->b_cont != NULL) && 11886 ((next_mp->b_cont = 11887 dupmsg(mp->b_cont)) == NULL))) { 11888 freemsg(next_mp); 11889 next_mp = NULL; 11890 ire_refrele(ire1); 11891 ire1 = NULL; 11892 } 11893 } 11894 11895 /* Last multiroute ire; don't loop anymore. */ 11896 if (ire1 == NULL) { 11897 multirt_send = B_FALSE; 11898 } 11899 } 11900 11901 ill_index = 11902 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11903 11904 /* Initiate IPPF processing */ 11905 if (IP6_OUT_IPP(flags, ipst)) { 11906 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11907 if (mp == NULL) { 11908 BUMP_MIB(ill->ill_ip_mib, 11909 ipIfStatsOutDiscards); 11910 if (next_mp != NULL) 11911 freemsg(next_mp); 11912 if (ire != save_ire) { 11913 ire_refrele(ire); 11914 } 11915 return; 11916 } 11917 ip6h = (ip6_t *)mp->b_rptr; 11918 } 11919 mp_ip6h = mp; 11920 11921 /* 11922 * Check for fastpath, we need to hold nce_lock to 11923 * prevent fastpath update from chaining nce_fp_mp. 11924 */ 11925 11926 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11927 mutex_enter(&nce->nce_lock); 11928 if ((mp1 = nce->nce_fp_mp) != NULL) { 11929 uchar_t *rptr; 11930 11931 hlen = MBLKL(mp1); 11932 rptr = mp->b_rptr - hlen; 11933 /* 11934 * make sure there is room for the fastpath 11935 * datalink header 11936 */ 11937 if (rptr < mp->b_datap->db_base) { 11938 mp1 = copyb(mp1); 11939 mutex_exit(&nce->nce_lock); 11940 if (mp1 == NULL) { 11941 BUMP_MIB(ill->ill_ip_mib, 11942 ipIfStatsOutDiscards); 11943 freemsg(mp); 11944 if (next_mp != NULL) 11945 freemsg(next_mp); 11946 if (ire != save_ire) { 11947 ire_refrele(ire); 11948 } 11949 return; 11950 } 11951 mp1->b_cont = mp; 11952 11953 /* Get the priority marking, if any */ 11954 mp1->b_band = mp->b_band; 11955 mp = mp1; 11956 } else { 11957 mp->b_rptr = rptr; 11958 /* 11959 * fastpath - pre-pend datalink 11960 * header 11961 */ 11962 bcopy(mp1->b_rptr, rptr, hlen); 11963 mutex_exit(&nce->nce_lock); 11964 fp_prepend = B_TRUE; 11965 } 11966 } else { 11967 /* 11968 * Get the DL_UNITDATA_REQ. 11969 */ 11970 mp1 = nce->nce_res_mp; 11971 if (mp1 == NULL) { 11972 mutex_exit(&nce->nce_lock); 11973 ip1dbg(("ip_xmit_v6: No resolution " 11974 "block ire = %p\n", (void *)ire)); 11975 freemsg(mp); 11976 if (next_mp != NULL) 11977 freemsg(next_mp); 11978 if (ire != save_ire) { 11979 ire_refrele(ire); 11980 } 11981 return; 11982 } 11983 /* 11984 * Prepend the DL_UNITDATA_REQ. 11985 */ 11986 mp1 = copyb(mp1); 11987 mutex_exit(&nce->nce_lock); 11988 if (mp1 == NULL) { 11989 BUMP_MIB(ill->ill_ip_mib, 11990 ipIfStatsOutDiscards); 11991 freemsg(mp); 11992 if (next_mp != NULL) 11993 freemsg(next_mp); 11994 if (ire != save_ire) { 11995 ire_refrele(ire); 11996 } 11997 return; 11998 } 11999 mp1->b_cont = mp; 12000 12001 /* Get the priority marking, if any */ 12002 mp1->b_band = mp->b_band; 12003 mp = mp1; 12004 } 12005 12006 out_ill = (ill_t *)stq->q_ptr; 12007 12008 DTRACE_PROBE4(ip6__physical__out__start, 12009 ill_t *, NULL, ill_t *, out_ill, 12010 ip6_t *, ip6h, mblk_t *, mp); 12011 12012 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 12013 ipst->ips_ipv6firewall_physical_out, 12014 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 12015 12016 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 12017 12018 if (mp == NULL) { 12019 if (multirt_send) { 12020 ASSERT(ire1 != NULL); 12021 if (ire != save_ire) { 12022 ire_refrele(ire); 12023 } 12024 /* 12025 * Proceed with the next RTF_MULTIRT 12026 * ire, also set up the send-to queue 12027 * accordingly. 12028 */ 12029 ire = ire1; 12030 ire1 = NULL; 12031 stq = ire->ire_stq; 12032 nce = ire->ire_nce; 12033 ill = ire_to_ill(ire); 12034 mp = next_mp; 12035 next_mp = NULL; 12036 continue; 12037 } else { 12038 ASSERT(next_mp == NULL); 12039 ASSERT(ire1 == NULL); 12040 break; 12041 } 12042 } 12043 12044 if (ipst->ips_ipobs_enabled) { 12045 zoneid_t szone; 12046 12047 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 12048 mp_ip6h, out_ill, ipst, ALL_ZONES); 12049 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 12050 ALL_ZONES, out_ill, IPV6_VERSION, 12051 fp_prepend ? hlen : 0, ipst); 12052 } 12053 12054 /* 12055 * Update ire and MIB counters; for save_ire, this has 12056 * been done by the caller. 12057 */ 12058 if (ire != save_ire) { 12059 UPDATE_OB_PKT_COUNT(ire); 12060 ire->ire_last_used_time = lbolt; 12061 12062 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 12063 BUMP_MIB(ill->ill_ip_mib, 12064 ipIfStatsHCOutMcastPkts); 12065 UPDATE_MIB(ill->ill_ip_mib, 12066 ipIfStatsHCOutMcastOctets, 12067 ntohs(ip6h->ip6_plen) + 12068 IPV6_HDR_LEN); 12069 } 12070 } 12071 12072 /* 12073 * Send it down. XXX Do we want to flow control AH/ESP 12074 * packets that carry TCP payloads? We don't flow 12075 * control TCP packets, but we should also not 12076 * flow-control TCP packets that have been protected. 12077 * We don't have an easy way to find out if an AH/ESP 12078 * packet was originally TCP or not currently. 12079 */ 12080 if (io == NULL) { 12081 BUMP_MIB(ill->ill_ip_mib, 12082 ipIfStatsHCOutTransmits); 12083 UPDATE_MIB(ill->ill_ip_mib, 12084 ipIfStatsHCOutOctets, 12085 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 12086 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 12087 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 12088 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 12089 int, 0); 12090 12091 putnext(stq, mp); 12092 } else { 12093 /* 12094 * Safety Pup says: make sure this is 12095 * going to the right interface! 12096 */ 12097 if (io->ipsec_out_capab_ill_index != 12098 ill_index) { 12099 /* IPsec kstats: bump lose counter */ 12100 freemsg(mp1); 12101 } else { 12102 BUMP_MIB(ill->ill_ip_mib, 12103 ipIfStatsHCOutTransmits); 12104 UPDATE_MIB(ill->ill_ip_mib, 12105 ipIfStatsHCOutOctets, 12106 ntohs(ip6h->ip6_plen) + 12107 IPV6_HDR_LEN); 12108 DTRACE_IP7(send, mblk_t *, mp, 12109 conn_t *, NULL, void_ip_t *, ip6h, 12110 __dtrace_ipsr_ill_t *, out_ill, 12111 ipha_t *, NULL, ip6_t *, ip6h, int, 12112 0); 12113 ipsec_hw_putnext(stq, mp); 12114 } 12115 } 12116 12117 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 12118 if (ire != save_ire) { 12119 ire_refrele(ire); 12120 } 12121 if (multirt_send) { 12122 ASSERT(ire1 != NULL); 12123 /* 12124 * Proceed with the next RTF_MULTIRT 12125 * ire, also set up the send-to queue 12126 * accordingly. 12127 */ 12128 ire = ire1; 12129 ire1 = NULL; 12130 stq = ire->ire_stq; 12131 nce = ire->ire_nce; 12132 ill = ire_to_ill(ire); 12133 mp = next_mp; 12134 next_mp = NULL; 12135 continue; 12136 } 12137 ASSERT(next_mp == NULL); 12138 ASSERT(ire1 == NULL); 12139 return; 12140 } 12141 12142 ASSERT(nce->nce_state != ND_INCOMPLETE); 12143 12144 /* 12145 * Check for upper layer advice 12146 */ 12147 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 12148 /* 12149 * It should be o.k. to check the state without 12150 * a lock here, at most we lose an advice. 12151 */ 12152 nce->nce_last = TICK_TO_MSEC(lbolt64); 12153 if (nce->nce_state != ND_REACHABLE) { 12154 12155 mutex_enter(&nce->nce_lock); 12156 nce->nce_state = ND_REACHABLE; 12157 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12158 mutex_exit(&nce->nce_lock); 12159 (void) untimeout(nce->nce_timeout_id); 12160 if (ip_debug > 2) { 12161 /* ip1dbg */ 12162 pr_addr_dbg("ip_xmit_v6: state" 12163 " for %s changed to" 12164 " REACHABLE\n", AF_INET6, 12165 &ire->ire_addr_v6); 12166 } 12167 } 12168 if (ire != save_ire) { 12169 ire_refrele(ire); 12170 } 12171 if (multirt_send) { 12172 ASSERT(ire1 != NULL); 12173 /* 12174 * Proceed with the next RTF_MULTIRT 12175 * ire, also set up the send-to queue 12176 * accordingly. 12177 */ 12178 ire = ire1; 12179 ire1 = NULL; 12180 stq = ire->ire_stq; 12181 nce = ire->ire_nce; 12182 ill = ire_to_ill(ire); 12183 mp = next_mp; 12184 next_mp = NULL; 12185 continue; 12186 } 12187 ASSERT(next_mp == NULL); 12188 ASSERT(ire1 == NULL); 12189 return; 12190 } 12191 12192 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12193 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12194 " ill_reachable_time = %d \n", delta, 12195 ill->ill_reachable_time)); 12196 if (delta > (uint64_t)ill->ill_reachable_time) { 12197 nce = ire->ire_nce; 12198 mutex_enter(&nce->nce_lock); 12199 switch (nce->nce_state) { 12200 case ND_REACHABLE: 12201 case ND_STALE: 12202 /* 12203 * ND_REACHABLE is identical to 12204 * ND_STALE in this specific case. If 12205 * reachable time has expired for this 12206 * neighbor (delta is greater than 12207 * reachable time), conceptually, the 12208 * neighbor cache is no longer in 12209 * REACHABLE state, but already in 12210 * STALE state. So the correct 12211 * transition here is to ND_DELAY. 12212 */ 12213 nce->nce_state = ND_DELAY; 12214 mutex_exit(&nce->nce_lock); 12215 NDP_RESTART_TIMER(nce, 12216 ipst->ips_delay_first_probe_time); 12217 if (ip_debug > 3) { 12218 /* ip2dbg */ 12219 pr_addr_dbg("ip_xmit_v6: state" 12220 " for %s changed to" 12221 " DELAY\n", AF_INET6, 12222 &ire->ire_addr_v6); 12223 } 12224 break; 12225 case ND_DELAY: 12226 case ND_PROBE: 12227 mutex_exit(&nce->nce_lock); 12228 /* Timers have already started */ 12229 break; 12230 case ND_UNREACHABLE: 12231 /* 12232 * ndp timer has detected that this nce 12233 * is unreachable and initiated deleting 12234 * this nce and all its associated IREs. 12235 * This is a race where we found the 12236 * ire before it was deleted and have 12237 * just sent out a packet using this 12238 * unreachable nce. 12239 */ 12240 mutex_exit(&nce->nce_lock); 12241 break; 12242 default: 12243 ASSERT(0); 12244 } 12245 } 12246 12247 if (multirt_send) { 12248 ASSERT(ire1 != NULL); 12249 /* 12250 * Proceed with the next RTF_MULTIRT ire, 12251 * Also set up the send-to queue accordingly. 12252 */ 12253 if (ire != save_ire) { 12254 ire_refrele(ire); 12255 } 12256 ire = ire1; 12257 ire1 = NULL; 12258 stq = ire->ire_stq; 12259 nce = ire->ire_nce; 12260 ill = ire_to_ill(ire); 12261 mp = next_mp; 12262 next_mp = NULL; 12263 } 12264 } while (multirt_send); 12265 /* 12266 * In the multirouting case, release the last ire used for 12267 * emission. save_ire will be released by the caller. 12268 */ 12269 if (ire != save_ire) { 12270 ire_refrele(ire); 12271 } 12272 } else { 12273 /* 12274 * Can't apply backpressure, just discard the packet. 12275 */ 12276 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12277 freemsg(mp); 12278 return; 12279 } 12280 } 12281 12282 /* 12283 * pr_addr_dbg function provides the needed buffer space to call 12284 * inet_ntop() function's 3rd argument. This function should be 12285 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12286 * stack buffer space in it's own stack frame. This function uses 12287 * a buffer from it's own stack and prints the information. 12288 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12289 * 12290 * Note: This function can call inet_ntop() once. 12291 */ 12292 void 12293 pr_addr_dbg(char *fmt1, int af, const void *addr) 12294 { 12295 char buf[INET6_ADDRSTRLEN]; 12296 12297 if (fmt1 == NULL) { 12298 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12299 return; 12300 } 12301 12302 /* 12303 * This does not compare debug level and just prints 12304 * out. Thus it is the responsibility of the caller 12305 * to check the appropriate debug-level before calling 12306 * this function. 12307 */ 12308 if (ip_debug > 0) { 12309 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12310 } 12311 12312 12313 } 12314 12315 12316 /* 12317 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12318 * if needed and extension headers) that will be needed based on the 12319 * ip6_pkt_t structure passed by the caller. 12320 * 12321 * The returned length does not include the length of the upper level 12322 * protocol (ULP) header. 12323 */ 12324 int 12325 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12326 { 12327 int len; 12328 12329 len = IPV6_HDR_LEN; 12330 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12331 len += sizeof (ip6i_t); 12332 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12333 ASSERT(ipp->ipp_hopoptslen != 0); 12334 len += ipp->ipp_hopoptslen; 12335 } 12336 if (ipp->ipp_fields & IPPF_RTHDR) { 12337 ASSERT(ipp->ipp_rthdrlen != 0); 12338 len += ipp->ipp_rthdrlen; 12339 } 12340 /* 12341 * En-route destination options 12342 * Only do them if there's a routing header as well 12343 */ 12344 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12345 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12346 ASSERT(ipp->ipp_rtdstoptslen != 0); 12347 len += ipp->ipp_rtdstoptslen; 12348 } 12349 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12350 ASSERT(ipp->ipp_dstoptslen != 0); 12351 len += ipp->ipp_dstoptslen; 12352 } 12353 return (len); 12354 } 12355 12356 /* 12357 * All-purpose routine to build a header chain of an IPv6 header 12358 * followed by any required extension headers and a proto header, 12359 * preceeded (where necessary) by an ip6i_t private header. 12360 * 12361 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12362 * will be filled in appropriately. 12363 * Thus the caller must fill in the rest of the IPv6 header, such as 12364 * traffic class/flowid, source address (if not set here), hoplimit (if not 12365 * set here) and destination address. 12366 * 12367 * The extension headers and ip6i_t header will all be fully filled in. 12368 */ 12369 void 12370 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12371 ip6_pkt_t *ipp, uint8_t protocol) 12372 { 12373 uint8_t *nxthdr_ptr; 12374 uint8_t *cp; 12375 ip6i_t *ip6i; 12376 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12377 12378 /* 12379 * If sending private ip6i_t header down (checksum info, nexthop, 12380 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12381 * then fill it in. (The checksum info will be filled in by icmp). 12382 */ 12383 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12384 ip6i = (ip6i_t *)ip6h; 12385 ip6h = (ip6_t *)&ip6i[1]; 12386 12387 ip6i->ip6i_flags = 0; 12388 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12389 if (ipp->ipp_fields & IPPF_IFINDEX || 12390 ipp->ipp_fields & IPPF_SCOPE_ID) { 12391 ASSERT(ipp->ipp_ifindex != 0); 12392 ip6i->ip6i_flags |= IP6I_IFINDEX; 12393 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12394 } 12395 if (ipp->ipp_fields & IPPF_ADDR) { 12396 /* 12397 * Enable per-packet source address verification if 12398 * IPV6_PKTINFO specified the source address. 12399 * ip6_src is set in the transport's _wput function. 12400 */ 12401 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12402 &ipp->ipp_addr)); 12403 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12404 } 12405 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12406 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12407 /* 12408 * We need to set this flag so that IP doesn't 12409 * rewrite the IPv6 header's hoplimit with the 12410 * current default value. 12411 */ 12412 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12413 } 12414 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12415 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12416 &ipp->ipp_nexthop)); 12417 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12418 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12419 } 12420 /* 12421 * tell IP this is an ip6i_t private header 12422 */ 12423 ip6i->ip6i_nxt = IPPROTO_RAW; 12424 } 12425 /* Initialize IPv6 header */ 12426 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12427 if (ipp->ipp_fields & IPPF_TCLASS) { 12428 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12429 (ipp->ipp_tclass << 20); 12430 } 12431 if (ipp->ipp_fields & IPPF_ADDR) 12432 ip6h->ip6_src = ipp->ipp_addr; 12433 12434 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12435 cp = (uint8_t *)&ip6h[1]; 12436 /* 12437 * Here's where we have to start stringing together 12438 * any extension headers in the right order: 12439 * Hop-by-hop, destination, routing, and final destination opts. 12440 */ 12441 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12442 /* Hop-by-hop options */ 12443 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12444 12445 *nxthdr_ptr = IPPROTO_HOPOPTS; 12446 nxthdr_ptr = &hbh->ip6h_nxt; 12447 12448 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12449 cp += ipp->ipp_hopoptslen; 12450 } 12451 /* 12452 * En-route destination options 12453 * Only do them if there's a routing header as well 12454 */ 12455 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12456 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12457 ip6_dest_t *dst = (ip6_dest_t *)cp; 12458 12459 *nxthdr_ptr = IPPROTO_DSTOPTS; 12460 nxthdr_ptr = &dst->ip6d_nxt; 12461 12462 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12463 cp += ipp->ipp_rtdstoptslen; 12464 } 12465 /* 12466 * Routing header next 12467 */ 12468 if (ipp->ipp_fields & IPPF_RTHDR) { 12469 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12470 12471 *nxthdr_ptr = IPPROTO_ROUTING; 12472 nxthdr_ptr = &rt->ip6r_nxt; 12473 12474 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12475 cp += ipp->ipp_rthdrlen; 12476 } 12477 /* 12478 * Do ultimate destination options 12479 */ 12480 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12481 ip6_dest_t *dest = (ip6_dest_t *)cp; 12482 12483 *nxthdr_ptr = IPPROTO_DSTOPTS; 12484 nxthdr_ptr = &dest->ip6d_nxt; 12485 12486 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12487 cp += ipp->ipp_dstoptslen; 12488 } 12489 /* 12490 * Now set the last header pointer to the proto passed in 12491 */ 12492 *nxthdr_ptr = protocol; 12493 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12494 } 12495 12496 /* 12497 * Return a pointer to the routing header extension header 12498 * in the IPv6 header(s) chain passed in. 12499 * If none found, return NULL 12500 * Assumes that all extension headers are in same mblk as the v6 header 12501 */ 12502 ip6_rthdr_t * 12503 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12504 { 12505 ip6_dest_t *desthdr; 12506 ip6_frag_t *fraghdr; 12507 uint_t hdrlen; 12508 uint8_t nexthdr; 12509 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12510 12511 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12512 return ((ip6_rthdr_t *)ptr); 12513 12514 /* 12515 * The routing header will precede all extension headers 12516 * other than the hop-by-hop and destination options 12517 * extension headers, so if we see anything other than those, 12518 * we're done and didn't find it. 12519 * We could see a destination options header alone but no 12520 * routing header, in which case we'll return NULL as soon as 12521 * we see anything after that. 12522 * Hop-by-hop and destination option headers are identical, 12523 * so we can use either one we want as a template. 12524 */ 12525 nexthdr = ip6h->ip6_nxt; 12526 while (ptr < endptr) { 12527 /* Is there enough left for len + nexthdr? */ 12528 if (ptr + MIN_EHDR_LEN > endptr) 12529 return (NULL); 12530 12531 switch (nexthdr) { 12532 case IPPROTO_HOPOPTS: 12533 case IPPROTO_DSTOPTS: 12534 /* Assumes the headers are identical for hbh and dst */ 12535 desthdr = (ip6_dest_t *)ptr; 12536 hdrlen = 8 * (desthdr->ip6d_len + 1); 12537 nexthdr = desthdr->ip6d_nxt; 12538 break; 12539 12540 case IPPROTO_ROUTING: 12541 return ((ip6_rthdr_t *)ptr); 12542 12543 case IPPROTO_FRAGMENT: 12544 fraghdr = (ip6_frag_t *)ptr; 12545 hdrlen = sizeof (ip6_frag_t); 12546 nexthdr = fraghdr->ip6f_nxt; 12547 break; 12548 12549 default: 12550 return (NULL); 12551 } 12552 ptr += hdrlen; 12553 } 12554 return (NULL); 12555 } 12556 12557 /* 12558 * Called for source-routed packets originating on this node. 12559 * Manipulates the original routing header by moving every entry up 12560 * one slot, placing the first entry in the v6 header's v6_dst field, 12561 * and placing the ultimate destination in the routing header's last 12562 * slot. 12563 * 12564 * Returns the checksum diference between the ultimate destination 12565 * (last hop in the routing header when the packet is sent) and 12566 * the first hop (ip6_dst when the packet is sent) 12567 */ 12568 /* ARGSUSED2 */ 12569 uint32_t 12570 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12571 { 12572 uint_t numaddr; 12573 uint_t i; 12574 in6_addr_t *addrptr; 12575 in6_addr_t tmp; 12576 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12577 uint32_t cksm; 12578 uint32_t addrsum = 0; 12579 uint16_t *ptr; 12580 12581 /* 12582 * Perform any processing needed for source routing. 12583 * We know that all extension headers will be in the same mblk 12584 * as the IPv6 header. 12585 */ 12586 12587 /* 12588 * If no segments left in header, or the header length field is zero, 12589 * don't move hop addresses around; 12590 * Checksum difference is zero. 12591 */ 12592 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12593 return (0); 12594 12595 ptr = (uint16_t *)&ip6h->ip6_dst; 12596 cksm = 0; 12597 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12598 cksm += ptr[i]; 12599 } 12600 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12601 12602 /* 12603 * Here's where the fun begins - we have to 12604 * move all addresses up one spot, take the 12605 * first hop and make it our first ip6_dst, 12606 * and place the ultimate destination in the 12607 * newly-opened last slot. 12608 */ 12609 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12610 numaddr = rthdr->ip6r0_len / 2; 12611 tmp = *addrptr; 12612 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12613 *addrptr = addrptr[1]; 12614 } 12615 *addrptr = ip6h->ip6_dst; 12616 ip6h->ip6_dst = tmp; 12617 12618 /* 12619 * From the checksummed ultimate destination subtract the checksummed 12620 * current ip6_dst (the first hop address). Return that number. 12621 * (In the v4 case, the second part of this is done in each routine 12622 * that calls ip_massage_options(). We do it all in this one place 12623 * for v6). 12624 */ 12625 ptr = (uint16_t *)&ip6h->ip6_dst; 12626 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12627 addrsum += ptr[i]; 12628 } 12629 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12630 if ((int)cksm < 0) 12631 cksm--; 12632 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12633 12634 return (cksm); 12635 } 12636 12637 /* 12638 * Propagate a multicast group membership operation (join/leave) (*fn) on 12639 * all interfaces crossed by the related multirt routes. 12640 * The call is considered successful if the operation succeeds 12641 * on at least one interface. 12642 * The function is called if the destination address in the packet to send 12643 * is multirouted. 12644 */ 12645 int 12646 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12647 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12648 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12649 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12650 { 12651 ire_t *ire_gw; 12652 irb_t *irb; 12653 int index, error = 0; 12654 opt_restart_t *or; 12655 ip_stack_t *ipst = ire->ire_ipst; 12656 12657 irb = ire->ire_bucket; 12658 ASSERT(irb != NULL); 12659 12660 ASSERT(DB_TYPE(first_mp) == M_CTL); 12661 or = (opt_restart_t *)first_mp->b_rptr; 12662 12663 IRB_REFHOLD(irb); 12664 for (; ire != NULL; ire = ire->ire_next) { 12665 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12666 continue; 12667 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12668 continue; 12669 12670 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12671 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12672 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12673 /* No resolver exists for the gateway; skip this ire. */ 12674 if (ire_gw == NULL) 12675 continue; 12676 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12677 /* 12678 * A resolver exists: we can get the interface on which we have 12679 * to apply the operation. 12680 */ 12681 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12682 first_mp); 12683 if (error == 0) 12684 or->or_private = CGTP_MCAST_SUCCESS; 12685 12686 if (ip_debug > 0) { 12687 ulong_t off; 12688 char *ksym; 12689 12690 ksym = kobj_getsymname((uintptr_t)fn, &off); 12691 ip2dbg(("ip_multirt_apply_membership_v6: " 12692 "called %s, multirt group 0x%08x via itf 0x%08x, " 12693 "error %d [success %u]\n", 12694 ksym ? ksym : "?", 12695 ntohl(V4_PART_OF_V6((*v6grp))), 12696 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12697 error, or->or_private)); 12698 } 12699 12700 ire_refrele(ire_gw); 12701 if (error == EINPROGRESS) { 12702 IRB_REFRELE(irb); 12703 return (error); 12704 } 12705 } 12706 IRB_REFRELE(irb); 12707 /* 12708 * Consider the call as successful if we succeeded on at least 12709 * one interface. Otherwise, return the last encountered error. 12710 */ 12711 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12712 } 12713 12714 void 12715 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12716 { 12717 kstat_t *ksp; 12718 12719 ip6_stat_t template = { 12720 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12721 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12722 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12723 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12724 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12725 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12726 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12727 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12728 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12729 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12730 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12731 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12732 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12733 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12734 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12735 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12736 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12737 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12738 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12739 }; 12740 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12741 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12742 KSTAT_FLAG_VIRTUAL, stackid); 12743 12744 if (ksp == NULL) 12745 return (NULL); 12746 12747 bcopy(&template, ip6_statisticsp, sizeof (template)); 12748 ksp->ks_data = (void *)ip6_statisticsp; 12749 ksp->ks_private = (void *)(uintptr_t)stackid; 12750 12751 kstat_install(ksp); 12752 return (ksp); 12753 } 12754 12755 void 12756 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12757 { 12758 if (ksp != NULL) { 12759 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12760 kstat_delete_netstack(ksp, stackid); 12761 } 12762 } 12763 12764 /* 12765 * The following two functions set and get the value for the 12766 * IPV6_SRC_PREFERENCES socket option. 12767 */ 12768 int 12769 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12770 { 12771 /* 12772 * We only support preferences that are covered by 12773 * IPV6_PREFER_SRC_MASK. 12774 */ 12775 if (prefs & ~IPV6_PREFER_SRC_MASK) 12776 return (EINVAL); 12777 12778 /* 12779 * Look for conflicting preferences or default preferences. If 12780 * both bits of a related pair are clear, the application wants the 12781 * system's default value for that pair. Both bits in a pair can't 12782 * be set. 12783 */ 12784 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12785 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12786 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12787 IPV6_PREFER_SRC_MIPMASK) { 12788 return (EINVAL); 12789 } 12790 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12791 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12792 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12793 IPV6_PREFER_SRC_TMPMASK) { 12794 return (EINVAL); 12795 } 12796 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12797 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12798 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12799 IPV6_PREFER_SRC_CGAMASK) { 12800 return (EINVAL); 12801 } 12802 12803 connp->conn_src_preferences = prefs; 12804 return (0); 12805 } 12806 12807 size_t 12808 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12809 { 12810 *val = connp->conn_src_preferences; 12811 return (sizeof (connp->conn_src_preferences)); 12812 } 12813 12814 int 12815 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12816 { 12817 ire_t *ire; 12818 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12819 12820 /* 12821 * Verify the source address and ifindex. Privileged users can use 12822 * any source address. For ancillary data the source address is 12823 * checked in ip_wput_v6. 12824 */ 12825 if (pkti->ipi6_ifindex != 0) { 12826 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12827 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12828 rw_exit(&ipst->ips_ill_g_lock); 12829 return (ENXIO); 12830 } 12831 rw_exit(&ipst->ips_ill_g_lock); 12832 } 12833 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12834 secpolicy_net_rawaccess(cr) != 0) { 12835 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12836 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12837 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12838 if (ire != NULL) 12839 ire_refrele(ire); 12840 else 12841 return (ENXIO); 12842 } 12843 return (0); 12844 } 12845 12846 /* 12847 * Get the size of the IP options (including the IP headers size) 12848 * without including the AH header's size. If till_ah is B_FALSE, 12849 * and if AH header is present, dest options beyond AH header will 12850 * also be included in the returned size. 12851 */ 12852 int 12853 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12854 { 12855 ip6_t *ip6h; 12856 uint8_t nexthdr; 12857 uint8_t *whereptr; 12858 ip6_hbh_t *hbhhdr; 12859 ip6_dest_t *dsthdr; 12860 ip6_rthdr_t *rthdr; 12861 int ehdrlen; 12862 int size; 12863 ah_t *ah; 12864 12865 ip6h = (ip6_t *)mp->b_rptr; 12866 size = IPV6_HDR_LEN; 12867 nexthdr = ip6h->ip6_nxt; 12868 whereptr = (uint8_t *)&ip6h[1]; 12869 for (;;) { 12870 /* Assume IP has already stripped it */ 12871 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12872 switch (nexthdr) { 12873 case IPPROTO_HOPOPTS: 12874 hbhhdr = (ip6_hbh_t *)whereptr; 12875 nexthdr = hbhhdr->ip6h_nxt; 12876 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12877 break; 12878 case IPPROTO_DSTOPTS: 12879 dsthdr = (ip6_dest_t *)whereptr; 12880 nexthdr = dsthdr->ip6d_nxt; 12881 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12882 break; 12883 case IPPROTO_ROUTING: 12884 rthdr = (ip6_rthdr_t *)whereptr; 12885 nexthdr = rthdr->ip6r_nxt; 12886 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12887 break; 12888 default : 12889 if (till_ah) { 12890 ASSERT(nexthdr == IPPROTO_AH); 12891 return (size); 12892 } 12893 /* 12894 * If we don't have a AH header to traverse, 12895 * return now. This happens normally for 12896 * outbound datagrams where we have not inserted 12897 * the AH header. 12898 */ 12899 if (nexthdr != IPPROTO_AH) { 12900 return (size); 12901 } 12902 12903 /* 12904 * We don't include the AH header's size 12905 * to be symmetrical with other cases where 12906 * we either don't have a AH header (outbound) 12907 * or peek into the AH header yet (inbound and 12908 * not pulled up yet). 12909 */ 12910 ah = (ah_t *)whereptr; 12911 nexthdr = ah->ah_nexthdr; 12912 ehdrlen = (ah->ah_length << 2) + 8; 12913 12914 if (nexthdr == IPPROTO_DSTOPTS) { 12915 if (whereptr + ehdrlen >= mp->b_wptr) { 12916 /* 12917 * The destination options header 12918 * is not part of the first mblk. 12919 */ 12920 whereptr = mp->b_cont->b_rptr; 12921 } else { 12922 whereptr += ehdrlen; 12923 } 12924 12925 dsthdr = (ip6_dest_t *)whereptr; 12926 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12927 size += ehdrlen; 12928 } 12929 return (size); 12930 } 12931 whereptr += ehdrlen; 12932 size += ehdrlen; 12933 } 12934 } 12935 12936 /* 12937 * Utility routine that checks if `v6srcp' is a valid address on underlying 12938 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12939 * associated with `v6srcp' on success. NOTE: if this is not called from 12940 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12941 * group during or after this lookup. 12942 */ 12943 static boolean_t 12944 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12945 { 12946 ipif_t *ipif; 12947 12948 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12949 if (ipif != NULL) { 12950 if (ipifp != NULL) 12951 *ipifp = ipif; 12952 else 12953 ipif_refrele(ipif); 12954 return (B_TRUE); 12955 } 12956 12957 if (ip_debug > 2) { 12958 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12959 "src %s\n", AF_INET6, v6srcp); 12960 } 12961 return (B_FALSE); 12962 } 12963