1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 #include <inet/sadb.h> 92 #include <inet/ipsec_impl.h> 93 #include <inet/iptun/iptun_impl.h> 94 #include <inet/sctp_ip.h> 95 #include <sys/pattr.h> 96 #include <inet/ipclassifier.h> 97 #include <inet/ipsecah.h> 98 #include <inet/rawip_impl.h> 99 #include <inet/rts_impl.h> 100 #include <sys/squeue_impl.h> 101 #include <sys/squeue.h> 102 103 #include <sys/tsol/label.h> 104 #include <sys/tsol/tnet.h> 105 106 #include <rpc/pmap_prot.h> 107 108 /* Temporary; for CR 6451644 work-around */ 109 #include <sys/ethernet.h> 110 111 extern int ip_squeue_flag; 112 113 /* 114 * Naming conventions: 115 * These rules should be judiciously applied 116 * if there is a need to identify something as IPv6 versus IPv4 117 * IPv6 funcions will end with _v6 in the ip module. 118 * IPv6 funcions will end with _ipv6 in the transport modules. 119 * IPv6 macros: 120 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 121 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 122 * And then there are ..V4_PART_OF_V6. 123 * The intent is that macros in the ip module end with _V6. 124 * IPv6 global variables will start with ipv6_ 125 * IPv6 structures will start with ipv6 126 * IPv6 defined constants should start with IPV6_ 127 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 128 */ 129 130 /* 131 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 132 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 133 * from IANA. This mechanism will remain in effect until an official 134 * number is obtained. 135 */ 136 uchar_t ip6opt_ls; 137 138 const in6_addr_t ipv6_all_ones = 139 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 140 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 170 #endif /* _BIG_ENDIAN */ 171 172 #ifdef _BIG_ENDIAN 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 175 #else /* _BIG_ENDIAN */ 176 const in6_addr_t ipv6_solicited_node_mcast = 177 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 178 #endif /* _BIG_ENDIAN */ 179 180 /* Leave room for ip_newroute to tack on the src and target addresses */ 181 #define OK_RESOLVER_MP_V6(mp) \ 182 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 183 184 #define IP6_MBLK_OK 0 185 #define IP6_MBLK_HDR_ERR 1 186 #define IP6_MBLK_LEN_ERR 2 187 188 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 189 boolean_t, zoneid_t); 190 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 191 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 192 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 193 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 194 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 195 boolean_t, boolean_t, cred_t *); 196 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 197 iulp_t *, ip_stack_t *); 198 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 199 const in6_addr_t *, uint16_t, boolean_t); 200 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 201 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 202 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 203 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 204 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 205 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 206 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 207 uint8_t *, uint_t, uint8_t, ip_stack_t *); 208 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 209 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 210 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 211 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 212 conn_t *, int, int, zoneid_t); 213 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 214 ipif_t **); 215 216 /* 217 * A template for an IPv6 AR_ENTRY_QUERY 218 */ 219 static areq_t ipv6_areq_template = { 220 AR_ENTRY_QUERY, /* cmd */ 221 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 222 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 223 ETHERTYPE_IPV6, /* protocol, from arps perspective */ 224 sizeof (areq_t), /* target addr offset */ 225 IPV6_ADDR_LEN, /* target addr_length */ 226 0, /* flags */ 227 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 228 IPV6_ADDR_LEN, /* sender addr length */ 229 6, /* xmit_count */ 230 1000, /* (re)xmit_interval in milliseconds */ 231 4 /* max # of requests to buffer */ 232 /* anything else filled in by the code */ 233 }; 234 235 /* 236 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 237 * The message has already been checksummed and if needed, 238 * a copy has been made to be sent any interested ICMP client (conn) 239 * Note that this is different than icmp_inbound() which does the fanout 240 * to conn's as well as local processing of the ICMP packets. 241 * 242 * All error messages are passed to the matching transport stream. 243 * 244 * Zones notes: 245 * The packet is only processed in the context of the specified zone: typically 246 * only this zone will reply to an echo request. This means that the caller must 247 * call icmp_inbound_v6() for each relevant zone. 248 */ 249 static void 250 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 251 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 252 mblk_t *dl_mp) 253 { 254 icmp6_t *icmp6; 255 ip6_t *ip6h; 256 boolean_t interested; 257 in6_addr_t origsrc; 258 mblk_t *first_mp; 259 ipsec_in_t *ii; 260 ip_stack_t *ipst = ill->ill_ipst; 261 262 ASSERT(ill != NULL); 263 first_mp = mp; 264 if (mctl_present) { 265 mp = first_mp->b_cont; 266 ASSERT(mp != NULL); 267 268 ii = (ipsec_in_t *)first_mp->b_rptr; 269 ASSERT(ii->ipsec_in_type == IPSEC_IN); 270 } 271 272 ip6h = (ip6_t *)mp->b_rptr; 273 274 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 275 276 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 277 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 278 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 279 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 280 freemsg(first_mp); 281 return; 282 } 283 ip6h = (ip6_t *)mp->b_rptr; 284 } 285 if (ipst->ips_icmp_accept_clear_messages == 0) { 286 first_mp = ipsec_check_global_policy(first_mp, NULL, 287 NULL, ip6h, mctl_present, ipst->ips_netstack); 288 if (first_mp == NULL) 289 return; 290 } 291 292 /* 293 * On a labeled system, we have to check whether the zone itself is 294 * permitted to receive raw traffic. 295 */ 296 if (is_system_labeled()) { 297 if (zoneid == ALL_ZONES) 298 zoneid = tsol_packet_to_zoneid(mp); 299 if (!tsol_can_accept_raw(mp, B_FALSE)) { 300 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 301 zoneid)); 302 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 303 freemsg(first_mp); 304 return; 305 } 306 } 307 308 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 309 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 310 icmp6->icmp6_code)); 311 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 312 313 /* Initiate IPPF processing here */ 314 if (IP6_IN_IPP(flags, ipst)) { 315 316 /* 317 * If the ifindex changes due to SIOCSLIFINDEX 318 * packet may return to IP on the wrong ill. 319 */ 320 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 321 if (mp == NULL) { 322 if (mctl_present) { 323 freeb(first_mp); 324 } 325 return; 326 } 327 } 328 329 switch (icmp6->icmp6_type) { 330 case ICMP6_DST_UNREACH: 331 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 332 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 334 break; 335 336 case ICMP6_TIME_EXCEEDED: 337 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 338 break; 339 340 case ICMP6_PARAM_PROB: 341 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 342 break; 343 344 case ICMP6_PACKET_TOO_BIG: 345 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 346 zoneid); 347 return; 348 case ICMP6_ECHO_REQUEST: 349 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 350 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 351 !ipst->ips_ipv6_resp_echo_mcast) 352 break; 353 354 /* 355 * We must have exclusive use of the mblk to convert it to 356 * a response. 357 * If not, we copy it. 358 */ 359 if (mp->b_datap->db_ref > 1) { 360 mblk_t *mp1; 361 362 mp1 = copymsg(mp); 363 freemsg(mp); 364 if (mp1 == NULL) { 365 BUMP_MIB(ill->ill_icmp6_mib, 366 ipv6IfIcmpInErrors); 367 if (mctl_present) 368 freeb(first_mp); 369 return; 370 } 371 mp = mp1; 372 ip6h = (ip6_t *)mp->b_rptr; 373 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 374 if (mctl_present) 375 first_mp->b_cont = mp; 376 else 377 first_mp = mp; 378 } 379 380 /* 381 * Turn the echo into an echo reply. 382 * Remove any extension headers (do not reverse a source route) 383 * and clear the flow id (keep traffic class for now). 384 */ 385 if (hdr_length != IPV6_HDR_LEN) { 386 int i; 387 388 for (i = 0; i < IPV6_HDR_LEN; i++) 389 mp->b_rptr[hdr_length - i - 1] = 390 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 391 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 392 ip6h = (ip6_t *)mp->b_rptr; 393 ip6h->ip6_nxt = IPPROTO_ICMPV6; 394 hdr_length = IPV6_HDR_LEN; 395 } 396 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 397 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 398 399 ip6h->ip6_plen = 400 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 401 origsrc = ip6h->ip6_src; 402 /* 403 * Reverse the source and destination addresses. 404 * If the return address is a multicast, zero out the source 405 * (ip_wput_v6 will set an address). 406 */ 407 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 408 ip6h->ip6_src = ipv6_all_zeros; 409 ip6h->ip6_dst = origsrc; 410 } else { 411 ip6h->ip6_src = ip6h->ip6_dst; 412 ip6h->ip6_dst = origsrc; 413 } 414 415 /* set the hop limit */ 416 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 417 418 /* 419 * Prepare for checksum by putting icmp length in the icmp 420 * checksum field. The checksum is calculated in ip_wput_v6. 421 */ 422 icmp6->icmp6_cksum = ip6h->ip6_plen; 423 424 if (!mctl_present) { 425 /* 426 * This packet should go out the same way as it 427 * came in i.e in clear. To make sure that global 428 * policy will not be applied to this in ip_wput, 429 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 430 */ 431 ASSERT(first_mp == mp); 432 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 433 if (first_mp == NULL) { 434 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 435 freemsg(mp); 436 return; 437 } 438 ii = (ipsec_in_t *)first_mp->b_rptr; 439 440 /* This is not a secure packet */ 441 ii->ipsec_in_secure = B_FALSE; 442 first_mp->b_cont = mp; 443 } 444 ii->ipsec_in_zoneid = zoneid; 445 ASSERT(zoneid != ALL_ZONES); 446 if (!ipsec_in_to_out(first_mp, NULL, ip6h)) { 447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 448 return; 449 } 450 put(WR(q), first_mp); 451 return; 452 453 case ICMP6_ECHO_REPLY: 454 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 455 break; 456 457 case ND_ROUTER_SOLICIT: 458 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 459 break; 460 461 case ND_ROUTER_ADVERT: 462 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 463 break; 464 465 case ND_NEIGHBOR_SOLICIT: 466 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 467 if (mctl_present) 468 freeb(first_mp); 469 /* XXX may wish to pass first_mp up to ndp_input someday. */ 470 ndp_input(inill, mp, dl_mp); 471 return; 472 473 case ND_NEIGHBOR_ADVERT: 474 BUMP_MIB(ill->ill_icmp6_mib, 475 ipv6IfIcmpInNeighborAdvertisements); 476 if (mctl_present) 477 freeb(first_mp); 478 /* XXX may wish to pass first_mp up to ndp_input someday. */ 479 ndp_input(inill, mp, dl_mp); 480 return; 481 482 case ND_REDIRECT: { 483 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 484 485 if (ipst->ips_ipv6_ignore_redirect) 486 break; 487 488 /* 489 * As there is no upper client to deliver, we don't 490 * need the first_mp any more. 491 */ 492 if (mctl_present) 493 freeb(first_mp); 494 if (!pullupmsg(mp, -1)) { 495 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 496 break; 497 } 498 icmp_redirect_v6(q, mp, ill); 499 return; 500 } 501 502 /* 503 * The next three icmp messages will be handled by MLD. 504 * Pass all valid MLD packets up to any process(es) 505 * listening on a raw ICMP socket. MLD messages are 506 * freed by mld_input function. 507 */ 508 case MLD_LISTENER_QUERY: 509 case MLD_LISTENER_REPORT: 510 case MLD_LISTENER_REDUCTION: 511 if (mctl_present) 512 freeb(first_mp); 513 mld_input(q, mp, ill); 514 return; 515 default: 516 break; 517 } 518 if (interested) { 519 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 520 inill, mctl_present, zoneid); 521 } else { 522 freemsg(first_mp); 523 } 524 } 525 526 /* 527 * Process received IPv6 ICMP Packet too big. 528 * After updating any IRE it does the fanout to any matching transport streams. 529 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 530 */ 531 /* ARGSUSED */ 532 static void 533 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 534 boolean_t mctl_present, zoneid_t zoneid) 535 { 536 ip6_t *ip6h; 537 ip6_t *inner_ip6h; 538 icmp6_t *icmp6; 539 uint16_t hdr_length; 540 uint32_t mtu; 541 ire_t *ire, *first_ire; 542 mblk_t *first_mp; 543 ip_stack_t *ipst = ill->ill_ipst; 544 545 first_mp = mp; 546 if (mctl_present) 547 mp = first_mp->b_cont; 548 /* 549 * We must have exclusive use of the mblk to update the MTU 550 * in the packet. 551 * If not, we copy it. 552 * 553 * If there's an M_CTL present, we know that allocated first_mp 554 * earlier in this function, so we know first_mp has refcnt of one. 555 */ 556 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 557 if (mp->b_datap->db_ref > 1) { 558 mblk_t *mp1; 559 560 mp1 = copymsg(mp); 561 freemsg(mp); 562 if (mp1 == NULL) { 563 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 564 if (mctl_present) 565 freeb(first_mp); 566 return; 567 } 568 mp = mp1; 569 if (mctl_present) 570 first_mp->b_cont = mp; 571 else 572 first_mp = mp; 573 } 574 ip6h = (ip6_t *)mp->b_rptr; 575 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 576 hdr_length = ip_hdr_length_v6(mp, ip6h); 577 else 578 hdr_length = IPV6_HDR_LEN; 579 580 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 581 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 582 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 583 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 584 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 585 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 586 freemsg(first_mp); 587 return; 588 } 589 ip6h = (ip6_t *)mp->b_rptr; 590 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 591 inner_ip6h = (ip6_t *)&icmp6[1]; 592 } 593 594 /* 595 * For link local destinations matching simply on IRE type is not 596 * sufficient. Same link local addresses for different ILL's is 597 * possible. 598 */ 599 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 600 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 601 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 602 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 603 604 if (first_ire == NULL) { 605 if (ip_debug > 2) { 606 /* ip1dbg */ 607 pr_addr_dbg("icmp_inbound_too_big_v6:" 608 "no ire for dst %s\n", AF_INET6, 609 &inner_ip6h->ip6_dst); 610 } 611 freemsg(first_mp); 612 return; 613 } 614 615 mtu = ntohl(icmp6->icmp6_mtu); 616 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 617 for (ire = first_ire; ire != NULL && 618 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 619 ire = ire->ire_next) { 620 mutex_enter(&ire->ire_lock); 621 if (mtu < IPV6_MIN_MTU) { 622 ip1dbg(("Received mtu less than IPv6 " 623 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 624 mtu = IPV6_MIN_MTU; 625 /* 626 * If an mtu less than IPv6 min mtu is received, 627 * we must include a fragment header in 628 * subsequent packets. 629 */ 630 ire->ire_frag_flag |= IPH_FRAG_HDR; 631 } 632 ip1dbg(("Received mtu from router: %d\n", mtu)); 633 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 634 if (ire->ire_max_frag == mtu) { 635 /* Decreased it */ 636 ire->ire_marks |= IRE_MARK_PMTU; 637 } 638 /* Record the new max frag size for the ULP. */ 639 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 640 /* 641 * If we need a fragment header in every packet 642 * (above case or multirouting), make sure the 643 * ULP takes it into account when computing the 644 * payload size. 645 */ 646 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 647 sizeof (ip6_frag_t)); 648 } else { 649 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 650 } 651 mutex_exit(&ire->ire_lock); 652 } 653 rw_exit(&first_ire->ire_bucket->irb_lock); 654 ire_refrele(first_ire); 655 } else { 656 irb_t *irb = NULL; 657 /* 658 * for non-link local destinations we match only on the IRE type 659 */ 660 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 661 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 662 ipst); 663 if (ire == NULL) { 664 if (ip_debug > 2) { 665 /* ip1dbg */ 666 pr_addr_dbg("icmp_inbound_too_big_v6:" 667 "no ire for dst %s\n", 668 AF_INET6, &inner_ip6h->ip6_dst); 669 } 670 freemsg(first_mp); 671 return; 672 } 673 irb = ire->ire_bucket; 674 ire_refrele(ire); 675 rw_enter(&irb->irb_lock, RW_READER); 676 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 677 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 678 &inner_ip6h->ip6_dst)) { 679 mtu = ntohl(icmp6->icmp6_mtu); 680 mutex_enter(&ire->ire_lock); 681 if (mtu < IPV6_MIN_MTU) { 682 ip1dbg(("Received mtu less than IPv6" 683 "min mtu %d: %d\n", 684 IPV6_MIN_MTU, mtu)); 685 mtu = IPV6_MIN_MTU; 686 /* 687 * If an mtu less than IPv6 min mtu is 688 * received, we must include a fragment 689 * header in subsequent packets. 690 */ 691 ire->ire_frag_flag |= IPH_FRAG_HDR; 692 } 693 694 ip1dbg(("Received mtu from router: %d\n", mtu)); 695 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 696 if (ire->ire_max_frag == mtu) { 697 /* Decreased it */ 698 ire->ire_marks |= IRE_MARK_PMTU; 699 } 700 /* Record the new max frag size for the ULP. */ 701 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 702 /* 703 * If we need a fragment header in 704 * every packet (above case or 705 * multirouting), make sure the ULP 706 * takes it into account when computing 707 * the payload size. 708 */ 709 icmp6->icmp6_mtu = 710 htonl(ire->ire_max_frag - 711 sizeof (ip6_frag_t)); 712 } else { 713 icmp6->icmp6_mtu = 714 htonl(ire->ire_max_frag); 715 } 716 mutex_exit(&ire->ire_lock); 717 } 718 } 719 rw_exit(&irb->irb_lock); 720 } 721 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 722 mctl_present, zoneid); 723 } 724 725 /* 726 * Fanout for ICMPv6 errors containing IP-in-IPv6 packets. Returns B_TRUE if a 727 * tunnel consumed the message, and B_FALSE otherwise. 728 */ 729 static boolean_t 730 icmp_inbound_iptun_fanout_v6(mblk_t *first_mp, ip6_t *rip6h, ill_t *ill, 731 ip_stack_t *ipst) 732 { 733 conn_t *connp; 734 735 if ((connp = ipcl_iptun_classify_v6(&rip6h->ip6_src, &rip6h->ip6_dst, 736 ipst)) == NULL) 737 return (B_FALSE); 738 739 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 740 connp->conn_recv(connp, first_mp, NULL); 741 CONN_DEC_REF(connp); 742 return (B_TRUE); 743 } 744 745 /* 746 * Fanout received ICMPv6 error packets to the transports. 747 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 748 */ 749 void 750 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 751 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 752 zoneid_t zoneid) 753 { 754 uint16_t *up; /* Pointer to ports in ULP header */ 755 uint32_t ports; /* reversed ports for fanout */ 756 ip6_t rip6h; /* With reversed addresses */ 757 uint16_t hdr_length; 758 uint8_t *nexthdrp; 759 uint8_t nexthdr; 760 mblk_t *first_mp; 761 ipsec_in_t *ii; 762 tcpha_t *tcpha; 763 conn_t *connp; 764 ip_stack_t *ipst = ill->ill_ipst; 765 766 first_mp = mp; 767 if (mctl_present) { 768 mp = first_mp->b_cont; 769 ASSERT(mp != NULL); 770 771 ii = (ipsec_in_t *)first_mp->b_rptr; 772 ASSERT(ii->ipsec_in_type == IPSEC_IN); 773 } else { 774 ii = NULL; 775 } 776 777 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 778 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 779 780 /* 781 * Need to pullup everything in order to use 782 * ip_hdr_length_nexthdr_v6() 783 */ 784 if (mp->b_cont != NULL) { 785 if (!pullupmsg(mp, -1)) { 786 ip1dbg(("icmp_inbound_error_fanout_v6: " 787 "pullupmsg failed\n")); 788 goto drop_pkt; 789 } 790 ip6h = (ip6_t *)mp->b_rptr; 791 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 792 } 793 794 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 795 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 796 goto drop_pkt; 797 798 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 799 goto drop_pkt; 800 nexthdr = *nexthdrp; 801 802 /* Set message type, must be done after pullups */ 803 mp->b_datap->db_type = M_CTL; 804 805 /* 806 * We need a separate IP header with the source and destination 807 * addresses reversed to do fanout/classification because the ip6h in 808 * the ICMPv6 error is in the form we sent it out. 809 */ 810 rip6h.ip6_src = ip6h->ip6_dst; 811 rip6h.ip6_dst = ip6h->ip6_src; 812 rip6h.ip6_nxt = nexthdr; 813 814 /* Try to pass the ICMP message to clients who need it */ 815 switch (nexthdr) { 816 case IPPROTO_UDP: { 817 /* 818 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 819 * UDP header to get the port information. 820 */ 821 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 822 mp->b_wptr) { 823 break; 824 } 825 /* Attempt to find a client stream based on port. */ 826 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 827 ((uint16_t *)&ports)[0] = up[1]; 828 ((uint16_t *)&ports)[1] = up[0]; 829 830 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 831 IP6_NO_IPPOLICY, mctl_present, zoneid); 832 return; 833 } 834 case IPPROTO_TCP: { 835 /* 836 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 837 * the TCP header to get the port information. 838 */ 839 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 840 mp->b_wptr) { 841 break; 842 } 843 844 /* 845 * Attempt to find a client stream based on port. 846 * Note that we do a reverse lookup since the header is 847 * in the form we sent it out. 848 */ 849 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 850 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 851 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 852 if (connp == NULL) { 853 goto drop_pkt; 854 } 855 856 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 857 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 858 return; 859 860 } 861 case IPPROTO_SCTP: 862 /* 863 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 864 * the SCTP header to get the port information. 865 */ 866 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 867 mp->b_wptr) { 868 break; 869 } 870 871 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 872 ((uint16_t *)&ports)[0] = up[1]; 873 ((uint16_t *)&ports)[1] = up[0]; 874 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 875 mctl_present, IP6_NO_IPPOLICY, zoneid); 876 return; 877 case IPPROTO_ESP: 878 case IPPROTO_AH: { 879 int ipsec_rc; 880 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 881 882 /* 883 * We need a IPSEC_IN in the front to fanout to AH/ESP. 884 * We will re-use the IPSEC_IN if it is already present as 885 * AH/ESP will not affect any fields in the IPSEC_IN for 886 * ICMP errors. If there is no IPSEC_IN, allocate a new 887 * one and attach it in the front. 888 */ 889 if (ii != NULL) { 890 /* 891 * ip_fanout_proto_again converts the ICMP errors 892 * that come back from AH/ESP to M_DATA so that 893 * if it is non-AH/ESP and we do a pullupmsg in 894 * this function, it would work. Convert it back 895 * to M_CTL before we send up as this is a ICMP 896 * error. This could have been generated locally or 897 * by some router. Validate the inner IPSEC 898 * headers. 899 * 900 * NOTE : ill_index is used by ip_fanout_proto_again 901 * to locate the ill. 902 */ 903 ASSERT(ill != NULL); 904 ii->ipsec_in_ill_index = 905 ill->ill_phyint->phyint_ifindex; 906 ii->ipsec_in_rill_index = 907 inill->ill_phyint->phyint_ifindex; 908 first_mp->b_cont->b_datap->db_type = M_CTL; 909 } else { 910 /* 911 * IPSEC_IN is not present. We attach a ipsec_in 912 * message and send up to IPSEC for validating 913 * and removing the IPSEC headers. Clear 914 * ipsec_in_secure so that when we return 915 * from IPSEC, we don't mistakenly think that this 916 * is a secure packet came from the network. 917 * 918 * NOTE : ill_index is used by ip_fanout_proto_again 919 * to locate the ill. 920 */ 921 ASSERT(first_mp == mp); 922 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 923 ASSERT(ill != NULL); 924 if (first_mp == NULL) { 925 freemsg(mp); 926 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 927 return; 928 } 929 ii = (ipsec_in_t *)first_mp->b_rptr; 930 931 /* This is not a secure packet */ 932 ii->ipsec_in_secure = B_FALSE; 933 first_mp->b_cont = mp; 934 mp->b_datap->db_type = M_CTL; 935 ii->ipsec_in_ill_index = 936 ill->ill_phyint->phyint_ifindex; 937 ii->ipsec_in_rill_index = 938 inill->ill_phyint->phyint_ifindex; 939 } 940 941 if (!ipsec_loaded(ipss)) { 942 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 943 return; 944 } 945 946 if (nexthdr == IPPROTO_ESP) 947 ipsec_rc = ipsecesp_icmp_error(first_mp); 948 else 949 ipsec_rc = ipsecah_icmp_error(first_mp); 950 if (ipsec_rc == IPSEC_STATUS_FAILED) 951 return; 952 953 ip_fanout_proto_again(first_mp, ill, inill, NULL); 954 return; 955 } 956 case IPPROTO_ENCAP: 957 case IPPROTO_IPV6: 958 if ((uint8_t *)ip6h + hdr_length + 959 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 960 sizeof (ip6_t)) > mp->b_wptr) { 961 goto drop_pkt; 962 } 963 964 if (nexthdr == IPPROTO_ENCAP || 965 !IN6_ARE_ADDR_EQUAL( 966 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 967 &ip6h->ip6_src) || 968 !IN6_ARE_ADDR_EQUAL( 969 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 970 &ip6h->ip6_dst)) { 971 /* 972 * For tunnels that have used IPsec protection, 973 * we need to adjust the MTU to take into account 974 * the IPsec overhead. 975 */ 976 if (ii != NULL) { 977 icmp6->icmp6_mtu = htonl( 978 ntohl(icmp6->icmp6_mtu) - 979 ipsec_in_extra_length(first_mp)); 980 } 981 } else { 982 /* 983 * Self-encapsulated case. As in the ipv4 case, 984 * we need to strip the 2nd IP header. Since mp 985 * is already pulled-up, we can simply bcopy 986 * the 3rd header + data over the 2nd header. 987 */ 988 uint16_t unused_len; 989 ip6_t *inner_ip6h = (ip6_t *) 990 ((uchar_t *)ip6h + hdr_length); 991 992 /* 993 * Make sure we don't do recursion more than once. 994 */ 995 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 996 &unused_len, &nexthdrp) || 997 *nexthdrp == IPPROTO_IPV6) { 998 goto drop_pkt; 999 } 1000 1001 /* 1002 * We are about to modify the packet. Make a copy if 1003 * someone else has a reference to it. 1004 */ 1005 if (DB_REF(mp) > 1) { 1006 mblk_t *mp1; 1007 uint16_t icmp6_offset; 1008 1009 mp1 = copymsg(mp); 1010 if (mp1 == NULL) { 1011 goto drop_pkt; 1012 } 1013 icmp6_offset = (uint16_t) 1014 ((uchar_t *)icmp6 - mp->b_rptr); 1015 freemsg(mp); 1016 mp = mp1; 1017 1018 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1019 ip6h = (ip6_t *)&icmp6[1]; 1020 inner_ip6h = (ip6_t *) 1021 ((uchar_t *)ip6h + hdr_length); 1022 1023 if (mctl_present) 1024 first_mp->b_cont = mp; 1025 else 1026 first_mp = mp; 1027 } 1028 1029 /* 1030 * Need to set db_type back to M_DATA before 1031 * refeeding mp into this function. 1032 */ 1033 DB_TYPE(mp) = M_DATA; 1034 1035 /* 1036 * Copy the 3rd header + remaining data on top 1037 * of the 2nd header. 1038 */ 1039 bcopy(inner_ip6h, ip6h, 1040 mp->b_wptr - (uchar_t *)inner_ip6h); 1041 1042 /* 1043 * Subtract length of the 2nd header. 1044 */ 1045 mp->b_wptr -= hdr_length; 1046 1047 /* 1048 * Now recurse, and see what I _really_ should be 1049 * doing here. 1050 */ 1051 icmp_inbound_error_fanout_v6(q, first_mp, 1052 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1053 mctl_present, zoneid); 1054 return; 1055 } 1056 if (icmp_inbound_iptun_fanout_v6(first_mp, &rip6h, ill, ipst)) 1057 return; 1058 /* 1059 * No IP tunnel is associated with this error. Perhaps a raw 1060 * socket will want it. 1061 */ 1062 /* FALLTHRU */ 1063 default: 1064 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1065 IP6_NO_IPPOLICY, mctl_present, zoneid); 1066 return; 1067 } 1068 /* NOTREACHED */ 1069 drop_pkt: 1070 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1071 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1072 freemsg(first_mp); 1073 } 1074 1075 /* 1076 * Process received IPv6 ICMP Redirect messages. 1077 */ 1078 /* ARGSUSED */ 1079 static void 1080 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1081 { 1082 ip6_t *ip6h; 1083 uint16_t hdr_length; 1084 nd_redirect_t *rd; 1085 ire_t *ire; 1086 ire_t *prev_ire; 1087 ire_t *redir_ire; 1088 in6_addr_t *src, *dst, *gateway; 1089 nd_opt_hdr_t *opt; 1090 nce_t *nce; 1091 int nce_flags = 0; 1092 int err = 0; 1093 boolean_t redirect_to_router = B_FALSE; 1094 int len; 1095 int optlen; 1096 iulp_t ulp_info = { 0 }; 1097 ill_t *prev_ire_ill; 1098 ipif_t *ipif; 1099 ip_stack_t *ipst = ill->ill_ipst; 1100 1101 ip6h = (ip6_t *)mp->b_rptr; 1102 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1103 hdr_length = ip_hdr_length_v6(mp, ip6h); 1104 else 1105 hdr_length = IPV6_HDR_LEN; 1106 1107 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1108 len = mp->b_wptr - mp->b_rptr - hdr_length; 1109 src = &ip6h->ip6_src; 1110 dst = &rd->nd_rd_dst; 1111 gateway = &rd->nd_rd_target; 1112 1113 /* Verify if it is a valid redirect */ 1114 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1115 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1116 (rd->nd_rd_code != 0) || 1117 (len < sizeof (nd_redirect_t)) || 1118 (IN6_IS_ADDR_V4MAPPED(dst)) || 1119 (IN6_IS_ADDR_MULTICAST(dst))) { 1120 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1121 freemsg(mp); 1122 return; 1123 } 1124 1125 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1126 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1127 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1128 freemsg(mp); 1129 return; 1130 } 1131 1132 if (len > sizeof (nd_redirect_t)) { 1133 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1134 len - sizeof (nd_redirect_t))) { 1135 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1136 freemsg(mp); 1137 return; 1138 } 1139 } 1140 1141 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1142 redirect_to_router = B_TRUE; 1143 nce_flags |= NCE_F_ISROUTER; 1144 } 1145 1146 /* ipif will be refreleased afterwards */ 1147 ipif = ipif_get_next_ipif(NULL, ill); 1148 if (ipif == NULL) { 1149 freemsg(mp); 1150 return; 1151 } 1152 1153 /* 1154 * Verify that the IP source address of the redirect is 1155 * the same as the current first-hop router for the specified 1156 * ICMP destination address. 1157 * Also, Make sure we had a route for the dest in question and 1158 * that route was pointing to the old gateway (the source of the 1159 * redirect packet.) 1160 */ 1161 1162 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1163 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1164 1165 /* 1166 * Check that 1167 * the redirect was not from ourselves 1168 * old gateway is still directly reachable 1169 */ 1170 if (prev_ire == NULL || 1171 prev_ire->ire_type == IRE_LOCAL) { 1172 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1173 ipif_refrele(ipif); 1174 goto fail_redirect; 1175 } 1176 prev_ire_ill = ire_to_ill(prev_ire); 1177 ASSERT(prev_ire_ill != NULL); 1178 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1179 nce_flags |= NCE_F_NONUD; 1180 1181 /* 1182 * Should we use the old ULP info to create the new gateway? From 1183 * a user's perspective, we should inherit the info so that it 1184 * is a "smooth" transition. If we do not do that, then new 1185 * connections going thru the new gateway will have no route metrics, 1186 * which is counter-intuitive to user. From a network point of 1187 * view, this may or may not make sense even though the new gateway 1188 * is still directly connected to us so the route metrics should not 1189 * change much. 1190 * 1191 * But if the old ire_uinfo is not initialized, we do another 1192 * recursive lookup on the dest using the new gateway. There may 1193 * be a route to that. If so, use it to initialize the redirect 1194 * route. 1195 */ 1196 if (prev_ire->ire_uinfo.iulp_set) { 1197 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1198 } else if (redirect_to_router) { 1199 /* 1200 * Only do the following if the redirection is really to 1201 * a router. 1202 */ 1203 ire_t *tmp_ire; 1204 ire_t *sire; 1205 1206 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1207 ALL_ZONES, 0, NULL, 1208 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1209 ipst); 1210 if (sire != NULL) { 1211 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1212 ASSERT(tmp_ire != NULL); 1213 ire_refrele(tmp_ire); 1214 ire_refrele(sire); 1215 } else if (tmp_ire != NULL) { 1216 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1217 sizeof (iulp_t)); 1218 ire_refrele(tmp_ire); 1219 } 1220 } 1221 1222 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1223 opt = (nd_opt_hdr_t *)&rd[1]; 1224 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1225 if (opt != NULL) { 1226 err = ndp_lookup_then_add_v6(ill, 1227 B_FALSE, /* don't match across illgrp */ 1228 (uchar_t *)&opt[1], /* Link layer address */ 1229 gateway, 1230 &ipv6_all_ones, /* prefix mask */ 1231 &ipv6_all_zeros, /* Mapping mask */ 1232 0, 1233 nce_flags, 1234 ND_STALE, 1235 &nce); 1236 switch (err) { 1237 case 0: 1238 NCE_REFRELE(nce); 1239 break; 1240 case EEXIST: 1241 /* 1242 * Check to see if link layer address has changed and 1243 * process the nce_state accordingly. 1244 */ 1245 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1246 NCE_REFRELE(nce); 1247 break; 1248 default: 1249 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1250 err)); 1251 ipif_refrele(ipif); 1252 goto fail_redirect; 1253 } 1254 } 1255 if (redirect_to_router) { 1256 /* icmp_redirect_ok_v6() must have already verified this */ 1257 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1258 1259 /* 1260 * Create a Route Association. This will allow us to remember 1261 * a router told us to use the particular gateway. 1262 */ 1263 ire = ire_create_v6( 1264 dst, 1265 &ipv6_all_ones, /* mask */ 1266 &prev_ire->ire_src_addr_v6, /* source addr */ 1267 gateway, /* gateway addr */ 1268 &prev_ire->ire_max_frag, /* max frag */ 1269 NULL, /* no src nce */ 1270 NULL, /* no rfq */ 1271 NULL, /* no stq */ 1272 IRE_HOST, 1273 prev_ire->ire_ipif, 1274 NULL, 1275 0, 1276 0, 1277 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1278 &ulp_info, 1279 NULL, 1280 NULL, 1281 ipst); 1282 } else { 1283 queue_t *stq; 1284 1285 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1286 ? ipif->ipif_rq : ipif->ipif_wq; 1287 1288 /* 1289 * Just create an on link entry, i.e. interface route. 1290 */ 1291 ire = ire_create_v6( 1292 dst, /* gateway == dst */ 1293 &ipv6_all_ones, /* mask */ 1294 &prev_ire->ire_src_addr_v6, /* source addr */ 1295 &ipv6_all_zeros, /* gateway addr */ 1296 &prev_ire->ire_max_frag, /* max frag */ 1297 NULL, /* no src nce */ 1298 NULL, /* ire rfq */ 1299 stq, /* ire stq */ 1300 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1301 prev_ire->ire_ipif, 1302 &ipv6_all_ones, 1303 0, 1304 0, 1305 (RTF_DYNAMIC | RTF_HOST), 1306 &ulp_info, 1307 NULL, 1308 NULL, 1309 ipst); 1310 } 1311 1312 /* Release reference from earlier ipif_get_next_ipif() */ 1313 ipif_refrele(ipif); 1314 1315 if (ire == NULL) 1316 goto fail_redirect; 1317 1318 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1319 1320 /* tell routing sockets that we received a redirect */ 1321 ip_rts_change_v6(RTM_REDIRECT, 1322 &rd->nd_rd_dst, 1323 &rd->nd_rd_target, 1324 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1325 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1326 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1327 1328 /* 1329 * Delete any existing IRE_HOST type ires for this destination. 1330 * This together with the added IRE has the effect of 1331 * modifying an existing redirect. 1332 */ 1333 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1334 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1335 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1336 1337 ire_refrele(ire); /* Held in ire_add_v6 */ 1338 1339 if (redir_ire != NULL) { 1340 if (redir_ire->ire_flags & RTF_DYNAMIC) 1341 ire_delete(redir_ire); 1342 ire_refrele(redir_ire); 1343 } 1344 } 1345 1346 if (prev_ire->ire_type == IRE_CACHE) 1347 ire_delete(prev_ire); 1348 ire_refrele(prev_ire); 1349 prev_ire = NULL; 1350 1351 fail_redirect: 1352 if (prev_ire != NULL) 1353 ire_refrele(prev_ire); 1354 freemsg(mp); 1355 } 1356 1357 static ill_t * 1358 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1359 { 1360 ill_t *ill; 1361 1362 ASSERT(WR(q) == q); 1363 1364 if (q->q_next != NULL) { 1365 ill = (ill_t *)q->q_ptr; 1366 if (ILL_CAN_LOOKUP(ill)) 1367 ill_refhold(ill); 1368 else 1369 ill = NULL; 1370 } else { 1371 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1372 NULL, NULL, NULL, NULL, NULL, ipst); 1373 } 1374 if (ill == NULL) 1375 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1376 return (ill); 1377 } 1378 1379 /* 1380 * Assigns an appropriate source address to the packet. 1381 * If origdst is one of our IP addresses that use it as the source. 1382 * If the queue is an ill queue then select a source from that ill. 1383 * Otherwise pick a source based on a route lookup back to the origsrc. 1384 * 1385 * src is the return parameter. Returns a pointer to src or NULL if failure. 1386 */ 1387 static in6_addr_t * 1388 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1389 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1390 { 1391 ill_t *ill; 1392 ire_t *ire; 1393 ipif_t *ipif; 1394 1395 ASSERT(!(wq->q_flag & QREADR)); 1396 if (wq->q_next != NULL) { 1397 ill = (ill_t *)wq->q_ptr; 1398 } else { 1399 ill = NULL; 1400 } 1401 1402 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1403 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1404 ipst); 1405 if (ire != NULL) { 1406 /* Destined to one of our addresses */ 1407 *src = *origdst; 1408 ire_refrele(ire); 1409 return (src); 1410 } 1411 if (ire != NULL) { 1412 ire_refrele(ire); 1413 ire = NULL; 1414 } 1415 if (ill == NULL) { 1416 /* What is the route back to the original source? */ 1417 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1418 NULL, NULL, zoneid, NULL, 1419 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1420 if (ire == NULL) { 1421 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1422 return (NULL); 1423 } 1424 ASSERT(ire->ire_ipif != NULL); 1425 ill = ire->ire_ipif->ipif_ill; 1426 ire_refrele(ire); 1427 } 1428 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1429 IPV6_PREFER_SRC_DEFAULT, zoneid); 1430 if (ipif != NULL) { 1431 *src = ipif->ipif_v6src_addr; 1432 ipif_refrele(ipif); 1433 return (src); 1434 } 1435 /* 1436 * Unusual case - can't find a usable source address to reach the 1437 * original source. Use what in the route to the source. 1438 */ 1439 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1440 NULL, NULL, zoneid, NULL, 1441 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1442 if (ire == NULL) { 1443 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1444 return (NULL); 1445 } 1446 ASSERT(ire != NULL); 1447 *src = ire->ire_src_addr_v6; 1448 ire_refrele(ire); 1449 return (src); 1450 } 1451 1452 /* 1453 * Build and ship an IPv6 ICMP message using the packet data in mp, 1454 * and the ICMP header pointed to by "stuff". (May be called as 1455 * writer.) 1456 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1457 * verify that an icmp error packet can be sent. 1458 * 1459 * If q is an ill write side queue (which is the case when packets 1460 * arrive from ip_rput) then ip_wput code will ensure that packets to 1461 * link-local destinations are sent out that ill. 1462 * 1463 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1464 * source address (see above function). 1465 */ 1466 static void 1467 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1468 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1469 ip_stack_t *ipst) 1470 { 1471 ip6_t *ip6h; 1472 in6_addr_t v6dst; 1473 size_t len_needed; 1474 size_t msg_len; 1475 mblk_t *mp1; 1476 icmp6_t *icmp6; 1477 ill_t *ill; 1478 in6_addr_t v6src; 1479 mblk_t *ipsec_mp; 1480 ipsec_out_t *io; 1481 1482 ill = ip_queue_to_ill_v6(q, ipst); 1483 if (ill == NULL) { 1484 freemsg(mp); 1485 return; 1486 } 1487 1488 if (mctl_present) { 1489 /* 1490 * If it is : 1491 * 1492 * 1) a IPSEC_OUT, then this is caused by outbound 1493 * datagram originating on this host. IPSEC processing 1494 * may or may not have been done. Refer to comments above 1495 * icmp_inbound_error_fanout for details. 1496 * 1497 * 2) a IPSEC_IN if we are generating a icmp_message 1498 * for an incoming datagram destined for us i.e called 1499 * from ip_fanout_send_icmp. 1500 */ 1501 ipsec_info_t *in; 1502 1503 ipsec_mp = mp; 1504 mp = ipsec_mp->b_cont; 1505 1506 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1507 ip6h = (ip6_t *)mp->b_rptr; 1508 1509 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1510 in->ipsec_info_type == IPSEC_IN); 1511 1512 if (in->ipsec_info_type == IPSEC_IN) { 1513 /* 1514 * Convert the IPSEC_IN to IPSEC_OUT. 1515 */ 1516 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1517 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1518 ill_refrele(ill); 1519 return; 1520 } 1521 } else { 1522 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1523 io = (ipsec_out_t *)in; 1524 /* 1525 * Clear out ipsec_out_proc_begin, so we do a fresh 1526 * ire lookup. 1527 */ 1528 io->ipsec_out_proc_begin = B_FALSE; 1529 } 1530 } else { 1531 /* 1532 * This is in clear. The icmp message we are building 1533 * here should go out in clear. 1534 */ 1535 ipsec_in_t *ii; 1536 ASSERT(mp->b_datap->db_type == M_DATA); 1537 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1538 if (ipsec_mp == NULL) { 1539 freemsg(mp); 1540 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1541 ill_refrele(ill); 1542 return; 1543 } 1544 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1545 1546 /* This is not a secure packet */ 1547 ii->ipsec_in_secure = B_FALSE; 1548 /* 1549 * For trusted extensions using a shared IP address we can 1550 * send using any zoneid. 1551 */ 1552 if (zoneid == ALL_ZONES) 1553 ii->ipsec_in_zoneid = GLOBAL_ZONEID; 1554 else 1555 ii->ipsec_in_zoneid = zoneid; 1556 ipsec_mp->b_cont = mp; 1557 ip6h = (ip6_t *)mp->b_rptr; 1558 /* 1559 * Convert the IPSEC_IN to IPSEC_OUT. 1560 */ 1561 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h)) { 1562 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1563 ill_refrele(ill); 1564 return; 1565 } 1566 } 1567 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1568 1569 if (v6src_ptr != NULL) { 1570 v6src = *v6src_ptr; 1571 } else { 1572 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1573 &v6src, zoneid, ipst) == NULL) { 1574 freemsg(ipsec_mp); 1575 ill_refrele(ill); 1576 return; 1577 } 1578 } 1579 v6dst = ip6h->ip6_src; 1580 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1581 msg_len = msgdsize(mp); 1582 if (msg_len > len_needed) { 1583 if (!adjmsg(mp, len_needed - msg_len)) { 1584 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1585 freemsg(ipsec_mp); 1586 ill_refrele(ill); 1587 return; 1588 } 1589 msg_len = len_needed; 1590 } 1591 mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp); 1592 if (mp1 == NULL) { 1593 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1594 freemsg(ipsec_mp); 1595 ill_refrele(ill); 1596 return; 1597 } 1598 ill_refrele(ill); 1599 mp1->b_cont = mp; 1600 mp = mp1; 1601 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1602 io->ipsec_out_type == IPSEC_OUT); 1603 ipsec_mp->b_cont = mp; 1604 1605 /* 1606 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1607 * node generates be accepted in peace by all on-host destinations. 1608 * If we do NOT assume that all on-host destinations trust 1609 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1610 * (Look for ipsec_out_icmp_loopback). 1611 */ 1612 io->ipsec_out_icmp_loopback = B_TRUE; 1613 1614 ip6h = (ip6_t *)mp->b_rptr; 1615 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1616 1617 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1618 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1619 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1620 ip6h->ip6_dst = v6dst; 1621 ip6h->ip6_src = v6src; 1622 msg_len += IPV6_HDR_LEN + len; 1623 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1624 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1625 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1626 } 1627 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1628 icmp6 = (icmp6_t *)&ip6h[1]; 1629 bcopy(stuff, (char *)icmp6, len); 1630 /* 1631 * Prepare for checksum by putting icmp length in the icmp 1632 * checksum field. The checksum is calculated in ip_wput_v6. 1633 */ 1634 icmp6->icmp6_cksum = ip6h->ip6_plen; 1635 if (icmp6->icmp6_type == ND_REDIRECT) { 1636 ip6h->ip6_hops = IPV6_MAX_HOPS; 1637 } 1638 /* Send to V6 writeside put routine */ 1639 put(q, ipsec_mp); 1640 } 1641 1642 /* 1643 * Update the output mib when ICMPv6 packets are sent. 1644 */ 1645 static void 1646 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1647 { 1648 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1649 1650 switch (icmp6->icmp6_type) { 1651 case ICMP6_DST_UNREACH: 1652 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1653 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1654 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1655 break; 1656 1657 case ICMP6_TIME_EXCEEDED: 1658 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1659 break; 1660 1661 case ICMP6_PARAM_PROB: 1662 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1663 break; 1664 1665 case ICMP6_PACKET_TOO_BIG: 1666 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1667 break; 1668 1669 case ICMP6_ECHO_REQUEST: 1670 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1671 break; 1672 1673 case ICMP6_ECHO_REPLY: 1674 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1675 break; 1676 1677 case ND_ROUTER_SOLICIT: 1678 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1679 break; 1680 1681 case ND_ROUTER_ADVERT: 1682 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1683 break; 1684 1685 case ND_NEIGHBOR_SOLICIT: 1686 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1687 break; 1688 1689 case ND_NEIGHBOR_ADVERT: 1690 BUMP_MIB(ill->ill_icmp6_mib, 1691 ipv6IfIcmpOutNeighborAdvertisements); 1692 break; 1693 1694 case ND_REDIRECT: 1695 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1696 break; 1697 1698 case MLD_LISTENER_QUERY: 1699 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1700 break; 1701 1702 case MLD_LISTENER_REPORT: 1703 case MLD_V2_LISTENER_REPORT: 1704 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1705 break; 1706 1707 case MLD_LISTENER_REDUCTION: 1708 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1709 break; 1710 } 1711 } 1712 1713 /* 1714 * Check if it is ok to send an ICMPv6 error packet in 1715 * response to the IP packet in mp. 1716 * Free the message and return null if no 1717 * ICMP error packet should be sent. 1718 */ 1719 static mblk_t * 1720 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1721 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1722 { 1723 ip6_t *ip6h; 1724 1725 if (!mp) 1726 return (NULL); 1727 1728 ip6h = (ip6_t *)mp->b_rptr; 1729 1730 /* Check if source address uniquely identifies the host */ 1731 1732 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1733 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1734 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1735 freemsg(mp); 1736 return (NULL); 1737 } 1738 1739 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1740 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1741 icmp6_t *icmp6; 1742 1743 if (mp->b_wptr - mp->b_rptr < len_needed) { 1744 if (!pullupmsg(mp, len_needed)) { 1745 ill_t *ill; 1746 1747 ill = ip_queue_to_ill_v6(q, ipst); 1748 if (ill == NULL) { 1749 BUMP_MIB(&ipst->ips_icmp6_mib, 1750 ipv6IfIcmpInErrors); 1751 } else { 1752 BUMP_MIB(ill->ill_icmp6_mib, 1753 ipv6IfIcmpInErrors); 1754 ill_refrele(ill); 1755 } 1756 freemsg(mp); 1757 return (NULL); 1758 } 1759 ip6h = (ip6_t *)mp->b_rptr; 1760 } 1761 icmp6 = (icmp6_t *)&ip6h[1]; 1762 /* Explicitly do not generate errors in response to redirects */ 1763 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1764 icmp6->icmp6_type == ND_REDIRECT) { 1765 freemsg(mp); 1766 return (NULL); 1767 } 1768 } 1769 /* 1770 * Check that the destination is not multicast and that the packet 1771 * was not sent on link layer broadcast or multicast. (Exception 1772 * is Packet too big message as per the draft - when mcast_ok is set.) 1773 */ 1774 if (!mcast_ok && 1775 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1776 freemsg(mp); 1777 return (NULL); 1778 } 1779 if (icmp_err_rate_limit(ipst)) { 1780 /* 1781 * Only send ICMP error packets every so often. 1782 * This should be done on a per port/source basis, 1783 * but for now this will suffice. 1784 */ 1785 freemsg(mp); 1786 return (NULL); 1787 } 1788 return (mp); 1789 } 1790 1791 /* 1792 * Generate an ICMPv6 redirect message. 1793 * Include target link layer address option if it exits. 1794 * Always include redirect header. 1795 */ 1796 static void 1797 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1798 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1799 { 1800 nd_redirect_t *rd; 1801 nd_opt_rd_hdr_t *rdh; 1802 uchar_t *buf; 1803 nce_t *nce = NULL; 1804 nd_opt_hdr_t *opt; 1805 int len; 1806 int ll_opt_len = 0; 1807 int max_redir_hdr_data_len; 1808 int pkt_len; 1809 in6_addr_t *srcp; 1810 ip_stack_t *ipst = ill->ill_ipst; 1811 1812 /* 1813 * We are called from ip_rput where we could 1814 * not have attached an IPSEC_IN. 1815 */ 1816 ASSERT(mp->b_datap->db_type == M_DATA); 1817 1818 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1819 if (mp == NULL) 1820 return; 1821 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1822 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1823 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1824 ill->ill_phys_addr_length + 7)/8 * 8; 1825 } 1826 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1827 ASSERT(len % 4 == 0); 1828 buf = kmem_alloc(len, KM_NOSLEEP); 1829 if (buf == NULL) { 1830 if (nce != NULL) 1831 NCE_REFRELE(nce); 1832 freemsg(mp); 1833 return; 1834 } 1835 1836 rd = (nd_redirect_t *)buf; 1837 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1838 rd->nd_rd_code = 0; 1839 rd->nd_rd_reserved = 0; 1840 rd->nd_rd_target = *targetp; 1841 rd->nd_rd_dst = *dest; 1842 1843 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1844 if (nce != NULL && ll_opt_len != 0) { 1845 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1846 opt->nd_opt_len = ll_opt_len/8; 1847 bcopy((char *)nce->nce_res_mp->b_rptr + 1848 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1849 ill->ill_phys_addr_length); 1850 } 1851 if (nce != NULL) 1852 NCE_REFRELE(nce); 1853 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1854 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1855 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1856 max_redir_hdr_data_len = 1857 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1858 pkt_len = msgdsize(mp); 1859 /* Make sure mp is 8 byte aligned */ 1860 if (pkt_len > max_redir_hdr_data_len) { 1861 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1862 sizeof (nd_opt_rd_hdr_t))/8; 1863 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1864 } else { 1865 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1866 (void) adjmsg(mp, -(pkt_len % 8)); 1867 } 1868 rdh->nd_opt_rh_reserved1 = 0; 1869 rdh->nd_opt_rh_reserved2 = 0; 1870 /* ipif_v6src_addr contains the link-local source address */ 1871 srcp = &ill->ill_ipif->ipif_v6src_addr; 1872 1873 /* Redirects sent by router, and router is global zone */ 1874 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1875 kmem_free(buf, len); 1876 } 1877 1878 1879 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1880 void 1881 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1882 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1883 ip_stack_t *ipst) 1884 { 1885 icmp6_t icmp6; 1886 boolean_t mctl_present; 1887 mblk_t *first_mp; 1888 1889 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1890 1891 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1892 if (mp == NULL) { 1893 if (mctl_present) 1894 freeb(first_mp); 1895 return; 1896 } 1897 bzero(&icmp6, sizeof (icmp6_t)); 1898 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1899 icmp6.icmp6_code = code; 1900 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1901 zoneid, ipst); 1902 } 1903 1904 /* 1905 * Generate an ICMP unreachable message. 1906 */ 1907 void 1908 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1909 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1910 ip_stack_t *ipst) 1911 { 1912 icmp6_t icmp6; 1913 boolean_t mctl_present; 1914 mblk_t *first_mp; 1915 1916 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1917 1918 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1919 if (mp == NULL) { 1920 if (mctl_present) 1921 freeb(first_mp); 1922 return; 1923 } 1924 bzero(&icmp6, sizeof (icmp6_t)); 1925 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1926 icmp6.icmp6_code = code; 1927 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1928 zoneid, ipst); 1929 } 1930 1931 /* 1932 * Generate an ICMP pkt too big message. 1933 */ 1934 static void 1935 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1936 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1937 { 1938 icmp6_t icmp6; 1939 mblk_t *first_mp; 1940 boolean_t mctl_present; 1941 1942 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1943 1944 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1945 if (mp == NULL) { 1946 if (mctl_present) 1947 freeb(first_mp); 1948 return; 1949 } 1950 bzero(&icmp6, sizeof (icmp6_t)); 1951 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1952 icmp6.icmp6_code = 0; 1953 icmp6.icmp6_mtu = htonl(mtu); 1954 1955 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1956 zoneid, ipst); 1957 } 1958 1959 /* 1960 * Generate an ICMP parameter problem message. (May be called as writer.) 1961 * 'offset' is the offset from the beginning of the packet in error. 1962 */ 1963 static void 1964 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1965 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1966 ip_stack_t *ipst) 1967 { 1968 icmp6_t icmp6; 1969 boolean_t mctl_present; 1970 mblk_t *first_mp; 1971 1972 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1973 1974 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1975 if (mp == NULL) { 1976 if (mctl_present) 1977 freeb(first_mp); 1978 return; 1979 } 1980 bzero((char *)&icmp6, sizeof (icmp6_t)); 1981 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1982 icmp6.icmp6_code = code; 1983 icmp6.icmp6_pptr = htonl(offset); 1984 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1985 zoneid, ipst); 1986 } 1987 1988 /* 1989 * This code will need to take into account the possibility of binding 1990 * to a link local address on a multi-homed host, in which case the 1991 * outgoing interface (from the conn) will need to be used when getting 1992 * an ire for the dst. Going through proper outgoing interface and 1993 * choosing the source address corresponding to the outgoing interface 1994 * is necessary when the destination address is a link-local address and 1995 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 1996 * This can happen when active connection is setup; thus ipp pointer 1997 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 1998 * pointer is passed as ipp pointer. 1999 */ 2000 mblk_t * 2001 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 2002 { 2003 ssize_t len; 2004 int protocol; 2005 struct T_bind_req *tbr; 2006 sin6_t *sin6; 2007 ipa6_conn_t *ac6; 2008 in6_addr_t *v6srcp; 2009 in6_addr_t *v6dstp; 2010 uint16_t lport; 2011 uint16_t fport; 2012 uchar_t *ucp; 2013 int error = 0; 2014 boolean_t local_bind; 2015 ipa6_conn_x_t *acx6; 2016 boolean_t verify_dst; 2017 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2018 cred_t *cr; 2019 2020 /* 2021 * All Solaris components should pass a db_credp 2022 * for this TPI message, hence we ASSERT. 2023 * But in case there is some other M_PROTO that looks 2024 * like a TPI message sent by some other kernel 2025 * component, we check and return an error. 2026 */ 2027 cr = msg_getcred(mp, NULL); 2028 ASSERT(cr != NULL); 2029 if (cr == NULL) { 2030 error = EINVAL; 2031 goto bad_addr; 2032 } 2033 2034 ASSERT(connp->conn_af_isv6); 2035 len = mp->b_wptr - mp->b_rptr; 2036 if (len < (sizeof (*tbr) + 1)) { 2037 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2038 "ip_bind_v6: bogus msg, len %ld", len); 2039 goto bad_addr; 2040 } 2041 /* Back up and extract the protocol identifier. */ 2042 mp->b_wptr--; 2043 tbr = (struct T_bind_req *)mp->b_rptr; 2044 /* Reset the message type in preparation for shipping it back. */ 2045 mp->b_datap->db_type = M_PCPROTO; 2046 2047 protocol = *mp->b_wptr & 0xFF; 2048 connp->conn_ulp = (uint8_t)protocol; 2049 2050 /* 2051 * Check for a zero length address. This is from a protocol that 2052 * wants to register to receive all packets of its type. 2053 */ 2054 if (tbr->ADDR_length == 0) { 2055 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2056 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2057 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2058 NULL) { 2059 /* 2060 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2061 * Do not allow others to bind to these. 2062 */ 2063 goto bad_addr; 2064 } 2065 2066 /* 2067 * 2068 * The udp module never sends down a zero-length address, 2069 * and allowing this on a labeled system will break MLP 2070 * functionality. 2071 */ 2072 if (is_system_labeled() && protocol == IPPROTO_UDP) 2073 goto bad_addr; 2074 2075 /* Allow ipsec plumbing */ 2076 if (connp->conn_mac_exempt && protocol != IPPROTO_AH && 2077 protocol != IPPROTO_ESP) 2078 goto bad_addr; 2079 2080 connp->conn_srcv6 = ipv6_all_zeros; 2081 ipcl_proto_insert_v6(connp, protocol); 2082 2083 tbr->PRIM_type = T_BIND_ACK; 2084 return (mp); 2085 } 2086 2087 /* Extract the address pointer from the message. */ 2088 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2089 tbr->ADDR_length); 2090 if (ucp == NULL) { 2091 ip1dbg(("ip_bind_v6: no address\n")); 2092 goto bad_addr; 2093 } 2094 if (!OK_32PTR(ucp)) { 2095 ip1dbg(("ip_bind_v6: unaligned address\n")); 2096 goto bad_addr; 2097 } 2098 2099 switch (tbr->ADDR_length) { 2100 default: 2101 ip1dbg(("ip_bind_v6: bad address length %d\n", 2102 (int)tbr->ADDR_length)); 2103 goto bad_addr; 2104 2105 case IPV6_ADDR_LEN: 2106 /* Verification of local address only */ 2107 v6srcp = (in6_addr_t *)ucp; 2108 lport = 0; 2109 local_bind = B_TRUE; 2110 break; 2111 2112 case sizeof (sin6_t): 2113 sin6 = (sin6_t *)ucp; 2114 v6srcp = &sin6->sin6_addr; 2115 lport = sin6->sin6_port; 2116 local_bind = B_TRUE; 2117 break; 2118 2119 case sizeof (ipa6_conn_t): 2120 /* 2121 * Verify that both the source and destination addresses 2122 * are valid. 2123 */ 2124 ac6 = (ipa6_conn_t *)ucp; 2125 v6srcp = &ac6->ac6_laddr; 2126 v6dstp = &ac6->ac6_faddr; 2127 fport = ac6->ac6_fport; 2128 /* For raw socket, the local port is not set. */ 2129 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2130 connp->conn_lport; 2131 local_bind = B_FALSE; 2132 /* Always verify destination reachability. */ 2133 verify_dst = B_TRUE; 2134 break; 2135 2136 case sizeof (ipa6_conn_x_t): 2137 /* 2138 * Verify that the source address is valid. 2139 */ 2140 acx6 = (ipa6_conn_x_t *)ucp; 2141 ac6 = &acx6->ac6x_conn; 2142 v6srcp = &ac6->ac6_laddr; 2143 v6dstp = &ac6->ac6_faddr; 2144 fport = ac6->ac6_fport; 2145 lport = ac6->ac6_lport; 2146 local_bind = B_FALSE; 2147 /* 2148 * Client that passed ipa6_conn_x_t to us specifies whether to 2149 * verify destination reachability. 2150 */ 2151 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2152 break; 2153 } 2154 if (local_bind) { 2155 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2156 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2157 } else { 2158 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2159 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2160 } 2161 2162 if (error == 0) { 2163 /* Send it home. */ 2164 mp->b_datap->db_type = M_PCPROTO; 2165 tbr->PRIM_type = T_BIND_ACK; 2166 return (mp); 2167 } 2168 2169 bad_addr: 2170 ASSERT(error != EINPROGRESS); 2171 if (error > 0) 2172 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2173 else 2174 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2175 return (mp); 2176 } 2177 2178 /* 2179 * Here address is verified to be a valid local address. 2180 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2181 * address is also considered a valid local address. 2182 * In the case of a multicast address, however, the 2183 * upper protocol is expected to reset the src address 2184 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2185 * no packets are emitted with multicast address as 2186 * source address. 2187 * The addresses valid for bind are: 2188 * (1) - in6addr_any 2189 * (2) - IP address of an UP interface 2190 * (3) - IP address of a DOWN interface 2191 * (4) - a multicast address. In this case 2192 * the conn will only receive packets destined to 2193 * the specified multicast address. Note: the 2194 * application still has to issue an 2195 * IPV6_JOIN_GROUP socket option. 2196 * 2197 * In all the above cases, the bound address must be valid in the current zone. 2198 * When the address is loopback or multicast, there might be many matching IREs 2199 * so bind has to look up based on the zone. 2200 */ 2201 /* 2202 * Verify the local IP address. Does not change the conn_t except 2203 * conn_fully_bound and conn_policy_cached. 2204 */ 2205 static int 2206 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2207 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2208 { 2209 int error = 0; 2210 ire_t *src_ire = NULL; 2211 zoneid_t zoneid; 2212 mblk_t *mp = NULL; 2213 boolean_t ire_requested; 2214 boolean_t ipsec_policy_set; 2215 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2216 2217 if (mpp) 2218 mp = *mpp; 2219 2220 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2221 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2222 2223 /* 2224 * If it was previously connected, conn_fully_bound would have 2225 * been set. 2226 */ 2227 connp->conn_fully_bound = B_FALSE; 2228 2229 zoneid = IPCL_ZONEID(connp); 2230 2231 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2232 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2233 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2234 /* 2235 * If an address other than in6addr_any is requested, 2236 * we verify that it is a valid address for bind 2237 * Note: Following code is in if-else-if form for 2238 * readability compared to a condition check. 2239 */ 2240 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2241 /* LINTED - statement has no consequent */ 2242 if (IRE_IS_LOCAL(src_ire)) { 2243 /* 2244 * (2) Bind to address of local UP interface 2245 */ 2246 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2247 ipif_t *multi_ipif = NULL; 2248 ire_t *save_ire; 2249 /* 2250 * (4) bind to multicast address. 2251 * Fake out the IRE returned to upper 2252 * layer to be a broadcast IRE in 2253 * ip_bind_insert_ire_v6(). 2254 * Pass other information that matches 2255 * the ipif (e.g. the source address). 2256 * conn_multicast_ill is only used for 2257 * IPv6 packets 2258 */ 2259 mutex_enter(&connp->conn_lock); 2260 if (connp->conn_multicast_ill != NULL) { 2261 (void) ipif_lookup_zoneid( 2262 connp->conn_multicast_ill, zoneid, 0, 2263 &multi_ipif); 2264 } else { 2265 /* 2266 * Look for default like 2267 * ip_wput_v6 2268 */ 2269 multi_ipif = ipif_lookup_group_v6( 2270 &ipv6_unspecified_group, zoneid, ipst); 2271 } 2272 mutex_exit(&connp->conn_lock); 2273 save_ire = src_ire; 2274 src_ire = NULL; 2275 if (multi_ipif == NULL || !ire_requested || 2276 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2277 src_ire = save_ire; 2278 error = EADDRNOTAVAIL; 2279 } else { 2280 ASSERT(src_ire != NULL); 2281 if (save_ire != NULL) 2282 ire_refrele(save_ire); 2283 } 2284 if (multi_ipif != NULL) 2285 ipif_refrele(multi_ipif); 2286 } else { 2287 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2288 /* 2289 * Not a valid address for bind 2290 */ 2291 error = EADDRNOTAVAIL; 2292 } 2293 } 2294 2295 if (error != 0) { 2296 /* Red Alert! Attempting to be a bogon! */ 2297 if (ip_debug > 2) { 2298 /* ip1dbg */ 2299 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2300 " address %s\n", AF_INET6, v6src); 2301 } 2302 goto bad_addr; 2303 } 2304 } 2305 2306 /* 2307 * Allow setting new policies. For example, disconnects come 2308 * down as ipa_t bind. As we would have set conn_policy_cached 2309 * to B_TRUE before, we should set it to B_FALSE, so that policy 2310 * can change after the disconnect. 2311 */ 2312 connp->conn_policy_cached = B_FALSE; 2313 2314 /* If not fanout_insert this was just an address verification */ 2315 if (fanout_insert) { 2316 /* 2317 * The addresses have been verified. Time to insert in 2318 * the correct fanout list. 2319 */ 2320 connp->conn_srcv6 = *v6src; 2321 connp->conn_remv6 = ipv6_all_zeros; 2322 connp->conn_lport = lport; 2323 connp->conn_fport = 0; 2324 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2325 } 2326 if (error == 0) { 2327 if (ire_requested) { 2328 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2329 ipst)) { 2330 error = -1; 2331 goto bad_addr; 2332 } 2333 mp = *mpp; 2334 } else if (ipsec_policy_set) { 2335 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2336 error = -1; 2337 goto bad_addr; 2338 } 2339 } 2340 } 2341 bad_addr: 2342 if (error != 0) { 2343 if (connp->conn_anon_port) { 2344 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2345 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2346 B_FALSE); 2347 } 2348 connp->conn_mlp_type = mlptSingle; 2349 } 2350 2351 if (src_ire != NULL) 2352 ire_refrele(src_ire); 2353 2354 if (ipsec_policy_set) { 2355 ASSERT(mp != NULL); 2356 freeb(mp); 2357 /* 2358 * As of now assume that nothing else accompanies 2359 * IPSEC_POLICY_SET. 2360 */ 2361 *mpp = NULL; 2362 } 2363 2364 return (error); 2365 } 2366 int 2367 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2368 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2369 { 2370 int error; 2371 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2372 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2373 2374 ASSERT(connp->conn_af_isv6); 2375 connp->conn_ulp = protocol; 2376 2377 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2378 /* Bind to IPv4 address */ 2379 ipaddr_t v4src; 2380 2381 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2382 2383 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2384 fanout_insert); 2385 if (error != 0) 2386 goto bad_addr; 2387 connp->conn_pkt_isv6 = B_FALSE; 2388 } else { 2389 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2390 error = 0; 2391 goto bad_addr; 2392 } 2393 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2394 lport, fanout_insert); 2395 if (error != 0) 2396 goto bad_addr; 2397 connp->conn_pkt_isv6 = B_TRUE; 2398 } 2399 2400 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2401 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2402 return (0); 2403 2404 bad_addr: 2405 if (error < 0) 2406 error = -TBADADDR; 2407 return (error); 2408 } 2409 2410 /* 2411 * Verify that both the source and destination addresses 2412 * are valid. If verify_dst, then destination address must also be reachable, 2413 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2414 * It takes ip6_pkt_t * as one of the arguments to determine correct 2415 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2416 * destination address. Note that parameter ipp is only useful for TCP connect 2417 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2418 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2419 * 2420 */ 2421 int 2422 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2423 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2424 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2425 boolean_t verify_dst, cred_t *cr) 2426 { 2427 ire_t *src_ire; 2428 ire_t *dst_ire; 2429 int error = 0; 2430 ire_t *sire = NULL; 2431 ire_t *md_dst_ire = NULL; 2432 ill_t *md_ill = NULL; 2433 ill_t *dst_ill = NULL; 2434 ipif_t *src_ipif = NULL; 2435 zoneid_t zoneid; 2436 boolean_t ill_held = B_FALSE; 2437 mblk_t *mp = NULL; 2438 boolean_t ire_requested = B_FALSE; 2439 boolean_t ipsec_policy_set = B_FALSE; 2440 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2441 ts_label_t *tsl = NULL; 2442 cred_t *effective_cred = NULL; 2443 2444 if (mpp) 2445 mp = *mpp; 2446 2447 if (mp != NULL) { 2448 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2449 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2450 } 2451 2452 src_ire = dst_ire = NULL; 2453 /* 2454 * If we never got a disconnect before, clear it now. 2455 */ 2456 connp->conn_fully_bound = B_FALSE; 2457 2458 zoneid = connp->conn_zoneid; 2459 2460 /* 2461 * Check whether Trusted Solaris policy allows communication with this 2462 * host, and pretend that the destination is unreachable if not. 2463 * 2464 * This is never a problem for TCP, since that transport is known to 2465 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2466 * handling. If the remote is unreachable, it will be detected at that 2467 * point, so there's no reason to check it here. 2468 * 2469 * Note that for sendto (and other datagram-oriented friends), this 2470 * check is done as part of the data path label computation instead. 2471 * The check here is just to make non-TCP connect() report the right 2472 * error. 2473 */ 2474 if (is_system_labeled() && !IPCL_IS_TCP(connp)) { 2475 if ((error = tsol_check_dest(cr, v6dst, IPV6_VERSION, 2476 connp->conn_mac_exempt, &effective_cred)) != 0) { 2477 if (ip_debug > 2) { 2478 pr_addr_dbg( 2479 "ip_bind_connected: no label for dst %s\n", 2480 AF_INET6, v6dst); 2481 } 2482 goto bad_addr; 2483 } 2484 2485 /* 2486 * tsol_check_dest() may have created a new cred with 2487 * a modified security label. Use that cred if it exists 2488 * for ire lookups. 2489 */ 2490 if (effective_cred == NULL) { 2491 tsl = crgetlabel(cr); 2492 } else { 2493 tsl = crgetlabel(effective_cred); 2494 } 2495 } 2496 2497 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2498 ipif_t *ipif; 2499 2500 /* 2501 * Use an "emulated" IRE_BROADCAST to tell the transport it 2502 * is a multicast. 2503 * Pass other information that matches 2504 * the ipif (e.g. the source address). 2505 * 2506 * conn_multicast_ill is only used for IPv6 packets 2507 */ 2508 mutex_enter(&connp->conn_lock); 2509 if (connp->conn_multicast_ill != NULL) { 2510 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2511 zoneid, 0, &ipif); 2512 } else { 2513 /* Look for default like ip_wput_v6 */ 2514 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2515 } 2516 mutex_exit(&connp->conn_lock); 2517 if (ipif == NULL || ire_requested || 2518 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2519 if (ipif != NULL) 2520 ipif_refrele(ipif); 2521 if (ip_debug > 2) { 2522 /* ip1dbg */ 2523 pr_addr_dbg("ip_bind_connected_v6: bad " 2524 "connected multicast %s\n", AF_INET6, 2525 v6dst); 2526 } 2527 error = ENETUNREACH; 2528 goto bad_addr; 2529 } 2530 if (ipif != NULL) 2531 ipif_refrele(ipif); 2532 } else { 2533 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2534 NULL, &sire, zoneid, tsl, 2535 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2536 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2537 ipst); 2538 /* 2539 * We also prevent ire's with src address INADDR_ANY to 2540 * be used, which are created temporarily for 2541 * sending out packets from endpoints that have 2542 * conn_unspec_src set. 2543 */ 2544 if (dst_ire == NULL || 2545 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2546 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2547 /* 2548 * When verifying destination reachability, we always 2549 * complain. 2550 * 2551 * When not verifying destination reachability but we 2552 * found an IRE, i.e. the destination is reachable, 2553 * then the other tests still apply and we complain. 2554 */ 2555 if (verify_dst || (dst_ire != NULL)) { 2556 if (ip_debug > 2) { 2557 /* ip1dbg */ 2558 pr_addr_dbg("ip_bind_connected_v6: bad" 2559 " connected dst %s\n", AF_INET6, 2560 v6dst); 2561 } 2562 if (dst_ire == NULL || 2563 !(dst_ire->ire_type & IRE_HOST)) { 2564 error = ENETUNREACH; 2565 } else { 2566 error = EHOSTUNREACH; 2567 } 2568 goto bad_addr; 2569 } 2570 } 2571 } 2572 2573 /* 2574 * If the app does a connect(), it means that it will most likely 2575 * send more than 1 packet to the destination. It makes sense 2576 * to clear the temporary flag. 2577 */ 2578 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2579 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2580 irb_t *irb = dst_ire->ire_bucket; 2581 2582 rw_enter(&irb->irb_lock, RW_WRITER); 2583 /* 2584 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2585 * the lock in order to guarantee irb_tmp_ire_cnt. 2586 */ 2587 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2588 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2589 irb->irb_tmp_ire_cnt--; 2590 } 2591 rw_exit(&irb->irb_lock); 2592 } 2593 2594 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2595 2596 /* 2597 * See if we should notify ULP about MDT; we do this whether or not 2598 * ire_requested is TRUE, in order to handle active connects; MDT 2599 * eligibility tests for passive connects are handled separately 2600 * through tcp_adapt_ire(). We do this before the source address 2601 * selection, because dst_ire may change after a call to 2602 * ipif_select_source_v6(). This is a best-effort check, as the 2603 * packet for this connection may not actually go through 2604 * dst_ire->ire_stq, and the exact IRE can only be known after 2605 * calling ip_newroute_v6(). This is why we further check on the 2606 * IRE during Multidata packet transmission in tcp_multisend(). 2607 */ 2608 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2609 dst_ire != NULL && 2610 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2611 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2612 ILL_MDT_CAPABLE(md_ill)) { 2613 md_dst_ire = dst_ire; 2614 IRE_REFHOLD(md_dst_ire); 2615 } 2616 2617 if (dst_ire != NULL && 2618 dst_ire->ire_type == IRE_LOCAL && 2619 dst_ire->ire_zoneid != zoneid && 2620 dst_ire->ire_zoneid != ALL_ZONES) { 2621 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2622 zoneid, 0, NULL, 2623 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2624 MATCH_IRE_RJ_BHOLE, ipst); 2625 if (src_ire == NULL) { 2626 error = EHOSTUNREACH; 2627 goto bad_addr; 2628 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2629 if (!(src_ire->ire_type & IRE_HOST)) 2630 error = ENETUNREACH; 2631 else 2632 error = EHOSTUNREACH; 2633 goto bad_addr; 2634 } 2635 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2636 src_ipif = src_ire->ire_ipif; 2637 ipif_refhold(src_ipif); 2638 *v6src = src_ipif->ipif_v6lcl_addr; 2639 } 2640 ire_refrele(src_ire); 2641 src_ire = NULL; 2642 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2643 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2644 *v6src = sire->ire_src_addr_v6; 2645 ire_refrele(dst_ire); 2646 dst_ire = sire; 2647 sire = NULL; 2648 } else if (dst_ire->ire_type == IRE_CACHE && 2649 (dst_ire->ire_flags & RTF_SETSRC)) { 2650 ASSERT(dst_ire->ire_zoneid == zoneid || 2651 dst_ire->ire_zoneid == ALL_ZONES); 2652 *v6src = dst_ire->ire_src_addr_v6; 2653 } else { 2654 /* 2655 * Pick a source address so that a proper inbound load 2656 * spreading would happen. Use dst_ill specified by the 2657 * app. when socket option or scopeid is set. 2658 */ 2659 int err; 2660 2661 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2662 uint_t if_index; 2663 2664 /* 2665 * Scope id or IPV6_PKTINFO 2666 */ 2667 2668 if_index = ipp->ipp_ifindex; 2669 dst_ill = ill_lookup_on_ifindex( 2670 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2671 ipst); 2672 if (dst_ill == NULL) { 2673 ip1dbg(("ip_bind_connected_v6:" 2674 " bad ifindex %d\n", if_index)); 2675 error = EADDRNOTAVAIL; 2676 goto bad_addr; 2677 } 2678 ill_held = B_TRUE; 2679 } else if (connp->conn_outgoing_ill != NULL) { 2680 /* 2681 * For IPV6_BOUND_IF socket option, 2682 * conn_outgoing_ill should be set 2683 * already in TCP or UDP/ICMP. 2684 */ 2685 dst_ill = conn_get_held_ill(connp, 2686 &connp->conn_outgoing_ill, &err); 2687 if (err == ILL_LOOKUP_FAILED) { 2688 ip1dbg(("ip_bind_connected_v6:" 2689 "no ill for bound_if\n")); 2690 error = EADDRNOTAVAIL; 2691 goto bad_addr; 2692 } 2693 ill_held = B_TRUE; 2694 } else if (dst_ire->ire_stq != NULL) { 2695 /* No need to hold ill here */ 2696 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2697 } else { 2698 /* No need to hold ill here */ 2699 dst_ill = dst_ire->ire_ipif->ipif_ill; 2700 } 2701 if (ip6_asp_can_lookup(ipst)) { 2702 src_ipif = ipif_select_source_v6(dst_ill, 2703 v6dst, B_FALSE, connp->conn_src_preferences, 2704 zoneid); 2705 ip6_asp_table_refrele(ipst); 2706 if (src_ipif == NULL) { 2707 pr_addr_dbg("ip_bind_connected_v6: " 2708 "no usable source address for " 2709 "connection to %s\n", 2710 AF_INET6, v6dst); 2711 error = EADDRNOTAVAIL; 2712 goto bad_addr; 2713 } 2714 *v6src = src_ipif->ipif_v6lcl_addr; 2715 } else { 2716 error = EADDRNOTAVAIL; 2717 goto bad_addr; 2718 } 2719 } 2720 } 2721 2722 /* 2723 * We do ire_route_lookup_v6() here (and not an interface lookup) 2724 * as we assert that v6src should only come from an 2725 * UP interface for hard binding. 2726 */ 2727 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2728 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2729 2730 /* src_ire must be a local|loopback */ 2731 if (!IRE_IS_LOCAL(src_ire)) { 2732 if (ip_debug > 2) { 2733 /* ip1dbg */ 2734 pr_addr_dbg("ip_bind_connected_v6: bad " 2735 "connected src %s\n", AF_INET6, v6src); 2736 } 2737 error = EADDRNOTAVAIL; 2738 goto bad_addr; 2739 } 2740 2741 /* 2742 * If the source address is a loopback address, the 2743 * destination had best be local or multicast. 2744 * The transports that can't handle multicast will reject 2745 * those addresses. 2746 */ 2747 if (src_ire->ire_type == IRE_LOOPBACK && 2748 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2749 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2750 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2751 error = -1; 2752 goto bad_addr; 2753 } 2754 /* 2755 * Allow setting new policies. For example, disconnects come 2756 * down as ipa_t bind. As we would have set conn_policy_cached 2757 * to B_TRUE before, we should set it to B_FALSE, so that policy 2758 * can change after the disconnect. 2759 */ 2760 connp->conn_policy_cached = B_FALSE; 2761 2762 /* 2763 * The addresses have been verified. Initialize the conn 2764 * before calling the policy as they expect the conns 2765 * initialized. 2766 */ 2767 connp->conn_srcv6 = *v6src; 2768 connp->conn_remv6 = *v6dst; 2769 connp->conn_lport = lport; 2770 connp->conn_fport = fport; 2771 2772 ASSERT(!(ipsec_policy_set && ire_requested)); 2773 if (ire_requested) { 2774 iulp_t *ulp_info = NULL; 2775 2776 /* 2777 * Note that sire will not be NULL if this is an off-link 2778 * connection and there is not cache for that dest yet. 2779 * 2780 * XXX Because of an existing bug, if there are multiple 2781 * default routes, the IRE returned now may not be the actual 2782 * default route used (default routes are chosen in a 2783 * round robin fashion). So if the metrics for different 2784 * default routes are different, we may return the wrong 2785 * metrics. This will not be a problem if the existing 2786 * bug is fixed. 2787 */ 2788 if (sire != NULL) 2789 ulp_info = &(sire->ire_uinfo); 2790 2791 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2792 ipst)) { 2793 error = -1; 2794 goto bad_addr; 2795 } 2796 } else if (ipsec_policy_set) { 2797 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2798 error = -1; 2799 goto bad_addr; 2800 } 2801 } 2802 2803 /* 2804 * Cache IPsec policy in this conn. If we have per-socket policy, 2805 * we'll cache that. If we don't, we'll inherit global policy. 2806 * 2807 * We can't insert until the conn reflects the policy. Note that 2808 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2809 * connections where we don't have a policy. This is to prevent 2810 * global policy lookups in the inbound path. 2811 * 2812 * If we insert before we set conn_policy_cached, 2813 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2814 * because global policy cound be non-empty. We normally call 2815 * ipsec_check_policy() for conn_policy_cached connections only if 2816 * conn_in_enforce_policy is set. But in this case, 2817 * conn_policy_cached can get set anytime since we made the 2818 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2819 * is called, which will make the above assumption false. Thus, we 2820 * need to insert after we set conn_policy_cached. 2821 */ 2822 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2823 goto bad_addr; 2824 2825 /* If not fanout_insert this was just an address verification */ 2826 if (fanout_insert) { 2827 /* 2828 * The addresses have been verified. Time to insert in 2829 * the correct fanout list. 2830 */ 2831 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2832 connp->conn_ports, 2833 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2834 } 2835 if (error == 0) { 2836 connp->conn_fully_bound = B_TRUE; 2837 /* 2838 * Our initial checks for MDT have passed; the IRE is not 2839 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2840 * be supporting MDT. Pass the IRE, IPC and ILL into 2841 * ip_mdinfo_return(), which performs further checks 2842 * against them and upon success, returns the MDT info 2843 * mblk which we will attach to the bind acknowledgment. 2844 */ 2845 if (md_dst_ire != NULL) { 2846 mblk_t *mdinfo_mp; 2847 2848 ASSERT(md_ill != NULL); 2849 ASSERT(md_ill->ill_mdt_capab != NULL); 2850 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2851 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2852 if (mp == NULL) { 2853 *mpp = mdinfo_mp; 2854 } else { 2855 linkb(mp, mdinfo_mp); 2856 } 2857 } 2858 } 2859 } 2860 bad_addr: 2861 if (ipsec_policy_set) { 2862 ASSERT(mp != NULL); 2863 freeb(mp); 2864 /* 2865 * As of now assume that nothing else accompanies 2866 * IPSEC_POLICY_SET. 2867 */ 2868 *mpp = NULL; 2869 } 2870 refrele_and_quit: 2871 if (src_ire != NULL) 2872 IRE_REFRELE(src_ire); 2873 if (dst_ire != NULL) 2874 IRE_REFRELE(dst_ire); 2875 if (sire != NULL) 2876 IRE_REFRELE(sire); 2877 if (src_ipif != NULL) 2878 ipif_refrele(src_ipif); 2879 if (md_dst_ire != NULL) 2880 IRE_REFRELE(md_dst_ire); 2881 if (ill_held && dst_ill != NULL) 2882 ill_refrele(dst_ill); 2883 if (effective_cred != NULL) 2884 crfree(effective_cred); 2885 return (error); 2886 } 2887 2888 /* ARGSUSED */ 2889 int 2890 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2891 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2892 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2893 boolean_t verify_dst, cred_t *cr) 2894 { 2895 int error = 0; 2896 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2897 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2898 2899 ASSERT(connp->conn_af_isv6); 2900 connp->conn_ulp = protocol; 2901 2902 /* For raw socket, the local port is not set. */ 2903 lport = lport != 0 ? lport : connp->conn_lport; 2904 2905 /* 2906 * Bind to local and remote address. Local might be 2907 * unspecified in which case it will be extracted from 2908 * ire_src_addr_v6 2909 */ 2910 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2911 /* Connect to IPv4 address */ 2912 ipaddr_t v4src; 2913 ipaddr_t v4dst; 2914 2915 /* Is the source unspecified or mapped? */ 2916 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2917 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2918 ip1dbg(("ip_proto_bind_connected_v6: " 2919 "dst is mapped, but not the src\n")); 2920 goto bad_addr; 2921 } 2922 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2923 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2924 2925 /* Always verify destination reachability. */ 2926 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2927 lport, v4dst, fport, B_TRUE, B_TRUE, cr); 2928 if (error != 0) 2929 goto bad_addr; 2930 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2931 connp->conn_pkt_isv6 = B_FALSE; 2932 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2933 ip1dbg(("ip_proto_bind_connected_v6: " 2934 "src is mapped, but not the dst\n")); 2935 goto bad_addr; 2936 } else { 2937 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2938 lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2939 if (error != 0) 2940 goto bad_addr; 2941 connp->conn_pkt_isv6 = B_TRUE; 2942 } 2943 2944 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2945 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2946 2947 /* Send it home. */ 2948 return (0); 2949 2950 bad_addr: 2951 if (error == 0) 2952 error = -TBADADDR; 2953 return (error); 2954 } 2955 2956 /* 2957 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2958 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2959 */ 2960 /* ARGSUSED4 */ 2961 static boolean_t 2962 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2963 iulp_t *ulp_info, ip_stack_t *ipst) 2964 { 2965 mblk_t *mp = *mpp; 2966 ire_t *ret_ire; 2967 2968 ASSERT(mp != NULL); 2969 2970 if (ire != NULL) { 2971 /* 2972 * mp initialized above to IRE_DB_REQ_TYPE 2973 * appended mblk. Its <upper protocol>'s 2974 * job to make sure there is room. 2975 */ 2976 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 2977 return (B_FALSE); 2978 2979 mp->b_datap->db_type = IRE_DB_TYPE; 2980 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 2981 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 2982 ret_ire = (ire_t *)mp->b_rptr; 2983 if (IN6_IS_ADDR_MULTICAST(dst) || 2984 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 2985 ret_ire->ire_type = IRE_BROADCAST; 2986 ret_ire->ire_addr_v6 = *dst; 2987 } 2988 if (ulp_info != NULL) { 2989 bcopy(ulp_info, &(ret_ire->ire_uinfo), 2990 sizeof (iulp_t)); 2991 } 2992 ret_ire->ire_mp = mp; 2993 } else { 2994 /* 2995 * No IRE was found. Remove IRE mblk. 2996 */ 2997 *mpp = mp->b_cont; 2998 freeb(mp); 2999 } 3000 return (B_TRUE); 3001 } 3002 3003 /* 3004 * Add an ip6i_t header to the front of the mblk. 3005 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3006 * Returns NULL if allocation fails (and frees original message). 3007 * Used in outgoing path when going through ip_newroute_*v6(). 3008 * Used in incoming path to pass ifindex to transports. 3009 */ 3010 mblk_t * 3011 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3012 { 3013 mblk_t *mp1; 3014 ip6i_t *ip6i; 3015 ip6_t *ip6h; 3016 3017 ip6h = (ip6_t *)mp->b_rptr; 3018 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3019 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3020 mp->b_datap->db_ref > 1) { 3021 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3022 if (mp1 == NULL) { 3023 freemsg(mp); 3024 return (NULL); 3025 } 3026 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3027 mp1->b_cont = mp; 3028 mp = mp1; 3029 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3030 } 3031 mp->b_rptr = (uchar_t *)ip6i; 3032 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3033 ip6i->ip6i_nxt = IPPROTO_RAW; 3034 if (ill != NULL) { 3035 ip6i->ip6i_flags = IP6I_IFINDEX; 3036 /* 3037 * If `ill' is in an IPMP group, make sure we use the IPMP 3038 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3039 * IPMP interface index and not an underlying interface index. 3040 */ 3041 if (IS_UNDER_IPMP(ill)) 3042 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3043 else 3044 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3045 } else { 3046 ip6i->ip6i_flags = 0; 3047 } 3048 ip6i->ip6i_nexthop = *dst; 3049 return (mp); 3050 } 3051 3052 /* 3053 * Handle protocols with which IP is less intimate. There 3054 * can be more than one stream bound to a particular 3055 * protocol. When this is the case, normally each one gets a copy 3056 * of any incoming packets. 3057 * 3058 * Zones notes: 3059 * Packets will be distributed to streams in all zones. This is really only 3060 * useful for ICMPv6 as only applications in the global zone can create raw 3061 * sockets for other protocols. 3062 */ 3063 static void 3064 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3065 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3066 boolean_t mctl_present, zoneid_t zoneid) 3067 { 3068 queue_t *rq; 3069 mblk_t *mp1, *first_mp1; 3070 in6_addr_t dst = ip6h->ip6_dst; 3071 in6_addr_t src = ip6h->ip6_src; 3072 mblk_t *first_mp = mp; 3073 boolean_t secure, shared_addr; 3074 conn_t *connp, *first_connp, *next_connp; 3075 connf_t *connfp; 3076 ip_stack_t *ipst = inill->ill_ipst; 3077 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3078 3079 if (mctl_present) { 3080 mp = first_mp->b_cont; 3081 secure = ipsec_in_is_secure(first_mp); 3082 ASSERT(mp != NULL); 3083 } else { 3084 secure = B_FALSE; 3085 } 3086 3087 shared_addr = (zoneid == ALL_ZONES); 3088 if (shared_addr) { 3089 /* 3090 * We don't allow multilevel ports for raw IP, so no need to 3091 * check for that here. 3092 */ 3093 zoneid = tsol_packet_to_zoneid(mp); 3094 } 3095 3096 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3097 mutex_enter(&connfp->connf_lock); 3098 connp = connfp->connf_head; 3099 for (connp = connfp->connf_head; connp != NULL; 3100 connp = connp->conn_next) { 3101 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3102 zoneid) && 3103 (!is_system_labeled() || 3104 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3105 connp))) 3106 break; 3107 } 3108 3109 if (connp == NULL) { 3110 /* 3111 * No one bound to this port. Is 3112 * there a client that wants all 3113 * unclaimed datagrams? 3114 */ 3115 mutex_exit(&connfp->connf_lock); 3116 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3117 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3118 nexthdr_offset, mctl_present, zoneid, ipst)) { 3119 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3120 } 3121 3122 return; 3123 } 3124 3125 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3126 3127 CONN_INC_REF(connp); 3128 first_connp = connp; 3129 3130 /* 3131 * XXX: Fix the multiple protocol listeners case. We should not 3132 * be walking the conn->next list here. 3133 */ 3134 connp = connp->conn_next; 3135 for (;;) { 3136 while (connp != NULL) { 3137 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3138 flags, zoneid) && 3139 (!is_system_labeled() || 3140 tsol_receive_local(mp, &dst, IPV6_VERSION, 3141 shared_addr, connp))) 3142 break; 3143 connp = connp->conn_next; 3144 } 3145 3146 /* 3147 * Just copy the data part alone. The mctl part is 3148 * needed just for verifying policy and it is never 3149 * sent up. 3150 */ 3151 if (connp == NULL || 3152 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3153 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3154 /* 3155 * No more intested clients or memory 3156 * allocation failed 3157 */ 3158 connp = first_connp; 3159 break; 3160 } 3161 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3162 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3163 CONN_INC_REF(connp); 3164 mutex_exit(&connfp->connf_lock); 3165 rq = connp->conn_rq; 3166 /* 3167 * For link-local always add ifindex so that transport can set 3168 * sin6_scope_id. Avoid it for ICMP error fanout. 3169 */ 3170 if ((connp->conn_ip_recvpktinfo || 3171 IN6_IS_ADDR_LINKLOCAL(&src)) && 3172 (flags & IP_FF_IPINFO)) { 3173 /* Add header */ 3174 mp1 = ip_add_info_v6(mp1, inill, &dst); 3175 } 3176 if (mp1 == NULL) { 3177 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3178 } else if ( 3179 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3180 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3181 if (flags & IP_FF_RAWIP) { 3182 BUMP_MIB(ill->ill_ip_mib, 3183 rawipIfStatsInOverflows); 3184 } else { 3185 BUMP_MIB(ill->ill_icmp6_mib, 3186 ipv6IfIcmpInOverflows); 3187 } 3188 3189 freemsg(mp1); 3190 } else { 3191 ASSERT(!IPCL_IS_IPTUN(connp)); 3192 3193 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3194 secure) { 3195 first_mp1 = ipsec_check_inbound_policy( 3196 first_mp1, connp, NULL, ip6h, mctl_present); 3197 } 3198 if (first_mp1 != NULL) { 3199 if (mctl_present) 3200 freeb(first_mp1); 3201 BUMP_MIB(ill->ill_ip_mib, 3202 ipIfStatsHCInDelivers); 3203 (connp->conn_recv)(connp, mp1, NULL); 3204 } 3205 } 3206 mutex_enter(&connfp->connf_lock); 3207 /* Follow the next pointer before releasing the conn. */ 3208 next_connp = connp->conn_next; 3209 CONN_DEC_REF(connp); 3210 connp = next_connp; 3211 } 3212 3213 /* Last one. Send it upstream. */ 3214 mutex_exit(&connfp->connf_lock); 3215 3216 /* Initiate IPPF processing */ 3217 if (IP6_IN_IPP(flags, ipst)) { 3218 uint_t ifindex; 3219 3220 mutex_enter(&ill->ill_lock); 3221 ifindex = ill->ill_phyint->phyint_ifindex; 3222 mutex_exit(&ill->ill_lock); 3223 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3224 if (mp == NULL) { 3225 CONN_DEC_REF(connp); 3226 if (mctl_present) 3227 freeb(first_mp); 3228 return; 3229 } 3230 } 3231 3232 /* 3233 * For link-local always add ifindex so that transport can set 3234 * sin6_scope_id. Avoid it for ICMP error fanout. 3235 */ 3236 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3237 (flags & IP_FF_IPINFO)) { 3238 /* Add header */ 3239 mp = ip_add_info_v6(mp, inill, &dst); 3240 if (mp == NULL) { 3241 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3242 CONN_DEC_REF(connp); 3243 if (mctl_present) 3244 freeb(first_mp); 3245 return; 3246 } else if (mctl_present) { 3247 first_mp->b_cont = mp; 3248 } else { 3249 first_mp = mp; 3250 } 3251 } 3252 3253 rq = connp->conn_rq; 3254 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3255 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3256 3257 if (flags & IP_FF_RAWIP) { 3258 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3259 } else { 3260 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3261 } 3262 3263 freemsg(first_mp); 3264 } else { 3265 ASSERT(!IPCL_IS_IPTUN(connp)); 3266 3267 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) { 3268 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3269 NULL, ip6h, mctl_present); 3270 if (first_mp == NULL) { 3271 CONN_DEC_REF(connp); 3272 return; 3273 } 3274 } 3275 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3276 (connp->conn_recv)(connp, mp, NULL); 3277 if (mctl_present) 3278 freeb(first_mp); 3279 } 3280 CONN_DEC_REF(connp); 3281 } 3282 3283 /* 3284 * Send an ICMP error after patching up the packet appropriately. Returns 3285 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3286 */ 3287 int 3288 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3289 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3290 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3291 { 3292 ip6_t *ip6h; 3293 mblk_t *first_mp; 3294 boolean_t secure; 3295 unsigned char db_type; 3296 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3297 3298 first_mp = mp; 3299 if (mctl_present) { 3300 mp = mp->b_cont; 3301 secure = ipsec_in_is_secure(first_mp); 3302 ASSERT(mp != NULL); 3303 } else { 3304 /* 3305 * If this is an ICMP error being reported - which goes 3306 * up as M_CTLs, we need to convert them to M_DATA till 3307 * we finish checking with global policy because 3308 * ipsec_check_global_policy() assumes M_DATA as clear 3309 * and M_CTL as secure. 3310 */ 3311 db_type = mp->b_datap->db_type; 3312 mp->b_datap->db_type = M_DATA; 3313 secure = B_FALSE; 3314 } 3315 /* 3316 * We are generating an icmp error for some inbound packet. 3317 * Called from all ip_fanout_(udp, tcp, proto) functions. 3318 * Before we generate an error, check with global policy 3319 * to see whether this is allowed to enter the system. As 3320 * there is no "conn", we are checking with global policy. 3321 */ 3322 ip6h = (ip6_t *)mp->b_rptr; 3323 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3324 first_mp = ipsec_check_global_policy(first_mp, NULL, 3325 NULL, ip6h, mctl_present, ipst->ips_netstack); 3326 if (first_mp == NULL) 3327 return (0); 3328 } 3329 3330 if (!mctl_present) 3331 mp->b_datap->db_type = db_type; 3332 3333 if (flags & IP_FF_SEND_ICMP) { 3334 if (flags & IP_FF_HDR_COMPLETE) { 3335 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3336 freemsg(first_mp); 3337 return (1); 3338 } 3339 } 3340 switch (icmp_type) { 3341 case ICMP6_DST_UNREACH: 3342 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3343 B_FALSE, B_FALSE, zoneid, ipst); 3344 break; 3345 case ICMP6_PARAM_PROB: 3346 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3347 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3348 break; 3349 default: 3350 #ifdef DEBUG 3351 panic("ip_fanout_send_icmp_v6: wrong type"); 3352 /*NOTREACHED*/ 3353 #else 3354 freemsg(first_mp); 3355 break; 3356 #endif 3357 } 3358 } else { 3359 freemsg(first_mp); 3360 return (0); 3361 } 3362 3363 return (1); 3364 } 3365 3366 /* 3367 * Fanout for TCP packets 3368 * The caller puts <fport, lport> in the ports parameter. 3369 */ 3370 static void 3371 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3372 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3373 { 3374 mblk_t *first_mp; 3375 boolean_t secure; 3376 conn_t *connp; 3377 tcph_t *tcph; 3378 boolean_t syn_present = B_FALSE; 3379 ip_stack_t *ipst = inill->ill_ipst; 3380 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3381 3382 first_mp = mp; 3383 if (mctl_present) { 3384 mp = first_mp->b_cont; 3385 secure = ipsec_in_is_secure(first_mp); 3386 ASSERT(mp != NULL); 3387 } else { 3388 secure = B_FALSE; 3389 } 3390 3391 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3392 3393 if (connp == NULL || 3394 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3395 /* 3396 * No hard-bound match. Send Reset. 3397 */ 3398 dblk_t *dp = mp->b_datap; 3399 uint32_t ill_index; 3400 3401 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3402 3403 /* Initiate IPPf processing, if needed. */ 3404 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3405 (flags & IP6_NO_IPPOLICY)) { 3406 ill_index = ill->ill_phyint->phyint_ifindex; 3407 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3408 if (first_mp == NULL) { 3409 if (connp != NULL) 3410 CONN_DEC_REF(connp); 3411 return; 3412 } 3413 } 3414 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3415 if (connp != NULL) { 3416 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3417 ipst->ips_netstack->netstack_tcp, connp); 3418 CONN_DEC_REF(connp); 3419 } else { 3420 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3421 ipst->ips_netstack->netstack_tcp, NULL); 3422 } 3423 3424 return; 3425 } 3426 3427 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3428 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3429 if (IPCL_IS_TCP(connp)) { 3430 squeue_t *sqp; 3431 3432 /* 3433 * If the queue belongs to a conn, and fused tcp 3434 * loopback is enabled, assign the eager's squeue 3435 * to be that of the active connect's. 3436 */ 3437 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3438 CONN_Q(q) && IPCL_IS_TCP(Q_TO_CONN(q)) && 3439 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3440 !secure && 3441 !IP6_IN_IPP(flags, ipst)) { 3442 ASSERT(Q_TO_CONN(q)->conn_sqp != NULL); 3443 sqp = Q_TO_CONN(q)->conn_sqp; 3444 } else { 3445 sqp = IP_SQUEUE_GET(lbolt); 3446 } 3447 3448 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3449 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3450 3451 /* 3452 * db_cksumstuff is unused in the incoming 3453 * path; Thus store the ifindex here. It will 3454 * be cleared in tcp_conn_create_v6(). 3455 */ 3456 DB_CKSUMSTUFF(mp) = 3457 (intptr_t)ill->ill_phyint->phyint_ifindex; 3458 syn_present = B_TRUE; 3459 } 3460 } 3461 3462 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3463 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3464 if ((flags & TH_RST) || (flags & TH_URG)) { 3465 CONN_DEC_REF(connp); 3466 freemsg(first_mp); 3467 return; 3468 } 3469 if (flags & TH_ACK) { 3470 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3471 ipst->ips_netstack->netstack_tcp, connp); 3472 CONN_DEC_REF(connp); 3473 return; 3474 } 3475 3476 CONN_DEC_REF(connp); 3477 freemsg(first_mp); 3478 return; 3479 } 3480 3481 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3482 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3483 NULL, ip6h, mctl_present); 3484 if (first_mp == NULL) { 3485 CONN_DEC_REF(connp); 3486 return; 3487 } 3488 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3489 ASSERT(syn_present); 3490 if (mctl_present) { 3491 ASSERT(first_mp != mp); 3492 first_mp->b_datap->db_struioflag |= 3493 STRUIO_POLICY; 3494 } else { 3495 ASSERT(first_mp == mp); 3496 mp->b_datap->db_struioflag &= 3497 ~STRUIO_EAGER; 3498 mp->b_datap->db_struioflag |= 3499 STRUIO_POLICY; 3500 } 3501 } else { 3502 /* 3503 * Discard first_mp early since we're dealing with a 3504 * fully-connected conn_t and tcp doesn't do policy in 3505 * this case. Also, if someone is bound to IPPROTO_TCP 3506 * over raw IP, they don't expect to see a M_CTL. 3507 */ 3508 if (mctl_present) { 3509 freeb(first_mp); 3510 mctl_present = B_FALSE; 3511 } 3512 first_mp = mp; 3513 } 3514 } 3515 3516 /* Initiate IPPF processing */ 3517 if (IP6_IN_IPP(flags, ipst)) { 3518 uint_t ifindex; 3519 3520 mutex_enter(&ill->ill_lock); 3521 ifindex = ill->ill_phyint->phyint_ifindex; 3522 mutex_exit(&ill->ill_lock); 3523 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3524 if (mp == NULL) { 3525 CONN_DEC_REF(connp); 3526 if (mctl_present) { 3527 freeb(first_mp); 3528 } 3529 return; 3530 } else if (mctl_present) { 3531 /* 3532 * ip_add_info_v6 might return a new mp. 3533 */ 3534 ASSERT(first_mp != mp); 3535 first_mp->b_cont = mp; 3536 } else { 3537 first_mp = mp; 3538 } 3539 } 3540 3541 /* 3542 * For link-local always add ifindex so that TCP can bind to that 3543 * interface. Avoid it for ICMP error fanout. 3544 */ 3545 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3546 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3547 (flags & IP_FF_IPINFO))) { 3548 /* Add header */ 3549 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3550 if (mp == NULL) { 3551 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3552 CONN_DEC_REF(connp); 3553 if (mctl_present) 3554 freeb(first_mp); 3555 return; 3556 } else if (mctl_present) { 3557 ASSERT(first_mp != mp); 3558 first_mp->b_cont = mp; 3559 } else { 3560 first_mp = mp; 3561 } 3562 } 3563 3564 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3565 if (IPCL_IS_TCP(connp)) { 3566 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3567 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3568 } else { 3569 /* SOCK_RAW, IPPROTO_TCP case */ 3570 (connp->conn_recv)(connp, first_mp, NULL); 3571 CONN_DEC_REF(connp); 3572 } 3573 } 3574 3575 /* 3576 * Fanout for UDP packets. 3577 * The caller puts <fport, lport> in the ports parameter. 3578 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3579 * 3580 * If SO_REUSEADDR is set all multicast and broadcast packets 3581 * will be delivered to all streams bound to the same port. 3582 * 3583 * Zones notes: 3584 * Multicast packets will be distributed to streams in all zones. 3585 */ 3586 static void 3587 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3588 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3589 zoneid_t zoneid) 3590 { 3591 uint32_t dstport, srcport; 3592 in6_addr_t dst; 3593 mblk_t *first_mp; 3594 boolean_t secure; 3595 conn_t *connp; 3596 connf_t *connfp; 3597 conn_t *first_conn; 3598 conn_t *next_conn; 3599 mblk_t *mp1, *first_mp1; 3600 in6_addr_t src; 3601 boolean_t shared_addr; 3602 ip_stack_t *ipst = inill->ill_ipst; 3603 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3604 3605 first_mp = mp; 3606 if (mctl_present) { 3607 mp = first_mp->b_cont; 3608 secure = ipsec_in_is_secure(first_mp); 3609 ASSERT(mp != NULL); 3610 } else { 3611 secure = B_FALSE; 3612 } 3613 3614 /* Extract ports in net byte order */ 3615 dstport = htons(ntohl(ports) & 0xFFFF); 3616 srcport = htons(ntohl(ports) >> 16); 3617 dst = ip6h->ip6_dst; 3618 src = ip6h->ip6_src; 3619 3620 shared_addr = (zoneid == ALL_ZONES); 3621 if (shared_addr) { 3622 /* 3623 * No need to handle exclusive-stack zones since ALL_ZONES 3624 * only applies to the shared stack. 3625 */ 3626 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3627 /* 3628 * If no shared MLP is found, tsol_mlp_findzone returns 3629 * ALL_ZONES. In that case, we assume it's SLP, and 3630 * search for the zone based on the packet label. 3631 * That will also return ALL_ZONES on failure, but 3632 * we never allow conn_zoneid to be set to ALL_ZONES. 3633 */ 3634 if (zoneid == ALL_ZONES) 3635 zoneid = tsol_packet_to_zoneid(mp); 3636 } 3637 3638 /* Attempt to find a client stream based on destination port. */ 3639 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3640 mutex_enter(&connfp->connf_lock); 3641 connp = connfp->connf_head; 3642 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3643 /* 3644 * Not multicast. Send to the one (first) client we find. 3645 */ 3646 while (connp != NULL) { 3647 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3648 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3649 conn_wantpacket_v6(connp, ill, ip6h, 3650 flags, zoneid)) { 3651 break; 3652 } 3653 connp = connp->conn_next; 3654 } 3655 if (connp == NULL || connp->conn_upq == NULL) 3656 goto notfound; 3657 3658 if (is_system_labeled() && 3659 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3660 connp)) 3661 goto notfound; 3662 3663 /* Found a client */ 3664 CONN_INC_REF(connp); 3665 mutex_exit(&connfp->connf_lock); 3666 3667 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3668 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3669 freemsg(first_mp); 3670 CONN_DEC_REF(connp); 3671 return; 3672 } 3673 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3674 first_mp = ipsec_check_inbound_policy(first_mp, 3675 connp, NULL, ip6h, mctl_present); 3676 if (first_mp == NULL) { 3677 CONN_DEC_REF(connp); 3678 return; 3679 } 3680 } 3681 /* Initiate IPPF processing */ 3682 if (IP6_IN_IPP(flags, ipst)) { 3683 uint_t ifindex; 3684 3685 mutex_enter(&ill->ill_lock); 3686 ifindex = ill->ill_phyint->phyint_ifindex; 3687 mutex_exit(&ill->ill_lock); 3688 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3689 if (mp == NULL) { 3690 CONN_DEC_REF(connp); 3691 if (mctl_present) 3692 freeb(first_mp); 3693 return; 3694 } 3695 } 3696 /* 3697 * For link-local always add ifindex so that 3698 * transport can set sin6_scope_id. Avoid it for 3699 * ICMP error fanout. 3700 */ 3701 if ((connp->conn_ip_recvpktinfo || 3702 IN6_IS_ADDR_LINKLOCAL(&src)) && 3703 (flags & IP_FF_IPINFO)) { 3704 /* Add header */ 3705 mp = ip_add_info_v6(mp, inill, &dst); 3706 if (mp == NULL) { 3707 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3708 CONN_DEC_REF(connp); 3709 if (mctl_present) 3710 freeb(first_mp); 3711 return; 3712 } else if (mctl_present) { 3713 first_mp->b_cont = mp; 3714 } else { 3715 first_mp = mp; 3716 } 3717 } 3718 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3719 3720 /* Send it upstream */ 3721 (connp->conn_recv)(connp, mp, NULL); 3722 3723 IP6_STAT(ipst, ip6_udp_fannorm); 3724 CONN_DEC_REF(connp); 3725 if (mctl_present) 3726 freeb(first_mp); 3727 return; 3728 } 3729 3730 while (connp != NULL) { 3731 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3732 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3733 (!is_system_labeled() || 3734 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3735 connp))) 3736 break; 3737 connp = connp->conn_next; 3738 } 3739 3740 if (connp == NULL || connp->conn_upq == NULL) 3741 goto notfound; 3742 3743 first_conn = connp; 3744 3745 CONN_INC_REF(connp); 3746 connp = connp->conn_next; 3747 for (;;) { 3748 while (connp != NULL) { 3749 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3750 src) && conn_wantpacket_v6(connp, ill, ip6h, 3751 flags, zoneid) && 3752 (!is_system_labeled() || 3753 tsol_receive_local(mp, &dst, IPV6_VERSION, 3754 shared_addr, connp))) 3755 break; 3756 connp = connp->conn_next; 3757 } 3758 /* 3759 * Just copy the data part alone. The mctl part is 3760 * needed just for verifying policy and it is never 3761 * sent up. 3762 */ 3763 if (connp == NULL || 3764 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3765 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3766 /* 3767 * No more interested clients or memory 3768 * allocation failed 3769 */ 3770 connp = first_conn; 3771 break; 3772 } 3773 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3774 CONN_INC_REF(connp); 3775 mutex_exit(&connfp->connf_lock); 3776 /* 3777 * For link-local always add ifindex so that transport 3778 * can set sin6_scope_id. Avoid it for ICMP error 3779 * fanout. 3780 */ 3781 if ((connp->conn_ip_recvpktinfo || 3782 IN6_IS_ADDR_LINKLOCAL(&src)) && 3783 (flags & IP_FF_IPINFO)) { 3784 /* Add header */ 3785 mp1 = ip_add_info_v6(mp1, inill, &dst); 3786 } 3787 /* mp1 could have changed */ 3788 if (mctl_present) 3789 first_mp1->b_cont = mp1; 3790 else 3791 first_mp1 = mp1; 3792 if (mp1 == NULL) { 3793 if (mctl_present) 3794 freeb(first_mp1); 3795 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3796 goto next_one; 3797 } 3798 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3799 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3800 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3801 freemsg(first_mp1); 3802 goto next_one; 3803 } 3804 3805 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3806 first_mp1 = ipsec_check_inbound_policy 3807 (first_mp1, connp, NULL, ip6h, 3808 mctl_present); 3809 } 3810 if (first_mp1 != NULL) { 3811 if (mctl_present) 3812 freeb(first_mp1); 3813 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3814 3815 /* Send it upstream */ 3816 (connp->conn_recv)(connp, mp1, NULL); 3817 } 3818 next_one: 3819 mutex_enter(&connfp->connf_lock); 3820 /* Follow the next pointer before releasing the conn. */ 3821 next_conn = connp->conn_next; 3822 IP6_STAT(ipst, ip6_udp_fanmb); 3823 CONN_DEC_REF(connp); 3824 connp = next_conn; 3825 } 3826 3827 /* Last one. Send it upstream. */ 3828 mutex_exit(&connfp->connf_lock); 3829 3830 /* Initiate IPPF processing */ 3831 if (IP6_IN_IPP(flags, ipst)) { 3832 uint_t ifindex; 3833 3834 mutex_enter(&ill->ill_lock); 3835 ifindex = ill->ill_phyint->phyint_ifindex; 3836 mutex_exit(&ill->ill_lock); 3837 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3838 if (mp == NULL) { 3839 CONN_DEC_REF(connp); 3840 if (mctl_present) { 3841 freeb(first_mp); 3842 } 3843 return; 3844 } 3845 } 3846 3847 /* 3848 * For link-local always add ifindex so that transport can set 3849 * sin6_scope_id. Avoid it for ICMP error fanout. 3850 */ 3851 if ((connp->conn_ip_recvpktinfo || 3852 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3853 /* Add header */ 3854 mp = ip_add_info_v6(mp, inill, &dst); 3855 if (mp == NULL) { 3856 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3857 CONN_DEC_REF(connp); 3858 if (mctl_present) 3859 freeb(first_mp); 3860 return; 3861 } else if (mctl_present) { 3862 first_mp->b_cont = mp; 3863 } else { 3864 first_mp = mp; 3865 } 3866 } 3867 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3868 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3869 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3870 freemsg(mp); 3871 } else { 3872 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3873 first_mp = ipsec_check_inbound_policy(first_mp, 3874 connp, NULL, ip6h, mctl_present); 3875 if (first_mp == NULL) { 3876 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3877 CONN_DEC_REF(connp); 3878 return; 3879 } 3880 } 3881 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3882 3883 /* Send it upstream */ 3884 (connp->conn_recv)(connp, mp, NULL); 3885 } 3886 IP6_STAT(ipst, ip6_udp_fanmb); 3887 CONN_DEC_REF(connp); 3888 if (mctl_present) 3889 freeb(first_mp); 3890 return; 3891 3892 notfound: 3893 mutex_exit(&connfp->connf_lock); 3894 /* 3895 * No one bound to this port. Is 3896 * there a client that wants all 3897 * unclaimed datagrams? 3898 */ 3899 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3900 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3901 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3902 zoneid); 3903 } else { 3904 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3905 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3906 mctl_present, zoneid, ipst)) { 3907 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3908 } 3909 } 3910 } 3911 3912 /* 3913 * int ip_find_hdr_v6() 3914 * 3915 * This routine is used by the upper layer protocols and the IP tunnel 3916 * module to: 3917 * - Set extension header pointers to appropriate locations 3918 * - Determine IPv6 header length and return it 3919 * - Return a pointer to the last nexthdr value 3920 * 3921 * The caller must initialize ipp_fields. 3922 * 3923 * NOTE: If multiple extension headers of the same type are present, 3924 * ip_find_hdr_v6() will set the respective extension header pointers 3925 * to the first one that it encounters in the IPv6 header. It also 3926 * skips fragment headers. This routine deals with malformed packets 3927 * of various sorts in which case the returned length is up to the 3928 * malformed part. 3929 */ 3930 int 3931 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3932 { 3933 uint_t length, ehdrlen; 3934 uint8_t nexthdr; 3935 uint8_t *whereptr, *endptr; 3936 ip6_dest_t *tmpdstopts; 3937 ip6_rthdr_t *tmprthdr; 3938 ip6_hbh_t *tmphopopts; 3939 ip6_frag_t *tmpfraghdr; 3940 3941 length = IPV6_HDR_LEN; 3942 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3943 endptr = mp->b_wptr; 3944 3945 nexthdr = ip6h->ip6_nxt; 3946 while (whereptr < endptr) { 3947 /* Is there enough left for len + nexthdr? */ 3948 if (whereptr + MIN_EHDR_LEN > endptr) 3949 goto done; 3950 3951 switch (nexthdr) { 3952 case IPPROTO_HOPOPTS: 3953 tmphopopts = (ip6_hbh_t *)whereptr; 3954 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3955 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3956 goto done; 3957 nexthdr = tmphopopts->ip6h_nxt; 3958 /* return only 1st hbh */ 3959 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3960 ipp->ipp_fields |= IPPF_HOPOPTS; 3961 ipp->ipp_hopopts = tmphopopts; 3962 ipp->ipp_hopoptslen = ehdrlen; 3963 } 3964 break; 3965 case IPPROTO_DSTOPTS: 3966 tmpdstopts = (ip6_dest_t *)whereptr; 3967 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3968 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3969 goto done; 3970 nexthdr = tmpdstopts->ip6d_nxt; 3971 /* 3972 * ipp_dstopts is set to the destination header after a 3973 * routing header. 3974 * Assume it is a post-rthdr destination header 3975 * and adjust when we find an rthdr. 3976 */ 3977 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3978 ipp->ipp_fields |= IPPF_DSTOPTS; 3979 ipp->ipp_dstopts = tmpdstopts; 3980 ipp->ipp_dstoptslen = ehdrlen; 3981 } 3982 break; 3983 case IPPROTO_ROUTING: 3984 tmprthdr = (ip6_rthdr_t *)whereptr; 3985 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3986 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3987 goto done; 3988 nexthdr = tmprthdr->ip6r_nxt; 3989 /* return only 1st rthdr */ 3990 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3991 ipp->ipp_fields |= IPPF_RTHDR; 3992 ipp->ipp_rthdr = tmprthdr; 3993 ipp->ipp_rthdrlen = ehdrlen; 3994 } 3995 /* 3996 * Make any destination header we've seen be a 3997 * pre-rthdr destination header. 3998 */ 3999 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4000 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4001 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4002 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 4003 ipp->ipp_dstopts = NULL; 4004 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 4005 ipp->ipp_dstoptslen = 0; 4006 } 4007 break; 4008 case IPPROTO_FRAGMENT: 4009 tmpfraghdr = (ip6_frag_t *)whereptr; 4010 ehdrlen = sizeof (ip6_frag_t); 4011 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4012 goto done; 4013 nexthdr = tmpfraghdr->ip6f_nxt; 4014 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4015 ipp->ipp_fields |= IPPF_FRAGHDR; 4016 ipp->ipp_fraghdr = tmpfraghdr; 4017 ipp->ipp_fraghdrlen = ehdrlen; 4018 } 4019 break; 4020 case IPPROTO_NONE: 4021 default: 4022 goto done; 4023 } 4024 length += ehdrlen; 4025 whereptr += ehdrlen; 4026 } 4027 done: 4028 if (nexthdrp != NULL) 4029 *nexthdrp = nexthdr; 4030 return (length); 4031 } 4032 4033 int 4034 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4035 { 4036 ire_t *ire; 4037 4038 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4039 ire = ire_lookup_local_v6(zoneid, ipst); 4040 if (ire == NULL) { 4041 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4042 return (1); 4043 } 4044 ip6h->ip6_src = ire->ire_addr_v6; 4045 ire_refrele(ire); 4046 } 4047 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4048 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4049 return (0); 4050 } 4051 4052 /* 4053 * Try to determine where and what are the IPv6 header length and 4054 * pointer to nexthdr value for the upper layer protocol (or an 4055 * unknown next hdr). 4056 * 4057 * Parameters returns a pointer to the nexthdr value; 4058 * Must handle malformed packets of various sorts. 4059 * Function returns failure for malformed cases. 4060 */ 4061 boolean_t 4062 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4063 uint8_t **nexthdrpp) 4064 { 4065 uint16_t length; 4066 uint_t ehdrlen; 4067 uint8_t *nexthdrp; 4068 uint8_t *whereptr; 4069 uint8_t *endptr; 4070 ip6_dest_t *desthdr; 4071 ip6_rthdr_t *rthdr; 4072 ip6_frag_t *fraghdr; 4073 4074 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4075 length = IPV6_HDR_LEN; 4076 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4077 endptr = mp->b_wptr; 4078 4079 nexthdrp = &ip6h->ip6_nxt; 4080 while (whereptr < endptr) { 4081 /* Is there enough left for len + nexthdr? */ 4082 if (whereptr + MIN_EHDR_LEN > endptr) 4083 break; 4084 4085 switch (*nexthdrp) { 4086 case IPPROTO_HOPOPTS: 4087 case IPPROTO_DSTOPTS: 4088 /* Assumes the headers are identical for hbh and dst */ 4089 desthdr = (ip6_dest_t *)whereptr; 4090 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4091 if ((uchar_t *)desthdr + ehdrlen > endptr) 4092 return (B_FALSE); 4093 nexthdrp = &desthdr->ip6d_nxt; 4094 break; 4095 case IPPROTO_ROUTING: 4096 rthdr = (ip6_rthdr_t *)whereptr; 4097 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4098 if ((uchar_t *)rthdr + ehdrlen > endptr) 4099 return (B_FALSE); 4100 nexthdrp = &rthdr->ip6r_nxt; 4101 break; 4102 case IPPROTO_FRAGMENT: 4103 fraghdr = (ip6_frag_t *)whereptr; 4104 ehdrlen = sizeof (ip6_frag_t); 4105 if ((uchar_t *)&fraghdr[1] > endptr) 4106 return (B_FALSE); 4107 nexthdrp = &fraghdr->ip6f_nxt; 4108 break; 4109 case IPPROTO_NONE: 4110 /* No next header means we're finished */ 4111 default: 4112 *hdr_length_ptr = length; 4113 *nexthdrpp = nexthdrp; 4114 return (B_TRUE); 4115 } 4116 length += ehdrlen; 4117 whereptr += ehdrlen; 4118 *hdr_length_ptr = length; 4119 *nexthdrpp = nexthdrp; 4120 } 4121 switch (*nexthdrp) { 4122 case IPPROTO_HOPOPTS: 4123 case IPPROTO_DSTOPTS: 4124 case IPPROTO_ROUTING: 4125 case IPPROTO_FRAGMENT: 4126 /* 4127 * If any know extension headers are still to be processed, 4128 * the packet's malformed (or at least all the IP header(s) are 4129 * not in the same mblk - and that should never happen. 4130 */ 4131 return (B_FALSE); 4132 4133 default: 4134 /* 4135 * If we get here, we know that all of the IP headers were in 4136 * the same mblk, even if the ULP header is in the next mblk. 4137 */ 4138 *hdr_length_ptr = length; 4139 *nexthdrpp = nexthdrp; 4140 return (B_TRUE); 4141 } 4142 } 4143 4144 /* 4145 * Return the length of the IPv6 related headers (including extension headers) 4146 * Returns a length even if the packet is malformed. 4147 */ 4148 int 4149 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4150 { 4151 uint16_t hdr_len; 4152 uint8_t *nexthdrp; 4153 4154 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4155 return (hdr_len); 4156 } 4157 4158 /* 4159 * IPv6 - 4160 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4161 * to send out a packet to a destination address for which we do not have 4162 * specific routing information. 4163 * 4164 * Handle non-multicast packets. If ill is non-NULL the match is done 4165 * for that ill. 4166 * 4167 * When a specific ill is specified (using IPV6_PKTINFO, 4168 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4169 * on routing entries (ftable and ctable) that have a matching 4170 * ire->ire_ipif->ipif_ill. Thus this can only be used 4171 * for destinations that are on-link for the specific ill 4172 * and that can appear on multiple links. Thus it is useful 4173 * for multicast destinations, link-local destinations, and 4174 * at some point perhaps for site-local destinations (if the 4175 * node sits at a site boundary). 4176 * We create the cache entries in the regular ctable since 4177 * it can not "confuse" things for other destinations. 4178 * 4179 * NOTE : These are the scopes of some of the variables that point at IRE, 4180 * which needs to be followed while making any future modifications 4181 * to avoid memory leaks. 4182 * 4183 * - ire and sire are the entries looked up initially by 4184 * ire_ftable_lookup_v6. 4185 * - ipif_ire is used to hold the interface ire associated with 4186 * the new cache ire. But it's scope is limited, so we always REFRELE 4187 * it before branching out to error paths. 4188 * - save_ire is initialized before ire_create, so that ire returned 4189 * by ire_create will not over-write the ire. We REFRELE save_ire 4190 * before breaking out of the switch. 4191 * 4192 * Thus on failures, we have to REFRELE only ire and sire, if they 4193 * are not NULL. 4194 */ 4195 /* ARGSUSED */ 4196 void 4197 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4198 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4199 { 4200 in6_addr_t v6gw; 4201 in6_addr_t dst; 4202 ire_t *ire = NULL; 4203 ipif_t *src_ipif = NULL; 4204 ill_t *dst_ill = NULL; 4205 ire_t *sire = NULL; 4206 ire_t *save_ire; 4207 ip6_t *ip6h; 4208 int err = 0; 4209 mblk_t *first_mp; 4210 ipsec_out_t *io; 4211 ushort_t ire_marks = 0; 4212 int match_flags; 4213 ire_t *first_sire = NULL; 4214 mblk_t *copy_mp = NULL; 4215 mblk_t *xmit_mp = NULL; 4216 in6_addr_t save_dst; 4217 uint32_t multirt_flags = 4218 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4219 boolean_t multirt_is_resolvable; 4220 boolean_t multirt_resolve_next; 4221 boolean_t need_rele = B_FALSE; 4222 boolean_t ip6_asp_table_held = B_FALSE; 4223 tsol_ire_gw_secattr_t *attrp = NULL; 4224 tsol_gcgrp_t *gcgrp = NULL; 4225 tsol_gcgrp_addr_t ga; 4226 4227 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4228 4229 first_mp = mp; 4230 if (mp->b_datap->db_type == M_CTL) { 4231 mp = mp->b_cont; 4232 io = (ipsec_out_t *)first_mp->b_rptr; 4233 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4234 } else { 4235 io = NULL; 4236 } 4237 4238 ip6h = (ip6_t *)mp->b_rptr; 4239 4240 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4241 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4242 goto icmp_err_ret; 4243 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4244 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4245 goto icmp_err_ret; 4246 } 4247 4248 /* 4249 * If this IRE is created for forwarding or it is not for 4250 * TCP traffic, mark it as temporary. 4251 * 4252 * Is it sufficient just to check the next header?? 4253 */ 4254 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4255 ire_marks |= IRE_MARK_TEMPORARY; 4256 4257 /* 4258 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4259 * chain until it gets the most specific information available. 4260 * For example, we know that there is no IRE_CACHE for this dest, 4261 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4262 * ire_ftable_lookup_v6 will look up the gateway, etc. 4263 */ 4264 4265 if (ill == NULL) { 4266 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4267 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4268 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4269 NULL, &sire, zoneid, 0, msg_getlabel(mp), 4270 match_flags, ipst); 4271 } else { 4272 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4273 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4274 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4275 4276 /* 4277 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4278 * tied to an underlying interface, IS_UNDER_IPMP() may be 4279 * true even when building IREs that will be used for data 4280 * traffic. As such, use the packet's source address to 4281 * determine whether the traffic is test traffic, and set 4282 * MATCH_IRE_MARK_TESTHIDDEN if so. 4283 */ 4284 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4285 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4286 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4287 } 4288 4289 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4290 &sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst); 4291 } 4292 4293 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4294 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4295 4296 /* 4297 * We enter a loop that will be run only once in most cases. 4298 * The loop is re-entered in the case where the destination 4299 * can be reached through multiple RTF_MULTIRT-flagged routes. 4300 * The intention is to compute multiple routes to a single 4301 * destination in a single ip_newroute_v6 call. 4302 * The information is contained in sire->ire_flags. 4303 */ 4304 do { 4305 multirt_resolve_next = B_FALSE; 4306 4307 if (dst_ill != NULL) { 4308 ill_refrele(dst_ill); 4309 dst_ill = NULL; 4310 } 4311 if (src_ipif != NULL) { 4312 ipif_refrele(src_ipif); 4313 src_ipif = NULL; 4314 } 4315 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4316 ip3dbg(("ip_newroute_v6: starting new resolution " 4317 "with first_mp %p, tag %d\n", 4318 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4319 4320 /* 4321 * We check if there are trailing unresolved routes for 4322 * the destination contained in sire. 4323 */ 4324 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4325 &sire, multirt_flags, msg_getlabel(mp), ipst); 4326 4327 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4328 "ire %p, sire %p\n", 4329 multirt_is_resolvable, (void *)ire, (void *)sire)); 4330 4331 if (!multirt_is_resolvable) { 4332 /* 4333 * No more multirt routes to resolve; give up 4334 * (all routes resolved or no more resolvable 4335 * routes). 4336 */ 4337 if (ire != NULL) { 4338 ire_refrele(ire); 4339 ire = NULL; 4340 } 4341 } else { 4342 ASSERT(sire != NULL); 4343 ASSERT(ire != NULL); 4344 /* 4345 * We simply use first_sire as a flag that 4346 * indicates if a resolvable multirt route has 4347 * already been found during the preceding 4348 * loops. If it is not the case, we may have 4349 * to send an ICMP error to report that the 4350 * destination is unreachable. We do not 4351 * IRE_REFHOLD first_sire. 4352 */ 4353 if (first_sire == NULL) { 4354 first_sire = sire; 4355 } 4356 } 4357 } 4358 if ((ire == NULL) || (ire == sire)) { 4359 /* 4360 * either ire == NULL (the destination cannot be 4361 * resolved) or ire == sire (the gateway cannot be 4362 * resolved). At this point, there are no more routes 4363 * to resolve for the destination, thus we exit. 4364 */ 4365 if (ip_debug > 3) { 4366 /* ip2dbg */ 4367 pr_addr_dbg("ip_newroute_v6: " 4368 "can't resolve %s\n", AF_INET6, v6dstp); 4369 } 4370 ip3dbg(("ip_newroute_v6: " 4371 "ire %p, sire %p, first_sire %p\n", 4372 (void *)ire, (void *)sire, (void *)first_sire)); 4373 4374 if (sire != NULL) { 4375 ire_refrele(sire); 4376 sire = NULL; 4377 } 4378 4379 if (first_sire != NULL) { 4380 /* 4381 * At least one multirt route has been found 4382 * in the same ip_newroute() call; there is no 4383 * need to report an ICMP error. 4384 * first_sire was not IRE_REFHOLDed. 4385 */ 4386 MULTIRT_DEBUG_UNTAG(first_mp); 4387 freemsg(first_mp); 4388 return; 4389 } 4390 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4391 RTA_DST, ipst); 4392 goto icmp_err_ret; 4393 } 4394 4395 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4396 4397 /* 4398 * Verify that the returned IRE does not have either the 4399 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4400 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4401 */ 4402 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4403 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4404 goto icmp_err_ret; 4405 4406 /* 4407 * Increment the ire_ob_pkt_count field for ire if it is an 4408 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4409 * increment the same for the parent IRE, sire, if it is some 4410 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4411 */ 4412 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4413 UPDATE_OB_PKT_COUNT(ire); 4414 ire->ire_last_used_time = lbolt; 4415 } 4416 4417 if (sire != NULL) { 4418 mutex_enter(&sire->ire_lock); 4419 v6gw = sire->ire_gateway_addr_v6; 4420 mutex_exit(&sire->ire_lock); 4421 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4422 IRE_INTERFACE)) == 0); 4423 UPDATE_OB_PKT_COUNT(sire); 4424 sire->ire_last_used_time = lbolt; 4425 } else { 4426 v6gw = ipv6_all_zeros; 4427 } 4428 4429 /* 4430 * We have a route to reach the destination. Find the 4431 * appropriate ill, then get a source address that matches the 4432 * right scope via ipif_select_source_v6(). 4433 * 4434 * If we are here trying to create an IRE_CACHE for an offlink 4435 * destination and have an IRE_CACHE entry for VNI, then use 4436 * ire_stq instead since VNI's queue is a black hole. 4437 * 4438 * Note: While we pick a dst_ill we are really only interested 4439 * in the ill for load spreading. The source ipif is 4440 * determined by source address selection below. 4441 */ 4442 if ((ire->ire_type == IRE_CACHE) && 4443 IS_VNI(ire->ire_ipif->ipif_ill)) { 4444 dst_ill = ire->ire_stq->q_ptr; 4445 ill_refhold(dst_ill); 4446 } else { 4447 ill_t *ill = ire->ire_ipif->ipif_ill; 4448 4449 if (IS_IPMP(ill)) { 4450 dst_ill = 4451 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4452 } else { 4453 dst_ill = ill; 4454 ill_refhold(dst_ill); 4455 } 4456 } 4457 4458 if (dst_ill == NULL) { 4459 if (ip_debug > 2) { 4460 pr_addr_dbg("ip_newroute_v6 : no dst " 4461 "ill for dst %s\n", AF_INET6, v6dstp); 4462 } 4463 goto icmp_err_ret; 4464 } 4465 4466 if (ill != NULL && dst_ill != ill && 4467 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4468 /* 4469 * We should have found a route matching "ill" 4470 * as we called ire_ftable_lookup_v6 with 4471 * MATCH_IRE_ILL. Rather than asserting when 4472 * there is a mismatch, we just drop the packet. 4473 */ 4474 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4475 "dst_ill %s ill %s\n", dst_ill->ill_name, 4476 ill->ill_name)); 4477 goto icmp_err_ret; 4478 } 4479 4480 /* 4481 * Pick a source address which matches the scope of the 4482 * destination address. 4483 * For RTF_SETSRC routes, the source address is imposed by the 4484 * parent ire (sire). 4485 */ 4486 ASSERT(src_ipif == NULL); 4487 4488 /* 4489 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4490 * tied to the underlying interface, IS_UNDER_IPMP() may be 4491 * true even when building IREs that will be used for data 4492 * traffic. As such, see if the packet's source address is a 4493 * test address, and if so use that test address's ipif for 4494 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4495 * ire_add_v6() can work properly. 4496 */ 4497 if (ill != NULL && IS_UNDER_IPMP(ill)) 4498 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4499 4500 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4501 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4502 ip6_asp_can_lookup(ipst)) { 4503 /* 4504 * The ire cache entry we're adding is for the 4505 * gateway itself. The source address in this case 4506 * is relative to the gateway's address. 4507 */ 4508 ip6_asp_table_held = B_TRUE; 4509 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4510 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4511 if (src_ipif != NULL) 4512 ire_marks |= IRE_MARK_USESRC_CHECK; 4513 } else if (src_ipif == NULL) { 4514 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4515 /* 4516 * Check that the ipif matching the requested 4517 * source address still exists. 4518 */ 4519 src_ipif = ipif_lookup_addr_v6( 4520 &sire->ire_src_addr_v6, NULL, zoneid, 4521 NULL, NULL, NULL, NULL, ipst); 4522 } 4523 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4524 ip6_asp_table_held = B_TRUE; 4525 src_ipif = ipif_select_source_v6(dst_ill, 4526 v6dstp, B_FALSE, 4527 IPV6_PREFER_SRC_DEFAULT, zoneid); 4528 if (src_ipif != NULL) 4529 ire_marks |= IRE_MARK_USESRC_CHECK; 4530 } 4531 } 4532 4533 if (src_ipif == NULL) { 4534 if (ip_debug > 2) { 4535 /* ip1dbg */ 4536 pr_addr_dbg("ip_newroute_v6: no src for " 4537 "dst %s\n", AF_INET6, v6dstp); 4538 printf("ip_newroute_v6: interface name %s\n", 4539 dst_ill->ill_name); 4540 } 4541 goto icmp_err_ret; 4542 } 4543 4544 if (ip_debug > 3) { 4545 /* ip2dbg */ 4546 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4547 AF_INET6, &v6gw); 4548 } 4549 ip2dbg(("\tire type %s (%d)\n", 4550 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4551 4552 /* 4553 * At this point in ip_newroute_v6(), ire is either the 4554 * IRE_CACHE of the next-hop gateway for an off-subnet 4555 * destination or an IRE_INTERFACE type that should be used 4556 * to resolve an on-subnet destination or an on-subnet 4557 * next-hop gateway. 4558 * 4559 * In the IRE_CACHE case, we have the following : 4560 * 4561 * 1) src_ipif - used for getting a source address. 4562 * 4563 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4564 * means packets using this IRE_CACHE will go out on dst_ill. 4565 * 4566 * 3) The IRE sire will point to the prefix that is the longest 4567 * matching route for the destination. These prefix types 4568 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4569 * 4570 * The newly created IRE_CACHE entry for the off-subnet 4571 * destination is tied to both the prefix route and the 4572 * interface route used to resolve the next-hop gateway 4573 * via the ire_phandle and ire_ihandle fields, respectively. 4574 * 4575 * In the IRE_INTERFACE case, we have the following : 4576 * 4577 * 1) src_ipif - used for getting a source address. 4578 * 4579 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4580 * means packets using the IRE_CACHE that we will build 4581 * here will go out on dst_ill. 4582 * 4583 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4584 * to be created will only be tied to the IRE_INTERFACE that 4585 * was derived from the ire_ihandle field. 4586 * 4587 * If sire is non-NULL, it means the destination is off-link 4588 * and we will first create the IRE_CACHE for the gateway. 4589 * Next time through ip_newroute_v6, we will create the 4590 * IRE_CACHE for the final destination as described above. 4591 */ 4592 save_ire = ire; 4593 switch (ire->ire_type) { 4594 case IRE_CACHE: { 4595 ire_t *ipif_ire; 4596 4597 ASSERT(sire != NULL); 4598 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4599 mutex_enter(&ire->ire_lock); 4600 v6gw = ire->ire_gateway_addr_v6; 4601 mutex_exit(&ire->ire_lock); 4602 } 4603 /* 4604 * We need 3 ire's to create a new cache ire for an 4605 * off-link destination from the cache ire of the 4606 * gateway. 4607 * 4608 * 1. The prefix ire 'sire' 4609 * 2. The cache ire of the gateway 'ire' 4610 * 3. The interface ire 'ipif_ire' 4611 * 4612 * We have (1) and (2). We lookup (3) below. 4613 * 4614 * If there is no interface route to the gateway, 4615 * it is a race condition, where we found the cache 4616 * but the inteface route has been deleted. 4617 */ 4618 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4619 if (ipif_ire == NULL) { 4620 ip1dbg(("ip_newroute_v6:" 4621 "ire_ihandle_lookup_offlink_v6 failed\n")); 4622 goto icmp_err_ret; 4623 } 4624 4625 /* 4626 * Note: the new ire inherits RTF_SETSRC 4627 * and RTF_MULTIRT to propagate these flags from prefix 4628 * to cache. 4629 */ 4630 4631 /* 4632 * Check cached gateway IRE for any security 4633 * attributes; if found, associate the gateway 4634 * credentials group to the destination IRE. 4635 */ 4636 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4637 mutex_enter(&attrp->igsa_lock); 4638 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4639 GCGRP_REFHOLD(gcgrp); 4640 mutex_exit(&attrp->igsa_lock); 4641 } 4642 4643 ire = ire_create_v6( 4644 v6dstp, /* dest address */ 4645 &ipv6_all_ones, /* mask */ 4646 &src_ipif->ipif_v6src_addr, /* source address */ 4647 &v6gw, /* gateway address */ 4648 &save_ire->ire_max_frag, 4649 NULL, /* src nce */ 4650 dst_ill->ill_rq, /* recv-from queue */ 4651 dst_ill->ill_wq, /* send-to queue */ 4652 IRE_CACHE, 4653 src_ipif, 4654 &sire->ire_mask_v6, /* Parent mask */ 4655 sire->ire_phandle, /* Parent handle */ 4656 ipif_ire->ire_ihandle, /* Interface handle */ 4657 sire->ire_flags & /* flags if any */ 4658 (RTF_SETSRC | RTF_MULTIRT), 4659 &(sire->ire_uinfo), 4660 NULL, 4661 gcgrp, 4662 ipst); 4663 4664 if (ire == NULL) { 4665 if (gcgrp != NULL) { 4666 GCGRP_REFRELE(gcgrp); 4667 gcgrp = NULL; 4668 } 4669 ire_refrele(save_ire); 4670 ire_refrele(ipif_ire); 4671 break; 4672 } 4673 4674 /* reference now held by IRE */ 4675 gcgrp = NULL; 4676 4677 ire->ire_marks |= ire_marks; 4678 4679 /* 4680 * Prevent sire and ipif_ire from getting deleted. The 4681 * newly created ire is tied to both of them via the 4682 * phandle and ihandle respectively. 4683 */ 4684 IRB_REFHOLD(sire->ire_bucket); 4685 /* Has it been removed already ? */ 4686 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4687 IRB_REFRELE(sire->ire_bucket); 4688 ire_refrele(ipif_ire); 4689 ire_refrele(save_ire); 4690 break; 4691 } 4692 4693 IRB_REFHOLD(ipif_ire->ire_bucket); 4694 /* Has it been removed already ? */ 4695 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4696 IRB_REFRELE(ipif_ire->ire_bucket); 4697 IRB_REFRELE(sire->ire_bucket); 4698 ire_refrele(ipif_ire); 4699 ire_refrele(save_ire); 4700 break; 4701 } 4702 4703 xmit_mp = first_mp; 4704 if (ire->ire_flags & RTF_MULTIRT) { 4705 copy_mp = copymsg(first_mp); 4706 if (copy_mp != NULL) { 4707 xmit_mp = copy_mp; 4708 MULTIRT_DEBUG_TAG(first_mp); 4709 } 4710 } 4711 ire_add_then_send(q, ire, xmit_mp); 4712 if (ip6_asp_table_held) { 4713 ip6_asp_table_refrele(ipst); 4714 ip6_asp_table_held = B_FALSE; 4715 } 4716 ire_refrele(save_ire); 4717 4718 /* Assert that sire is not deleted yet. */ 4719 ASSERT(sire->ire_ptpn != NULL); 4720 IRB_REFRELE(sire->ire_bucket); 4721 4722 /* Assert that ipif_ire is not deleted yet. */ 4723 ASSERT(ipif_ire->ire_ptpn != NULL); 4724 IRB_REFRELE(ipif_ire->ire_bucket); 4725 ire_refrele(ipif_ire); 4726 4727 if (copy_mp != NULL) { 4728 /* 4729 * Search for the next unresolved 4730 * multirt route. 4731 */ 4732 copy_mp = NULL; 4733 ipif_ire = NULL; 4734 ire = NULL; 4735 /* re-enter the loop */ 4736 multirt_resolve_next = B_TRUE; 4737 continue; 4738 } 4739 ire_refrele(sire); 4740 ill_refrele(dst_ill); 4741 ipif_refrele(src_ipif); 4742 return; 4743 } 4744 case IRE_IF_NORESOLVER: 4745 /* 4746 * We have what we need to build an IRE_CACHE. 4747 * 4748 * handle the Gated case, where we create 4749 * a NORESOLVER route for loopback. 4750 */ 4751 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4752 break; 4753 /* 4754 * TSol note: We are creating the ire cache for the 4755 * destination 'dst'. If 'dst' is offlink, going 4756 * through the first hop 'gw', the security attributes 4757 * of 'dst' must be set to point to the gateway 4758 * credentials of gateway 'gw'. If 'dst' is onlink, it 4759 * is possible that 'dst' is a potential gateway that is 4760 * referenced by some route that has some security 4761 * attributes. Thus in the former case, we need to do a 4762 * gcgrp_lookup of 'gw' while in the latter case we 4763 * need to do gcgrp_lookup of 'dst' itself. 4764 */ 4765 ga.ga_af = AF_INET6; 4766 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4767 ga.ga_addr = v6gw; 4768 else 4769 ga.ga_addr = *v6dstp; 4770 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4771 4772 /* 4773 * Note: the new ire inherits sire flags RTF_SETSRC 4774 * and RTF_MULTIRT to propagate those rules from prefix 4775 * to cache. 4776 */ 4777 ire = ire_create_v6( 4778 v6dstp, /* dest address */ 4779 &ipv6_all_ones, /* mask */ 4780 &src_ipif->ipif_v6src_addr, /* source address */ 4781 &v6gw, /* gateway address */ 4782 &save_ire->ire_max_frag, 4783 NULL, /* no src nce */ 4784 dst_ill->ill_rq, /* recv-from queue */ 4785 dst_ill->ill_wq, /* send-to queue */ 4786 IRE_CACHE, 4787 src_ipif, 4788 &save_ire->ire_mask_v6, /* Parent mask */ 4789 (sire != NULL) ? /* Parent handle */ 4790 sire->ire_phandle : 0, 4791 save_ire->ire_ihandle, /* Interface handle */ 4792 (sire != NULL) ? /* flags if any */ 4793 sire->ire_flags & 4794 (RTF_SETSRC | RTF_MULTIRT) : 0, 4795 &(save_ire->ire_uinfo), 4796 NULL, 4797 gcgrp, 4798 ipst); 4799 4800 if (ire == NULL) { 4801 if (gcgrp != NULL) { 4802 GCGRP_REFRELE(gcgrp); 4803 gcgrp = NULL; 4804 } 4805 ire_refrele(save_ire); 4806 break; 4807 } 4808 4809 /* reference now held by IRE */ 4810 gcgrp = NULL; 4811 4812 ire->ire_marks |= ire_marks; 4813 4814 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4815 dst = v6gw; 4816 else 4817 dst = *v6dstp; 4818 err = ndp_noresolver(dst_ill, &dst); 4819 if (err != 0) { 4820 ire_refrele(save_ire); 4821 break; 4822 } 4823 4824 /* Prevent save_ire from getting deleted */ 4825 IRB_REFHOLD(save_ire->ire_bucket); 4826 /* Has it been removed already ? */ 4827 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4828 IRB_REFRELE(save_ire->ire_bucket); 4829 ire_refrele(save_ire); 4830 break; 4831 } 4832 4833 xmit_mp = first_mp; 4834 /* 4835 * In case of MULTIRT, a copy of the current packet 4836 * to send is made to further re-enter the 4837 * loop and attempt another route resolution 4838 */ 4839 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4840 copy_mp = copymsg(first_mp); 4841 if (copy_mp != NULL) { 4842 xmit_mp = copy_mp; 4843 MULTIRT_DEBUG_TAG(first_mp); 4844 } 4845 } 4846 ire_add_then_send(q, ire, xmit_mp); 4847 if (ip6_asp_table_held) { 4848 ip6_asp_table_refrele(ipst); 4849 ip6_asp_table_held = B_FALSE; 4850 } 4851 4852 /* Assert that it is not deleted yet. */ 4853 ASSERT(save_ire->ire_ptpn != NULL); 4854 IRB_REFRELE(save_ire->ire_bucket); 4855 ire_refrele(save_ire); 4856 4857 if (copy_mp != NULL) { 4858 /* 4859 * If we found a (no)resolver, we ignore any 4860 * trailing top priority IRE_CACHE in 4861 * further loops. This ensures that we do not 4862 * omit any (no)resolver despite the priority 4863 * in this call. 4864 * IRE_CACHE, if any, will be processed 4865 * by another thread entering ip_newroute(), 4866 * (on resolver response, for example). 4867 * We use this to force multiple parallel 4868 * resolution as soon as a packet needs to be 4869 * sent. The result is, after one packet 4870 * emission all reachable routes are generally 4871 * resolved. 4872 * Otherwise, complete resolution of MULTIRT 4873 * routes would require several emissions as 4874 * side effect. 4875 */ 4876 multirt_flags &= ~MULTIRT_CACHEGW; 4877 4878 /* 4879 * Search for the next unresolved multirt 4880 * route. 4881 */ 4882 copy_mp = NULL; 4883 save_ire = NULL; 4884 ire = NULL; 4885 /* re-enter the loop */ 4886 multirt_resolve_next = B_TRUE; 4887 continue; 4888 } 4889 4890 /* Don't need sire anymore */ 4891 if (sire != NULL) 4892 ire_refrele(sire); 4893 ill_refrele(dst_ill); 4894 ipif_refrele(src_ipif); 4895 return; 4896 4897 case IRE_IF_RESOLVER: 4898 /* 4899 * We can't build an IRE_CACHE yet, but at least we 4900 * found a resolver that can help. 4901 */ 4902 dst = *v6dstp; 4903 4904 /* 4905 * To be at this point in the code with a non-zero gw 4906 * means that dst is reachable through a gateway that 4907 * we have never resolved. By changing dst to the gw 4908 * addr we resolve the gateway first. When 4909 * ire_add_then_send() tries to put the IP dg to dst, 4910 * it will reenter ip_newroute() at which time we will 4911 * find the IRE_CACHE for the gw and create another 4912 * IRE_CACHE above (for dst itself). 4913 */ 4914 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4915 save_dst = dst; 4916 dst = v6gw; 4917 v6gw = ipv6_all_zeros; 4918 } 4919 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4920 /* 4921 * Ask the external resolver to do its thing. 4922 * Make an mblk chain in the following form: 4923 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4924 */ 4925 mblk_t *ire_mp; 4926 mblk_t *areq_mp; 4927 areq_t *areq; 4928 in6_addr_t *addrp; 4929 4930 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4931 if (ip6_asp_table_held) { 4932 ip6_asp_table_refrele(ipst); 4933 ip6_asp_table_held = B_FALSE; 4934 } 4935 ire = ire_create_mp_v6( 4936 &dst, /* dest address */ 4937 &ipv6_all_ones, /* mask */ 4938 &src_ipif->ipif_v6src_addr, 4939 /* source address */ 4940 &v6gw, /* gateway address */ 4941 NULL, /* no src nce */ 4942 dst_ill->ill_rq, /* recv-from queue */ 4943 dst_ill->ill_wq, /* send-to queue */ 4944 IRE_CACHE, 4945 src_ipif, 4946 &save_ire->ire_mask_v6, /* Parent mask */ 4947 0, 4948 save_ire->ire_ihandle, 4949 /* Interface handle */ 4950 0, /* flags if any */ 4951 &(save_ire->ire_uinfo), 4952 NULL, 4953 NULL, 4954 ipst); 4955 4956 ire_refrele(save_ire); 4957 if (ire == NULL) { 4958 ip1dbg(("ip_newroute_v6:" 4959 "ire is NULL\n")); 4960 break; 4961 } 4962 4963 if ((sire != NULL) && 4964 (sire->ire_flags & RTF_MULTIRT)) { 4965 /* 4966 * processing a copy of the packet to 4967 * send for further resolution loops 4968 */ 4969 copy_mp = copymsg(first_mp); 4970 if (copy_mp != NULL) 4971 MULTIRT_DEBUG_TAG(copy_mp); 4972 } 4973 ire->ire_marks |= ire_marks; 4974 ire_mp = ire->ire_mp; 4975 /* 4976 * Now create or find an nce for this interface. 4977 * The hw addr will need to to be set from 4978 * the reply to the AR_ENTRY_QUERY that 4979 * we're about to send. This will be done in 4980 * ire_add_v6(). 4981 */ 4982 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 4983 switch (err) { 4984 case 0: 4985 /* 4986 * New cache entry created. 4987 * Break, then ask the external 4988 * resolver. 4989 */ 4990 break; 4991 case EINPROGRESS: 4992 /* 4993 * Resolution in progress; 4994 * packet has been queued by 4995 * ndp_resolver(). 4996 */ 4997 ire_delete(ire); 4998 ire = NULL; 4999 /* 5000 * Check if another multirt 5001 * route must be resolved. 5002 */ 5003 if (copy_mp != NULL) { 5004 /* 5005 * If we found a resolver, we 5006 * ignore any trailing top 5007 * priority IRE_CACHE in 5008 * further loops. The reason is 5009 * the same as for noresolver. 5010 */ 5011 multirt_flags &= 5012 ~MULTIRT_CACHEGW; 5013 /* 5014 * Search for the next 5015 * unresolved multirt route. 5016 */ 5017 first_mp = copy_mp; 5018 copy_mp = NULL; 5019 mp = first_mp; 5020 if (mp->b_datap->db_type == 5021 M_CTL) { 5022 mp = mp->b_cont; 5023 } 5024 ASSERT(sire != NULL); 5025 dst = save_dst; 5026 /* 5027 * re-enter the loop 5028 */ 5029 multirt_resolve_next = 5030 B_TRUE; 5031 continue; 5032 } 5033 5034 if (sire != NULL) 5035 ire_refrele(sire); 5036 ill_refrele(dst_ill); 5037 ipif_refrele(src_ipif); 5038 return; 5039 default: 5040 /* 5041 * Transient error; packet will be 5042 * freed. 5043 */ 5044 ire_delete(ire); 5045 ire = NULL; 5046 break; 5047 } 5048 if (err != 0) 5049 break; 5050 /* 5051 * Now set up the AR_ENTRY_QUERY and send it. 5052 */ 5053 areq_mp = ill_arp_alloc(dst_ill, 5054 (uchar_t *)&ipv6_areq_template, 5055 (caddr_t)&dst); 5056 if (areq_mp == NULL) { 5057 ip1dbg(("ip_newroute_v6:" 5058 "areq_mp is NULL\n")); 5059 freemsg(ire_mp); 5060 break; 5061 } 5062 areq = (areq_t *)areq_mp->b_rptr; 5063 addrp = (in6_addr_t *)((char *)areq + 5064 areq->areq_target_addr_offset); 5065 *addrp = dst; 5066 addrp = (in6_addr_t *)((char *)areq + 5067 areq->areq_sender_addr_offset); 5068 *addrp = src_ipif->ipif_v6src_addr; 5069 /* 5070 * link the chain, then send up to the resolver. 5071 */ 5072 linkb(areq_mp, ire_mp); 5073 linkb(areq_mp, mp); 5074 ip1dbg(("ip_newroute_v6:" 5075 "putnext to resolver\n")); 5076 putnext(dst_ill->ill_rq, areq_mp); 5077 /* 5078 * Check if another multirt route 5079 * must be resolved. 5080 */ 5081 ire = NULL; 5082 if (copy_mp != NULL) { 5083 /* 5084 * If we find a resolver, we ignore any 5085 * trailing top priority IRE_CACHE in 5086 * further loops. The reason is the 5087 * same as for noresolver. 5088 */ 5089 multirt_flags &= ~MULTIRT_CACHEGW; 5090 /* 5091 * Search for the next unresolved 5092 * multirt route. 5093 */ 5094 first_mp = copy_mp; 5095 copy_mp = NULL; 5096 mp = first_mp; 5097 if (mp->b_datap->db_type == M_CTL) { 5098 mp = mp->b_cont; 5099 } 5100 ASSERT(sire != NULL); 5101 dst = save_dst; 5102 /* 5103 * re-enter the loop 5104 */ 5105 multirt_resolve_next = B_TRUE; 5106 continue; 5107 } 5108 5109 if (sire != NULL) 5110 ire_refrele(sire); 5111 ill_refrele(dst_ill); 5112 ipif_refrele(src_ipif); 5113 return; 5114 } 5115 /* 5116 * Non-external resolver case. 5117 * 5118 * TSol note: Please see the note above the 5119 * IRE_IF_NORESOLVER case. 5120 */ 5121 ga.ga_af = AF_INET6; 5122 ga.ga_addr = dst; 5123 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5124 5125 ire = ire_create_v6( 5126 &dst, /* dest address */ 5127 &ipv6_all_ones, /* mask */ 5128 &src_ipif->ipif_v6src_addr, /* source address */ 5129 &v6gw, /* gateway address */ 5130 &save_ire->ire_max_frag, 5131 NULL, /* no src nce */ 5132 dst_ill->ill_rq, /* recv-from queue */ 5133 dst_ill->ill_wq, /* send-to queue */ 5134 IRE_CACHE, 5135 src_ipif, 5136 &save_ire->ire_mask_v6, /* Parent mask */ 5137 0, 5138 save_ire->ire_ihandle, /* Interface handle */ 5139 0, /* flags if any */ 5140 &(save_ire->ire_uinfo), 5141 NULL, 5142 gcgrp, 5143 ipst); 5144 5145 if (ire == NULL) { 5146 if (gcgrp != NULL) { 5147 GCGRP_REFRELE(gcgrp); 5148 gcgrp = NULL; 5149 } 5150 ire_refrele(save_ire); 5151 break; 5152 } 5153 5154 /* reference now held by IRE */ 5155 gcgrp = NULL; 5156 5157 if ((sire != NULL) && 5158 (sire->ire_flags & RTF_MULTIRT)) { 5159 copy_mp = copymsg(first_mp); 5160 if (copy_mp != NULL) 5161 MULTIRT_DEBUG_TAG(copy_mp); 5162 } 5163 5164 ire->ire_marks |= ire_marks; 5165 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5166 switch (err) { 5167 case 0: 5168 /* Prevent save_ire from getting deleted */ 5169 IRB_REFHOLD(save_ire->ire_bucket); 5170 /* Has it been removed already ? */ 5171 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5172 IRB_REFRELE(save_ire->ire_bucket); 5173 ire_refrele(save_ire); 5174 break; 5175 } 5176 5177 /* 5178 * We have a resolved cache entry, 5179 * add in the IRE. 5180 */ 5181 ire_add_then_send(q, ire, first_mp); 5182 if (ip6_asp_table_held) { 5183 ip6_asp_table_refrele(ipst); 5184 ip6_asp_table_held = B_FALSE; 5185 } 5186 5187 /* Assert that it is not deleted yet. */ 5188 ASSERT(save_ire->ire_ptpn != NULL); 5189 IRB_REFRELE(save_ire->ire_bucket); 5190 ire_refrele(save_ire); 5191 /* 5192 * Check if another multirt route 5193 * must be resolved. 5194 */ 5195 ire = NULL; 5196 if (copy_mp != NULL) { 5197 /* 5198 * If we find a resolver, we ignore any 5199 * trailing top priority IRE_CACHE in 5200 * further loops. The reason is the 5201 * same as for noresolver. 5202 */ 5203 multirt_flags &= ~MULTIRT_CACHEGW; 5204 /* 5205 * Search for the next unresolved 5206 * multirt route. 5207 */ 5208 first_mp = copy_mp; 5209 copy_mp = NULL; 5210 mp = first_mp; 5211 if (mp->b_datap->db_type == M_CTL) { 5212 mp = mp->b_cont; 5213 } 5214 ASSERT(sire != NULL); 5215 dst = save_dst; 5216 /* 5217 * re-enter the loop 5218 */ 5219 multirt_resolve_next = B_TRUE; 5220 continue; 5221 } 5222 5223 if (sire != NULL) 5224 ire_refrele(sire); 5225 ill_refrele(dst_ill); 5226 ipif_refrele(src_ipif); 5227 return; 5228 5229 case EINPROGRESS: 5230 /* 5231 * mp was consumed - presumably queued. 5232 * No need for ire, presumably resolution is 5233 * in progress, and ire will be added when the 5234 * address is resolved. 5235 */ 5236 if (ip6_asp_table_held) { 5237 ip6_asp_table_refrele(ipst); 5238 ip6_asp_table_held = B_FALSE; 5239 } 5240 ASSERT(ire->ire_nce == NULL); 5241 ire_delete(ire); 5242 ire_refrele(save_ire); 5243 /* 5244 * Check if another multirt route 5245 * must be resolved. 5246 */ 5247 ire = NULL; 5248 if (copy_mp != NULL) { 5249 /* 5250 * If we find a resolver, we ignore any 5251 * trailing top priority IRE_CACHE in 5252 * further loops. The reason is the 5253 * same as for noresolver. 5254 */ 5255 multirt_flags &= ~MULTIRT_CACHEGW; 5256 /* 5257 * Search for the next unresolved 5258 * multirt route. 5259 */ 5260 first_mp = copy_mp; 5261 copy_mp = NULL; 5262 mp = first_mp; 5263 if (mp->b_datap->db_type == M_CTL) { 5264 mp = mp->b_cont; 5265 } 5266 ASSERT(sire != NULL); 5267 dst = save_dst; 5268 /* 5269 * re-enter the loop 5270 */ 5271 multirt_resolve_next = B_TRUE; 5272 continue; 5273 } 5274 if (sire != NULL) 5275 ire_refrele(sire); 5276 ill_refrele(dst_ill); 5277 ipif_refrele(src_ipif); 5278 return; 5279 default: 5280 /* Some transient error */ 5281 ASSERT(ire->ire_nce == NULL); 5282 ire_refrele(save_ire); 5283 break; 5284 } 5285 break; 5286 default: 5287 break; 5288 } 5289 if (ip6_asp_table_held) { 5290 ip6_asp_table_refrele(ipst); 5291 ip6_asp_table_held = B_FALSE; 5292 } 5293 } while (multirt_resolve_next); 5294 5295 err_ret: 5296 ip1dbg(("ip_newroute_v6: dropped\n")); 5297 if (src_ipif != NULL) 5298 ipif_refrele(src_ipif); 5299 if (dst_ill != NULL) { 5300 need_rele = B_TRUE; 5301 ill = dst_ill; 5302 } 5303 if (ill != NULL) { 5304 if (mp->b_prev != NULL) { 5305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5306 } else { 5307 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5308 } 5309 5310 if (need_rele) 5311 ill_refrele(ill); 5312 } else { 5313 if (mp->b_prev != NULL) { 5314 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5315 } else { 5316 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5317 } 5318 } 5319 /* Did this packet originate externally? */ 5320 if (mp->b_prev) { 5321 mp->b_next = NULL; 5322 mp->b_prev = NULL; 5323 } 5324 if (copy_mp != NULL) { 5325 MULTIRT_DEBUG_UNTAG(copy_mp); 5326 freemsg(copy_mp); 5327 } 5328 MULTIRT_DEBUG_UNTAG(first_mp); 5329 freemsg(first_mp); 5330 if (ire != NULL) 5331 ire_refrele(ire); 5332 if (sire != NULL) 5333 ire_refrele(sire); 5334 return; 5335 5336 icmp_err_ret: 5337 if (ip6_asp_table_held) 5338 ip6_asp_table_refrele(ipst); 5339 if (src_ipif != NULL) 5340 ipif_refrele(src_ipif); 5341 if (dst_ill != NULL) { 5342 need_rele = B_TRUE; 5343 ill = dst_ill; 5344 } 5345 ip1dbg(("ip_newroute_v6: no route\n")); 5346 if (sire != NULL) 5347 ire_refrele(sire); 5348 /* 5349 * We need to set sire to NULL to avoid double freeing if we 5350 * ever goto err_ret from below. 5351 */ 5352 sire = NULL; 5353 ip6h = (ip6_t *)mp->b_rptr; 5354 /* Skip ip6i_t header if present */ 5355 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5356 /* Make sure the IPv6 header is present */ 5357 if ((mp->b_wptr - (uchar_t *)ip6h) < 5358 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5359 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5360 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5361 goto err_ret; 5362 } 5363 } 5364 mp->b_rptr += sizeof (ip6i_t); 5365 ip6h = (ip6_t *)mp->b_rptr; 5366 } 5367 /* Did this packet originate externally? */ 5368 if (mp->b_prev) { 5369 if (ill != NULL) { 5370 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5371 } else { 5372 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5373 } 5374 mp->b_next = NULL; 5375 mp->b_prev = NULL; 5376 q = WR(q); 5377 } else { 5378 if (ill != NULL) { 5379 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5380 } else { 5381 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5382 } 5383 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5384 /* Failed */ 5385 if (copy_mp != NULL) { 5386 MULTIRT_DEBUG_UNTAG(copy_mp); 5387 freemsg(copy_mp); 5388 } 5389 MULTIRT_DEBUG_UNTAG(first_mp); 5390 freemsg(first_mp); 5391 if (ire != NULL) 5392 ire_refrele(ire); 5393 if (need_rele) 5394 ill_refrele(ill); 5395 return; 5396 } 5397 } 5398 5399 if (need_rele) 5400 ill_refrele(ill); 5401 5402 /* 5403 * At this point we will have ire only if RTF_BLACKHOLE 5404 * or RTF_REJECT flags are set on the IRE. It will not 5405 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5406 */ 5407 if (ire != NULL) { 5408 if (ire->ire_flags & RTF_BLACKHOLE) { 5409 ire_refrele(ire); 5410 if (copy_mp != NULL) { 5411 MULTIRT_DEBUG_UNTAG(copy_mp); 5412 freemsg(copy_mp); 5413 } 5414 MULTIRT_DEBUG_UNTAG(first_mp); 5415 freemsg(first_mp); 5416 return; 5417 } 5418 ire_refrele(ire); 5419 } 5420 if (ip_debug > 3) { 5421 /* ip2dbg */ 5422 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5423 AF_INET6, v6dstp); 5424 } 5425 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5426 B_FALSE, B_FALSE, zoneid, ipst); 5427 } 5428 5429 /* 5430 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5431 * we need to send out a packet to a destination address for which we do not 5432 * have specific routing information. It is only used for multicast packets. 5433 * 5434 * If unspec_src we allow creating an IRE with source address zero. 5435 * ire_send_v6() will delete it after the packet is sent. 5436 */ 5437 void 5438 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5439 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5440 zoneid_t zoneid) 5441 { 5442 ire_t *ire = NULL; 5443 ipif_t *src_ipif = NULL; 5444 int err = 0; 5445 ill_t *dst_ill = NULL; 5446 ire_t *save_ire; 5447 ipsec_out_t *io; 5448 ill_t *ill; 5449 mblk_t *first_mp; 5450 ire_t *fire = NULL; 5451 mblk_t *copy_mp = NULL; 5452 const in6_addr_t *ire_v6srcp; 5453 boolean_t probe = B_FALSE; 5454 boolean_t multirt_resolve_next; 5455 boolean_t ipif_held = B_FALSE; 5456 boolean_t ill_held = B_FALSE; 5457 boolean_t ip6_asp_table_held = B_FALSE; 5458 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5459 5460 /* 5461 * This loop is run only once in most cases. 5462 * We loop to resolve further routes only when the destination 5463 * can be reached through multiple RTF_MULTIRT-flagged ires. 5464 */ 5465 do { 5466 multirt_resolve_next = B_FALSE; 5467 if (dst_ill != NULL) { 5468 ill_refrele(dst_ill); 5469 dst_ill = NULL; 5470 } 5471 5472 if (src_ipif != NULL) { 5473 ipif_refrele(src_ipif); 5474 src_ipif = NULL; 5475 } 5476 ASSERT(ipif != NULL); 5477 ill = ipif->ipif_ill; 5478 5479 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5480 if (ip_debug > 2) { 5481 /* ip1dbg */ 5482 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5483 AF_INET6, v6dstp); 5484 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5485 ill->ill_name, ipif->ipif_isv6); 5486 } 5487 5488 first_mp = mp; 5489 if (mp->b_datap->db_type == M_CTL) { 5490 mp = mp->b_cont; 5491 io = (ipsec_out_t *)first_mp->b_rptr; 5492 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5493 } else { 5494 io = NULL; 5495 } 5496 5497 /* 5498 * If the interface is a pt-pt interface we look for an 5499 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5500 * local_address and the pt-pt destination address. 5501 * Otherwise we just match the local address. 5502 */ 5503 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5504 goto err_ret; 5505 } 5506 5507 /* 5508 * We check if an IRE_OFFSUBNET for the addr that goes through 5509 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5510 * RTF_MULTIRT flags must be honored. 5511 */ 5512 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5513 ip2dbg(("ip_newroute_ipif_v6: " 5514 "ipif_lookup_multi_ire_v6(" 5515 "ipif %p, dst %08x) = fire %p\n", 5516 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5517 (void *)fire)); 5518 5519 ASSERT(src_ipif == NULL); 5520 5521 /* 5522 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5523 * tied to the underlying interface, IS_UNDER_IPMP() may be 5524 * true even when building IREs that will be used for data 5525 * traffic. As such, see if the packet's source address is a 5526 * test address, and if so use that test address's ipif for 5527 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5528 * ire_add_v6() can work properly. 5529 */ 5530 if (IS_UNDER_IPMP(ill)) 5531 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5532 5533 /* 5534 * Determine the outbound (destination) ill for this route. 5535 * If IPMP is not in use, that's the same as our ill. If IPMP 5536 * is in-use and we're on the IPMP interface, or we're on an 5537 * underlying ill but sending data traffic, use a suitable 5538 * destination ill from the group. The latter case covers a 5539 * subtle edge condition with multicast: when we bring up an 5540 * IPv6 data address, we will create an NCE on an underlying 5541 * interface, and send solitications to ff02::1, which would 5542 * take us through here, and cause us to create an IRE for 5543 * ff02::1. To meet our defined semantics for multicast (and 5544 * ensure there aren't unexpected echoes), that IRE needs to 5545 * use the IPMP group's nominated multicast interface. 5546 * 5547 * Note: the source ipif is determined by source address 5548 * selection later. 5549 */ 5550 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5551 ill_t *ipmp_ill; 5552 ipmp_illgrp_t *illg; 5553 5554 if (IS_UNDER_IPMP(ill)) { 5555 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5556 } else { 5557 ipmp_ill = ill; 5558 ill_refhold(ipmp_ill); /* for symmetry */ 5559 } 5560 5561 if (ipmp_ill == NULL) 5562 goto err_ret; 5563 5564 illg = ipmp_ill->ill_grp; 5565 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5566 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5567 else 5568 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5569 5570 ill_refrele(ipmp_ill); 5571 } else { 5572 dst_ill = ill; 5573 ill_refhold(dst_ill); /* for symmetry */ 5574 } 5575 5576 if (dst_ill == NULL) { 5577 if (ip_debug > 2) { 5578 pr_addr_dbg("ip_newroute_ipif_v6: " 5579 "no dst ill for dst %s\n", 5580 AF_INET6, v6dstp); 5581 } 5582 goto err_ret; 5583 } 5584 5585 /* 5586 * Pick a source address which matches the scope of the 5587 * destination address. 5588 * For RTF_SETSRC routes, the source address is imposed by the 5589 * parent ire (fire). 5590 */ 5591 5592 if (src_ipif == NULL && fire != NULL && 5593 (fire->ire_flags & RTF_SETSRC)) { 5594 /* 5595 * Check that the ipif matching the requested source 5596 * address still exists. 5597 */ 5598 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5599 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5600 } 5601 5602 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5603 ip6_asp_table_held = B_TRUE; 5604 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5605 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5606 } 5607 5608 if (src_ipif == NULL) { 5609 if (!unspec_src) { 5610 if (ip_debug > 2) { 5611 /* ip1dbg */ 5612 pr_addr_dbg("ip_newroute_ipif_v6: " 5613 "no src for dst %s\n", 5614 AF_INET6, v6dstp); 5615 printf(" through interface %s\n", 5616 dst_ill->ill_name); 5617 } 5618 goto err_ret; 5619 } 5620 ire_v6srcp = &ipv6_all_zeros; 5621 src_ipif = ipif; 5622 ipif_refhold(src_ipif); 5623 } else { 5624 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5625 } 5626 5627 ire = ipif_to_ire_v6(ipif); 5628 if (ire == NULL) { 5629 if (ip_debug > 2) { 5630 /* ip1dbg */ 5631 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5632 AF_INET6, &ipif->ipif_v6lcl_addr); 5633 printf("ip_newroute_ipif_v6: " 5634 "if %s\n", dst_ill->ill_name); 5635 } 5636 goto err_ret; 5637 } 5638 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5639 goto err_ret; 5640 5641 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5642 5643 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5644 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5645 if (ip_debug > 2) { 5646 /* ip1dbg */ 5647 pr_addr_dbg(" address %s\n", 5648 AF_INET6, &ire->ire_src_addr_v6); 5649 } 5650 save_ire = ire; 5651 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5652 (void *)ire, (void *)ipif)); 5653 5654 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5655 /* 5656 * an IRE_OFFSUBET was looked up 5657 * on that interface. 5658 * this ire has RTF_MULTIRT flag, 5659 * so the resolution loop 5660 * will be re-entered to resolve 5661 * additional routes on other 5662 * interfaces. For that purpose, 5663 * a copy of the packet is 5664 * made at this point. 5665 */ 5666 fire->ire_last_used_time = lbolt; 5667 copy_mp = copymsg(first_mp); 5668 if (copy_mp) { 5669 MULTIRT_DEBUG_TAG(copy_mp); 5670 } 5671 } 5672 5673 switch (ire->ire_type) { 5674 case IRE_IF_NORESOLVER: { 5675 /* 5676 * We have what we need to build an IRE_CACHE. 5677 * 5678 * handle the Gated case, where we create 5679 * a NORESOLVER route for loopback. 5680 */ 5681 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5682 break; 5683 /* 5684 * The newly created ire will inherit the flags of the 5685 * parent ire, if any. 5686 */ 5687 ire = ire_create_v6( 5688 v6dstp, /* dest address */ 5689 &ipv6_all_ones, /* mask */ 5690 ire_v6srcp, /* source address */ 5691 NULL, /* gateway address */ 5692 &save_ire->ire_max_frag, 5693 NULL, /* no src nce */ 5694 dst_ill->ill_rq, /* recv-from queue */ 5695 dst_ill->ill_wq, /* send-to queue */ 5696 IRE_CACHE, 5697 src_ipif, 5698 NULL, 5699 (fire != NULL) ? /* Parent handle */ 5700 fire->ire_phandle : 0, 5701 save_ire->ire_ihandle, /* Interface handle */ 5702 (fire != NULL) ? 5703 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5704 0, 5705 &ire_uinfo_null, 5706 NULL, 5707 NULL, 5708 ipst); 5709 5710 if (ire == NULL) { 5711 ire_refrele(save_ire); 5712 break; 5713 } 5714 5715 err = ndp_noresolver(dst_ill, v6dstp); 5716 if (err != 0) { 5717 ire_refrele(save_ire); 5718 break; 5719 } 5720 5721 /* Prevent save_ire from getting deleted */ 5722 IRB_REFHOLD(save_ire->ire_bucket); 5723 /* Has it been removed already ? */ 5724 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5725 IRB_REFRELE(save_ire->ire_bucket); 5726 ire_refrele(save_ire); 5727 break; 5728 } 5729 5730 ire_add_then_send(q, ire, first_mp); 5731 if (ip6_asp_table_held) { 5732 ip6_asp_table_refrele(ipst); 5733 ip6_asp_table_held = B_FALSE; 5734 } 5735 5736 /* Assert that it is not deleted yet. */ 5737 ASSERT(save_ire->ire_ptpn != NULL); 5738 IRB_REFRELE(save_ire->ire_bucket); 5739 ire_refrele(save_ire); 5740 if (fire != NULL) { 5741 ire_refrele(fire); 5742 fire = NULL; 5743 } 5744 5745 /* 5746 * The resolution loop is re-entered if we 5747 * actually are in a multirouting case. 5748 */ 5749 if (copy_mp != NULL) { 5750 boolean_t need_resolve = 5751 ire_multirt_need_resolve_v6(v6dstp, 5752 msg_getlabel(copy_mp), ipst); 5753 if (!need_resolve) { 5754 MULTIRT_DEBUG_UNTAG(copy_mp); 5755 freemsg(copy_mp); 5756 copy_mp = NULL; 5757 } else { 5758 /* 5759 * ipif_lookup_group_v6() calls 5760 * ire_lookup_multi_v6() that uses 5761 * ire_ftable_lookup_v6() to find 5762 * an IRE_INTERFACE for the group. 5763 * In the multirt case, 5764 * ire_lookup_multi_v6() then invokes 5765 * ire_multirt_lookup_v6() to find 5766 * the next resolvable ire. 5767 * As a result, we obtain a new 5768 * interface, derived from the 5769 * next ire. 5770 */ 5771 if (ipif_held) { 5772 ipif_refrele(ipif); 5773 ipif_held = B_FALSE; 5774 } 5775 ipif = ipif_lookup_group_v6(v6dstp, 5776 zoneid, ipst); 5777 ip2dbg(("ip_newroute_ipif: " 5778 "multirt dst %08x, ipif %p\n", 5779 ntohl(V4_PART_OF_V6((*v6dstp))), 5780 (void *)ipif)); 5781 if (ipif != NULL) { 5782 ipif_held = B_TRUE; 5783 mp = copy_mp; 5784 copy_mp = NULL; 5785 multirt_resolve_next = 5786 B_TRUE; 5787 continue; 5788 } else { 5789 freemsg(copy_mp); 5790 } 5791 } 5792 } 5793 ill_refrele(dst_ill); 5794 if (ipif_held) { 5795 ipif_refrele(ipif); 5796 ipif_held = B_FALSE; 5797 } 5798 if (src_ipif != NULL) 5799 ipif_refrele(src_ipif); 5800 return; 5801 } 5802 case IRE_IF_RESOLVER: { 5803 5804 ASSERT(dst_ill->ill_isv6); 5805 5806 /* 5807 * We obtain a partial IRE_CACHE which we will pass 5808 * along with the resolver query. When the response 5809 * comes back it will be there ready for us to add. 5810 */ 5811 /* 5812 * the newly created ire will inherit the flags of the 5813 * parent ire, if any. 5814 */ 5815 ire = ire_create_v6( 5816 v6dstp, /* dest address */ 5817 &ipv6_all_ones, /* mask */ 5818 ire_v6srcp, /* source address */ 5819 NULL, /* gateway address */ 5820 &save_ire->ire_max_frag, 5821 NULL, /* src nce */ 5822 dst_ill->ill_rq, /* recv-from queue */ 5823 dst_ill->ill_wq, /* send-to queue */ 5824 IRE_CACHE, 5825 src_ipif, 5826 NULL, 5827 (fire != NULL) ? /* Parent handle */ 5828 fire->ire_phandle : 0, 5829 save_ire->ire_ihandle, /* Interface handle */ 5830 (fire != NULL) ? 5831 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5832 0, 5833 &ire_uinfo_null, 5834 NULL, 5835 NULL, 5836 ipst); 5837 5838 if (ire == NULL) { 5839 ire_refrele(save_ire); 5840 break; 5841 } 5842 5843 /* Resolve and add ire to the ctable */ 5844 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5845 switch (err) { 5846 case 0: 5847 /* Prevent save_ire from getting deleted */ 5848 IRB_REFHOLD(save_ire->ire_bucket); 5849 /* Has it been removed already ? */ 5850 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5851 IRB_REFRELE(save_ire->ire_bucket); 5852 ire_refrele(save_ire); 5853 break; 5854 } 5855 /* 5856 * We have a resolved cache entry, 5857 * add in the IRE. 5858 */ 5859 ire_add_then_send(q, ire, first_mp); 5860 if (ip6_asp_table_held) { 5861 ip6_asp_table_refrele(ipst); 5862 ip6_asp_table_held = B_FALSE; 5863 } 5864 5865 /* Assert that it is not deleted yet. */ 5866 ASSERT(save_ire->ire_ptpn != NULL); 5867 IRB_REFRELE(save_ire->ire_bucket); 5868 ire_refrele(save_ire); 5869 if (fire != NULL) { 5870 ire_refrele(fire); 5871 fire = NULL; 5872 } 5873 5874 /* 5875 * The resolution loop is re-entered if we 5876 * actually are in a multirouting case. 5877 */ 5878 if (copy_mp != NULL) { 5879 boolean_t need_resolve = 5880 ire_multirt_need_resolve_v6(v6dstp, 5881 msg_getlabel(copy_mp), ipst); 5882 if (!need_resolve) { 5883 MULTIRT_DEBUG_UNTAG(copy_mp); 5884 freemsg(copy_mp); 5885 copy_mp = NULL; 5886 } else { 5887 /* 5888 * ipif_lookup_group_v6() calls 5889 * ire_lookup_multi_v6() that 5890 * uses ire_ftable_lookup_v6() 5891 * to find an IRE_INTERFACE for 5892 * the group. In the multirt 5893 * case, ire_lookup_multi_v6() 5894 * then invokes 5895 * ire_multirt_lookup_v6() to 5896 * find the next resolvable ire. 5897 * As a result, we obtain a new 5898 * interface, derived from the 5899 * next ire. 5900 */ 5901 if (ipif_held) { 5902 ipif_refrele(ipif); 5903 ipif_held = B_FALSE; 5904 } 5905 ipif = ipif_lookup_group_v6( 5906 v6dstp, zoneid, ipst); 5907 ip2dbg(("ip_newroute_ipif: " 5908 "multirt dst %08x, " 5909 "ipif %p\n", 5910 ntohl(V4_PART_OF_V6( 5911 (*v6dstp))), 5912 (void *)ipif)); 5913 if (ipif != NULL) { 5914 ipif_held = B_TRUE; 5915 mp = copy_mp; 5916 copy_mp = NULL; 5917 multirt_resolve_next = 5918 B_TRUE; 5919 continue; 5920 } else { 5921 freemsg(copy_mp); 5922 } 5923 } 5924 } 5925 ill_refrele(dst_ill); 5926 if (ipif_held) { 5927 ipif_refrele(ipif); 5928 ipif_held = B_FALSE; 5929 } 5930 if (src_ipif != NULL) 5931 ipif_refrele(src_ipif); 5932 return; 5933 5934 case EINPROGRESS: 5935 /* 5936 * mp was consumed - presumably queued. 5937 * No need for ire, presumably resolution is 5938 * in progress, and ire will be added when the 5939 * address is resolved. 5940 */ 5941 if (ip6_asp_table_held) { 5942 ip6_asp_table_refrele(ipst); 5943 ip6_asp_table_held = B_FALSE; 5944 } 5945 ire_delete(ire); 5946 ire_refrele(save_ire); 5947 if (fire != NULL) { 5948 ire_refrele(fire); 5949 fire = NULL; 5950 } 5951 5952 /* 5953 * The resolution loop is re-entered if we 5954 * actually are in a multirouting case. 5955 */ 5956 if (copy_mp != NULL) { 5957 boolean_t need_resolve = 5958 ire_multirt_need_resolve_v6(v6dstp, 5959 msg_getlabel(copy_mp), ipst); 5960 if (!need_resolve) { 5961 MULTIRT_DEBUG_UNTAG(copy_mp); 5962 freemsg(copy_mp); 5963 copy_mp = NULL; 5964 } else { 5965 /* 5966 * ipif_lookup_group_v6() calls 5967 * ire_lookup_multi_v6() that 5968 * uses ire_ftable_lookup_v6() 5969 * to find an IRE_INTERFACE for 5970 * the group. In the multirt 5971 * case, ire_lookup_multi_v6() 5972 * then invokes 5973 * ire_multirt_lookup_v6() to 5974 * find the next resolvable ire. 5975 * As a result, we obtain a new 5976 * interface, derived from the 5977 * next ire. 5978 */ 5979 if (ipif_held) { 5980 ipif_refrele(ipif); 5981 ipif_held = B_FALSE; 5982 } 5983 ipif = ipif_lookup_group_v6( 5984 v6dstp, zoneid, ipst); 5985 ip2dbg(("ip_newroute_ipif: " 5986 "multirt dst %08x, " 5987 "ipif %p\n", 5988 ntohl(V4_PART_OF_V6( 5989 (*v6dstp))), 5990 (void *)ipif)); 5991 if (ipif != NULL) { 5992 ipif_held = B_TRUE; 5993 mp = copy_mp; 5994 copy_mp = NULL; 5995 multirt_resolve_next = 5996 B_TRUE; 5997 continue; 5998 } else { 5999 freemsg(copy_mp); 6000 } 6001 } 6002 } 6003 ill_refrele(dst_ill); 6004 if (ipif_held) { 6005 ipif_refrele(ipif); 6006 ipif_held = B_FALSE; 6007 } 6008 if (src_ipif != NULL) 6009 ipif_refrele(src_ipif); 6010 return; 6011 default: 6012 /* Some transient error */ 6013 ire_refrele(save_ire); 6014 break; 6015 } 6016 break; 6017 } 6018 default: 6019 break; 6020 } 6021 if (ip6_asp_table_held) { 6022 ip6_asp_table_refrele(ipst); 6023 ip6_asp_table_held = B_FALSE; 6024 } 6025 } while (multirt_resolve_next); 6026 6027 err_ret: 6028 if (ip6_asp_table_held) 6029 ip6_asp_table_refrele(ipst); 6030 if (ire != NULL) 6031 ire_refrele(ire); 6032 if (fire != NULL) 6033 ire_refrele(fire); 6034 if (ipif != NULL && ipif_held) 6035 ipif_refrele(ipif); 6036 if (src_ipif != NULL) 6037 ipif_refrele(src_ipif); 6038 6039 /* Multicast - no point in trying to generate ICMP error */ 6040 if (dst_ill != NULL) { 6041 ill = dst_ill; 6042 ill_held = B_TRUE; 6043 } 6044 if (mp->b_prev || mp->b_next) { 6045 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6046 } else { 6047 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6048 } 6049 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6050 mp->b_next = NULL; 6051 mp->b_prev = NULL; 6052 freemsg(first_mp); 6053 if (ill_held) 6054 ill_refrele(ill); 6055 } 6056 6057 /* 6058 * Parse and process any hop-by-hop or destination options. 6059 * 6060 * Assumes that q is an ill read queue so that ICMP errors for link-local 6061 * destinations are sent out the correct interface. 6062 * 6063 * Returns -1 if there was an error and mp has been consumed. 6064 * Returns 0 if no special action is needed. 6065 * Returns 1 if the packet contained a router alert option for this node 6066 * which is verified to be "interesting/known" for our implementation. 6067 * 6068 * XXX Note: In future as more hbh or dest options are defined, 6069 * it may be better to have different routines for hbh and dest 6070 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6071 * may have same value in different namespaces. Or is it same namespace ?? 6072 * Current code checks for each opt_type (other than pads) if it is in 6073 * the expected nexthdr (hbh or dest) 6074 */ 6075 static int 6076 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6077 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6078 { 6079 uint8_t opt_type; 6080 uint_t optused; 6081 int ret = 0; 6082 mblk_t *first_mp; 6083 const char *errtype; 6084 zoneid_t zoneid; 6085 ill_t *ill = q->q_ptr; 6086 ipif_t *ipif; 6087 6088 first_mp = mp; 6089 if (mp->b_datap->db_type == M_CTL) { 6090 mp = mp->b_cont; 6091 } 6092 6093 while (optlen != 0) { 6094 opt_type = *optptr; 6095 if (opt_type == IP6OPT_PAD1) { 6096 optused = 1; 6097 } else { 6098 if (optlen < 2) 6099 goto bad_opt; 6100 errtype = "malformed"; 6101 if (opt_type == ip6opt_ls) { 6102 optused = 2 + optptr[1]; 6103 if (optused > optlen) 6104 goto bad_opt; 6105 } else switch (opt_type) { 6106 case IP6OPT_PADN: 6107 /* 6108 * Note:We don't verify that (N-2) pad octets 6109 * are zero as required by spec. Adhere to 6110 * "be liberal in what you accept..." part of 6111 * implementation philosophy (RFC791,RFC1122) 6112 */ 6113 optused = 2 + optptr[1]; 6114 if (optused > optlen) 6115 goto bad_opt; 6116 break; 6117 6118 case IP6OPT_JUMBO: 6119 if (hdr_type != IPPROTO_HOPOPTS) 6120 goto opt_error; 6121 goto opt_error; /* XXX Not implemented! */ 6122 6123 case IP6OPT_ROUTER_ALERT: { 6124 struct ip6_opt_router *or; 6125 6126 if (hdr_type != IPPROTO_HOPOPTS) 6127 goto opt_error; 6128 optused = 2 + optptr[1]; 6129 if (optused > optlen) 6130 goto bad_opt; 6131 or = (struct ip6_opt_router *)optptr; 6132 /* Check total length and alignment */ 6133 if (optused != sizeof (*or) || 6134 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6135 goto opt_error; 6136 /* Check value */ 6137 switch (*((uint16_t *)or->ip6or_value)) { 6138 case IP6_ALERT_MLD: 6139 case IP6_ALERT_RSVP: 6140 ret = 1; 6141 } 6142 break; 6143 } 6144 case IP6OPT_HOME_ADDRESS: { 6145 /* 6146 * Minimal support for the home address option 6147 * (which is required by all IPv6 nodes). 6148 * Implement by just swapping the home address 6149 * and source address. 6150 * XXX Note: this has IPsec implications since 6151 * AH needs to take this into account. 6152 * Also, when IPsec is used we need to ensure 6153 * that this is only processed once 6154 * in the received packet (to avoid swapping 6155 * back and forth). 6156 * NOTE:This option processing is considered 6157 * to be unsafe and prone to a denial of 6158 * service attack. 6159 * The current processing is not safe even with 6160 * IPsec secured IP packets. Since the home 6161 * address option processing requirement still 6162 * is in the IETF draft and in the process of 6163 * being redefined for its usage, it has been 6164 * decided to turn off the option by default. 6165 * If this section of code needs to be executed, 6166 * ndd variable ip6_ignore_home_address_opt 6167 * should be set to 0 at the user's own risk. 6168 */ 6169 struct ip6_opt_home_address *oh; 6170 in6_addr_t tmp; 6171 6172 if (ipst->ips_ipv6_ignore_home_address_opt) 6173 goto opt_error; 6174 6175 if (hdr_type != IPPROTO_DSTOPTS) 6176 goto opt_error; 6177 optused = 2 + optptr[1]; 6178 if (optused > optlen) 6179 goto bad_opt; 6180 6181 /* 6182 * We did this dest. opt the first time 6183 * around (i.e. before AH processing). 6184 * If we've done AH... stop now. 6185 */ 6186 if (first_mp != mp) { 6187 ipsec_in_t *ii; 6188 6189 ii = (ipsec_in_t *)first_mp->b_rptr; 6190 if (ii->ipsec_in_ah_sa != NULL) 6191 break; 6192 } 6193 6194 oh = (struct ip6_opt_home_address *)optptr; 6195 /* Check total length and alignment */ 6196 if (optused < sizeof (*oh) || 6197 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6198 goto opt_error; 6199 /* Swap ip6_src and the home address */ 6200 tmp = ip6h->ip6_src; 6201 /* XXX Note: only 8 byte alignment option */ 6202 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6203 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6204 break; 6205 } 6206 6207 case IP6OPT_TUNNEL_LIMIT: 6208 if (hdr_type != IPPROTO_DSTOPTS) { 6209 goto opt_error; 6210 } 6211 optused = 2 + optptr[1]; 6212 if (optused > optlen) { 6213 goto bad_opt; 6214 } 6215 if (optused != 3) { 6216 goto opt_error; 6217 } 6218 break; 6219 6220 default: 6221 errtype = "unknown"; 6222 /* FALLTHROUGH */ 6223 opt_error: 6224 /* Determine which zone should send error */ 6225 zoneid = ipif_lookup_addr_zoneid_v6( 6226 &ip6h->ip6_dst, ill, ipst); 6227 switch (IP6OPT_TYPE(opt_type)) { 6228 case IP6OPT_TYPE_SKIP: 6229 optused = 2 + optptr[1]; 6230 if (optused > optlen) 6231 goto bad_opt; 6232 ip1dbg(("ip_process_options_v6: %s " 6233 "opt 0x%x skipped\n", 6234 errtype, opt_type)); 6235 break; 6236 case IP6OPT_TYPE_DISCARD: 6237 ip1dbg(("ip_process_options_v6: %s " 6238 "opt 0x%x; packet dropped\n", 6239 errtype, opt_type)); 6240 freemsg(first_mp); 6241 return (-1); 6242 case IP6OPT_TYPE_ICMP: 6243 if (zoneid == ALL_ZONES) { 6244 freemsg(first_mp); 6245 return (-1); 6246 } 6247 icmp_param_problem_v6(WR(q), first_mp, 6248 ICMP6_PARAMPROB_OPTION, 6249 (uint32_t)(optptr - 6250 (uint8_t *)ip6h), 6251 B_FALSE, B_FALSE, zoneid, ipst); 6252 return (-1); 6253 case IP6OPT_TYPE_FORCEICMP: 6254 /* 6255 * If we don't have a zone and the dst 6256 * addr is multicast, then pick a zone 6257 * based on the inbound interface. 6258 */ 6259 if (zoneid == ALL_ZONES && 6260 IN6_IS_ADDR_MULTICAST( 6261 &ip6h->ip6_dst)) { 6262 ipif = ipif_select_source_v6( 6263 ill, &ip6h->ip6_src, 6264 B_TRUE, 6265 IPV6_PREFER_SRC_DEFAULT, 6266 ALL_ZONES); 6267 if (ipif != NULL) { 6268 zoneid = 6269 ipif->ipif_zoneid; 6270 ipif_refrele(ipif); 6271 } 6272 } 6273 if (zoneid == ALL_ZONES) { 6274 freemsg(first_mp); 6275 return (-1); 6276 } 6277 icmp_param_problem_v6(WR(q), first_mp, 6278 ICMP6_PARAMPROB_OPTION, 6279 (uint32_t)(optptr - 6280 (uint8_t *)ip6h), 6281 B_FALSE, B_TRUE, zoneid, ipst); 6282 return (-1); 6283 default: 6284 ASSERT(0); 6285 } 6286 } 6287 } 6288 optlen -= optused; 6289 optptr += optused; 6290 } 6291 return (ret); 6292 6293 bad_opt: 6294 /* Determine which zone should send error */ 6295 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6296 if (zoneid == ALL_ZONES) { 6297 freemsg(first_mp); 6298 } else { 6299 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6300 (uint32_t)(optptr - (uint8_t *)ip6h), 6301 B_FALSE, B_FALSE, zoneid, ipst); 6302 } 6303 return (-1); 6304 } 6305 6306 /* 6307 * Process a routing header that is not yet empty. 6308 * Because of RFC 5095, we now reject all route headers. 6309 */ 6310 static void 6311 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6312 ill_t *ill, mblk_t *hada_mp) 6313 { 6314 ip_stack_t *ipst = ill->ill_ipst; 6315 6316 ASSERT(rth->ip6r_segleft != 0); 6317 6318 if (!ipst->ips_ipv6_forward_src_routed) { 6319 /* XXX Check for source routed out same interface? */ 6320 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6321 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6322 freemsg(hada_mp); 6323 freemsg(mp); 6324 return; 6325 } 6326 if (hada_mp != NULL) { 6327 freemsg(hada_mp); 6328 freemsg(mp); 6329 return; 6330 } 6331 /* Sent by forwarding path, and router is global zone */ 6332 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 6333 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), B_FALSE, 6334 B_FALSE, GLOBAL_ZONEID, ipst); 6335 } 6336 6337 /* 6338 * Read side put procedure for IPv6 module. 6339 */ 6340 void 6341 ip_rput_v6(queue_t *q, mblk_t *mp) 6342 { 6343 mblk_t *first_mp; 6344 mblk_t *hada_mp = NULL; 6345 ip6_t *ip6h; 6346 boolean_t ll_multicast = B_FALSE; 6347 boolean_t mctl_present = B_FALSE; 6348 ill_t *ill; 6349 struct iocblk *iocp; 6350 uint_t flags = 0; 6351 mblk_t *dl_mp; 6352 ip_stack_t *ipst; 6353 int check; 6354 6355 ill = (ill_t *)q->q_ptr; 6356 ipst = ill->ill_ipst; 6357 if (ill->ill_state_flags & ILL_CONDEMNED) { 6358 union DL_primitives *dl; 6359 6360 dl = (union DL_primitives *)mp->b_rptr; 6361 /* 6362 * Things are opening or closing - only accept DLPI 6363 * ack messages. If the stream is closing and ip_wsrv 6364 * has completed, ip_close is out of the qwait, but has 6365 * not yet completed qprocsoff. Don't proceed any further 6366 * because the ill has been cleaned up and things hanging 6367 * off the ill have been freed. 6368 */ 6369 if ((mp->b_datap->db_type != M_PCPROTO) || 6370 (dl->dl_primitive == DL_UNITDATA_IND)) { 6371 inet_freemsg(mp); 6372 return; 6373 } 6374 } 6375 6376 dl_mp = NULL; 6377 switch (mp->b_datap->db_type) { 6378 case M_DATA: { 6379 int hlen; 6380 uchar_t *ucp; 6381 struct ether_header *eh; 6382 dl_unitdata_ind_t *dui; 6383 6384 /* 6385 * This is a work-around for CR 6451644, a bug in Nemo. It 6386 * should be removed when that problem is fixed. 6387 */ 6388 if (ill->ill_mactype == DL_ETHER && 6389 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6390 (ucp = mp->b_rptr)[-1] == (ETHERTYPE_IPV6 & 0xFF) && 6391 ucp[-2] == (ETHERTYPE_IPV6 >> 8)) { 6392 if (hlen >= sizeof (struct ether_vlan_header) && 6393 ucp[-5] == 0 && ucp[-6] == 0x81) 6394 ucp -= sizeof (struct ether_vlan_header); 6395 else 6396 ucp -= sizeof (struct ether_header); 6397 /* 6398 * If it's a group address, then fabricate a 6399 * DL_UNITDATA_IND message. 6400 */ 6401 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6402 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6403 BPRI_HI)) != NULL) { 6404 eh = (struct ether_header *)ucp; 6405 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6406 DB_TYPE(dl_mp) = M_PROTO; 6407 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6408 dui->dl_primitive = DL_UNITDATA_IND; 6409 dui->dl_dest_addr_length = 8; 6410 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6411 dui->dl_src_addr_length = 8; 6412 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6413 8; 6414 dui->dl_group_address = 1; 6415 ucp = (uchar_t *)(dui + 1); 6416 if (ill->ill_sap_length > 0) 6417 ucp += ill->ill_sap_length; 6418 bcopy(&eh->ether_dhost, ucp, 6); 6419 bcopy(&eh->ether_shost, ucp + 8, 6); 6420 ucp = (uchar_t *)(dui + 1); 6421 if (ill->ill_sap_length < 0) 6422 ucp += 8 + ill->ill_sap_length; 6423 bcopy(&eh->ether_type, ucp, 2); 6424 bcopy(&eh->ether_type, ucp + 8, 2); 6425 } 6426 } 6427 break; 6428 } 6429 6430 case M_PROTO: 6431 case M_PCPROTO: 6432 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6433 DL_UNITDATA_IND) { 6434 /* Go handle anything other than data elsewhere. */ 6435 ip_rput_dlpi(q, mp); 6436 return; 6437 } 6438 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6439 6440 /* Save the DLPI header. */ 6441 dl_mp = mp; 6442 mp = mp->b_cont; 6443 dl_mp->b_cont = NULL; 6444 break; 6445 case M_BREAK: 6446 panic("ip_rput_v6: got an M_BREAK"); 6447 /*NOTREACHED*/ 6448 case M_IOCACK: 6449 iocp = (struct iocblk *)mp->b_rptr; 6450 switch (iocp->ioc_cmd) { 6451 case DL_IOC_HDR_INFO: 6452 ill = (ill_t *)q->q_ptr; 6453 ill_fastpath_ack(ill, mp); 6454 return; 6455 default: 6456 putnext(q, mp); 6457 return; 6458 } 6459 /* FALLTHRU */ 6460 case M_ERROR: 6461 case M_HANGUP: 6462 mutex_enter(&ill->ill_lock); 6463 if (ill->ill_state_flags & ILL_CONDEMNED) { 6464 mutex_exit(&ill->ill_lock); 6465 freemsg(mp); 6466 return; 6467 } 6468 ill_refhold_locked(ill); 6469 mutex_exit(&ill->ill_lock); 6470 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6471 return; 6472 case M_CTL: 6473 if ((MBLKL(mp) > sizeof (int)) && 6474 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6475 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6476 mctl_present = B_TRUE; 6477 break; 6478 } 6479 putnext(q, mp); 6480 return; 6481 case M_IOCNAK: 6482 iocp = (struct iocblk *)mp->b_rptr; 6483 switch (iocp->ioc_cmd) { 6484 case DL_IOC_HDR_INFO: 6485 ip_rput_other(NULL, q, mp, NULL); 6486 return; 6487 default: 6488 break; 6489 } 6490 /* FALLTHRU */ 6491 default: 6492 putnext(q, mp); 6493 return; 6494 } 6495 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6496 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6497 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6498 /* 6499 * if db_ref > 1 then copymsg and free original. Packet may be 6500 * changed and do not want other entity who has a reference to this 6501 * message to trip over the changes. This is a blind change because 6502 * trying to catch all places that might change packet is too 6503 * difficult (since it may be a module above this one). 6504 */ 6505 if (mp->b_datap->db_ref > 1) { 6506 mblk_t *mp1; 6507 6508 mp1 = copymsg(mp); 6509 freemsg(mp); 6510 if (mp1 == NULL) { 6511 first_mp = NULL; 6512 goto discard; 6513 } 6514 mp = mp1; 6515 } 6516 first_mp = mp; 6517 if (mctl_present) { 6518 hada_mp = first_mp; 6519 mp = first_mp->b_cont; 6520 } 6521 6522 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6523 freemsg(mp); 6524 return; 6525 } 6526 6527 ip6h = (ip6_t *)mp->b_rptr; 6528 6529 /* 6530 * ip:::receive must see ipv6 packets with a full header, 6531 * and so is placed after the IP6_MBLK_HDR_ERR check. 6532 */ 6533 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6534 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6535 int, 0); 6536 6537 if (check != IP6_MBLK_OK) { 6538 freemsg(mp); 6539 return; 6540 } 6541 6542 DTRACE_PROBE4(ip6__physical__in__start, 6543 ill_t *, ill, ill_t *, NULL, 6544 ip6_t *, ip6h, mblk_t *, first_mp); 6545 6546 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6547 ipst->ips_ipv6firewall_physical_in, 6548 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6549 6550 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6551 6552 if (first_mp == NULL) 6553 return; 6554 6555 /* 6556 * Attach any necessary label information to this packet. 6557 */ 6558 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6559 if (ip6opt_ls != 0) 6560 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6561 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6562 goto discard; 6563 } 6564 6565 /* IP observability hook. */ 6566 if (ipst->ips_ipobs_enabled) { 6567 zoneid_t dzone; 6568 6569 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6570 ALL_ZONES); 6571 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, 6572 IPV6_VERSION, 0, ipst); 6573 } 6574 6575 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6576 IPV6_DEFAULT_VERS_AND_FLOW) { 6577 /* 6578 * It may be a bit too expensive to do this mapped address 6579 * check here, but in the interest of robustness, it seems 6580 * like the correct place. 6581 * TODO: Avoid this check for e.g. connected TCP sockets 6582 */ 6583 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6584 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6585 goto discard; 6586 } 6587 6588 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6589 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6590 goto discard; 6591 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6592 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6593 goto discard; 6594 } 6595 6596 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6597 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6598 } else { 6599 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6600 goto discard; 6601 } 6602 freemsg(dl_mp); 6603 return; 6604 6605 discard: 6606 if (dl_mp != NULL) 6607 freeb(dl_mp); 6608 freemsg(first_mp); 6609 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6610 } 6611 6612 /* 6613 * Walk through the IPv6 packet in mp and see if there's an AH header 6614 * in it. See if the AH header needs to get done before other headers in 6615 * the packet. (Worker function for ipsec_early_ah_v6().) 6616 */ 6617 #define IPSEC_HDR_DONT_PROCESS 0 6618 #define IPSEC_HDR_PROCESS 1 6619 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6620 static int 6621 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6622 { 6623 uint_t length; 6624 uint_t ehdrlen; 6625 uint8_t *whereptr; 6626 uint8_t *endptr; 6627 uint8_t *nexthdrp; 6628 ip6_dest_t *desthdr; 6629 ip6_rthdr_t *rthdr; 6630 ip6_t *ip6h; 6631 6632 /* 6633 * For now just pullup everything. In general, the less pullups, 6634 * the better, but there's so much squirrelling through anyway, 6635 * it's just easier this way. 6636 */ 6637 if (!pullupmsg(mp, -1)) { 6638 return (IPSEC_MEMORY_ERROR); 6639 } 6640 6641 ip6h = (ip6_t *)mp->b_rptr; 6642 length = IPV6_HDR_LEN; 6643 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6644 endptr = mp->b_wptr; 6645 6646 /* 6647 * We can't just use the argument nexthdr in the place 6648 * of nexthdrp becaue we don't dereference nexthdrp 6649 * till we confirm whether it is a valid address. 6650 */ 6651 nexthdrp = &ip6h->ip6_nxt; 6652 while (whereptr < endptr) { 6653 /* Is there enough left for len + nexthdr? */ 6654 if (whereptr + MIN_EHDR_LEN > endptr) 6655 return (IPSEC_MEMORY_ERROR); 6656 6657 switch (*nexthdrp) { 6658 case IPPROTO_HOPOPTS: 6659 case IPPROTO_DSTOPTS: 6660 /* Assumes the headers are identical for hbh and dst */ 6661 desthdr = (ip6_dest_t *)whereptr; 6662 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6663 if ((uchar_t *)desthdr + ehdrlen > endptr) 6664 return (IPSEC_MEMORY_ERROR); 6665 /* 6666 * Return DONT_PROCESS because the destination 6667 * options header may be for each hop in a 6668 * routing-header, and we only want AH if we're 6669 * finished with routing headers. 6670 */ 6671 if (*nexthdrp == IPPROTO_DSTOPTS) 6672 return (IPSEC_HDR_DONT_PROCESS); 6673 nexthdrp = &desthdr->ip6d_nxt; 6674 break; 6675 case IPPROTO_ROUTING: 6676 rthdr = (ip6_rthdr_t *)whereptr; 6677 6678 /* 6679 * If there's more hops left on the routing header, 6680 * return now with DON'T PROCESS. 6681 */ 6682 if (rthdr->ip6r_segleft > 0) 6683 return (IPSEC_HDR_DONT_PROCESS); 6684 6685 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6686 if ((uchar_t *)rthdr + ehdrlen > endptr) 6687 return (IPSEC_MEMORY_ERROR); 6688 nexthdrp = &rthdr->ip6r_nxt; 6689 break; 6690 case IPPROTO_FRAGMENT: 6691 /* Wait for reassembly */ 6692 return (IPSEC_HDR_DONT_PROCESS); 6693 case IPPROTO_AH: 6694 *nexthdr = IPPROTO_AH; 6695 return (IPSEC_HDR_PROCESS); 6696 case IPPROTO_NONE: 6697 /* No next header means we're finished */ 6698 default: 6699 return (IPSEC_HDR_DONT_PROCESS); 6700 } 6701 length += ehdrlen; 6702 whereptr += ehdrlen; 6703 } 6704 /* 6705 * Malformed/truncated packet. 6706 */ 6707 return (IPSEC_MEMORY_ERROR); 6708 } 6709 6710 /* 6711 * Path for AH if options are present. If this is the first time we are 6712 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6713 * Otherwise, just fanout. Return value answers the boolean question: 6714 * "Did I consume the mblk you sent me?" 6715 * 6716 * Sometimes AH needs to be done before other IPv6 headers for security 6717 * reasons. This function (and its ipsec_needs_processing_v6() above) 6718 * indicates if that is so, and fans out to the appropriate IPsec protocol 6719 * for the datagram passed in. 6720 */ 6721 static boolean_t 6722 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6723 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6724 { 6725 mblk_t *mp; 6726 uint8_t nexthdr; 6727 ipsec_in_t *ii = NULL; 6728 ah_t *ah; 6729 ipsec_status_t ipsec_rc; 6730 ip_stack_t *ipst = ill->ill_ipst; 6731 netstack_t *ns = ipst->ips_netstack; 6732 ipsec_stack_t *ipss = ns->netstack_ipsec; 6733 6734 ASSERT((hada_mp == NULL) || (!mctl_present)); 6735 6736 switch (ipsec_needs_processing_v6( 6737 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6738 case IPSEC_MEMORY_ERROR: 6739 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6740 freemsg(hada_mp); 6741 freemsg(first_mp); 6742 return (B_TRUE); 6743 case IPSEC_HDR_DONT_PROCESS: 6744 return (B_FALSE); 6745 } 6746 6747 /* Default means send it to AH! */ 6748 ASSERT(nexthdr == IPPROTO_AH); 6749 if (!mctl_present) { 6750 mp = first_mp; 6751 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6752 if (first_mp == NULL) { 6753 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6754 "allocation failure.\n")); 6755 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6756 freemsg(hada_mp); 6757 freemsg(mp); 6758 return (B_TRUE); 6759 } 6760 /* 6761 * Store the ill_index so that when we come back 6762 * from IPSEC we ride on the same queue. 6763 */ 6764 ii = (ipsec_in_t *)first_mp->b_rptr; 6765 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6766 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6767 first_mp->b_cont = mp; 6768 } 6769 /* 6770 * Cache hardware acceleration info. 6771 */ 6772 if (hada_mp != NULL) { 6773 ASSERT(ii != NULL); 6774 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6775 "caching data attr.\n")); 6776 ii->ipsec_in_accelerated = B_TRUE; 6777 ii->ipsec_in_da = hada_mp; 6778 } 6779 6780 if (!ipsec_loaded(ipss)) { 6781 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6782 return (B_TRUE); 6783 } 6784 6785 ah = ipsec_inbound_ah_sa(first_mp, ns); 6786 if (ah == NULL) 6787 return (B_TRUE); 6788 ASSERT(ii->ipsec_in_ah_sa != NULL); 6789 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6790 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6791 6792 switch (ipsec_rc) { 6793 case IPSEC_STATUS_SUCCESS: 6794 /* we're done with IPsec processing, send it up */ 6795 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6796 break; 6797 case IPSEC_STATUS_FAILED: 6798 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6799 break; 6800 case IPSEC_STATUS_PENDING: 6801 /* no action needed */ 6802 break; 6803 } 6804 return (B_TRUE); 6805 } 6806 6807 static boolean_t 6808 ip_iptun_input_v6(mblk_t *ipsec_mp, mblk_t *data_mp, 6809 size_t hdr_len, uint8_t nexthdr, zoneid_t zoneid, ill_t *ill, 6810 ip_stack_t *ipst) 6811 { 6812 conn_t *connp; 6813 6814 ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp); 6815 6816 connp = ipcl_classify_v6(data_mp, nexthdr, hdr_len, zoneid, ipst); 6817 if (connp != NULL) { 6818 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 6819 connp->conn_recv(connp, ipsec_mp != NULL ? ipsec_mp : data_mp, 6820 NULL); 6821 CONN_DEC_REF(connp); 6822 return (B_TRUE); 6823 } 6824 return (B_FALSE); 6825 } 6826 6827 /* 6828 * Validate the IPv6 mblk for alignment. 6829 */ 6830 int 6831 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6832 { 6833 int pkt_len, ip6_len; 6834 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6835 6836 /* check for alignment and full IPv6 header */ 6837 if (!OK_32PTR((uchar_t *)ip6h) || 6838 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6839 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6840 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6841 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6842 return (IP6_MBLK_HDR_ERR); 6843 } 6844 ip6h = (ip6_t *)mp->b_rptr; 6845 } 6846 6847 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6848 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6849 6850 if (mp->b_cont == NULL) 6851 pkt_len = mp->b_wptr - mp->b_rptr; 6852 else 6853 pkt_len = msgdsize(mp); 6854 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6855 6856 /* 6857 * Check for bogus (too short packet) and packet which 6858 * was padded by the link layer. 6859 */ 6860 if (ip6_len != pkt_len) { 6861 ssize_t diff; 6862 6863 if (ip6_len > pkt_len) { 6864 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 6865 ip6_len, pkt_len)); 6866 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 6867 return (IP6_MBLK_LEN_ERR); 6868 } 6869 diff = (ssize_t)(pkt_len - ip6_len); 6870 6871 if (!adjmsg(mp, -diff)) { 6872 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6873 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6874 return (IP6_MBLK_LEN_ERR); 6875 } 6876 6877 /* 6878 * adjmsg may have freed an mblk from the chain, hence 6879 * invalidate any hw checksum here. This will force IP to 6880 * calculate the checksum in sw, but only for this packet. 6881 */ 6882 DB_CKSUMFLAGS(mp) = 0; 6883 } 6884 return (IP6_MBLK_OK); 6885 } 6886 6887 /* 6888 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6889 * ip_rput_v6 has already verified alignment, the min length, the version, 6890 * and db_ref = 1. 6891 * 6892 * The ill passed in (the arg named inill) is the ill that the packet 6893 * actually arrived on. We need to remember this when saving the 6894 * input interface index into potential IPV6_PKTINFO data in 6895 * ip_add_info_v6(). 6896 * 6897 * This routine doesn't free dl_mp; that's the caller's responsibility on 6898 * return. (Note that the callers are complex enough that there's no tail 6899 * recursion here anyway.) 6900 */ 6901 void 6902 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6903 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6904 { 6905 ire_t *ire = NULL; 6906 ill_t *ill = inill; 6907 ill_t *outill; 6908 uint8_t *whereptr; 6909 uint8_t nexthdr; 6910 uint16_t remlen; 6911 uint_t prev_nexthdr_offset; 6912 uint_t used; 6913 size_t old_pkt_len; 6914 size_t pkt_len; 6915 uint16_t ip6_len; 6916 uint_t hdr_len; 6917 boolean_t mctl_present; 6918 mblk_t *first_mp; 6919 mblk_t *first_mp1; 6920 boolean_t no_forward; 6921 ip6_hbh_t *hbhhdr; 6922 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6923 conn_t *connp; 6924 uint32_t ports; 6925 zoneid_t zoneid = GLOBAL_ZONEID; 6926 uint16_t hck_flags, reass_hck_flags; 6927 uint32_t reass_sum; 6928 boolean_t cksum_err; 6929 mblk_t *mp1; 6930 ip_stack_t *ipst = inill->ill_ipst; 6931 6932 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6933 6934 if (hada_mp != NULL) { 6935 /* 6936 * It's an IPsec accelerated packet. 6937 * Keep a pointer to the data attributes around until 6938 * we allocate the ipsecinfo structure. 6939 */ 6940 IPSECHW_DEBUG(IPSECHW_PKT, 6941 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6942 hada_mp->b_cont = NULL; 6943 /* 6944 * Since it is accelerated, it came directly from 6945 * the ill. 6946 */ 6947 ASSERT(mctl_present == B_FALSE); 6948 ASSERT(mp->b_datap->db_type != M_CTL); 6949 } 6950 6951 ip6h = (ip6_t *)mp->b_rptr; 6952 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6953 old_pkt_len = pkt_len = ip6_len; 6954 6955 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 6956 hck_flags = DB_CKSUMFLAGS(mp); 6957 else 6958 hck_flags = 0; 6959 6960 /* Clear checksum flags in case we need to forward */ 6961 DB_CKSUMFLAGS(mp) = 0; 6962 reass_sum = reass_hck_flags = 0; 6963 6964 nexthdr = ip6h->ip6_nxt; 6965 6966 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6967 (uchar_t *)ip6h); 6968 whereptr = (uint8_t *)&ip6h[1]; 6969 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6970 6971 /* Process hop by hop header options */ 6972 if (nexthdr == IPPROTO_HOPOPTS) { 6973 uint_t ehdrlen; 6974 uint8_t *optptr; 6975 6976 if (remlen < MIN_EHDR_LEN) 6977 goto pkt_too_short; 6978 if (mp->b_cont != NULL && 6979 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6980 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6981 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6982 freemsg(hada_mp); 6983 freemsg(first_mp); 6984 return; 6985 } 6986 ip6h = (ip6_t *)mp->b_rptr; 6987 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6988 } 6989 hbhhdr = (ip6_hbh_t *)whereptr; 6990 nexthdr = hbhhdr->ip6h_nxt; 6991 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6992 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6993 6994 if (remlen < ehdrlen) 6995 goto pkt_too_short; 6996 if (mp->b_cont != NULL && 6997 whereptr + ehdrlen > mp->b_wptr) { 6998 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 6999 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7000 freemsg(hada_mp); 7001 freemsg(first_mp); 7002 return; 7003 } 7004 ip6h = (ip6_t *)mp->b_rptr; 7005 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7006 hbhhdr = (ip6_hbh_t *)whereptr; 7007 } 7008 7009 optptr = whereptr + 2; 7010 whereptr += ehdrlen; 7011 remlen -= ehdrlen; 7012 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7013 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7014 case -1: 7015 /* 7016 * Packet has been consumed and any 7017 * needed ICMP messages sent. 7018 */ 7019 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7020 freemsg(hada_mp); 7021 return; 7022 case 0: 7023 /* no action needed */ 7024 break; 7025 case 1: 7026 /* Known router alert */ 7027 goto ipv6forus; 7028 } 7029 } 7030 7031 /* 7032 * On incoming v6 multicast packets we will bypass the ire table, 7033 * and assume that the read queue corresponds to the targetted 7034 * interface. 7035 * 7036 * The effect of this is the same as the IPv4 original code, but is 7037 * much cleaner I think. See ip_rput for how that was done. 7038 */ 7039 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7040 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7041 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7042 7043 /* 7044 * So that we don't end up with dups, only one ill in an IPMP 7045 * group is nominated to receive multicast data traffic. 7046 * However, link-locals on any underlying interfaces will have 7047 * joined their solicited-node multicast addresses and we must 7048 * accept those packets. (We don't attempt to precisely 7049 * filter out duplicate solicited-node multicast packets since 7050 * e.g. an IPMP interface and underlying interface may have 7051 * the same solicited-node multicast address.) Note that we 7052 * won't generally have duplicates because we only issue a 7053 * DL_ENABMULTI_REQ on one interface in a group; the exception 7054 * is when PHYI_MULTI_BCAST is set. 7055 */ 7056 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7057 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7058 goto drop_pkt; 7059 } 7060 7061 /* 7062 * XXX TODO Give to mrouted to for multicast forwarding. 7063 */ 7064 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7065 ALL_ZONES) == NULL) { 7066 if (ip_debug > 3) { 7067 /* ip2dbg */ 7068 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7069 " which is not for us: %s\n", AF_INET6, 7070 &ip6h->ip6_dst); 7071 } 7072 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7073 freemsg(hada_mp); 7074 freemsg(first_mp); 7075 return; 7076 } 7077 if (ip_debug > 3) { 7078 /* ip2dbg */ 7079 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7080 AF_INET6, &ip6h->ip6_dst); 7081 } 7082 zoneid = GLOBAL_ZONEID; 7083 goto ipv6forus; 7084 } 7085 7086 /* 7087 * Find an ire that matches destination. For link-local addresses 7088 * we have to match the ill. 7089 * TBD for site local addresses. 7090 */ 7091 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7092 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7093 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7094 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7095 } else { 7096 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7097 msg_getlabel(mp), ipst); 7098 7099 if (ire != NULL && ire->ire_stq != NULL && 7100 ire->ire_zoneid != GLOBAL_ZONEID && 7101 ire->ire_zoneid != ALL_ZONES) { 7102 /* 7103 * Should only use IREs that are visible from the 7104 * global zone for forwarding. 7105 */ 7106 ire_refrele(ire); 7107 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7108 GLOBAL_ZONEID, msg_getlabel(mp), ipst); 7109 } 7110 } 7111 7112 if (ire == NULL) { 7113 /* 7114 * No matching IRE found. Mark this packet as having 7115 * originated externally. 7116 */ 7117 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7118 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7119 if (!(ill->ill_flags & ILLF_ROUTER)) { 7120 BUMP_MIB(ill->ill_ip_mib, 7121 ipIfStatsInAddrErrors); 7122 } 7123 freemsg(hada_mp); 7124 freemsg(first_mp); 7125 return; 7126 } 7127 if (ip6h->ip6_hops <= 1) { 7128 if (hada_mp != NULL) 7129 goto hada_drop; 7130 /* Sent by forwarding path, and router is global zone */ 7131 icmp_time_exceeded_v6(WR(q), first_mp, 7132 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7133 GLOBAL_ZONEID, ipst); 7134 return; 7135 } 7136 /* 7137 * Per RFC 3513 section 2.5.2, we must not forward packets with 7138 * an unspecified source address. 7139 */ 7140 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7141 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7142 freemsg(hada_mp); 7143 freemsg(first_mp); 7144 return; 7145 } 7146 mp->b_prev = (mblk_t *)(uintptr_t) 7147 ill->ill_phyint->phyint_ifindex; 7148 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7149 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7150 GLOBAL_ZONEID, ipst); 7151 return; 7152 } 7153 /* we have a matching IRE */ 7154 if (ire->ire_stq != NULL) { 7155 /* 7156 * To be quicker, we may wish not to chase pointers 7157 * (ire->ire_ipif->ipif_ill...) and instead store the 7158 * forwarding policy in the ire. An unfortunate side- 7159 * effect of this would be requiring an ire flush whenever 7160 * the ILLF_ROUTER flag changes. For now, chase pointers 7161 * once and store in the boolean no_forward. 7162 * 7163 * This appears twice to keep it out of the non-forwarding, 7164 * yes-it's-for-us-on-the-right-interface case. 7165 */ 7166 no_forward = ((ill->ill_flags & 7167 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7168 7169 ASSERT(first_mp == mp); 7170 /* 7171 * This ire has a send-to queue - forward the packet. 7172 */ 7173 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7174 freemsg(hada_mp); 7175 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7176 if (no_forward) { 7177 BUMP_MIB(ill->ill_ip_mib, 7178 ipIfStatsInAddrErrors); 7179 } 7180 freemsg(mp); 7181 ire_refrele(ire); 7182 return; 7183 } 7184 /* 7185 * ipIfStatsHCInForwDatagrams should only be increment if there 7186 * will be an attempt to forward the packet, which is why we 7187 * increment after the above condition has been checked. 7188 */ 7189 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7190 if (ip6h->ip6_hops <= 1) { 7191 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7192 /* Sent by forwarding path, and router is global zone */ 7193 icmp_time_exceeded_v6(WR(q), mp, 7194 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7195 GLOBAL_ZONEID, ipst); 7196 ire_refrele(ire); 7197 return; 7198 } 7199 /* 7200 * Per RFC 3513 section 2.5.2, we must not forward packets with 7201 * an unspecified source address. 7202 */ 7203 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7204 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7205 freemsg(mp); 7206 ire_refrele(ire); 7207 return; 7208 } 7209 7210 if (is_system_labeled()) { 7211 mblk_t *mp1; 7212 7213 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7214 BUMP_MIB(ill->ill_ip_mib, 7215 ipIfStatsForwProhibits); 7216 freemsg(mp); 7217 ire_refrele(ire); 7218 return; 7219 } 7220 /* Size may have changed */ 7221 mp = mp1; 7222 ip6h = (ip6_t *)mp->b_rptr; 7223 pkt_len = msgdsize(mp); 7224 } 7225 7226 if (pkt_len > ire->ire_max_frag) { 7227 int max_frag = ire->ire_max_frag; 7228 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7229 /* 7230 * Handle labeled packet resizing. 7231 */ 7232 if (is_system_labeled()) { 7233 max_frag = tsol_pmtu_adjust(mp, max_frag, 7234 pkt_len - old_pkt_len, AF_INET6); 7235 } 7236 7237 /* Sent by forwarding path, and router is global zone */ 7238 icmp_pkt2big_v6(WR(q), mp, max_frag, 7239 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7240 ire_refrele(ire); 7241 return; 7242 } 7243 7244 /* 7245 * Check to see if we're forwarding the packet to a 7246 * different link from which it came. If so, check the 7247 * source and destination addresses since routers must not 7248 * forward any packets with link-local source or 7249 * destination addresses to other links. Otherwise (if 7250 * we're forwarding onto the same link), conditionally send 7251 * a redirect message. 7252 */ 7253 if (ire->ire_rfq != q && 7254 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7255 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7256 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7257 BUMP_MIB(ill->ill_ip_mib, 7258 ipIfStatsInAddrErrors); 7259 freemsg(mp); 7260 ire_refrele(ire); 7261 return; 7262 } 7263 /* TBD add site-local check at site boundary? */ 7264 } else if (ipst->ips_ipv6_send_redirects) { 7265 in6_addr_t *v6targ; 7266 in6_addr_t gw_addr_v6; 7267 ire_t *src_ire_v6 = NULL; 7268 7269 /* 7270 * Don't send a redirect when forwarding a source 7271 * routed packet. 7272 */ 7273 if (ip_source_routed_v6(ip6h, mp, ipst)) 7274 goto forward; 7275 7276 mutex_enter(&ire->ire_lock); 7277 gw_addr_v6 = ire->ire_gateway_addr_v6; 7278 mutex_exit(&ire->ire_lock); 7279 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7280 v6targ = &gw_addr_v6; 7281 /* 7282 * We won't send redirects to a router 7283 * that doesn't have a link local 7284 * address, but will forward. 7285 */ 7286 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7287 BUMP_MIB(ill->ill_ip_mib, 7288 ipIfStatsInAddrErrors); 7289 goto forward; 7290 } 7291 } else { 7292 v6targ = &ip6h->ip6_dst; 7293 } 7294 7295 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7296 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7297 GLOBAL_ZONEID, 0, NULL, 7298 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7299 ipst); 7300 7301 if (src_ire_v6 != NULL) { 7302 /* 7303 * The source is directly connected. 7304 */ 7305 mp1 = copymsg(mp); 7306 if (mp1 != NULL) { 7307 icmp_send_redirect_v6(WR(q), 7308 mp1, v6targ, &ip6h->ip6_dst, 7309 ill, B_FALSE); 7310 } 7311 ire_refrele(src_ire_v6); 7312 } 7313 } 7314 7315 forward: 7316 /* Hoplimit verified above */ 7317 ip6h->ip6_hops--; 7318 7319 outill = ire->ire_ipif->ipif_ill; 7320 7321 DTRACE_PROBE4(ip6__forwarding__start, 7322 ill_t *, inill, ill_t *, outill, 7323 ip6_t *, ip6h, mblk_t *, mp); 7324 7325 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7326 ipst->ips_ipv6firewall_forwarding, 7327 inill, outill, ip6h, mp, mp, 0, ipst); 7328 7329 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7330 7331 if (mp != NULL) { 7332 UPDATE_IB_PKT_COUNT(ire); 7333 ire->ire_last_used_time = lbolt; 7334 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7335 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7336 } 7337 IRE_REFRELE(ire); 7338 return; 7339 } 7340 7341 /* 7342 * Need to put on correct queue for reassembly to find it. 7343 * No need to use put() since reassembly has its own locks. 7344 * Note: multicast packets and packets destined to addresses 7345 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7346 * the arriving ill. Unlike the IPv4 case, enabling strict 7347 * destination multihoming will prevent accepting packets 7348 * addressed to an IRE_LOCAL on lo0. 7349 */ 7350 if (ire->ire_rfq != q) { 7351 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7352 == NULL) { 7353 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7354 freemsg(hada_mp); 7355 freemsg(first_mp); 7356 return; 7357 } 7358 if (ire->ire_rfq != NULL) { 7359 q = ire->ire_rfq; 7360 ill = (ill_t *)q->q_ptr; 7361 ASSERT(ill != NULL); 7362 } 7363 } 7364 7365 zoneid = ire->ire_zoneid; 7366 UPDATE_IB_PKT_COUNT(ire); 7367 ire->ire_last_used_time = lbolt; 7368 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7369 ire_refrele(ire); 7370 ire = NULL; 7371 ipv6forus: 7372 /* 7373 * Looks like this packet is for us one way or another. 7374 * This is where we'll process destination headers etc. 7375 */ 7376 for (; ; ) { 7377 switch (nexthdr) { 7378 case IPPROTO_TCP: { 7379 uint16_t *up; 7380 uint32_t sum; 7381 int offset; 7382 7383 hdr_len = pkt_len - remlen; 7384 7385 if (hada_mp != NULL) { 7386 ip0dbg(("tcp hada drop\n")); 7387 goto hada_drop; 7388 } 7389 7390 7391 /* TCP needs all of the TCP header */ 7392 if (remlen < TCP_MIN_HEADER_LENGTH) 7393 goto pkt_too_short; 7394 if (mp->b_cont != NULL && 7395 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7396 if (!pullupmsg(mp, 7397 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7398 BUMP_MIB(ill->ill_ip_mib, 7399 ipIfStatsInDiscards); 7400 freemsg(first_mp); 7401 return; 7402 } 7403 hck_flags = 0; 7404 ip6h = (ip6_t *)mp->b_rptr; 7405 whereptr = (uint8_t *)ip6h + hdr_len; 7406 } 7407 /* 7408 * Extract the offset field from the TCP header. 7409 */ 7410 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7411 if (offset != 5) { 7412 if (offset < 5) { 7413 ip1dbg(("ip_rput_data_v6: short " 7414 "TCP data offset")); 7415 BUMP_MIB(ill->ill_ip_mib, 7416 ipIfStatsInDiscards); 7417 freemsg(first_mp); 7418 return; 7419 } 7420 /* 7421 * There must be TCP options. 7422 * Make sure we can grab them. 7423 */ 7424 offset <<= 2; 7425 if (remlen < offset) 7426 goto pkt_too_short; 7427 if (mp->b_cont != NULL && 7428 whereptr + offset > mp->b_wptr) { 7429 if (!pullupmsg(mp, 7430 hdr_len + offset)) { 7431 BUMP_MIB(ill->ill_ip_mib, 7432 ipIfStatsInDiscards); 7433 freemsg(first_mp); 7434 return; 7435 } 7436 hck_flags = 0; 7437 ip6h = (ip6_t *)mp->b_rptr; 7438 whereptr = (uint8_t *)ip6h + hdr_len; 7439 } 7440 } 7441 7442 up = (uint16_t *)&ip6h->ip6_src; 7443 /* 7444 * TCP checksum calculation. First sum up the 7445 * pseudo-header fields: 7446 * - Source IPv6 address 7447 * - Destination IPv6 address 7448 * - TCP payload length 7449 * - TCP protocol ID 7450 */ 7451 sum = htons(IPPROTO_TCP + remlen) + 7452 up[0] + up[1] + up[2] + up[3] + 7453 up[4] + up[5] + up[6] + up[7] + 7454 up[8] + up[9] + up[10] + up[11] + 7455 up[12] + up[13] + up[14] + up[15]; 7456 7457 /* Fold initial sum */ 7458 sum = (sum & 0xffff) + (sum >> 16); 7459 7460 mp1 = mp->b_cont; 7461 7462 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7463 IP6_STAT(ipst, ip6_in_sw_cksum); 7464 7465 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7466 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7467 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7468 mp, mp1, cksum_err); 7469 7470 if (cksum_err) { 7471 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7472 7473 if (hck_flags & HCK_FULLCKSUM) { 7474 IP6_STAT(ipst, 7475 ip6_tcp_in_full_hw_cksum_err); 7476 } else if (hck_flags & HCK_PARTIALCKSUM) { 7477 IP6_STAT(ipst, 7478 ip6_tcp_in_part_hw_cksum_err); 7479 } else { 7480 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7481 } 7482 freemsg(first_mp); 7483 return; 7484 } 7485 tcp_fanout: 7486 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7487 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7488 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7489 return; 7490 } 7491 case IPPROTO_SCTP: 7492 { 7493 sctp_hdr_t *sctph; 7494 uint32_t calcsum, pktsum; 7495 uint_t hdr_len = pkt_len - remlen; 7496 sctp_stack_t *sctps; 7497 7498 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7499 7500 /* SCTP needs all of the SCTP header */ 7501 if (remlen < sizeof (*sctph)) { 7502 goto pkt_too_short; 7503 } 7504 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7505 ASSERT(mp->b_cont != NULL); 7506 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7507 BUMP_MIB(ill->ill_ip_mib, 7508 ipIfStatsInDiscards); 7509 freemsg(mp); 7510 return; 7511 } 7512 ip6h = (ip6_t *)mp->b_rptr; 7513 whereptr = (uint8_t *)ip6h + hdr_len; 7514 } 7515 7516 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7517 /* checksum */ 7518 pktsum = sctph->sh_chksum; 7519 sctph->sh_chksum = 0; 7520 calcsum = sctp_cksum(mp, hdr_len); 7521 if (calcsum != pktsum) { 7522 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7523 freemsg(mp); 7524 return; 7525 } 7526 sctph->sh_chksum = pktsum; 7527 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7528 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7529 ports, zoneid, mp, sctps)) == NULL) { 7530 ip_fanout_sctp_raw(first_mp, ill, 7531 (ipha_t *)ip6h, B_FALSE, ports, 7532 mctl_present, 7533 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7534 B_TRUE, zoneid); 7535 return; 7536 } 7537 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7538 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7539 B_FALSE, mctl_present); 7540 return; 7541 } 7542 case IPPROTO_UDP: { 7543 uint16_t *up; 7544 uint32_t sum; 7545 7546 hdr_len = pkt_len - remlen; 7547 7548 if (hada_mp != NULL) { 7549 ip0dbg(("udp hada drop\n")); 7550 goto hada_drop; 7551 } 7552 7553 /* Verify that at least the ports are present */ 7554 if (remlen < UDPH_SIZE) 7555 goto pkt_too_short; 7556 if (mp->b_cont != NULL && 7557 whereptr + UDPH_SIZE > mp->b_wptr) { 7558 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7559 BUMP_MIB(ill->ill_ip_mib, 7560 ipIfStatsInDiscards); 7561 freemsg(first_mp); 7562 return; 7563 } 7564 hck_flags = 0; 7565 ip6h = (ip6_t *)mp->b_rptr; 7566 whereptr = (uint8_t *)ip6h + hdr_len; 7567 } 7568 7569 /* 7570 * Before going through the regular checksum 7571 * calculation, make sure the received checksum 7572 * is non-zero. RFC 2460 says, a 0x0000 checksum 7573 * in a UDP packet (within IPv6 packet) is invalid 7574 * and should be replaced by 0xffff. This makes 7575 * sense as regular checksum calculation will 7576 * pass for both the cases i.e. 0x0000 and 0xffff. 7577 * Removing one of the case makes error detection 7578 * stronger. 7579 */ 7580 7581 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7582 /* 0x0000 checksum is invalid */ 7583 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7584 "checksum value 0x0000\n")); 7585 BUMP_MIB(ill->ill_ip_mib, 7586 udpIfStatsInCksumErrs); 7587 freemsg(first_mp); 7588 return; 7589 } 7590 7591 up = (uint16_t *)&ip6h->ip6_src; 7592 7593 /* 7594 * UDP checksum calculation. First sum up the 7595 * pseudo-header fields: 7596 * - Source IPv6 address 7597 * - Destination IPv6 address 7598 * - UDP payload length 7599 * - UDP protocol ID 7600 */ 7601 7602 sum = htons(IPPROTO_UDP + remlen) + 7603 up[0] + up[1] + up[2] + up[3] + 7604 up[4] + up[5] + up[6] + up[7] + 7605 up[8] + up[9] + up[10] + up[11] + 7606 up[12] + up[13] + up[14] + up[15]; 7607 7608 /* Fold initial sum */ 7609 sum = (sum & 0xffff) + (sum >> 16); 7610 7611 if (reass_hck_flags != 0) { 7612 hck_flags = reass_hck_flags; 7613 7614 IP_CKSUM_RECV_REASS(hck_flags, 7615 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7616 sum, reass_sum, cksum_err); 7617 } else { 7618 mp1 = mp->b_cont; 7619 7620 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7621 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7622 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7623 mp, mp1, cksum_err); 7624 } 7625 7626 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7627 IP6_STAT(ipst, ip6_in_sw_cksum); 7628 7629 if (cksum_err) { 7630 BUMP_MIB(ill->ill_ip_mib, 7631 udpIfStatsInCksumErrs); 7632 7633 if (hck_flags & HCK_FULLCKSUM) 7634 IP6_STAT(ipst, 7635 ip6_udp_in_full_hw_cksum_err); 7636 else if (hck_flags & HCK_PARTIALCKSUM) 7637 IP6_STAT(ipst, 7638 ip6_udp_in_part_hw_cksum_err); 7639 else 7640 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7641 7642 freemsg(first_mp); 7643 return; 7644 } 7645 goto udp_fanout; 7646 } 7647 case IPPROTO_ICMPV6: { 7648 uint16_t *up; 7649 uint32_t sum; 7650 uint_t hdr_len = pkt_len - remlen; 7651 7652 if (hada_mp != NULL) { 7653 ip0dbg(("icmp hada drop\n")); 7654 goto hada_drop; 7655 } 7656 7657 up = (uint16_t *)&ip6h->ip6_src; 7658 sum = htons(IPPROTO_ICMPV6 + remlen) + 7659 up[0] + up[1] + up[2] + up[3] + 7660 up[4] + up[5] + up[6] + up[7] + 7661 up[8] + up[9] + up[10] + up[11] + 7662 up[12] + up[13] + up[14] + up[15]; 7663 sum = (sum & 0xffff) + (sum >> 16); 7664 sum = IP_CSUM(mp, hdr_len, sum); 7665 if (sum != 0) { 7666 /* IPv6 ICMP checksum failed */ 7667 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7668 "failed %x\n", 7669 sum)); 7670 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7671 BUMP_MIB(ill->ill_icmp6_mib, 7672 ipv6IfIcmpInErrors); 7673 freemsg(first_mp); 7674 return; 7675 } 7676 7677 icmp_fanout: 7678 /* Check variable for testing applications */ 7679 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7680 freemsg(first_mp); 7681 return; 7682 } 7683 /* 7684 * Assume that there is always at least one conn for 7685 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7686 * where there is no conn. 7687 */ 7688 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7689 ilm_t *ilm; 7690 ilm_walker_t ilw; 7691 7692 ASSERT(!IS_LOOPBACK(ill)); 7693 /* 7694 * In the multicast case, applications may have 7695 * joined the group from different zones, so we 7696 * need to deliver the packet to each of them. 7697 * Loop through the multicast memberships 7698 * structures (ilm) on the receive ill and send 7699 * a copy of the packet up each matching one. 7700 */ 7701 ilm = ilm_walker_start(&ilw, inill); 7702 for (; ilm != NULL; 7703 ilm = ilm_walker_step(&ilw, ilm)) { 7704 if (!IN6_ARE_ADDR_EQUAL( 7705 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7706 continue; 7707 if (!ipif_lookup_zoneid( 7708 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7709 IPIF_UP, NULL)) 7710 continue; 7711 7712 first_mp1 = ip_copymsg(first_mp); 7713 if (first_mp1 == NULL) 7714 continue; 7715 icmp_inbound_v6(q, first_mp1, 7716 ilw.ilw_walk_ill, inill, 7717 hdr_len, mctl_present, 0, 7718 ilm->ilm_zoneid, dl_mp); 7719 } 7720 ilm_walker_finish(&ilw); 7721 } else { 7722 first_mp1 = ip_copymsg(first_mp); 7723 if (first_mp1 != NULL) 7724 icmp_inbound_v6(q, first_mp1, ill, 7725 inill, hdr_len, mctl_present, 0, 7726 zoneid, dl_mp); 7727 } 7728 goto proto_fanout; 7729 } 7730 case IPPROTO_ENCAP: 7731 case IPPROTO_IPV6: 7732 if (ip_iptun_input_v6(mctl_present ? first_mp : NULL, 7733 mp, pkt_len - remlen, nexthdr, zoneid, ill, ipst)) { 7734 return; 7735 } 7736 /* 7737 * If there was no IP tunnel data-link bound to 7738 * receive this packet, then we fall through to 7739 * allow potential raw sockets bound to either of 7740 * these protocols to pick it up. 7741 */ 7742 /* FALLTHRU */ 7743 proto_fanout: 7744 default: { 7745 /* 7746 * Handle protocols with which IPv6 is less intimate. 7747 */ 7748 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7749 7750 if (hada_mp != NULL) { 7751 ip0dbg(("default hada drop\n")); 7752 goto hada_drop; 7753 } 7754 7755 /* 7756 * Enable sending ICMP for "Unknown" nexthdr 7757 * case. i.e. where we did not FALLTHRU from 7758 * IPPROTO_ICMPV6 processing case above. 7759 * If we did FALLTHRU, then the packet has already been 7760 * processed for IPPF, don't process it again in 7761 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7762 * flags 7763 */ 7764 if (nexthdr != IPPROTO_ICMPV6) 7765 proto_flags |= IP_FF_SEND_ICMP; 7766 else 7767 proto_flags |= IP6_NO_IPPOLICY; 7768 7769 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7770 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7771 mctl_present, zoneid); 7772 return; 7773 } 7774 7775 case IPPROTO_DSTOPTS: { 7776 uint_t ehdrlen; 7777 uint8_t *optptr; 7778 ip6_dest_t *desthdr; 7779 7780 /* If packet is too short, look no further */ 7781 if (remlen < MIN_EHDR_LEN) 7782 goto pkt_too_short; 7783 7784 /* Check if AH is present. */ 7785 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7786 inill, hada_mp, zoneid)) { 7787 return; 7788 } 7789 7790 /* 7791 * Reinitialize pointers, as ipsec_early_ah_v6() does 7792 * complete pullups. We don't have to do more pullups 7793 * as a result. 7794 */ 7795 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7796 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7797 ip6h = (ip6_t *)mp->b_rptr; 7798 7799 desthdr = (ip6_dest_t *)whereptr; 7800 nexthdr = desthdr->ip6d_nxt; 7801 prev_nexthdr_offset = (uint_t)(whereptr - 7802 (uint8_t *)ip6h); 7803 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7804 if (remlen < ehdrlen) 7805 goto pkt_too_short; 7806 optptr = whereptr + 2; 7807 /* 7808 * Note: XXX This code does not seem to make 7809 * distinction between Destination Options Header 7810 * being before/after Routing Header which can 7811 * happen if we are at the end of source route. 7812 * This may become significant in future. 7813 * (No real significant Destination Options are 7814 * defined/implemented yet ). 7815 */ 7816 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7817 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7818 case -1: 7819 /* 7820 * Packet has been consumed and any needed 7821 * ICMP errors sent. 7822 */ 7823 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7824 freemsg(hada_mp); 7825 return; 7826 case 0: 7827 /* No action needed continue */ 7828 break; 7829 case 1: 7830 /* 7831 * Unnexpected return value 7832 * (Router alert is a Hop-by-Hop option) 7833 */ 7834 #ifdef DEBUG 7835 panic("ip_rput_data_v6: router " 7836 "alert hbh opt indication in dest opt"); 7837 /*NOTREACHED*/ 7838 #else 7839 freemsg(hada_mp); 7840 freemsg(first_mp); 7841 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7842 return; 7843 #endif 7844 } 7845 used = ehdrlen; 7846 break; 7847 } 7848 case IPPROTO_FRAGMENT: { 7849 ip6_frag_t *fraghdr; 7850 size_t no_frag_hdr_len; 7851 7852 if (hada_mp != NULL) { 7853 ip0dbg(("frag hada drop\n")); 7854 goto hada_drop; 7855 } 7856 7857 ASSERT(first_mp == mp); 7858 if (remlen < sizeof (ip6_frag_t)) 7859 goto pkt_too_short; 7860 7861 if (mp->b_cont != NULL && 7862 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7863 if (!pullupmsg(mp, 7864 pkt_len - remlen + sizeof (ip6_frag_t))) { 7865 BUMP_MIB(ill->ill_ip_mib, 7866 ipIfStatsInDiscards); 7867 freemsg(mp); 7868 return; 7869 } 7870 hck_flags = 0; 7871 ip6h = (ip6_t *)mp->b_rptr; 7872 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7873 } 7874 7875 fraghdr = (ip6_frag_t *)whereptr; 7876 used = (uint_t)sizeof (ip6_frag_t); 7877 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 7878 7879 /* 7880 * Invoke the CGTP (multirouting) filtering module to 7881 * process the incoming packet. Packets identified as 7882 * duplicates must be discarded. Filtering is active 7883 * only if the the ip_cgtp_filter ndd variable is 7884 * non-zero. 7885 */ 7886 if (ipst->ips_ip_cgtp_filter && 7887 ipst->ips_ip_cgtp_filter_ops != NULL) { 7888 int cgtp_flt_pkt; 7889 netstackid_t stackid; 7890 7891 stackid = ipst->ips_netstack->netstack_stackid; 7892 7893 cgtp_flt_pkt = 7894 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 7895 stackid, inill->ill_phyint->phyint_ifindex, 7896 ip6h, fraghdr); 7897 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7898 freemsg(mp); 7899 return; 7900 } 7901 } 7902 7903 /* Restore the flags */ 7904 DB_CKSUMFLAGS(mp) = hck_flags; 7905 7906 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 7907 remlen - used, &prev_nexthdr_offset, 7908 &reass_sum, &reass_hck_flags); 7909 if (mp == NULL) { 7910 /* Reassembly is still pending */ 7911 return; 7912 } 7913 /* The first mblk are the headers before the frag hdr */ 7914 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 7915 7916 first_mp = mp; /* mp has most likely changed! */ 7917 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7918 ip6h = (ip6_t *)mp->b_rptr; 7919 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7920 whereptr = mp->b_rptr + no_frag_hdr_len; 7921 remlen = ntohs(ip6h->ip6_plen) + 7922 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7923 pkt_len = msgdsize(mp); 7924 used = 0; 7925 break; 7926 } 7927 case IPPROTO_HOPOPTS: { 7928 if (hada_mp != NULL) { 7929 ip0dbg(("hop hada drop\n")); 7930 goto hada_drop; 7931 } 7932 /* 7933 * Illegal header sequence. 7934 * (Hop-by-hop headers are processed above 7935 * and required to immediately follow IPv6 header) 7936 */ 7937 icmp_param_problem_v6(WR(q), first_mp, 7938 ICMP6_PARAMPROB_NEXTHEADER, 7939 prev_nexthdr_offset, 7940 B_FALSE, B_FALSE, zoneid, ipst); 7941 return; 7942 } 7943 case IPPROTO_ROUTING: { 7944 uint_t ehdrlen; 7945 ip6_rthdr_t *rthdr; 7946 7947 /* If packet is too short, look no further */ 7948 if (remlen < MIN_EHDR_LEN) 7949 goto pkt_too_short; 7950 7951 /* Check if AH is present. */ 7952 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7953 inill, hada_mp, zoneid)) { 7954 return; 7955 } 7956 7957 /* 7958 * Reinitialize pointers, as ipsec_early_ah_v6() does 7959 * complete pullups. We don't have to do more pullups 7960 * as a result. 7961 */ 7962 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7963 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7964 ip6h = (ip6_t *)mp->b_rptr; 7965 7966 rthdr = (ip6_rthdr_t *)whereptr; 7967 nexthdr = rthdr->ip6r_nxt; 7968 prev_nexthdr_offset = (uint_t)(whereptr - 7969 (uint8_t *)ip6h); 7970 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7971 if (remlen < ehdrlen) 7972 goto pkt_too_short; 7973 if (rthdr->ip6r_segleft != 0) { 7974 /* Not end of source route */ 7975 if (ll_multicast) { 7976 BUMP_MIB(ill->ill_ip_mib, 7977 ipIfStatsForwProhibits); 7978 freemsg(hada_mp); 7979 freemsg(mp); 7980 return; 7981 } 7982 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 7983 hada_mp); 7984 return; 7985 } 7986 used = ehdrlen; 7987 break; 7988 } 7989 case IPPROTO_AH: 7990 case IPPROTO_ESP: { 7991 /* 7992 * Fast path for AH/ESP. If this is the first time 7993 * we are sending a datagram to AH/ESP, allocate 7994 * a IPSEC_IN message and prepend it. Otherwise, 7995 * just fanout. 7996 */ 7997 7998 ipsec_in_t *ii; 7999 int ipsec_rc; 8000 ipsec_stack_t *ipss; 8001 8002 ipss = ipst->ips_netstack->netstack_ipsec; 8003 if (!mctl_present) { 8004 ASSERT(first_mp == mp); 8005 first_mp = ipsec_in_alloc(B_FALSE, 8006 ipst->ips_netstack); 8007 if (first_mp == NULL) { 8008 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8009 "allocation failure.\n")); 8010 BUMP_MIB(ill->ill_ip_mib, 8011 ipIfStatsInDiscards); 8012 freemsg(mp); 8013 return; 8014 } 8015 /* 8016 * Store the ill_index so that when we come back 8017 * from IPSEC we ride on the same queue. 8018 */ 8019 ii = (ipsec_in_t *)first_mp->b_rptr; 8020 ii->ipsec_in_ill_index = 8021 ill->ill_phyint->phyint_ifindex; 8022 ii->ipsec_in_rill_index = 8023 inill->ill_phyint->phyint_ifindex; 8024 first_mp->b_cont = mp; 8025 /* 8026 * Cache hardware acceleration info. 8027 */ 8028 if (hada_mp != NULL) { 8029 IPSECHW_DEBUG(IPSECHW_PKT, 8030 ("ip_rput_data_v6: " 8031 "caching data attr.\n")); 8032 ii->ipsec_in_accelerated = B_TRUE; 8033 ii->ipsec_in_da = hada_mp; 8034 hada_mp = NULL; 8035 } 8036 } else { 8037 ii = (ipsec_in_t *)first_mp->b_rptr; 8038 } 8039 8040 if (!ipsec_loaded(ipss)) { 8041 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8042 zoneid, ipst); 8043 return; 8044 } 8045 8046 /* select inbound SA and have IPsec process the pkt */ 8047 if (nexthdr == IPPROTO_ESP) { 8048 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8049 ipst->ips_netstack); 8050 if (esph == NULL) 8051 return; 8052 ASSERT(ii->ipsec_in_esp_sa != NULL); 8053 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8054 NULL); 8055 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8056 first_mp, esph); 8057 } else { 8058 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8059 ipst->ips_netstack); 8060 if (ah == NULL) 8061 return; 8062 ASSERT(ii->ipsec_in_ah_sa != NULL); 8063 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8064 NULL); 8065 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8066 first_mp, ah); 8067 } 8068 8069 switch (ipsec_rc) { 8070 case IPSEC_STATUS_SUCCESS: 8071 break; 8072 case IPSEC_STATUS_FAILED: 8073 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8074 /* FALLTHRU */ 8075 case IPSEC_STATUS_PENDING: 8076 return; 8077 } 8078 /* we're done with IPsec processing, send it up */ 8079 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8080 return; 8081 } 8082 case IPPROTO_NONE: 8083 /* All processing is done. Count as "delivered". */ 8084 freemsg(hada_mp); 8085 freemsg(first_mp); 8086 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8087 return; 8088 } 8089 whereptr += used; 8090 ASSERT(remlen >= used); 8091 remlen -= used; 8092 } 8093 /* NOTREACHED */ 8094 8095 pkt_too_short: 8096 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8097 ip6_len, pkt_len, remlen)); 8098 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8099 freemsg(hada_mp); 8100 freemsg(first_mp); 8101 return; 8102 udp_fanout: 8103 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8104 connp = NULL; 8105 } else { 8106 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8107 ipst); 8108 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8109 CONN_DEC_REF(connp); 8110 connp = NULL; 8111 } 8112 } 8113 8114 if (connp == NULL) { 8115 uint32_t ports; 8116 8117 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8118 UDP_PORTS_OFFSET); 8119 IP6_STAT(ipst, ip6_udp_slow_path); 8120 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8121 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8122 zoneid); 8123 return; 8124 } 8125 8126 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8127 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8128 freemsg(first_mp); 8129 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8130 CONN_DEC_REF(connp); 8131 return; 8132 } 8133 8134 /* Initiate IPPF processing */ 8135 if (IP6_IN_IPP(flags, ipst)) { 8136 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8137 if (mp == NULL) { 8138 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8139 CONN_DEC_REF(connp); 8140 return; 8141 } 8142 } 8143 8144 if (connp->conn_ip_recvpktinfo || 8145 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8146 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8147 if (mp == NULL) { 8148 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8149 CONN_DEC_REF(connp); 8150 return; 8151 } 8152 } 8153 8154 IP6_STAT(ipst, ip6_udp_fast_path); 8155 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8156 8157 /* Send it upstream */ 8158 (connp->conn_recv)(connp, mp, NULL); 8159 8160 CONN_DEC_REF(connp); 8161 freemsg(hada_mp); 8162 return; 8163 8164 hada_drop: 8165 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8166 /* IPsec kstats: bump counter here */ 8167 freemsg(hada_mp); 8168 freemsg(first_mp); 8169 } 8170 8171 /* 8172 * Reassemble fragment. 8173 * When it returns a completed message the first mblk will only contain 8174 * the headers prior to the fragment header. 8175 * 8176 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8177 * of the preceding header. This is needed to patch the previous header's 8178 * nexthdr field when reassembly completes. 8179 */ 8180 static mblk_t * 8181 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8182 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8183 uint32_t *cksum_val, uint16_t *cksum_flags) 8184 { 8185 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8186 uint16_t offset; 8187 boolean_t more_frags; 8188 uint8_t nexthdr = fraghdr->ip6f_nxt; 8189 in6_addr_t *v6dst_ptr; 8190 in6_addr_t *v6src_ptr; 8191 uint_t end; 8192 uint_t hdr_length; 8193 size_t count; 8194 ipf_t *ipf; 8195 ipf_t **ipfp; 8196 ipfb_t *ipfb; 8197 mblk_t *mp1; 8198 uint8_t ecn_info = 0; 8199 size_t msg_len; 8200 mblk_t *tail_mp; 8201 mblk_t *t_mp; 8202 boolean_t pruned = B_FALSE; 8203 uint32_t sum_val; 8204 uint16_t sum_flags; 8205 ip_stack_t *ipst = ill->ill_ipst; 8206 8207 if (cksum_val != NULL) 8208 *cksum_val = 0; 8209 if (cksum_flags != NULL) 8210 *cksum_flags = 0; 8211 8212 /* 8213 * We utilize hardware computed checksum info only for UDP since 8214 * IP fragmentation is a normal occurence for the protocol. In 8215 * addition, checksum offload support for IP fragments carrying 8216 * UDP payload is commonly implemented across network adapters. 8217 */ 8218 ASSERT(inill != NULL); 8219 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8220 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8221 mblk_t *mp1 = mp->b_cont; 8222 int32_t len; 8223 8224 /* Record checksum information from the packet */ 8225 sum_val = (uint32_t)DB_CKSUM16(mp); 8226 sum_flags = DB_CKSUMFLAGS(mp); 8227 8228 /* fragmented payload offset from beginning of mblk */ 8229 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8230 8231 if ((sum_flags & HCK_PARTIALCKSUM) && 8232 (mp1 == NULL || mp1->b_cont == NULL) && 8233 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8234 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8235 uint32_t adj; 8236 /* 8237 * Partial checksum has been calculated by hardware 8238 * and attached to the packet; in addition, any 8239 * prepended extraneous data is even byte aligned. 8240 * If any such data exists, we adjust the checksum; 8241 * this would also handle any postpended data. 8242 */ 8243 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8244 mp, mp1, len, adj); 8245 8246 /* One's complement subtract extraneous checksum */ 8247 if (adj >= sum_val) 8248 sum_val = ~(adj - sum_val) & 0xFFFF; 8249 else 8250 sum_val -= adj; 8251 } 8252 } else { 8253 sum_val = 0; 8254 sum_flags = 0; 8255 } 8256 8257 /* Clear hardware checksumming flag */ 8258 DB_CKSUMFLAGS(mp) = 0; 8259 8260 /* 8261 * Note: Fragment offset in header is in 8-octet units. 8262 * Clearing least significant 3 bits not only extracts 8263 * it but also gets it in units of octets. 8264 */ 8265 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8266 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8267 8268 /* 8269 * Is the more frags flag on and the payload length not a multiple 8270 * of eight? 8271 */ 8272 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8273 zoneid_t zoneid; 8274 8275 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8276 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8277 if (zoneid == ALL_ZONES) { 8278 freemsg(mp); 8279 return (NULL); 8280 } 8281 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8282 (uint32_t)((char *)&ip6h->ip6_plen - 8283 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8284 return (NULL); 8285 } 8286 8287 v6src_ptr = &ip6h->ip6_src; 8288 v6dst_ptr = &ip6h->ip6_dst; 8289 end = remlen; 8290 8291 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8292 end += offset; 8293 8294 /* 8295 * Would fragment cause reassembled packet to have a payload length 8296 * greater than IP_MAXPACKET - the max payload size? 8297 */ 8298 if (end > IP_MAXPACKET) { 8299 zoneid_t zoneid; 8300 8301 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8302 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8303 if (zoneid == ALL_ZONES) { 8304 freemsg(mp); 8305 return (NULL); 8306 } 8307 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8308 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8309 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8310 return (NULL); 8311 } 8312 8313 /* 8314 * This packet just has one fragment. Reassembly not 8315 * needed. 8316 */ 8317 if (!more_frags && offset == 0) { 8318 goto reass_done; 8319 } 8320 8321 /* 8322 * Drop the fragmented as early as possible, if 8323 * we don't have resource(s) to re-assemble. 8324 */ 8325 if (ipst->ips_ip_reass_queue_bytes == 0) { 8326 freemsg(mp); 8327 return (NULL); 8328 } 8329 8330 /* Record the ECN field info. */ 8331 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8332 /* 8333 * If this is not the first fragment, dump the unfragmentable 8334 * portion of the packet. 8335 */ 8336 if (offset) 8337 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8338 8339 /* 8340 * Fragmentation reassembly. Each ILL has a hash table for 8341 * queueing packets undergoing reassembly for all IPIFs 8342 * associated with the ILL. The hash is based on the packet 8343 * IP ident field. The ILL frag hash table was allocated 8344 * as a timer block at the time the ILL was created. Whenever 8345 * there is anything on the reassembly queue, the timer will 8346 * be running. 8347 */ 8348 msg_len = MBLKSIZE(mp); 8349 tail_mp = mp; 8350 while (tail_mp->b_cont != NULL) { 8351 tail_mp = tail_mp->b_cont; 8352 msg_len += MBLKSIZE(tail_mp); 8353 } 8354 /* 8355 * If the reassembly list for this ILL will get too big 8356 * prune it. 8357 */ 8358 8359 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8360 ipst->ips_ip_reass_queue_bytes) { 8361 ill_frag_prune(ill, 8362 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8363 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8364 pruned = B_TRUE; 8365 } 8366 8367 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8368 mutex_enter(&ipfb->ipfb_lock); 8369 8370 ipfp = &ipfb->ipfb_ipf; 8371 /* Try to find an existing fragment queue for this packet. */ 8372 for (;;) { 8373 ipf = ipfp[0]; 8374 if (ipf) { 8375 /* 8376 * It has to match on ident, source address, and 8377 * dest address. 8378 */ 8379 if (ipf->ipf_ident == ident && 8380 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8381 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8382 8383 /* 8384 * If we have received too many 8385 * duplicate fragments for this packet 8386 * free it. 8387 */ 8388 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8389 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8390 freemsg(mp); 8391 mutex_exit(&ipfb->ipfb_lock); 8392 return (NULL); 8393 } 8394 8395 break; 8396 } 8397 ipfp = &ipf->ipf_hash_next; 8398 continue; 8399 } 8400 8401 8402 /* 8403 * If we pruned the list, do we want to store this new 8404 * fragment?. We apply an optimization here based on the 8405 * fact that most fragments will be received in order. 8406 * So if the offset of this incoming fragment is zero, 8407 * it is the first fragment of a new packet. We will 8408 * keep it. Otherwise drop the fragment, as we have 8409 * probably pruned the packet already (since the 8410 * packet cannot be found). 8411 */ 8412 8413 if (pruned && offset != 0) { 8414 mutex_exit(&ipfb->ipfb_lock); 8415 freemsg(mp); 8416 return (NULL); 8417 } 8418 8419 /* New guy. Allocate a frag message. */ 8420 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8421 if (!mp1) { 8422 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8423 freemsg(mp); 8424 partial_reass_done: 8425 mutex_exit(&ipfb->ipfb_lock); 8426 return (NULL); 8427 } 8428 8429 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8430 /* 8431 * Too many fragmented packets in this hash bucket. 8432 * Free the oldest. 8433 */ 8434 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8435 } 8436 8437 mp1->b_cont = mp; 8438 8439 /* Initialize the fragment header. */ 8440 ipf = (ipf_t *)mp1->b_rptr; 8441 ipf->ipf_mp = mp1; 8442 ipf->ipf_ptphn = ipfp; 8443 ipfp[0] = ipf; 8444 ipf->ipf_hash_next = NULL; 8445 ipf->ipf_ident = ident; 8446 ipf->ipf_v6src = *v6src_ptr; 8447 ipf->ipf_v6dst = *v6dst_ptr; 8448 /* Record reassembly start time. */ 8449 ipf->ipf_timestamp = gethrestime_sec(); 8450 /* Record ipf generation and account for frag header */ 8451 ipf->ipf_gen = ill->ill_ipf_gen++; 8452 ipf->ipf_count = MBLKSIZE(mp1); 8453 ipf->ipf_protocol = nexthdr; 8454 ipf->ipf_nf_hdr_len = 0; 8455 ipf->ipf_prev_nexthdr_offset = 0; 8456 ipf->ipf_last_frag_seen = B_FALSE; 8457 ipf->ipf_ecn = ecn_info; 8458 ipf->ipf_num_dups = 0; 8459 ipfb->ipfb_frag_pkts++; 8460 ipf->ipf_checksum = 0; 8461 ipf->ipf_checksum_flags = 0; 8462 8463 /* Store checksum value in fragment header */ 8464 if (sum_flags != 0) { 8465 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8466 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8467 ipf->ipf_checksum = sum_val; 8468 ipf->ipf_checksum_flags = sum_flags; 8469 } 8470 8471 /* 8472 * We handle reassembly two ways. In the easy case, 8473 * where all the fragments show up in order, we do 8474 * minimal bookkeeping, and just clip new pieces on 8475 * the end. If we ever see a hole, then we go off 8476 * to ip_reassemble which has to mark the pieces and 8477 * keep track of the number of holes, etc. Obviously, 8478 * the point of having both mechanisms is so we can 8479 * handle the easy case as efficiently as possible. 8480 */ 8481 if (offset == 0) { 8482 /* Easy case, in-order reassembly so far. */ 8483 /* Update the byte count */ 8484 ipf->ipf_count += msg_len; 8485 ipf->ipf_tail_mp = tail_mp; 8486 /* 8487 * Keep track of next expected offset in 8488 * ipf_end. 8489 */ 8490 ipf->ipf_end = end; 8491 ipf->ipf_nf_hdr_len = hdr_length; 8492 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8493 } else { 8494 /* Hard case, hole at the beginning. */ 8495 ipf->ipf_tail_mp = NULL; 8496 /* 8497 * ipf_end == 0 means that we have given up 8498 * on easy reassembly. 8499 */ 8500 ipf->ipf_end = 0; 8501 8502 /* Forget checksum offload from now on */ 8503 ipf->ipf_checksum_flags = 0; 8504 8505 /* 8506 * ipf_hole_cnt is set by ip_reassemble. 8507 * ipf_count is updated by ip_reassemble. 8508 * No need to check for return value here 8509 * as we don't expect reassembly to complete or 8510 * fail for the first fragment itself. 8511 */ 8512 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8513 msg_len); 8514 } 8515 /* Update per ipfb and ill byte counts */ 8516 ipfb->ipfb_count += ipf->ipf_count; 8517 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8518 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8519 /* If the frag timer wasn't already going, start it. */ 8520 mutex_enter(&ill->ill_lock); 8521 ill_frag_timer_start(ill); 8522 mutex_exit(&ill->ill_lock); 8523 goto partial_reass_done; 8524 } 8525 8526 /* 8527 * If the packet's flag has changed (it could be coming up 8528 * from an interface different than the previous, therefore 8529 * possibly different checksum capability), then forget about 8530 * any stored checksum states. Otherwise add the value to 8531 * the existing one stored in the fragment header. 8532 */ 8533 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8534 sum_val += ipf->ipf_checksum; 8535 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8536 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8537 ipf->ipf_checksum = sum_val; 8538 } else if (ipf->ipf_checksum_flags != 0) { 8539 /* Forget checksum offload from now on */ 8540 ipf->ipf_checksum_flags = 0; 8541 } 8542 8543 /* 8544 * We have a new piece of a datagram which is already being 8545 * reassembled. Update the ECN info if all IP fragments 8546 * are ECN capable. If there is one which is not, clear 8547 * all the info. If there is at least one which has CE 8548 * code point, IP needs to report that up to transport. 8549 */ 8550 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8551 if (ecn_info == IPH_ECN_CE) 8552 ipf->ipf_ecn = IPH_ECN_CE; 8553 } else { 8554 ipf->ipf_ecn = IPH_ECN_NECT; 8555 } 8556 8557 if (offset && ipf->ipf_end == offset) { 8558 /* The new fragment fits at the end */ 8559 ipf->ipf_tail_mp->b_cont = mp; 8560 /* Update the byte count */ 8561 ipf->ipf_count += msg_len; 8562 /* Update per ipfb and ill byte counts */ 8563 ipfb->ipfb_count += msg_len; 8564 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8565 atomic_add_32(&ill->ill_frag_count, msg_len); 8566 if (more_frags) { 8567 /* More to come. */ 8568 ipf->ipf_end = end; 8569 ipf->ipf_tail_mp = tail_mp; 8570 goto partial_reass_done; 8571 } 8572 } else { 8573 /* 8574 * Go do the hard cases. 8575 * Call ip_reassemble(). 8576 */ 8577 int ret; 8578 8579 if (offset == 0) { 8580 if (ipf->ipf_prev_nexthdr_offset == 0) { 8581 ipf->ipf_nf_hdr_len = hdr_length; 8582 ipf->ipf_prev_nexthdr_offset = 8583 *prev_nexthdr_offset; 8584 } 8585 } 8586 /* Save current byte count */ 8587 count = ipf->ipf_count; 8588 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8589 8590 /* Count of bytes added and subtracted (freeb()ed) */ 8591 count = ipf->ipf_count - count; 8592 if (count) { 8593 /* Update per ipfb and ill byte counts */ 8594 ipfb->ipfb_count += count; 8595 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8596 atomic_add_32(&ill->ill_frag_count, count); 8597 } 8598 if (ret == IP_REASS_PARTIAL) { 8599 goto partial_reass_done; 8600 } else if (ret == IP_REASS_FAILED) { 8601 /* Reassembly failed. Free up all resources */ 8602 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8603 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8604 IP_REASS_SET_START(t_mp, 0); 8605 IP_REASS_SET_END(t_mp, 0); 8606 } 8607 freemsg(mp); 8608 goto partial_reass_done; 8609 } 8610 8611 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8612 } 8613 /* 8614 * We have completed reassembly. Unhook the frag header from 8615 * the reassembly list. 8616 * 8617 * Grab the unfragmentable header length next header value out 8618 * of the first fragment 8619 */ 8620 ASSERT(ipf->ipf_nf_hdr_len != 0); 8621 hdr_length = ipf->ipf_nf_hdr_len; 8622 8623 /* 8624 * Before we free the frag header, record the ECN info 8625 * to report back to the transport. 8626 */ 8627 ecn_info = ipf->ipf_ecn; 8628 8629 /* 8630 * Store the nextheader field in the header preceding the fragment 8631 * header 8632 */ 8633 nexthdr = ipf->ipf_protocol; 8634 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8635 ipfp = ipf->ipf_ptphn; 8636 8637 /* We need to supply these to caller */ 8638 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8639 sum_val = ipf->ipf_checksum; 8640 else 8641 sum_val = 0; 8642 8643 mp1 = ipf->ipf_mp; 8644 count = ipf->ipf_count; 8645 ipf = ipf->ipf_hash_next; 8646 if (ipf) 8647 ipf->ipf_ptphn = ipfp; 8648 ipfp[0] = ipf; 8649 atomic_add_32(&ill->ill_frag_count, -count); 8650 ASSERT(ipfb->ipfb_count >= count); 8651 ipfb->ipfb_count -= count; 8652 ipfb->ipfb_frag_pkts--; 8653 mutex_exit(&ipfb->ipfb_lock); 8654 /* Ditch the frag header. */ 8655 mp = mp1->b_cont; 8656 freeb(mp1); 8657 8658 /* 8659 * Make sure the packet is good by doing some sanity 8660 * check. If bad we can silentely drop the packet. 8661 */ 8662 reass_done: 8663 if (hdr_length < sizeof (ip6_frag_t)) { 8664 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8665 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8666 freemsg(mp); 8667 return (NULL); 8668 } 8669 8670 /* 8671 * Remove the fragment header from the initial header by 8672 * splitting the mblk into the non-fragmentable header and 8673 * everthing after the fragment extension header. This has the 8674 * side effect of putting all the headers that need destination 8675 * processing into the b_cont block-- on return this fact is 8676 * used in order to avoid having to look at the extensions 8677 * already processed. 8678 * 8679 * Note that this code assumes that the unfragmentable portion 8680 * of the header is in the first mblk and increments 8681 * the read pointer past it. If this assumption is broken 8682 * this code fails badly. 8683 */ 8684 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8685 mblk_t *nmp; 8686 8687 if (!(nmp = dupb(mp))) { 8688 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8689 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8690 freemsg(mp); 8691 return (NULL); 8692 } 8693 nmp->b_cont = mp->b_cont; 8694 mp->b_cont = nmp; 8695 nmp->b_rptr += hdr_length; 8696 } 8697 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8698 8699 ip6h = (ip6_t *)mp->b_rptr; 8700 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8701 8702 /* Restore original IP length in header. */ 8703 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8704 /* Record the ECN info. */ 8705 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8706 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8707 8708 /* Reassembly is successful; return checksum information if needed */ 8709 if (cksum_val != NULL) 8710 *cksum_val = sum_val; 8711 if (cksum_flags != NULL) 8712 *cksum_flags = sum_flags; 8713 8714 return (mp); 8715 } 8716 8717 /* 8718 * Given an mblk and a ptr, find the destination address in an IPv6 routing 8719 * header. 8720 */ 8721 static in6_addr_t 8722 pluck_out_dst(mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 8723 { 8724 ip6_rthdr0_t *rt0; 8725 int segleft, numaddr; 8726 in6_addr_t *ap, rv = oldrv; 8727 8728 rt0 = (ip6_rthdr0_t *)whereptr; 8729 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 8730 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 8731 uint8_t *, whereptr); 8732 return (rv); 8733 } 8734 segleft = rt0->ip6r0_segleft; 8735 numaddr = rt0->ip6r0_len / 2; 8736 8737 if ((rt0->ip6r0_len & 0x1) || 8738 whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr || 8739 (segleft > rt0->ip6r0_len / 2)) { 8740 /* 8741 * Corrupt packet. Either the routing header length is odd 8742 * (can't happen) or mismatched compared to the packet, or the 8743 * number of addresses is. Return what we can. This will 8744 * only be a problem on forwarded packets that get squeezed 8745 * through an outbound tunnel enforcing IPsec Tunnel Mode. 8746 */ 8747 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 8748 whereptr); 8749 return (rv); 8750 } 8751 8752 if (segleft != 0) { 8753 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 8754 rv = ap[numaddr - 1]; 8755 } 8756 8757 return (rv); 8758 } 8759 8760 /* 8761 * Walk through the options to see if there is a routing header. 8762 * If present get the destination which is the last address of 8763 * the option. 8764 */ 8765 in6_addr_t 8766 ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment) 8767 { 8768 mblk_t *current_mp = mp; 8769 uint8_t nexthdr; 8770 uint8_t *whereptr; 8771 int ehdrlen; 8772 in6_addr_t rv; 8773 8774 whereptr = (uint8_t *)ip6h; 8775 ehdrlen = sizeof (ip6_t); 8776 8777 /* We assume at least the IPv6 base header is within one mblk. */ 8778 ASSERT(mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen); 8779 8780 rv = ip6h->ip6_dst; 8781 nexthdr = ip6h->ip6_nxt; 8782 if (is_fragment != NULL) 8783 *is_fragment = B_FALSE; 8784 8785 /* 8786 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 8787 * no extension headers will be split across mblks. 8788 */ 8789 8790 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 8791 nexthdr == IPPROTO_ROUTING) { 8792 if (nexthdr == IPPROTO_ROUTING) 8793 rv = pluck_out_dst(current_mp, whereptr, rv); 8794 8795 /* 8796 * All IPv6 extension headers have the next-header in byte 8797 * 0, and the (length - 8) in 8-byte-words. 8798 */ 8799 while (whereptr + ehdrlen >= current_mp->b_wptr) { 8800 ehdrlen -= (current_mp->b_wptr - whereptr); 8801 current_mp = current_mp->b_cont; 8802 if (current_mp == NULL) { 8803 /* Bad packet. Return what we can. */ 8804 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 8805 mp, mblk_t *, current_mp, ip6_t *, ip6h); 8806 goto done; 8807 } 8808 whereptr = current_mp->b_rptr; 8809 } 8810 whereptr += ehdrlen; 8811 8812 nexthdr = *whereptr; 8813 ASSERT(whereptr + 1 < current_mp->b_wptr); 8814 ehdrlen = (*(whereptr + 1) + 1) * 8; 8815 } 8816 8817 done: 8818 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 8819 *is_fragment = B_TRUE; 8820 return (rv); 8821 } 8822 8823 /* 8824 * ip_source_routed_v6: 8825 * This function is called by redirect code in ip_rput_data_v6 to 8826 * know whether this packet is source routed through this node i.e 8827 * whether this node (router) is part of the journey. This 8828 * function is called under two cases : 8829 * 8830 * case 1 : Routing header was processed by this node and 8831 * ip_process_rthdr replaced ip6_dst with the next hop 8832 * and we are forwarding the packet to the next hop. 8833 * 8834 * case 2 : Routing header was not processed by this node and we 8835 * are just forwarding the packet. 8836 * 8837 * For case (1) we don't want to send redirects. For case(2) we 8838 * want to send redirects. 8839 */ 8840 static boolean_t 8841 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8842 { 8843 uint8_t nexthdr; 8844 in6_addr_t *addrptr; 8845 ip6_rthdr0_t *rthdr; 8846 uint8_t numaddr; 8847 ip6_hbh_t *hbhhdr; 8848 uint_t ehdrlen; 8849 uint8_t *byteptr; 8850 8851 ip2dbg(("ip_source_routed_v6\n")); 8852 nexthdr = ip6h->ip6_nxt; 8853 ehdrlen = IPV6_HDR_LEN; 8854 8855 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8856 while (nexthdr == IPPROTO_HOPOPTS || 8857 nexthdr == IPPROTO_DSTOPTS) { 8858 byteptr = (uint8_t *)ip6h + ehdrlen; 8859 /* 8860 * Check if we have already processed 8861 * packets or we are just a forwarding 8862 * router which only pulled up msgs up 8863 * to IPV6HDR and one HBH ext header 8864 */ 8865 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8866 ip2dbg(("ip_source_routed_v6: Extension" 8867 " headers not processed\n")); 8868 return (B_FALSE); 8869 } 8870 hbhhdr = (ip6_hbh_t *)byteptr; 8871 nexthdr = hbhhdr->ip6h_nxt; 8872 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8873 } 8874 switch (nexthdr) { 8875 case IPPROTO_ROUTING: 8876 byteptr = (uint8_t *)ip6h + ehdrlen; 8877 /* 8878 * If for some reason, we haven't pulled up 8879 * the routing hdr data mblk, then we must 8880 * not have processed it at all. So for sure 8881 * we are not part of the source routed journey. 8882 */ 8883 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8884 ip2dbg(("ip_source_routed_v6: Routing" 8885 " header not processed\n")); 8886 return (B_FALSE); 8887 } 8888 rthdr = (ip6_rthdr0_t *)byteptr; 8889 /* 8890 * Either we are an intermediate router or the 8891 * last hop before destination and we have 8892 * already processed the routing header. 8893 * If segment_left is greater than or equal to zero, 8894 * then we must be the (numaddr - segleft) entry 8895 * of the routing header. Although ip6r0_segleft 8896 * is a unit8_t variable, we still check for zero 8897 * or greater value, if in case the data type 8898 * is changed someday in future. 8899 */ 8900 if (rthdr->ip6r0_segleft > 0 || 8901 rthdr->ip6r0_segleft == 0) { 8902 ire_t *ire = NULL; 8903 8904 numaddr = rthdr->ip6r0_len / 2; 8905 addrptr = (in6_addr_t *)((char *)rthdr + 8906 sizeof (*rthdr)); 8907 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8908 if (addrptr != NULL) { 8909 ire = ire_ctable_lookup_v6(addrptr, NULL, 8910 IRE_LOCAL, NULL, ALL_ZONES, NULL, 8911 MATCH_IRE_TYPE, 8912 ipst); 8913 if (ire != NULL) { 8914 ire_refrele(ire); 8915 return (B_TRUE); 8916 } 8917 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8918 } 8919 } 8920 /* FALLTHRU */ 8921 default: 8922 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8923 return (B_FALSE); 8924 } 8925 } 8926 8927 /* 8928 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8929 * Assumes that the following set of headers appear in the first 8930 * mblk: 8931 * ip6i_t (if present) CAN also appear as a separate mblk. 8932 * ip6_t 8933 * Any extension headers 8934 * TCP/UDP/SCTP header (if present) 8935 * The routine can handle an ICMPv6 header that is not in the first mblk. 8936 * 8937 * The order to determine the outgoing interface is as follows: 8938 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8939 * 2. If q is an ill queue and (link local or multicast destination) then 8940 * use that ill. 8941 * 3. If IPV6_BOUND_IF has been set use that ill. 8942 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8943 * look for the best IRE match for the unspecified group to determine 8944 * the ill. 8945 * 5. For unicast: Just do an IRE lookup for the best match. 8946 * 8947 * arg2 is always a queue_t *. 8948 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 8949 * the zoneid. 8950 * When that queue is not an ill_t, then arg must be a conn_t pointer. 8951 */ 8952 void 8953 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8954 { 8955 conn_t *connp = NULL; 8956 queue_t *q = (queue_t *)arg2; 8957 ire_t *ire = NULL; 8958 ire_t *sctp_ire = NULL; 8959 ip6_t *ip6h; 8960 in6_addr_t *v6dstp; 8961 ill_t *ill = NULL; 8962 ipif_t *ipif; 8963 ip6i_t *ip6i; 8964 int cksum_request; /* -1 => normal. */ 8965 /* 1 => Skip TCP/UDP/SCTP checksum */ 8966 /* Otherwise contains insert offset for checksum */ 8967 int unspec_src; 8968 boolean_t do_outrequests; /* Increment OutRequests? */ 8969 mib2_ipIfStatsEntry_t *mibptr; 8970 int match_flags = MATCH_IRE_ILL; 8971 mblk_t *first_mp; 8972 boolean_t mctl_present; 8973 ipsec_out_t *io; 8974 boolean_t multirt_need_resolve = B_FALSE; 8975 mblk_t *copy_mp = NULL; 8976 int err = 0; 8977 int ip6i_flags = 0; 8978 zoneid_t zoneid; 8979 ill_t *saved_ill = NULL; 8980 boolean_t conn_lock_held; 8981 boolean_t need_decref = B_FALSE; 8982 ip_stack_t *ipst; 8983 8984 if (q->q_next != NULL) { 8985 ill = (ill_t *)q->q_ptr; 8986 ipst = ill->ill_ipst; 8987 } else { 8988 connp = (conn_t *)arg; 8989 ASSERT(connp != NULL); 8990 ipst = connp->conn_netstack->netstack_ip; 8991 } 8992 8993 /* 8994 * Highest bit in version field is Reachability Confirmation bit 8995 * used by NUD in ip_xmit_v6(). 8996 */ 8997 #ifdef _BIG_ENDIAN 8998 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 8999 #else 9000 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 9001 #endif 9002 9003 /* 9004 * M_CTL comes from 5 places 9005 * 9006 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9007 * both V4 and V6 datagrams. 9008 * 9009 * 2) AH/ESP sends down M_CTL after doing their job with both 9010 * V4 and V6 datagrams. 9011 * 9012 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9013 * attached. 9014 * 9015 * 4) Notifications from an external resolver (for XRESOLV ifs) 9016 * 9017 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9018 * IPsec hardware acceleration support. 9019 * 9020 * We need to handle (1)'s IPv6 case and (3) here. For the 9021 * IPv4 case in (1), and (2), IPSEC processing has already 9022 * started. The code in ip_wput() already knows how to handle 9023 * continuing IPSEC processing (for IPv4 and IPv6). All other 9024 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9025 * for handling. 9026 */ 9027 first_mp = mp; 9028 mctl_present = B_FALSE; 9029 io = NULL; 9030 9031 /* Multidata transmit? */ 9032 if (DB_TYPE(mp) == M_MULTIDATA) { 9033 /* 9034 * We should never get here, since all Multidata messages 9035 * originating from tcp should have been directed over to 9036 * tcp_multisend() in the first place. 9037 */ 9038 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9039 freemsg(mp); 9040 return; 9041 } else if (DB_TYPE(mp) == M_CTL) { 9042 uint32_t mctltype = 0; 9043 uint32_t mlen = MBLKL(first_mp); 9044 9045 mp = mp->b_cont; 9046 mctl_present = B_TRUE; 9047 io = (ipsec_out_t *)first_mp->b_rptr; 9048 9049 /* 9050 * Validate this M_CTL message. The only three types of 9051 * M_CTL messages we expect to see in this code path are 9052 * ipsec_out_t or ipsec_in_t structures (allocated as 9053 * ipsec_info_t unions), or ipsec_ctl_t structures. 9054 * The ipsec_out_type and ipsec_in_type overlap in the two 9055 * data structures, and they are either set to IPSEC_OUT 9056 * or IPSEC_IN depending on which data structure it is. 9057 * ipsec_ctl_t is an IPSEC_CTL. 9058 * 9059 * All other M_CTL messages are sent to ip_wput_nondata() 9060 * for handling. 9061 */ 9062 if (mlen >= sizeof (io->ipsec_out_type)) 9063 mctltype = io->ipsec_out_type; 9064 9065 if ((mlen == sizeof (ipsec_ctl_t)) && 9066 (mctltype == IPSEC_CTL)) { 9067 ip_output(arg, first_mp, arg2, caller); 9068 return; 9069 } 9070 9071 if ((mlen < sizeof (ipsec_info_t)) || 9072 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9073 mp == NULL) { 9074 ip_wput_nondata(NULL, q, first_mp, NULL); 9075 return; 9076 } 9077 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9078 if (q->q_next == NULL) { 9079 ip6h = (ip6_t *)mp->b_rptr; 9080 /* 9081 * For a freshly-generated TCP dgram that needs IPV6 9082 * processing, don't call ip_wput immediately. We can 9083 * tell this by the ipsec_out_proc_begin. In-progress 9084 * IPSEC_OUT messages have proc_begin set to TRUE, 9085 * and we want to send all IPSEC_IN messages to 9086 * ip_wput() for IPsec processing or finishing. 9087 */ 9088 if (mctltype == IPSEC_IN || 9089 IPVER(ip6h) != IPV6_VERSION || 9090 io->ipsec_out_proc_begin) { 9091 mibptr = &ipst->ips_ip6_mib; 9092 goto notv6; 9093 } 9094 } 9095 } else if (DB_TYPE(mp) != M_DATA) { 9096 ip_wput_nondata(NULL, q, mp, NULL); 9097 return; 9098 } 9099 9100 ip6h = (ip6_t *)mp->b_rptr; 9101 9102 if (IPVER(ip6h) != IPV6_VERSION) { 9103 mibptr = &ipst->ips_ip6_mib; 9104 goto notv6; 9105 } 9106 9107 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9108 (connp == NULL || !connp->conn_ulp_labeled)) { 9109 cred_t *cr; 9110 pid_t pid; 9111 9112 if (connp != NULL) { 9113 ASSERT(CONN_CRED(connp) != NULL); 9114 cr = BEST_CRED(mp, connp, &pid); 9115 err = tsol_check_label_v6(cr, &mp, 9116 connp->conn_mac_exempt, ipst, pid); 9117 } else if ((cr = msg_getcred(mp, &pid)) != NULL) { 9118 err = tsol_check_label_v6(cr, &mp, B_FALSE, ipst, pid); 9119 } 9120 if (mctl_present) 9121 first_mp->b_cont = mp; 9122 else 9123 first_mp = mp; 9124 if (err != 0) { 9125 DTRACE_PROBE3( 9126 tsol_ip_log_drop_checklabel_ip6, char *, 9127 "conn(1), failed to check/update mp(2)", 9128 conn_t, connp, mblk_t, mp); 9129 freemsg(first_mp); 9130 return; 9131 } 9132 ip6h = (ip6_t *)mp->b_rptr; 9133 } 9134 if (q->q_next != NULL) { 9135 /* 9136 * We don't know if this ill will be used for IPv6 9137 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9138 * ipif_set_values() sets the ill_isv6 flag to true if 9139 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9140 * just drop the packet. 9141 */ 9142 if (!ill->ill_isv6) { 9143 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9144 "ILLF_IPV6 was set\n")); 9145 freemsg(first_mp); 9146 return; 9147 } 9148 /* For uniformity do a refhold */ 9149 mutex_enter(&ill->ill_lock); 9150 if (!ILL_CAN_LOOKUP(ill)) { 9151 mutex_exit(&ill->ill_lock); 9152 freemsg(first_mp); 9153 return; 9154 } 9155 ill_refhold_locked(ill); 9156 mutex_exit(&ill->ill_lock); 9157 mibptr = ill->ill_ip_mib; 9158 9159 ASSERT(mibptr != NULL); 9160 unspec_src = 0; 9161 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9162 do_outrequests = B_FALSE; 9163 zoneid = (zoneid_t)(uintptr_t)arg; 9164 } else { 9165 ASSERT(connp != NULL); 9166 zoneid = connp->conn_zoneid; 9167 9168 /* is queue flow controlled? */ 9169 if ((q->q_first || connp->conn_draining) && 9170 (caller == IP_WPUT)) { 9171 /* 9172 * 1) TCP sends down M_CTL for detached connections. 9173 * 2) AH/ESP sends down M_CTL. 9174 * 9175 * We don't flow control either of the above. Only 9176 * UDP and others are flow controlled for which we 9177 * can't have a M_CTL. 9178 */ 9179 ASSERT(first_mp == mp); 9180 (void) putq(q, mp); 9181 return; 9182 } 9183 mibptr = &ipst->ips_ip6_mib; 9184 unspec_src = connp->conn_unspec_src; 9185 do_outrequests = B_TRUE; 9186 if (mp->b_flag & MSGHASREF) { 9187 mp->b_flag &= ~MSGHASREF; 9188 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9189 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9190 need_decref = B_TRUE; 9191 } 9192 9193 /* 9194 * If there is a policy, try to attach an ipsec_out in 9195 * the front. At the end, first_mp either points to a 9196 * M_DATA message or IPSEC_OUT message linked to a 9197 * M_DATA message. We have to do it now as we might 9198 * lose the "conn" if we go through ip_newroute. 9199 */ 9200 if (!mctl_present && 9201 (connp->conn_out_enforce_policy || 9202 connp->conn_latch != NULL)) { 9203 ASSERT(first_mp == mp); 9204 /* XXX Any better way to get the protocol fast ? */ 9205 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9206 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9207 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9208 if (need_decref) 9209 CONN_DEC_REF(connp); 9210 return; 9211 } else { 9212 ASSERT(mp->b_datap->db_type == M_CTL); 9213 first_mp = mp; 9214 mp = mp->b_cont; 9215 mctl_present = B_TRUE; 9216 io = (ipsec_out_t *)first_mp->b_rptr; 9217 } 9218 } 9219 } 9220 9221 /* check for alignment and full IPv6 header */ 9222 if (!OK_32PTR((uchar_t *)ip6h) || 9223 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9224 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9225 if (do_outrequests) 9226 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9227 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9228 freemsg(first_mp); 9229 if (ill != NULL) 9230 ill_refrele(ill); 9231 if (need_decref) 9232 CONN_DEC_REF(connp); 9233 return; 9234 } 9235 v6dstp = &ip6h->ip6_dst; 9236 cksum_request = -1; 9237 ip6i = NULL; 9238 9239 /* 9240 * Once neighbor discovery has completed, ndp_process() will provide 9241 * locally generated packets for which processing can be reattempted. 9242 * In these cases, connp is NULL and the original zone is part of a 9243 * prepended ipsec_out_t. 9244 */ 9245 if (io != NULL) { 9246 /* 9247 * When coming from icmp_input_v6, the zoneid might not match 9248 * for the loopback case, because inside icmp_input_v6 the 9249 * queue_t is a conn queue from the sending side. 9250 */ 9251 zoneid = io->ipsec_out_zoneid; 9252 ASSERT(zoneid != ALL_ZONES); 9253 } 9254 9255 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9256 /* 9257 * This is an ip6i_t header followed by an ip6_hdr. 9258 * Check which fields are set. 9259 * 9260 * When the packet comes from a transport we should have 9261 * all needed headers in the first mblk. However, when 9262 * going through ip_newroute*_v6 the ip6i might be in 9263 * a separate mblk when we return here. In that case 9264 * we pullup everything to ensure that extension and transport 9265 * headers "stay" in the first mblk. 9266 */ 9267 ip6i = (ip6i_t *)ip6h; 9268 ip6i_flags = ip6i->ip6i_flags; 9269 9270 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9271 ((mp->b_wptr - (uchar_t *)ip6i) >= 9272 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9273 9274 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9275 if (!pullupmsg(mp, -1)) { 9276 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9277 if (do_outrequests) { 9278 BUMP_MIB(mibptr, 9279 ipIfStatsHCOutRequests); 9280 } 9281 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9282 freemsg(first_mp); 9283 if (ill != NULL) 9284 ill_refrele(ill); 9285 if (need_decref) 9286 CONN_DEC_REF(connp); 9287 return; 9288 } 9289 ip6h = (ip6_t *)mp->b_rptr; 9290 v6dstp = &ip6h->ip6_dst; 9291 ip6i = (ip6i_t *)ip6h; 9292 } 9293 ip6h = (ip6_t *)&ip6i[1]; 9294 9295 /* 9296 * Advance rptr past the ip6i_t to get ready for 9297 * transmitting the packet. However, if the packet gets 9298 * passed to ip_newroute*_v6 then rptr is moved back so 9299 * that the ip6i_t header can be inspected when the 9300 * packet comes back here after passing through 9301 * ire_add_then_send. 9302 */ 9303 mp->b_rptr = (uchar_t *)ip6h; 9304 9305 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9306 ASSERT(ip6i->ip6i_ifindex != 0); 9307 if (ill != NULL) 9308 ill_refrele(ill); 9309 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9310 NULL, NULL, NULL, NULL, ipst); 9311 if (ill == NULL) { 9312 if (do_outrequests) { 9313 BUMP_MIB(mibptr, 9314 ipIfStatsHCOutRequests); 9315 } 9316 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9317 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9318 ip6i->ip6i_ifindex)); 9319 if (need_decref) 9320 CONN_DEC_REF(connp); 9321 freemsg(first_mp); 9322 return; 9323 } 9324 mibptr = ill->ill_ip_mib; 9325 /* 9326 * Preserve the index so that when we return from 9327 * IPSEC processing, we know where to send the packet. 9328 */ 9329 if (mctl_present) { 9330 ASSERT(io != NULL); 9331 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9332 } 9333 } 9334 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9335 cred_t *cr = msg_getcred(mp, NULL); 9336 9337 /* rpcmod doesn't send down db_credp for UDP packets */ 9338 if (cr == NULL) { 9339 if (connp != NULL) 9340 cr = connp->conn_cred; 9341 else 9342 cr = ill->ill_credp; 9343 } 9344 9345 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9346 if (secpolicy_net_rawaccess(cr) != 0) { 9347 /* 9348 * Use IPCL_ZONEID to honor SO_ALLZONES. 9349 */ 9350 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9351 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9352 NULL, connp != NULL ? 9353 IPCL_ZONEID(connp) : zoneid, NULL, 9354 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9355 if (ire == NULL) { 9356 if (do_outrequests) 9357 BUMP_MIB(mibptr, 9358 ipIfStatsHCOutRequests); 9359 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9360 ip1dbg(("ip_wput_v6: bad source " 9361 "addr\n")); 9362 freemsg(first_mp); 9363 if (ill != NULL) 9364 ill_refrele(ill); 9365 if (need_decref) 9366 CONN_DEC_REF(connp); 9367 return; 9368 } 9369 ire_refrele(ire); 9370 } 9371 /* No need to verify again when using ip_newroute */ 9372 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9373 } 9374 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9375 /* 9376 * Make sure they match since ip_newroute*_v6 etc might 9377 * (unknown to them) inspect ip6i_nexthop when 9378 * they think they access ip6_dst. 9379 */ 9380 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9381 } 9382 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9383 cksum_request = 1; 9384 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9385 cksum_request = ip6i->ip6i_checksum_off; 9386 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9387 unspec_src = 1; 9388 9389 if (do_outrequests && ill != NULL) { 9390 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9391 do_outrequests = B_FALSE; 9392 } 9393 /* 9394 * Store ip6i_t info that we need after we come back 9395 * from IPSEC processing. 9396 */ 9397 if (mctl_present) { 9398 ASSERT(io != NULL); 9399 io->ipsec_out_unspec_src = unspec_src; 9400 } 9401 } 9402 if (connp != NULL && connp->conn_dontroute) 9403 ip6h->ip6_hops = 1; 9404 9405 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9406 goto ipv6multicast; 9407 9408 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9409 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9410 ASSERT(ill != NULL); 9411 goto send_from_ill; 9412 } 9413 9414 /* 9415 * 2. If q is an ill queue and there's a link-local destination 9416 * then use that ill. 9417 */ 9418 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9419 goto send_from_ill; 9420 9421 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9422 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9423 ill_t *conn_outgoing_ill; 9424 9425 conn_outgoing_ill = conn_get_held_ill(connp, 9426 &connp->conn_outgoing_ill, &err); 9427 if (err == ILL_LOOKUP_FAILED) { 9428 if (ill != NULL) 9429 ill_refrele(ill); 9430 if (need_decref) 9431 CONN_DEC_REF(connp); 9432 freemsg(first_mp); 9433 return; 9434 } 9435 if (ill != NULL) 9436 ill_refrele(ill); 9437 ill = conn_outgoing_ill; 9438 mibptr = ill->ill_ip_mib; 9439 goto send_from_ill; 9440 } 9441 9442 /* 9443 * 4. For unicast: Just do an IRE lookup for the best match. 9444 * If we get here for a link-local address it is rather random 9445 * what interface we pick on a multihomed host. 9446 * *If* there is an IRE_CACHE (and the link-local address 9447 * isn't duplicated on multi links) this will find the IRE_CACHE. 9448 * Otherwise it will use one of the matching IRE_INTERFACE routes 9449 * for the link-local prefix. Hence, applications 9450 * *should* be encouraged to specify an outgoing interface when sending 9451 * to a link local address. 9452 */ 9453 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9454 !connp->conn_fully_bound)) { 9455 /* 9456 * We cache IRE_CACHEs to avoid lookups. We don't do 9457 * this for the tcp global queue and listen end point 9458 * as it does not really have a real destination to 9459 * talk to. 9460 */ 9461 ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp), 9462 ipst); 9463 } else { 9464 /* 9465 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9466 * grab a lock here to check for CONDEMNED as it is okay 9467 * to send a packet or two with the IRE_CACHE that is going 9468 * away. 9469 */ 9470 mutex_enter(&connp->conn_lock); 9471 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9472 if (ire != NULL && 9473 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9474 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9475 9476 IRE_REFHOLD(ire); 9477 mutex_exit(&connp->conn_lock); 9478 9479 } else { 9480 boolean_t cached = B_FALSE; 9481 9482 connp->conn_ire_cache = NULL; 9483 mutex_exit(&connp->conn_lock); 9484 /* Release the old ire */ 9485 if (ire != NULL && sctp_ire == NULL) 9486 IRE_REFRELE_NOTR(ire); 9487 9488 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9489 msg_getlabel(mp), ipst); 9490 if (ire != NULL) { 9491 IRE_REFHOLD_NOTR(ire); 9492 9493 mutex_enter(&connp->conn_lock); 9494 if (CONN_CACHE_IRE(connp) && 9495 (connp->conn_ire_cache == NULL)) { 9496 rw_enter(&ire->ire_bucket->irb_lock, 9497 RW_READER); 9498 if (!(ire->ire_marks & 9499 IRE_MARK_CONDEMNED)) { 9500 connp->conn_ire_cache = ire; 9501 cached = B_TRUE; 9502 } 9503 rw_exit(&ire->ire_bucket->irb_lock); 9504 } 9505 mutex_exit(&connp->conn_lock); 9506 9507 /* 9508 * We can continue to use the ire but since it 9509 * was not cached, we should drop the extra 9510 * reference. 9511 */ 9512 if (!cached) 9513 IRE_REFRELE_NOTR(ire); 9514 } 9515 } 9516 } 9517 9518 if (ire != NULL) { 9519 if (do_outrequests) { 9520 /* Handle IRE_LOCAL's that might appear here */ 9521 if (ire->ire_type == IRE_CACHE) { 9522 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9523 ill_ip_mib; 9524 } else { 9525 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9526 } 9527 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9528 } 9529 9530 /* 9531 * Check if the ire has the RTF_MULTIRT flag, inherited 9532 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9533 */ 9534 if (ire->ire_flags & RTF_MULTIRT) { 9535 /* 9536 * Force hop limit of multirouted packets if required. 9537 * The hop limit of such packets is bounded by the 9538 * ip_multirt_ttl ndd variable. 9539 * NDP packets must have a hop limit of 255; don't 9540 * change the hop limit in that case. 9541 */ 9542 if ((ipst->ips_ip_multirt_ttl > 0) && 9543 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9544 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9545 if (ip_debug > 3) { 9546 ip2dbg(("ip_wput_v6: forcing multirt " 9547 "hop limit to %d (was %d) ", 9548 ipst->ips_ip_multirt_ttl, 9549 ip6h->ip6_hops)); 9550 pr_addr_dbg("v6dst %s\n", AF_INET6, 9551 &ire->ire_addr_v6); 9552 } 9553 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9554 } 9555 9556 /* 9557 * We look at this point if there are pending 9558 * unresolved routes. ire_multirt_need_resolve_v6() 9559 * checks in O(n) that all IRE_OFFSUBNET ire 9560 * entries for the packet's destination and 9561 * flagged RTF_MULTIRT are currently resolved. 9562 * If some remain unresolved, we do a copy 9563 * of the current message. It will be used 9564 * to initiate additional route resolutions. 9565 */ 9566 multirt_need_resolve = 9567 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9568 msg_getlabel(first_mp), ipst); 9569 ip2dbg(("ip_wput_v6: ire %p, " 9570 "multirt_need_resolve %d, first_mp %p\n", 9571 (void *)ire, multirt_need_resolve, 9572 (void *)first_mp)); 9573 if (multirt_need_resolve) { 9574 copy_mp = copymsg(first_mp); 9575 if (copy_mp != NULL) { 9576 MULTIRT_DEBUG_TAG(copy_mp); 9577 } 9578 } 9579 } 9580 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9581 connp, caller, ip6i_flags, zoneid); 9582 if (need_decref) { 9583 CONN_DEC_REF(connp); 9584 connp = NULL; 9585 } 9586 IRE_REFRELE(ire); 9587 9588 /* 9589 * Try to resolve another multiroute if 9590 * ire_multirt_need_resolve_v6() deemed it necessary. 9591 * copy_mp will be consumed (sent or freed) by 9592 * ip_newroute_v6(). 9593 */ 9594 if (copy_mp != NULL) { 9595 if (mctl_present) { 9596 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9597 } else { 9598 ip6h = (ip6_t *)copy_mp->b_rptr; 9599 } 9600 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9601 &ip6h->ip6_src, NULL, zoneid, ipst); 9602 } 9603 if (ill != NULL) 9604 ill_refrele(ill); 9605 return; 9606 } 9607 9608 /* 9609 * No full IRE for this destination. Send it to 9610 * ip_newroute_v6 to see if anything else matches. 9611 * Mark this packet as having originated on this 9612 * machine. 9613 * Update rptr if there was an ip6i_t header. 9614 */ 9615 mp->b_prev = NULL; 9616 mp->b_next = NULL; 9617 if (ip6i != NULL) 9618 mp->b_rptr -= sizeof (ip6i_t); 9619 9620 if (unspec_src) { 9621 if (ip6i == NULL) { 9622 /* 9623 * Add ip6i_t header to carry unspec_src 9624 * until the packet comes back in ip_wput_v6. 9625 */ 9626 mp = ip_add_info_v6(mp, NULL, v6dstp); 9627 if (mp == NULL) { 9628 if (do_outrequests) 9629 BUMP_MIB(mibptr, 9630 ipIfStatsHCOutRequests); 9631 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9632 if (mctl_present) 9633 freeb(first_mp); 9634 if (ill != NULL) 9635 ill_refrele(ill); 9636 if (need_decref) 9637 CONN_DEC_REF(connp); 9638 return; 9639 } 9640 ip6i = (ip6i_t *)mp->b_rptr; 9641 9642 if (mctl_present) { 9643 ASSERT(first_mp != mp); 9644 first_mp->b_cont = mp; 9645 } else { 9646 first_mp = mp; 9647 } 9648 9649 if ((mp->b_wptr - (uchar_t *)ip6i) == 9650 sizeof (ip6i_t)) { 9651 /* 9652 * ndp_resolver called from ip_newroute_v6 9653 * expects pulled up message. 9654 */ 9655 if (!pullupmsg(mp, -1)) { 9656 ip1dbg(("ip_wput_v6: pullupmsg" 9657 " failed\n")); 9658 if (do_outrequests) { 9659 BUMP_MIB(mibptr, 9660 ipIfStatsHCOutRequests); 9661 } 9662 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9663 freemsg(first_mp); 9664 if (ill != NULL) 9665 ill_refrele(ill); 9666 if (need_decref) 9667 CONN_DEC_REF(connp); 9668 return; 9669 } 9670 ip6i = (ip6i_t *)mp->b_rptr; 9671 } 9672 ip6h = (ip6_t *)&ip6i[1]; 9673 v6dstp = &ip6h->ip6_dst; 9674 } 9675 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9676 if (mctl_present) { 9677 ASSERT(io != NULL); 9678 io->ipsec_out_unspec_src = unspec_src; 9679 } 9680 } 9681 if (do_outrequests) 9682 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9683 if (need_decref) 9684 CONN_DEC_REF(connp); 9685 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9686 if (ill != NULL) 9687 ill_refrele(ill); 9688 return; 9689 9690 9691 /* 9692 * Handle multicast packets with or without an conn. 9693 * Assumes that the transports set ip6_hops taking 9694 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9695 * into account. 9696 */ 9697 ipv6multicast: 9698 ip2dbg(("ip_wput_v6: multicast\n")); 9699 9700 /* 9701 * Hold the conn_lock till we refhold the ill of interest that is 9702 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9703 * while holding any locks, postpone the refrele until after the 9704 * conn_lock is dropped. 9705 */ 9706 if (connp != NULL) { 9707 mutex_enter(&connp->conn_lock); 9708 conn_lock_held = B_TRUE; 9709 } else { 9710 conn_lock_held = B_FALSE; 9711 } 9712 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9713 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9714 ASSERT(ill != NULL); 9715 } else if (ill != NULL) { 9716 /* 9717 * 2. If q is an ill queue and (link local or multicast 9718 * destination) then use that ill. 9719 * We don't need the ipif initialization here. 9720 * This useless assert below is just to prevent lint from 9721 * reporting a null body if statement. 9722 */ 9723 ASSERT(ill != NULL); 9724 } else if (connp != NULL) { 9725 /* 9726 * 3. If IPV6_BOUND_IF has been set use that ill. 9727 * 9728 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9729 * Otherwise look for the best IRE match for the unspecified 9730 * group to determine the ill. 9731 * 9732 * conn_multicast_ill is used for only IPv6 packets. 9733 * conn_multicast_ipif is used for only IPv4 packets. 9734 * Thus a PF_INET6 socket send both IPv4 and IPv6 9735 * multicast packets using different IP*_MULTICAST_IF 9736 * interfaces. 9737 */ 9738 if (connp->conn_outgoing_ill != NULL) { 9739 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9740 if (err == ILL_LOOKUP_FAILED) { 9741 ip1dbg(("ip_output_v6: multicast" 9742 " conn_outgoing_ill no ipif\n")); 9743 multicast_discard: 9744 ASSERT(saved_ill == NULL); 9745 if (conn_lock_held) 9746 mutex_exit(&connp->conn_lock); 9747 if (ill != NULL) 9748 ill_refrele(ill); 9749 freemsg(first_mp); 9750 if (do_outrequests) 9751 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9752 if (need_decref) 9753 CONN_DEC_REF(connp); 9754 return; 9755 } 9756 ill = connp->conn_outgoing_ill; 9757 } else if (connp->conn_multicast_ill != NULL) { 9758 err = ill_check_and_refhold(connp->conn_multicast_ill); 9759 if (err == ILL_LOOKUP_FAILED) { 9760 ip1dbg(("ip_output_v6: multicast" 9761 " conn_multicast_ill no ipif\n")); 9762 goto multicast_discard; 9763 } 9764 ill = connp->conn_multicast_ill; 9765 } else { 9766 mutex_exit(&connp->conn_lock); 9767 conn_lock_held = B_FALSE; 9768 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9769 if (ipif == NULL) { 9770 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9771 goto multicast_discard; 9772 } 9773 /* 9774 * We have a ref to this ipif, so we can safely 9775 * access ipif_ill. 9776 */ 9777 ill = ipif->ipif_ill; 9778 mutex_enter(&ill->ill_lock); 9779 if (!ILL_CAN_LOOKUP(ill)) { 9780 mutex_exit(&ill->ill_lock); 9781 ipif_refrele(ipif); 9782 ill = NULL; 9783 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9784 goto multicast_discard; 9785 } 9786 ill_refhold_locked(ill); 9787 mutex_exit(&ill->ill_lock); 9788 ipif_refrele(ipif); 9789 /* 9790 * Save binding until IPV6_MULTICAST_IF 9791 * changes it 9792 */ 9793 mutex_enter(&connp->conn_lock); 9794 connp->conn_multicast_ill = ill; 9795 mutex_exit(&connp->conn_lock); 9796 } 9797 } 9798 if (conn_lock_held) 9799 mutex_exit(&connp->conn_lock); 9800 9801 if (saved_ill != NULL) 9802 ill_refrele(saved_ill); 9803 9804 ASSERT(ill != NULL); 9805 /* 9806 * For multicast loopback interfaces replace the multicast address 9807 * with a unicast address for the ire lookup. 9808 */ 9809 if (IS_LOOPBACK(ill)) 9810 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9811 9812 mibptr = ill->ill_ip_mib; 9813 if (do_outrequests) { 9814 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9815 do_outrequests = B_FALSE; 9816 } 9817 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9818 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9819 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9820 9821 /* 9822 * As we may lose the conn by the time we reach ip_wput_ire_v6 9823 * we copy conn_multicast_loop and conn_dontroute on to an 9824 * ipsec_out. In case if this datagram goes out secure, 9825 * we need the ill_index also. Copy that also into the 9826 * ipsec_out. 9827 */ 9828 if (mctl_present) { 9829 io = (ipsec_out_t *)first_mp->b_rptr; 9830 ASSERT(first_mp->b_datap->db_type == M_CTL); 9831 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9832 } else { 9833 ASSERT(mp == first_mp); 9834 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9835 NULL) { 9836 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9837 freemsg(mp); 9838 if (ill != NULL) 9839 ill_refrele(ill); 9840 if (need_decref) 9841 CONN_DEC_REF(connp); 9842 return; 9843 } 9844 io = (ipsec_out_t *)first_mp->b_rptr; 9845 /* This is not a secure packet */ 9846 io->ipsec_out_secure = B_FALSE; 9847 io->ipsec_out_use_global_policy = B_TRUE; 9848 io->ipsec_out_zoneid = 9849 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9850 first_mp->b_cont = mp; 9851 mctl_present = B_TRUE; 9852 } 9853 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9854 io->ipsec_out_unspec_src = unspec_src; 9855 if (connp != NULL) 9856 io->ipsec_out_dontroute = connp->conn_dontroute; 9857 9858 send_from_ill: 9859 ASSERT(ill != NULL); 9860 ASSERT(mibptr == ill->ill_ip_mib); 9861 9862 if (do_outrequests) { 9863 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9864 do_outrequests = B_FALSE; 9865 } 9866 9867 /* 9868 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 9869 * an underlying interface, IS_UNDER_IPMP() may be true even when 9870 * building IREs that will be used for data traffic. As such, use the 9871 * packet's source address to determine whether the traffic is test 9872 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 9873 * 9874 * Separately, we also need to mark probe packets so that ND can 9875 * process them specially; see the comments in nce_queue_mp_common(). 9876 */ 9877 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 9878 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 9879 if (ip6i == NULL) { 9880 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 9881 if (mctl_present) 9882 freeb(first_mp); 9883 goto discard; 9884 } 9885 9886 if (mctl_present) 9887 first_mp->b_cont = mp; 9888 else 9889 first_mp = mp; 9890 9891 /* ndp_resolver() expects a pulled-up message */ 9892 if (MBLKL(mp) == sizeof (ip6i_t) && 9893 pullupmsg(mp, -1) == 0) { 9894 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 9895 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9896 ill_refrele(ill); 9897 if (need_decref) 9898 CONN_DEC_REF(connp); 9899 return; 9900 } 9901 ip6i = (ip6i_t *)mp->b_rptr; 9902 ip6h = (ip6_t *)&ip6i[1]; 9903 v6dstp = &ip6h->ip6_dst; 9904 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 9905 } 9906 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 9907 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 9908 } 9909 9910 if (io != NULL) 9911 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9912 9913 /* 9914 * When a specific ill is specified (using IPV6_PKTINFO, 9915 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9916 * on routing entries (ftable and ctable) that have a matching 9917 * ire->ire_ipif->ipif_ill. Thus this can only be used 9918 * for destinations that are on-link for the specific ill 9919 * and that can appear on multiple links. Thus it is useful 9920 * for multicast destinations, link-local destinations, and 9921 * at some point perhaps for site-local destinations (if the 9922 * node sits at a site boundary). 9923 * We create the cache entries in the regular ctable since 9924 * it can not "confuse" things for other destinations. 9925 * table. 9926 * 9927 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9928 * It is used only when ire_cache_lookup is used above. 9929 */ 9930 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9931 zoneid, msg_getlabel(mp), match_flags, ipst); 9932 if (ire != NULL) { 9933 /* 9934 * Check if the ire has the RTF_MULTIRT flag, inherited 9935 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9936 */ 9937 if (ire->ire_flags & RTF_MULTIRT) { 9938 /* 9939 * Force hop limit of multirouted packets if required. 9940 * The hop limit of such packets is bounded by the 9941 * ip_multirt_ttl ndd variable. 9942 * NDP packets must have a hop limit of 255; don't 9943 * change the hop limit in that case. 9944 */ 9945 if ((ipst->ips_ip_multirt_ttl > 0) && 9946 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9947 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9948 if (ip_debug > 3) { 9949 ip2dbg(("ip_wput_v6: forcing multirt " 9950 "hop limit to %d (was %d) ", 9951 ipst->ips_ip_multirt_ttl, 9952 ip6h->ip6_hops)); 9953 pr_addr_dbg("v6dst %s\n", AF_INET6, 9954 &ire->ire_addr_v6); 9955 } 9956 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9957 } 9958 9959 /* 9960 * We look at this point if there are pending 9961 * unresolved routes. ire_multirt_need_resolve_v6() 9962 * checks in O(n) that all IRE_OFFSUBNET ire 9963 * entries for the packet's destination and 9964 * flagged RTF_MULTIRT are currently resolved. 9965 * If some remain unresolved, we make a copy 9966 * of the current message. It will be used 9967 * to initiate additional route resolutions. 9968 */ 9969 multirt_need_resolve = 9970 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9971 msg_getlabel(first_mp), ipst); 9972 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9973 "multirt_need_resolve %d, first_mp %p\n", 9974 (void *)ire, multirt_need_resolve, 9975 (void *)first_mp)); 9976 if (multirt_need_resolve) { 9977 copy_mp = copymsg(first_mp); 9978 if (copy_mp != NULL) { 9979 MULTIRT_DEBUG_TAG(copy_mp); 9980 } 9981 } 9982 } 9983 9984 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 9985 ill->ill_name, (void *)ire, 9986 ill->ill_phyint->phyint_ifindex)); 9987 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9988 connp, caller, ip6i_flags, zoneid); 9989 ire_refrele(ire); 9990 if (need_decref) { 9991 CONN_DEC_REF(connp); 9992 connp = NULL; 9993 } 9994 9995 /* 9996 * Try to resolve another multiroute if 9997 * ire_multirt_need_resolve_v6() deemed it necessary. 9998 * copy_mp will be consumed (sent or freed) by 9999 * ip_newroute_[ipif_]v6(). 10000 */ 10001 if (copy_mp != NULL) { 10002 if (mctl_present) { 10003 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 10004 } else { 10005 ip6h = (ip6_t *)copy_mp->b_rptr; 10006 } 10007 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10008 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10009 zoneid, ipst); 10010 if (ipif == NULL) { 10011 ip1dbg(("ip_wput_v6: No ipif for " 10012 "multicast\n")); 10013 MULTIRT_DEBUG_UNTAG(copy_mp); 10014 freemsg(copy_mp); 10015 return; 10016 } 10017 ip_newroute_ipif_v6(q, copy_mp, ipif, 10018 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10019 zoneid); 10020 ipif_refrele(ipif); 10021 } else { 10022 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10023 &ip6h->ip6_src, ill, zoneid, ipst); 10024 } 10025 } 10026 ill_refrele(ill); 10027 return; 10028 } 10029 if (need_decref) { 10030 CONN_DEC_REF(connp); 10031 connp = NULL; 10032 } 10033 10034 /* Update rptr if there was an ip6i_t header. */ 10035 if (ip6i != NULL) 10036 mp->b_rptr -= sizeof (ip6i_t); 10037 if (unspec_src) { 10038 if (ip6i == NULL) { 10039 /* 10040 * Add ip6i_t header to carry unspec_src 10041 * until the packet comes back in ip_wput_v6. 10042 */ 10043 if (mctl_present) { 10044 first_mp->b_cont = 10045 ip_add_info_v6(mp, NULL, v6dstp); 10046 mp = first_mp->b_cont; 10047 if (mp == NULL) 10048 freeb(first_mp); 10049 } else { 10050 first_mp = mp = ip_add_info_v6(mp, NULL, 10051 v6dstp); 10052 } 10053 if (mp == NULL) { 10054 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10055 ill_refrele(ill); 10056 return; 10057 } 10058 ip6i = (ip6i_t *)mp->b_rptr; 10059 if ((mp->b_wptr - (uchar_t *)ip6i) == 10060 sizeof (ip6i_t)) { 10061 /* 10062 * ndp_resolver called from ip_newroute_v6 10063 * expects a pulled up message. 10064 */ 10065 if (!pullupmsg(mp, -1)) { 10066 ip1dbg(("ip_wput_v6: pullupmsg" 10067 " failed\n")); 10068 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10069 freemsg(first_mp); 10070 return; 10071 } 10072 ip6i = (ip6i_t *)mp->b_rptr; 10073 } 10074 ip6h = (ip6_t *)&ip6i[1]; 10075 v6dstp = &ip6h->ip6_dst; 10076 } 10077 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10078 if (mctl_present) { 10079 ASSERT(io != NULL); 10080 io->ipsec_out_unspec_src = unspec_src; 10081 } 10082 } 10083 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10084 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10085 &ip6h->ip6_src, unspec_src, zoneid); 10086 } else { 10087 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10088 zoneid, ipst); 10089 } 10090 ill_refrele(ill); 10091 return; 10092 10093 notv6: 10094 /* FIXME?: assume the caller calls the right version of ip_output? */ 10095 if (q->q_next == NULL) { 10096 connp = Q_TO_CONN(q); 10097 10098 /* 10099 * We can change conn_send for all types of conn, even 10100 * though only TCP uses it right now. 10101 * FIXME: sctp could use conn_send but doesn't currently. 10102 */ 10103 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10104 } 10105 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10106 (void) ip_output(arg, first_mp, arg2, caller); 10107 if (ill != NULL) 10108 ill_refrele(ill); 10109 } 10110 10111 /* 10112 * If this is a conn_t queue, then we pass in the conn. This includes the 10113 * zoneid. 10114 * Otherwise, this is a message for an ill_t queue, 10115 * in which case we use the global zoneid since those are all part of 10116 * the global zone. 10117 */ 10118 void 10119 ip_wput_v6(queue_t *q, mblk_t *mp) 10120 { 10121 if (CONN_Q(q)) 10122 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10123 else 10124 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10125 } 10126 10127 /* 10128 * NULL send-to queue - packet is to be delivered locally. 10129 */ 10130 void 10131 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10132 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10133 { 10134 uint32_t ports; 10135 mblk_t *mp = first_mp, *first_mp1; 10136 boolean_t mctl_present; 10137 uint8_t nexthdr; 10138 uint16_t hdr_length; 10139 ipsec_out_t *io; 10140 mib2_ipIfStatsEntry_t *mibptr; 10141 ilm_t *ilm; 10142 uint_t nexthdr_offset; 10143 ip_stack_t *ipst = ill->ill_ipst; 10144 10145 if (DB_TYPE(mp) == M_CTL) { 10146 io = (ipsec_out_t *)mp->b_rptr; 10147 if (!io->ipsec_out_secure) { 10148 mp = mp->b_cont; 10149 freeb(first_mp); 10150 first_mp = mp; 10151 mctl_present = B_FALSE; 10152 } else { 10153 mctl_present = B_TRUE; 10154 mp = first_mp->b_cont; 10155 ipsec_out_to_in(first_mp); 10156 } 10157 } else { 10158 mctl_present = B_FALSE; 10159 } 10160 10161 /* 10162 * Remove reachability confirmation bit from version field 10163 * before passing the packet on to any firewall hooks or 10164 * looping back the packet. 10165 */ 10166 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10167 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10168 10169 DTRACE_PROBE4(ip6__loopback__in__start, 10170 ill_t *, ill, ill_t *, NULL, 10171 ip6_t *, ip6h, mblk_t *, first_mp); 10172 10173 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10174 ipst->ips_ipv6firewall_loopback_in, 10175 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10176 10177 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10178 10179 if (first_mp == NULL) 10180 return; 10181 10182 if (ipst->ips_ipobs_enabled) { 10183 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10184 zoneid_t stackzoneid = netstackid_to_zoneid( 10185 ipst->ips_netstack->netstack_stackid); 10186 10187 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10188 /* 10189 * ::1 is special, as we cannot lookup its zoneid by 10190 * address. For this case, restrict the lookup to the 10191 * source zone. 10192 */ 10193 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10194 lookup_zoneid = zoneid; 10195 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10196 lookup_zoneid); 10197 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 10198 IPV6_VERSION, 0, ipst); 10199 } 10200 10201 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10202 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10203 int, 1); 10204 10205 nexthdr = ip6h->ip6_nxt; 10206 mibptr = ill->ill_ip_mib; 10207 10208 /* Fastpath */ 10209 switch (nexthdr) { 10210 case IPPROTO_TCP: 10211 case IPPROTO_UDP: 10212 case IPPROTO_ICMPV6: 10213 case IPPROTO_SCTP: 10214 hdr_length = IPV6_HDR_LEN; 10215 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10216 (uchar_t *)ip6h); 10217 break; 10218 default: { 10219 uint8_t *nexthdrp; 10220 10221 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10222 &hdr_length, &nexthdrp)) { 10223 /* Malformed packet */ 10224 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10225 freemsg(first_mp); 10226 return; 10227 } 10228 nexthdr = *nexthdrp; 10229 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10230 break; 10231 } 10232 } 10233 10234 UPDATE_OB_PKT_COUNT(ire); 10235 ire->ire_last_used_time = lbolt; 10236 10237 switch (nexthdr) { 10238 case IPPROTO_TCP: 10239 if (DB_TYPE(mp) == M_DATA) { 10240 /* 10241 * M_DATA mblk, so init mblk (chain) for 10242 * no struio(). 10243 */ 10244 mblk_t *mp1 = mp; 10245 10246 do { 10247 mp1->b_datap->db_struioflag = 0; 10248 } while ((mp1 = mp1->b_cont) != NULL); 10249 } 10250 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10251 TCP_PORTS_OFFSET); 10252 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10253 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10254 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10255 hdr_length, mctl_present, ire->ire_zoneid); 10256 return; 10257 10258 case IPPROTO_UDP: 10259 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10260 UDP_PORTS_OFFSET); 10261 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10262 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10263 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10264 return; 10265 10266 case IPPROTO_SCTP: 10267 { 10268 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10269 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10270 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10271 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10272 return; 10273 } 10274 case IPPROTO_ICMPV6: { 10275 icmp6_t *icmp6; 10276 10277 /* check for full IPv6+ICMPv6 header */ 10278 if ((mp->b_wptr - mp->b_rptr) < 10279 (hdr_length + ICMP6_MINLEN)) { 10280 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10281 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10282 " failed\n")); 10283 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10284 freemsg(first_mp); 10285 return; 10286 } 10287 ip6h = (ip6_t *)mp->b_rptr; 10288 } 10289 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10290 10291 /* Update output mib stats */ 10292 icmp_update_out_mib_v6(ill, icmp6); 10293 10294 /* Check variable for testing applications */ 10295 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10296 freemsg(first_mp); 10297 return; 10298 } 10299 /* 10300 * Assume that there is always at least one conn for 10301 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10302 * where there is no conn. 10303 */ 10304 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10305 !IS_LOOPBACK(ill)) { 10306 ilm_walker_t ilw; 10307 10308 /* 10309 * In the multicast case, applications may have 10310 * joined the group from different zones, so we 10311 * need to deliver the packet to each of them. 10312 * Loop through the multicast memberships 10313 * structures (ilm) on the receive ill and send 10314 * a copy of the packet up each matching one. 10315 * However, we don't do this for multicasts sent 10316 * on the loopback interface (PHYI_LOOPBACK flag 10317 * set) as they must stay in the sender's zone. 10318 */ 10319 ilm = ilm_walker_start(&ilw, ill); 10320 for (; ilm != NULL; 10321 ilm = ilm_walker_step(&ilw, ilm)) { 10322 if (!IN6_ARE_ADDR_EQUAL( 10323 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10324 continue; 10325 if ((fanout_flags & 10326 IP_FF_NO_MCAST_LOOP) && 10327 ilm->ilm_zoneid == ire->ire_zoneid) 10328 continue; 10329 if (!ipif_lookup_zoneid( 10330 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10331 IPIF_UP, NULL)) 10332 continue; 10333 10334 first_mp1 = ip_copymsg(first_mp); 10335 if (first_mp1 == NULL) 10336 continue; 10337 icmp_inbound_v6(q, first_mp1, 10338 ilw.ilw_walk_ill, ill, hdr_length, 10339 mctl_present, IP6_NO_IPPOLICY, 10340 ilm->ilm_zoneid, NULL); 10341 } 10342 ilm_walker_finish(&ilw); 10343 } else { 10344 first_mp1 = ip_copymsg(first_mp); 10345 if (first_mp1 != NULL) 10346 icmp_inbound_v6(q, first_mp1, ill, ill, 10347 hdr_length, mctl_present, 10348 IP6_NO_IPPOLICY, ire->ire_zoneid, 10349 NULL); 10350 } 10351 } 10352 /* FALLTHRU */ 10353 default: { 10354 /* 10355 * Handle protocols with which IPv6 is less intimate. 10356 */ 10357 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10358 10359 /* 10360 * Enable sending ICMP for "Unknown" nexthdr 10361 * case. i.e. where we did not FALLTHRU from 10362 * IPPROTO_ICMPV6 processing case above. 10363 */ 10364 if (nexthdr != IPPROTO_ICMPV6) 10365 fanout_flags |= IP_FF_SEND_ICMP; 10366 /* 10367 * Note: There can be more than one stream bound 10368 * to a particular protocol. When this is the case, 10369 * each one gets a copy of any incoming packets. 10370 */ 10371 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10372 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10373 mctl_present, ire->ire_zoneid); 10374 return; 10375 } 10376 } 10377 } 10378 10379 /* 10380 * Send packet using IRE. 10381 * Checksumming is controlled by cksum_request: 10382 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10383 * 1 => Skip TCP/UDP/SCTP checksum 10384 * Otherwise => checksum_request contains insert offset for checksum 10385 * 10386 * Assumes that the following set of headers appear in the first 10387 * mblk: 10388 * ip6_t 10389 * Any extension headers 10390 * TCP/UDP/SCTP header (if present) 10391 * The routine can handle an ICMPv6 header that is not in the first mblk. 10392 * 10393 * NOTE : This function does not ire_refrele the ire passed in as the 10394 * argument unlike ip_wput_ire where the REFRELE is done. 10395 * Refer to ip_wput_ire for more on this. 10396 */ 10397 static void 10398 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10399 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10400 { 10401 ip6_t *ip6h; 10402 uint8_t nexthdr; 10403 uint16_t hdr_length; 10404 uint_t reachable = 0x0; 10405 ill_t *ill; 10406 mib2_ipIfStatsEntry_t *mibptr; 10407 mblk_t *first_mp; 10408 boolean_t mctl_present; 10409 ipsec_out_t *io; 10410 boolean_t conn_dontroute; /* conn value for multicast */ 10411 boolean_t conn_multicast_loop; /* conn value for multicast */ 10412 boolean_t multicast_forward; /* Should we forward ? */ 10413 int max_frag; 10414 ip_stack_t *ipst = ire->ire_ipst; 10415 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10416 10417 ill = ire_to_ill(ire); 10418 first_mp = mp; 10419 multicast_forward = B_FALSE; 10420 10421 if (mp->b_datap->db_type != M_CTL) { 10422 ip6h = (ip6_t *)first_mp->b_rptr; 10423 } else { 10424 io = (ipsec_out_t *)first_mp->b_rptr; 10425 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10426 /* 10427 * Grab the zone id now because the M_CTL can be discarded by 10428 * ip_wput_ire_parse_ipsec_out() below. 10429 */ 10430 ASSERT(zoneid == io->ipsec_out_zoneid); 10431 ASSERT(zoneid != ALL_ZONES); 10432 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10433 /* 10434 * For the multicast case, ipsec_out carries conn_dontroute and 10435 * conn_multicast_loop as conn may not be available here. We 10436 * need this for multicast loopback and forwarding which is done 10437 * later in the code. 10438 */ 10439 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10440 conn_dontroute = io->ipsec_out_dontroute; 10441 conn_multicast_loop = io->ipsec_out_multicast_loop; 10442 /* 10443 * If conn_dontroute is not set or conn_multicast_loop 10444 * is set, we need to do forwarding/loopback. For 10445 * datagrams from ip_wput_multicast, conn_dontroute is 10446 * set to B_TRUE and conn_multicast_loop is set to 10447 * B_FALSE so that we neither do forwarding nor 10448 * loopback. 10449 */ 10450 if (!conn_dontroute || conn_multicast_loop) 10451 multicast_forward = B_TRUE; 10452 } 10453 } 10454 10455 /* 10456 * If the sender didn't supply the hop limit and there is a default 10457 * unicast hop limit associated with the output interface, we use 10458 * that if the packet is unicast. Interface specific unicast hop 10459 * limits as set via the SIOCSLIFLNKINFO ioctl. 10460 */ 10461 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10462 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10463 ip6h->ip6_hops = ill->ill_max_hops; 10464 } 10465 10466 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10467 ire->ire_zoneid != ALL_ZONES) { 10468 /* 10469 * When a zone sends a packet to another zone, we try to deliver 10470 * the packet under the same conditions as if the destination 10471 * was a real node on the network. To do so, we look for a 10472 * matching route in the forwarding table. 10473 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10474 * ip_newroute_v6() does. 10475 * Note that IRE_LOCAL are special, since they are used 10476 * when the zoneid doesn't match in some cases. This means that 10477 * we need to handle ipha_src differently since ire_src_addr 10478 * belongs to the receiving zone instead of the sending zone. 10479 * When ip_restrict_interzone_loopback is set, then 10480 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10481 * for loopback between zones when the logical "Ethernet" would 10482 * have looped them back. 10483 */ 10484 ire_t *src_ire; 10485 10486 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10487 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10488 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10489 if (src_ire != NULL && 10490 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10491 (!ipst->ips_ip_restrict_interzone_loopback || 10492 ire_local_same_lan(ire, src_ire))) { 10493 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10494 !unspec_src) { 10495 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10496 } 10497 ire_refrele(src_ire); 10498 } else { 10499 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10500 if (src_ire != NULL) { 10501 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10502 ire_refrele(src_ire); 10503 freemsg(first_mp); 10504 return; 10505 } 10506 ire_refrele(src_ire); 10507 } 10508 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10509 /* Failed */ 10510 freemsg(first_mp); 10511 return; 10512 } 10513 icmp_unreachable_v6(q, first_mp, 10514 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10515 zoneid, ipst); 10516 return; 10517 } 10518 } 10519 10520 if (mp->b_datap->db_type == M_CTL || 10521 ipss->ipsec_outbound_v6_policy_present) { 10522 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10523 connp, unspec_src, zoneid); 10524 if (mp == NULL) { 10525 return; 10526 } 10527 } 10528 10529 first_mp = mp; 10530 if (mp->b_datap->db_type == M_CTL) { 10531 io = (ipsec_out_t *)mp->b_rptr; 10532 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10533 mp = mp->b_cont; 10534 mctl_present = B_TRUE; 10535 } else { 10536 mctl_present = B_FALSE; 10537 } 10538 10539 ip6h = (ip6_t *)mp->b_rptr; 10540 nexthdr = ip6h->ip6_nxt; 10541 mibptr = ill->ill_ip_mib; 10542 10543 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10544 ipif_t *ipif; 10545 10546 /* 10547 * Select the source address using ipif_select_source_v6. 10548 */ 10549 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10550 IPV6_PREFER_SRC_DEFAULT, zoneid); 10551 if (ipif == NULL) { 10552 if (ip_debug > 2) { 10553 /* ip1dbg */ 10554 pr_addr_dbg("ip_wput_ire_v6: no src for " 10555 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10556 printf("through interface %s\n", ill->ill_name); 10557 } 10558 freemsg(first_mp); 10559 return; 10560 } 10561 ip6h->ip6_src = ipif->ipif_v6src_addr; 10562 ipif_refrele(ipif); 10563 } 10564 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10565 if ((connp != NULL && connp->conn_multicast_loop) || 10566 !IS_LOOPBACK(ill)) { 10567 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10568 ALL_ZONES) != NULL) { 10569 mblk_t *nmp; 10570 int fanout_flags = 0; 10571 10572 if (connp != NULL && 10573 !connp->conn_multicast_loop) { 10574 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10575 } 10576 ip1dbg(("ip_wput_ire_v6: " 10577 "Loopback multicast\n")); 10578 nmp = ip_copymsg(first_mp); 10579 if (nmp != NULL) { 10580 ip6_t *nip6h; 10581 mblk_t *mp_ip6h; 10582 10583 if (mctl_present) { 10584 nip6h = (ip6_t *) 10585 nmp->b_cont->b_rptr; 10586 mp_ip6h = nmp->b_cont; 10587 } else { 10588 nip6h = (ip6_t *)nmp->b_rptr; 10589 mp_ip6h = nmp; 10590 } 10591 10592 DTRACE_PROBE4( 10593 ip6__loopback__out__start, 10594 ill_t *, NULL, 10595 ill_t *, ill, 10596 ip6_t *, nip6h, 10597 mblk_t *, nmp); 10598 10599 FW_HOOKS6( 10600 ipst->ips_ip6_loopback_out_event, 10601 ipst->ips_ipv6firewall_loopback_out, 10602 NULL, ill, nip6h, nmp, mp_ip6h, 10603 0, ipst); 10604 10605 DTRACE_PROBE1( 10606 ip6__loopback__out__end, 10607 mblk_t *, nmp); 10608 10609 /* 10610 * DTrace this as ip:::send. A blocked 10611 * packet will fire the send probe, but 10612 * not the receive probe. 10613 */ 10614 DTRACE_IP7(send, mblk_t *, nmp, 10615 conn_t *, NULL, void_ip_t *, nip6h, 10616 __dtrace_ipsr_ill_t *, ill, 10617 ipha_t *, NULL, ip6_t *, nip6h, 10618 int, 1); 10619 10620 if (nmp != NULL) { 10621 /* 10622 * Deliver locally and to 10623 * every local zone, except 10624 * the sending zone when 10625 * IPV6_MULTICAST_LOOP is 10626 * disabled. 10627 */ 10628 ip_wput_local_v6(RD(q), ill, 10629 nip6h, nmp, ire, 10630 fanout_flags, zoneid); 10631 } 10632 } else { 10633 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10634 ip1dbg(("ip_wput_ire_v6: " 10635 "copymsg failed\n")); 10636 } 10637 } 10638 } 10639 if (ip6h->ip6_hops == 0 || 10640 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10641 IS_LOOPBACK(ill)) { 10642 /* 10643 * Local multicast or just loopback on loopback 10644 * interface. 10645 */ 10646 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10647 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10648 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10649 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10650 freemsg(first_mp); 10651 return; 10652 } 10653 } 10654 10655 if (ire->ire_stq != NULL) { 10656 uint32_t sum; 10657 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10658 ill_phyint->phyint_ifindex; 10659 queue_t *dev_q = ire->ire_stq->q_next; 10660 10661 /* 10662 * non-NULL send-to queue - packet is to be sent 10663 * out an interface. 10664 */ 10665 10666 /* Driver is flow-controlling? */ 10667 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10668 DEV_Q_FLOW_BLOCKED(dev_q)) { 10669 /* 10670 * Queue packet if we have an conn to give back 10671 * pressure. We can't queue packets intended for 10672 * hardware acceleration since we've tossed that 10673 * state already. If the packet is being fed back 10674 * from ire_send_v6, we don't know the position in 10675 * the queue to enqueue the packet and we discard 10676 * the packet. 10677 */ 10678 if (ipst->ips_ip_output_queue && connp != NULL && 10679 !mctl_present && caller != IRE_SEND) { 10680 if (caller == IP_WSRV) { 10681 idl_tx_list_t *idl_txl; 10682 10683 idl_txl = &ipst->ips_idl_tx_list[0]; 10684 connp->conn_did_putbq = 1; 10685 (void) putbq(connp->conn_wq, mp); 10686 conn_drain_insert(connp, idl_txl); 10687 /* 10688 * caller == IP_WSRV implies we are 10689 * the service thread, and the 10690 * queue is already noenabled. 10691 * The check for canput and 10692 * the putbq is not atomic. 10693 * So we need to check again. 10694 */ 10695 if (canput(dev_q)) 10696 connp->conn_did_putbq = 0; 10697 } else { 10698 (void) putq(connp->conn_wq, mp); 10699 } 10700 return; 10701 } 10702 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10703 freemsg(first_mp); 10704 return; 10705 } 10706 10707 /* 10708 * Look for reachability confirmations from the transport. 10709 */ 10710 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10711 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10712 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10713 if (mctl_present) 10714 io->ipsec_out_reachable = B_TRUE; 10715 } 10716 /* Fastpath */ 10717 switch (nexthdr) { 10718 case IPPROTO_TCP: 10719 case IPPROTO_UDP: 10720 case IPPROTO_ICMPV6: 10721 case IPPROTO_SCTP: 10722 hdr_length = IPV6_HDR_LEN; 10723 break; 10724 default: { 10725 uint8_t *nexthdrp; 10726 10727 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10728 &hdr_length, &nexthdrp)) { 10729 /* Malformed packet */ 10730 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10731 freemsg(first_mp); 10732 return; 10733 } 10734 nexthdr = *nexthdrp; 10735 break; 10736 } 10737 } 10738 10739 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10740 uint16_t *up; 10741 uint16_t *insp; 10742 10743 /* 10744 * The packet header is processed once for all, even 10745 * in the multirouting case. We disable hardware 10746 * checksum if the packet is multirouted, as it will be 10747 * replicated via several interfaces, and not all of 10748 * them may have this capability. 10749 */ 10750 if (cksum_request == 1 && 10751 !(ire->ire_flags & RTF_MULTIRT)) { 10752 /* Skip the transport checksum */ 10753 goto cksum_done; 10754 } 10755 /* 10756 * Do user-configured raw checksum. 10757 * Compute checksum and insert at offset "cksum_request" 10758 */ 10759 10760 /* check for enough headers for checksum */ 10761 cksum_request += hdr_length; /* offset from rptr */ 10762 if ((mp->b_wptr - mp->b_rptr) < 10763 (cksum_request + sizeof (int16_t))) { 10764 if (!pullupmsg(mp, 10765 cksum_request + sizeof (int16_t))) { 10766 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10767 " failed\n")); 10768 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10769 freemsg(first_mp); 10770 return; 10771 } 10772 ip6h = (ip6_t *)mp->b_rptr; 10773 } 10774 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10775 ASSERT(((uintptr_t)insp & 0x1) == 0); 10776 up = (uint16_t *)&ip6h->ip6_src; 10777 /* 10778 * icmp has placed length and routing 10779 * header adjustment in *insp. 10780 */ 10781 sum = htons(nexthdr) + 10782 up[0] + up[1] + up[2] + up[3] + 10783 up[4] + up[5] + up[6] + up[7] + 10784 up[8] + up[9] + up[10] + up[11] + 10785 up[12] + up[13] + up[14] + up[15]; 10786 sum = (sum & 0xffff) + (sum >> 16); 10787 *insp = IP_CSUM(mp, hdr_length, sum); 10788 } else if (nexthdr == IPPROTO_TCP) { 10789 uint16_t *up; 10790 10791 /* 10792 * Check for full IPv6 header + enough TCP header 10793 * to get at the checksum field. 10794 */ 10795 if ((mp->b_wptr - mp->b_rptr) < 10796 (hdr_length + TCP_CHECKSUM_OFFSET + 10797 TCP_CHECKSUM_SIZE)) { 10798 if (!pullupmsg(mp, hdr_length + 10799 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10800 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10801 " failed\n")); 10802 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10803 freemsg(first_mp); 10804 return; 10805 } 10806 ip6h = (ip6_t *)mp->b_rptr; 10807 } 10808 10809 up = (uint16_t *)&ip6h->ip6_src; 10810 /* 10811 * Note: The TCP module has stored the length value 10812 * into the tcp checksum field, so we don't 10813 * need to explicitly sum it in here. 10814 */ 10815 sum = up[0] + up[1] + up[2] + up[3] + 10816 up[4] + up[5] + up[6] + up[7] + 10817 up[8] + up[9] + up[10] + up[11] + 10818 up[12] + up[13] + up[14] + up[15]; 10819 10820 /* Fold the initial sum */ 10821 sum = (sum & 0xffff) + (sum >> 16); 10822 10823 up = (uint16_t *)(((uchar_t *)ip6h) + 10824 hdr_length + TCP_CHECKSUM_OFFSET); 10825 10826 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10827 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10828 ire->ire_max_frag, mctl_present, sum); 10829 10830 /* Software checksum? */ 10831 if (DB_CKSUMFLAGS(mp) == 0) { 10832 IP6_STAT(ipst, ip6_out_sw_cksum); 10833 IP6_STAT_UPDATE(ipst, 10834 ip6_tcp_out_sw_cksum_bytes, 10835 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10836 hdr_length); 10837 } 10838 } else if (nexthdr == IPPROTO_UDP) { 10839 uint16_t *up; 10840 10841 /* 10842 * check for full IPv6 header + enough UDP header 10843 * to get at the UDP checksum field 10844 */ 10845 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10846 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10847 if (!pullupmsg(mp, hdr_length + 10848 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10849 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10850 " failed\n")); 10851 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10852 freemsg(first_mp); 10853 return; 10854 } 10855 ip6h = (ip6_t *)mp->b_rptr; 10856 } 10857 up = (uint16_t *)&ip6h->ip6_src; 10858 /* 10859 * Note: The UDP module has stored the length value 10860 * into the udp checksum field, so we don't 10861 * need to explicitly sum it in here. 10862 */ 10863 sum = up[0] + up[1] + up[2] + up[3] + 10864 up[4] + up[5] + up[6] + up[7] + 10865 up[8] + up[9] + up[10] + up[11] + 10866 up[12] + up[13] + up[14] + up[15]; 10867 10868 /* Fold the initial sum */ 10869 sum = (sum & 0xffff) + (sum >> 16); 10870 10871 up = (uint16_t *)(((uchar_t *)ip6h) + 10872 hdr_length + UDP_CHECKSUM_OFFSET); 10873 10874 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 10875 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10876 ire->ire_max_frag, mctl_present, sum); 10877 10878 /* Software checksum? */ 10879 if (DB_CKSUMFLAGS(mp) == 0) { 10880 IP6_STAT(ipst, ip6_out_sw_cksum); 10881 IP6_STAT_UPDATE(ipst, 10882 ip6_udp_out_sw_cksum_bytes, 10883 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10884 hdr_length); 10885 } 10886 } else if (nexthdr == IPPROTO_ICMPV6) { 10887 uint16_t *up; 10888 icmp6_t *icmp6; 10889 10890 /* check for full IPv6+ICMPv6 header */ 10891 if ((mp->b_wptr - mp->b_rptr) < 10892 (hdr_length + ICMP6_MINLEN)) { 10893 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10894 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10895 " failed\n")); 10896 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10897 freemsg(first_mp); 10898 return; 10899 } 10900 ip6h = (ip6_t *)mp->b_rptr; 10901 } 10902 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10903 up = (uint16_t *)&ip6h->ip6_src; 10904 /* 10905 * icmp has placed length and routing 10906 * header adjustment in icmp6_cksum. 10907 */ 10908 sum = htons(IPPROTO_ICMPV6) + 10909 up[0] + up[1] + up[2] + up[3] + 10910 up[4] + up[5] + up[6] + up[7] + 10911 up[8] + up[9] + up[10] + up[11] + 10912 up[12] + up[13] + up[14] + up[15]; 10913 sum = (sum & 0xffff) + (sum >> 16); 10914 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10915 10916 /* Update output mib stats */ 10917 icmp_update_out_mib_v6(ill, icmp6); 10918 } else if (nexthdr == IPPROTO_SCTP) { 10919 sctp_hdr_t *sctph; 10920 10921 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10922 if (!pullupmsg(mp, hdr_length + 10923 sizeof (*sctph))) { 10924 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10925 " failed\n")); 10926 BUMP_MIB(ill->ill_ip_mib, 10927 ipIfStatsOutDiscards); 10928 freemsg(mp); 10929 return; 10930 } 10931 ip6h = (ip6_t *)mp->b_rptr; 10932 } 10933 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10934 sctph->sh_chksum = 0; 10935 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10936 } 10937 10938 cksum_done: 10939 /* 10940 * We force the insertion of a fragment header using the 10941 * IPH_FRAG_HDR flag in two cases: 10942 * - after reception of an ICMPv6 "packet too big" message 10943 * with a MTU < 1280 (cf. RFC 2460 section 5) 10944 * - for multirouted IPv6 packets, so that the receiver can 10945 * discard duplicates according to their fragment identifier 10946 * 10947 * Two flags modifed from the API can modify this behavior. 10948 * The first is IPV6_USE_MIN_MTU. With this API the user 10949 * can specify how to manage PMTUD for unicast and multicast. 10950 * 10951 * IPV6_DONTFRAG disallows fragmentation. 10952 */ 10953 max_frag = ire->ire_max_frag; 10954 switch (IP6I_USE_MIN_MTU_API(flags)) { 10955 case IPV6_USE_MIN_MTU_DEFAULT: 10956 case IPV6_USE_MIN_MTU_UNICAST: 10957 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10958 max_frag = IPV6_MIN_MTU; 10959 } 10960 break; 10961 10962 case IPV6_USE_MIN_MTU_NEVER: 10963 max_frag = IPV6_MIN_MTU; 10964 break; 10965 } 10966 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10967 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10968 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10969 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10970 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 10971 return; 10972 } 10973 10974 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10975 (mp->b_cont ? msgdsize(mp) : 10976 mp->b_wptr - (uchar_t *)ip6h)) { 10977 ip0dbg(("Packet length mismatch: %d, %ld\n", 10978 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10979 msgdsize(mp))); 10980 freemsg(first_mp); 10981 return; 10982 } 10983 /* Do IPSEC processing first */ 10984 if (mctl_present) { 10985 ipsec_out_process(q, first_mp, ire, ill_index); 10986 return; 10987 } 10988 ASSERT(mp->b_prev == NULL); 10989 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 10990 ntohs(ip6h->ip6_plen) + 10991 IPV6_HDR_LEN, max_frag)); 10992 ASSERT(mp == first_mp); 10993 /* Initiate IPPF processing */ 10994 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 10995 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 10996 if (mp == NULL) { 10997 return; 10998 } 10999 } 11000 ip_wput_frag_v6(mp, ire, reachable, connp, 11001 caller, max_frag); 11002 return; 11003 } 11004 /* Do IPSEC processing first */ 11005 if (mctl_present) { 11006 int extra_len = ipsec_out_extra_length(first_mp); 11007 11008 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11009 max_frag && connp != NULL && 11010 (flags & IP6I_DONTFRAG)) { 11011 /* 11012 * IPsec headers will push the packet over the 11013 * MTU limit. Issue an ICMPv6 Packet Too Big 11014 * message for this packet if the upper-layer 11015 * that issued this packet will be able to 11016 * react to the icmp_pkt2big_v6() that we'll 11017 * generate. 11018 */ 11019 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11020 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11021 return; 11022 } 11023 ipsec_out_process(q, first_mp, ire, ill_index); 11024 return; 11025 } 11026 /* 11027 * XXX multicast: add ip_mforward_v6() here. 11028 * Check conn_dontroute 11029 */ 11030 #ifdef lint 11031 /* 11032 * XXX The only purpose of this statement is to avoid lint 11033 * errors. See the above "XXX multicast". When that gets 11034 * fixed, remove this whole #ifdef lint section. 11035 */ 11036 ip3dbg(("multicast forward is %s.\n", 11037 (multicast_forward ? "TRUE" : "FALSE"))); 11038 #endif 11039 11040 UPDATE_OB_PKT_COUNT(ire); 11041 ire->ire_last_used_time = lbolt; 11042 ASSERT(mp == first_mp); 11043 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11044 } else { 11045 /* 11046 * DTrace this as ip:::send. A blocked packet will fire the 11047 * send probe, but not the receive probe. 11048 */ 11049 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11050 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11051 NULL, ip6_t *, ip6h, int, 1); 11052 DTRACE_PROBE4(ip6__loopback__out__start, 11053 ill_t *, NULL, ill_t *, ill, 11054 ip6_t *, ip6h, mblk_t *, first_mp); 11055 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11056 ipst->ips_ipv6firewall_loopback_out, 11057 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11058 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11059 if (first_mp != NULL) { 11060 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11061 zoneid); 11062 } 11063 } 11064 } 11065 11066 /* 11067 * Outbound IPv6 fragmentation routine using MDT. 11068 */ 11069 static void 11070 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11071 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11072 { 11073 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11074 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11075 mblk_t *hdr_mp, *md_mp = NULL; 11076 int i1; 11077 multidata_t *mmd; 11078 unsigned char *hdr_ptr, *pld_ptr; 11079 ip_pdescinfo_t pdi; 11080 uint32_t ident; 11081 size_t len; 11082 uint16_t offset; 11083 queue_t *stq = ire->ire_stq; 11084 ill_t *ill = (ill_t *)stq->q_ptr; 11085 ip_stack_t *ipst = ill->ill_ipst; 11086 11087 ASSERT(DB_TYPE(mp) == M_DATA); 11088 ASSERT(MBLKL(mp) > unfragmentable_len); 11089 11090 /* 11091 * Move read ptr past unfragmentable portion, we don't want this part 11092 * of the data in our fragments. 11093 */ 11094 mp->b_rptr += unfragmentable_len; 11095 11096 /* Calculate how many packets we will send out */ 11097 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11098 pkts = (i1 + max_chunk - 1) / max_chunk; 11099 ASSERT(pkts > 1); 11100 11101 /* Allocate a message block which will hold all the IP Headers. */ 11102 wroff = ipst->ips_ip_wroff_extra; 11103 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11104 11105 i1 = pkts * hdr_chunk_len; 11106 /* 11107 * Create the header buffer, Multidata and destination address 11108 * and SAP attribute that should be associated with it. 11109 */ 11110 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11111 ((hdr_mp->b_wptr += i1), 11112 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11113 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11114 freemsg(mp); 11115 if (md_mp == NULL) { 11116 freemsg(hdr_mp); 11117 } else { 11118 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11119 freemsg(md_mp); 11120 } 11121 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11122 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11123 return; 11124 } 11125 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11126 11127 /* 11128 * Add a payload buffer to the Multidata; this operation must not 11129 * fail, or otherwise our logic in this routine is broken. There 11130 * is no memory allocation done by the routine, so any returned 11131 * failure simply tells us that we've done something wrong. 11132 * 11133 * A failure tells us that either we're adding the same payload 11134 * buffer more than once, or we're trying to add more buffers than 11135 * allowed. None of the above cases should happen, and we panic 11136 * because either there's horrible heap corruption, and/or 11137 * programming mistake. 11138 */ 11139 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11140 goto pbuf_panic; 11141 } 11142 11143 hdr_ptr = hdr_mp->b_rptr; 11144 pld_ptr = mp->b_rptr; 11145 11146 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11147 11148 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11149 11150 /* 11151 * len is the total length of the fragmentable data in this 11152 * datagram. For each fragment sent, we will decrement len 11153 * by the amount of fragmentable data sent in that fragment 11154 * until len reaches zero. 11155 */ 11156 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11157 11158 offset = 0; 11159 prev_nexthdr_offset += wroff; 11160 11161 while (len != 0) { 11162 size_t mlen; 11163 ip6_t *fip6h; 11164 ip6_frag_t *fraghdr; 11165 int error; 11166 11167 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11168 mlen = MIN(len, max_chunk); 11169 len -= mlen; 11170 11171 fip6h = (ip6_t *)(hdr_ptr + wroff); 11172 ASSERT(OK_32PTR(fip6h)); 11173 bcopy(ip6h, fip6h, unfragmentable_len); 11174 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11175 11176 fip6h->ip6_plen = htons((uint16_t)(mlen + 11177 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11178 11179 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11180 unfragmentable_len); 11181 fraghdr->ip6f_nxt = nexthdr; 11182 fraghdr->ip6f_reserved = 0; 11183 fraghdr->ip6f_offlg = htons(offset) | 11184 ((len != 0) ? IP6F_MORE_FRAG : 0); 11185 fraghdr->ip6f_ident = ident; 11186 11187 /* 11188 * Record offset and size of header and data of the next packet 11189 * in the multidata message. 11190 */ 11191 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11192 unfragmentable_len + sizeof (ip6_frag_t), 0); 11193 PDESC_PLD_INIT(&pdi); 11194 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11195 ASSERT(i1 > 0); 11196 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11197 if (i1 == mlen) { 11198 pld_ptr += mlen; 11199 } else { 11200 i1 = mlen - i1; 11201 mp = mp->b_cont; 11202 ASSERT(mp != NULL); 11203 ASSERT(MBLKL(mp) >= i1); 11204 /* 11205 * Attach the next payload message block to the 11206 * multidata message. 11207 */ 11208 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11209 goto pbuf_panic; 11210 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11211 pld_ptr = mp->b_rptr + i1; 11212 } 11213 11214 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11215 KM_NOSLEEP)) == NULL) { 11216 /* 11217 * Any failure other than ENOMEM indicates that we 11218 * have passed in invalid pdesc info or parameters 11219 * to mmd_addpdesc, which must not happen. 11220 * 11221 * EINVAL is a result of failure on boundary checks 11222 * against the pdesc info contents. It should not 11223 * happen, and we panic because either there's 11224 * horrible heap corruption, and/or programming 11225 * mistake. 11226 */ 11227 if (error != ENOMEM) { 11228 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11229 "pdesc logic error detected for " 11230 "mmd %p pinfo %p (%d)\n", 11231 (void *)mmd, (void *)&pdi, error); 11232 /* NOTREACHED */ 11233 } 11234 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11235 /* Free unattached payload message blocks as well */ 11236 md_mp->b_cont = mp->b_cont; 11237 goto free_mmd; 11238 } 11239 11240 /* Advance fragment offset. */ 11241 offset += mlen; 11242 11243 /* Advance to location for next header in the buffer. */ 11244 hdr_ptr += hdr_chunk_len; 11245 11246 /* Did we reach the next payload message block? */ 11247 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11248 mp = mp->b_cont; 11249 /* 11250 * Attach the next message block with payload 11251 * data to the multidata message. 11252 */ 11253 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11254 goto pbuf_panic; 11255 pld_ptr = mp->b_rptr; 11256 } 11257 } 11258 11259 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11260 ASSERT(mp->b_wptr == pld_ptr); 11261 11262 /* Update IP statistics */ 11263 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11264 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11265 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11266 /* 11267 * The ipv6 header len is accounted for in unfragmentable_len so 11268 * when calculating the fragmentation overhead just add the frag 11269 * header len. 11270 */ 11271 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11272 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11273 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11274 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11275 11276 ire->ire_ob_pkt_count += pkts; 11277 if (ire->ire_ipif != NULL) 11278 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11279 11280 ire->ire_last_used_time = lbolt; 11281 /* Send it down */ 11282 putnext(stq, md_mp); 11283 return; 11284 11285 pbuf_panic: 11286 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11287 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11288 pbuf_idx); 11289 /* NOTREACHED */ 11290 } 11291 11292 /* 11293 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11294 * We have not optimized this in terms of number of mblks 11295 * allocated. For instance, for each fragment sent we always allocate a 11296 * mblk to hold the IPv6 header and fragment header. 11297 * 11298 * Assumes that all the extension headers are contained in the first mblk. 11299 * 11300 * The fragment header is inserted after an hop-by-hop options header 11301 * and after [an optional destinations header followed by] a routing header. 11302 * 11303 * NOTE : This function does not ire_refrele the ire passed in as 11304 * the argument. 11305 */ 11306 void 11307 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11308 int caller, int max_frag) 11309 { 11310 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11311 ip6_t *fip6h; 11312 mblk_t *hmp; 11313 mblk_t *hmp0; 11314 mblk_t *dmp; 11315 ip6_frag_t *fraghdr; 11316 size_t unfragmentable_len; 11317 size_t len; 11318 size_t mlen; 11319 size_t max_chunk; 11320 uint32_t ident; 11321 uint16_t off_flags; 11322 uint16_t offset = 0; 11323 ill_t *ill; 11324 uint8_t nexthdr; 11325 uint_t prev_nexthdr_offset; 11326 uint8_t *ptr; 11327 ip_stack_t *ipst = ire->ire_ipst; 11328 11329 ASSERT(ire->ire_type == IRE_CACHE); 11330 ill = (ill_t *)ire->ire_stq->q_ptr; 11331 11332 if (max_frag <= 0) { 11333 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11334 freemsg(mp); 11335 return; 11336 } 11337 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11338 11339 /* 11340 * Determine the length of the unfragmentable portion of this 11341 * datagram. This consists of the IPv6 header, a potential 11342 * hop-by-hop options header, a potential pre-routing-header 11343 * destination options header, and a potential routing header. 11344 */ 11345 nexthdr = ip6h->ip6_nxt; 11346 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11347 ptr = (uint8_t *)&ip6h[1]; 11348 11349 if (nexthdr == IPPROTO_HOPOPTS) { 11350 ip6_hbh_t *hbh_hdr; 11351 uint_t hdr_len; 11352 11353 hbh_hdr = (ip6_hbh_t *)ptr; 11354 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11355 nexthdr = hbh_hdr->ip6h_nxt; 11356 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11357 - (uint8_t *)ip6h; 11358 ptr += hdr_len; 11359 } 11360 if (nexthdr == IPPROTO_DSTOPTS) { 11361 ip6_dest_t *dest_hdr; 11362 uint_t hdr_len; 11363 11364 dest_hdr = (ip6_dest_t *)ptr; 11365 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11366 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11367 nexthdr = dest_hdr->ip6d_nxt; 11368 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11369 - (uint8_t *)ip6h; 11370 ptr += hdr_len; 11371 } 11372 } 11373 if (nexthdr == IPPROTO_ROUTING) { 11374 ip6_rthdr_t *rthdr; 11375 uint_t hdr_len; 11376 11377 rthdr = (ip6_rthdr_t *)ptr; 11378 nexthdr = rthdr->ip6r_nxt; 11379 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11380 - (uint8_t *)ip6h; 11381 hdr_len = 8 * (rthdr->ip6r_len + 1); 11382 ptr += hdr_len; 11383 } 11384 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11385 11386 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11387 sizeof (ip6_frag_t)) & ~7; 11388 11389 /* Check if we can use MDT to send out the frags. */ 11390 ASSERT(!IRE_IS_LOCAL(ire)); 11391 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11392 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11393 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11394 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11395 nexthdr, prev_nexthdr_offset); 11396 return; 11397 } 11398 11399 /* 11400 * Allocate an mblk with enough room for the link-layer 11401 * header, the unfragmentable part of the datagram, and the 11402 * fragment header. This (or a copy) will be used as the 11403 * first mblk for each fragment we send. 11404 */ 11405 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 11406 ipst->ips_ip_wroff_extra, mp); 11407 if (hmp == NULL) { 11408 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11409 freemsg(mp); 11410 return; 11411 } 11412 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11413 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11414 11415 fip6h = (ip6_t *)hmp->b_rptr; 11416 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11417 11418 bcopy(ip6h, fip6h, unfragmentable_len); 11419 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11420 11421 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11422 11423 fraghdr->ip6f_nxt = nexthdr; 11424 fraghdr->ip6f_reserved = 0; 11425 fraghdr->ip6f_offlg = 0; 11426 fraghdr->ip6f_ident = htonl(ident); 11427 11428 /* 11429 * len is the total length of the fragmentable data in this 11430 * datagram. For each fragment sent, we will decrement len 11431 * by the amount of fragmentable data sent in that fragment 11432 * until len reaches zero. 11433 */ 11434 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11435 11436 /* 11437 * Move read ptr past unfragmentable portion, we don't want this part 11438 * of the data in our fragments. 11439 */ 11440 mp->b_rptr += unfragmentable_len; 11441 11442 while (len != 0) { 11443 mlen = MIN(len, max_chunk); 11444 len -= mlen; 11445 if (len != 0) { 11446 /* Not last */ 11447 hmp0 = copyb(hmp); 11448 if (hmp0 == NULL) { 11449 freeb(hmp); 11450 freemsg(mp); 11451 BUMP_MIB(ill->ill_ip_mib, 11452 ipIfStatsOutFragFails); 11453 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11454 return; 11455 } 11456 off_flags = IP6F_MORE_FRAG; 11457 } else { 11458 /* Last fragment */ 11459 hmp0 = hmp; 11460 hmp = NULL; 11461 off_flags = 0; 11462 } 11463 fip6h = (ip6_t *)(hmp0->b_rptr); 11464 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11465 11466 fip6h->ip6_plen = htons((uint16_t)(mlen + 11467 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11468 /* 11469 * Note: Optimization alert. 11470 * In IPv6 (and IPv4) protocol header, Fragment Offset 11471 * ("offset") is 13 bits wide and in 8-octet units. 11472 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11473 * it occupies the most significant 13 bits. 11474 * (least significant 13 bits in IPv4). 11475 * We do not do any shifts here. Not shifting is same effect 11476 * as taking offset value in octet units, dividing by 8 and 11477 * then shifting 3 bits left to line it up in place in proper 11478 * place protocol header. 11479 */ 11480 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11481 11482 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11483 /* mp has already been freed by ip_carve_mp() */ 11484 if (hmp != NULL) 11485 freeb(hmp); 11486 freeb(hmp0); 11487 ip1dbg(("ip_carve_mp: failed\n")); 11488 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11489 return; 11490 } 11491 hmp0->b_cont = dmp; 11492 /* Get the priority marking, if any */ 11493 hmp0->b_band = dmp->b_band; 11494 UPDATE_OB_PKT_COUNT(ire); 11495 ire->ire_last_used_time = lbolt; 11496 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11497 caller, NULL); 11498 reachable = 0; /* No need to redo state machine in loop */ 11499 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11500 offset += mlen; 11501 } 11502 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11503 } 11504 11505 /* 11506 * Determine if the ill and multicast aspects of that packets 11507 * "matches" the conn. 11508 */ 11509 boolean_t 11510 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11511 zoneid_t zoneid) 11512 { 11513 ill_t *bound_ill; 11514 boolean_t wantpacket; 11515 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11516 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11517 11518 /* 11519 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11520 * unicast and multicast reception to conn_incoming_ill. 11521 * conn_wantpacket_v6 is called both for unicast and 11522 * multicast. 11523 */ 11524 bound_ill = connp->conn_incoming_ill; 11525 if (bound_ill != NULL) { 11526 if (IS_IPMP(bound_ill)) { 11527 if (bound_ill->ill_grp != ill->ill_grp) 11528 return (B_FALSE); 11529 } else { 11530 if (bound_ill != ill) 11531 return (B_FALSE); 11532 } 11533 } 11534 11535 if (connp->conn_multi_router) 11536 return (B_TRUE); 11537 11538 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11539 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11540 /* 11541 * Unicast case: we match the conn only if it's in the specified 11542 * zone. 11543 */ 11544 return (IPCL_ZONE_MATCH(connp, zoneid)); 11545 } 11546 11547 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11548 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11549 /* 11550 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11551 * disabled, therefore we don't dispatch the multicast packet to 11552 * the sending zone. 11553 */ 11554 return (B_FALSE); 11555 } 11556 11557 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11558 zoneid != ALL_ZONES) { 11559 /* 11560 * Multicast packet on the loopback interface: we only match 11561 * conns who joined the group in the specified zone. 11562 */ 11563 return (B_FALSE); 11564 } 11565 11566 mutex_enter(&connp->conn_lock); 11567 wantpacket = 11568 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11569 mutex_exit(&connp->conn_lock); 11570 11571 return (wantpacket); 11572 } 11573 11574 11575 /* 11576 * Transmit a packet and update any NUD state based on the flags 11577 * XXX need to "recover" any ip6i_t when doing putq! 11578 * 11579 * NOTE : This function does not ire_refrele the ire passed in as the 11580 * argument. 11581 */ 11582 void 11583 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11584 int caller, ipsec_out_t *io) 11585 { 11586 mblk_t *mp1; 11587 nce_t *nce = ire->ire_nce; 11588 ill_t *ill; 11589 ill_t *out_ill; 11590 uint64_t delta; 11591 ip6_t *ip6h; 11592 queue_t *stq = ire->ire_stq; 11593 ire_t *ire1 = NULL; 11594 ire_t *save_ire = ire; 11595 boolean_t multirt_send = B_FALSE; 11596 mblk_t *next_mp = NULL; 11597 ip_stack_t *ipst = ire->ire_ipst; 11598 boolean_t fp_prepend = B_FALSE; 11599 uint32_t hlen; 11600 11601 ip6h = (ip6_t *)mp->b_rptr; 11602 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11603 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11604 ASSERT(nce != NULL); 11605 ASSERT(mp->b_datap->db_type == M_DATA); 11606 ASSERT(stq != NULL); 11607 11608 ill = ire_to_ill(ire); 11609 if (!ill) { 11610 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11611 freemsg(mp); 11612 return; 11613 } 11614 11615 /* Flow-control check has been done in ip_wput_ire_v6 */ 11616 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11617 caller == IP_WSRV || canput(stq->q_next)) { 11618 uint32_t ill_index; 11619 11620 /* 11621 * In most cases, the emission loop below is entered only 11622 * once. Only in the case where the ire holds the 11623 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11624 * flagged ires in the bucket, and send the packet 11625 * through all crossed RTF_MULTIRT routes. 11626 */ 11627 if (ire->ire_flags & RTF_MULTIRT) { 11628 /* 11629 * Multirouting case. The bucket where ire is stored 11630 * probably holds other RTF_MULTIRT flagged ires 11631 * to the destination. In this call to ip_xmit_v6, 11632 * we attempt to send the packet through all 11633 * those ires. Thus, we first ensure that ire is the 11634 * first RTF_MULTIRT ire in the bucket, 11635 * before walking the ire list. 11636 */ 11637 ire_t *first_ire; 11638 irb_t *irb = ire->ire_bucket; 11639 ASSERT(irb != NULL); 11640 multirt_send = B_TRUE; 11641 11642 /* Make sure we do not omit any multiroute ire. */ 11643 IRB_REFHOLD(irb); 11644 for (first_ire = irb->irb_ire; 11645 first_ire != NULL; 11646 first_ire = first_ire->ire_next) { 11647 if ((first_ire->ire_flags & RTF_MULTIRT) && 11648 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11649 &ire->ire_addr_v6)) && 11650 !(first_ire->ire_marks & 11651 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11652 break; 11653 } 11654 11655 if ((first_ire != NULL) && (first_ire != ire)) { 11656 IRE_REFHOLD(first_ire); 11657 /* ire will be released by the caller */ 11658 ire = first_ire; 11659 nce = ire->ire_nce; 11660 stq = ire->ire_stq; 11661 ill = ire_to_ill(ire); 11662 } 11663 IRB_REFRELE(irb); 11664 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11665 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11666 ILL_MDT_USABLE(ill)) { 11667 /* 11668 * This tcp connection was marked as MDT-capable, but 11669 * it has been turned off due changes in the interface. 11670 * Now that the interface support is back, turn it on 11671 * by notifying tcp. We don't directly modify tcp_mdt, 11672 * since we leave all the details to the tcp code that 11673 * knows better. 11674 */ 11675 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11676 11677 if (mdimp == NULL) { 11678 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11679 "connp %p (ENOMEM)\n", (void *)connp)); 11680 } else { 11681 CONN_INC_REF(connp); 11682 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11683 tcp_input, connp, SQ_FILL, 11684 SQTAG_TCP_INPUT_MCTL); 11685 } 11686 } 11687 11688 do { 11689 mblk_t *mp_ip6h; 11690 11691 if (multirt_send) { 11692 irb_t *irb; 11693 /* 11694 * We are in a multiple send case, need to get 11695 * the next ire and make a duplicate of the 11696 * packet. ire1 holds here the next ire to 11697 * process in the bucket. If multirouting is 11698 * expected, any non-RTF_MULTIRT ire that has 11699 * the right destination address is ignored. 11700 */ 11701 irb = ire->ire_bucket; 11702 ASSERT(irb != NULL); 11703 11704 IRB_REFHOLD(irb); 11705 for (ire1 = ire->ire_next; 11706 ire1 != NULL; 11707 ire1 = ire1->ire_next) { 11708 if (!(ire1->ire_flags & RTF_MULTIRT)) 11709 continue; 11710 if (!IN6_ARE_ADDR_EQUAL( 11711 &ire1->ire_addr_v6, 11712 &ire->ire_addr_v6)) 11713 continue; 11714 if (ire1->ire_marks & 11715 IRE_MARK_CONDEMNED) 11716 continue; 11717 11718 /* Got one */ 11719 if (ire1 != save_ire) { 11720 IRE_REFHOLD(ire1); 11721 } 11722 break; 11723 } 11724 IRB_REFRELE(irb); 11725 11726 if (ire1 != NULL) { 11727 next_mp = copyb(mp); 11728 if ((next_mp == NULL) || 11729 ((mp->b_cont != NULL) && 11730 ((next_mp->b_cont = 11731 dupmsg(mp->b_cont)) == NULL))) { 11732 freemsg(next_mp); 11733 next_mp = NULL; 11734 ire_refrele(ire1); 11735 ire1 = NULL; 11736 } 11737 } 11738 11739 /* Last multiroute ire; don't loop anymore. */ 11740 if (ire1 == NULL) { 11741 multirt_send = B_FALSE; 11742 } 11743 } 11744 11745 ill_index = 11746 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11747 11748 /* Initiate IPPF processing */ 11749 if (IP6_OUT_IPP(flags, ipst)) { 11750 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11751 if (mp == NULL) { 11752 BUMP_MIB(ill->ill_ip_mib, 11753 ipIfStatsOutDiscards); 11754 if (next_mp != NULL) 11755 freemsg(next_mp); 11756 if (ire != save_ire) { 11757 ire_refrele(ire); 11758 } 11759 return; 11760 } 11761 ip6h = (ip6_t *)mp->b_rptr; 11762 } 11763 mp_ip6h = mp; 11764 11765 /* 11766 * Check for fastpath, we need to hold nce_lock to 11767 * prevent fastpath update from chaining nce_fp_mp. 11768 */ 11769 11770 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11771 mutex_enter(&nce->nce_lock); 11772 if ((mp1 = nce->nce_fp_mp) != NULL) { 11773 uchar_t *rptr; 11774 11775 hlen = MBLKL(mp1); 11776 rptr = mp->b_rptr - hlen; 11777 /* 11778 * make sure there is room for the fastpath 11779 * datalink header 11780 */ 11781 if (rptr < mp->b_datap->db_base) { 11782 mp1 = copyb(mp1); 11783 mutex_exit(&nce->nce_lock); 11784 if (mp1 == NULL) { 11785 BUMP_MIB(ill->ill_ip_mib, 11786 ipIfStatsOutDiscards); 11787 freemsg(mp); 11788 if (next_mp != NULL) 11789 freemsg(next_mp); 11790 if (ire != save_ire) { 11791 ire_refrele(ire); 11792 } 11793 return; 11794 } 11795 mp1->b_cont = mp; 11796 11797 /* Get the priority marking, if any */ 11798 mp1->b_band = mp->b_band; 11799 mp = mp1; 11800 } else { 11801 mp->b_rptr = rptr; 11802 /* 11803 * fastpath - pre-pend datalink 11804 * header 11805 */ 11806 bcopy(mp1->b_rptr, rptr, hlen); 11807 mutex_exit(&nce->nce_lock); 11808 fp_prepend = B_TRUE; 11809 } 11810 } else { 11811 /* 11812 * Get the DL_UNITDATA_REQ. 11813 */ 11814 mp1 = nce->nce_res_mp; 11815 if (mp1 == NULL) { 11816 mutex_exit(&nce->nce_lock); 11817 ip1dbg(("ip_xmit_v6: No resolution " 11818 "block ire = %p\n", (void *)ire)); 11819 freemsg(mp); 11820 if (next_mp != NULL) 11821 freemsg(next_mp); 11822 if (ire != save_ire) { 11823 ire_refrele(ire); 11824 } 11825 return; 11826 } 11827 /* 11828 * Prepend the DL_UNITDATA_REQ. 11829 */ 11830 mp1 = copyb(mp1); 11831 mutex_exit(&nce->nce_lock); 11832 if (mp1 == NULL) { 11833 BUMP_MIB(ill->ill_ip_mib, 11834 ipIfStatsOutDiscards); 11835 freemsg(mp); 11836 if (next_mp != NULL) 11837 freemsg(next_mp); 11838 if (ire != save_ire) { 11839 ire_refrele(ire); 11840 } 11841 return; 11842 } 11843 mp1->b_cont = mp; 11844 11845 /* Get the priority marking, if any */ 11846 mp1->b_band = mp->b_band; 11847 mp = mp1; 11848 } 11849 11850 out_ill = (ill_t *)stq->q_ptr; 11851 11852 DTRACE_PROBE4(ip6__physical__out__start, 11853 ill_t *, NULL, ill_t *, out_ill, 11854 ip6_t *, ip6h, mblk_t *, mp); 11855 11856 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 11857 ipst->ips_ipv6firewall_physical_out, 11858 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 11859 11860 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 11861 11862 if (mp == NULL) { 11863 if (multirt_send) { 11864 ASSERT(ire1 != NULL); 11865 if (ire != save_ire) { 11866 ire_refrele(ire); 11867 } 11868 /* 11869 * Proceed with the next RTF_MULTIRT 11870 * ire, also set up the send-to queue 11871 * accordingly. 11872 */ 11873 ire = ire1; 11874 ire1 = NULL; 11875 stq = ire->ire_stq; 11876 nce = ire->ire_nce; 11877 ill = ire_to_ill(ire); 11878 mp = next_mp; 11879 next_mp = NULL; 11880 continue; 11881 } else { 11882 ASSERT(next_mp == NULL); 11883 ASSERT(ire1 == NULL); 11884 break; 11885 } 11886 } 11887 11888 if (ipst->ips_ipobs_enabled) { 11889 zoneid_t szone; 11890 11891 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 11892 mp_ip6h, out_ill, ipst, ALL_ZONES); 11893 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 11894 ALL_ZONES, out_ill, IPV6_VERSION, 11895 fp_prepend ? hlen : 0, ipst); 11896 } 11897 11898 /* 11899 * Update ire and MIB counters; for save_ire, this has 11900 * been done by the caller. 11901 */ 11902 if (ire != save_ire) { 11903 UPDATE_OB_PKT_COUNT(ire); 11904 ire->ire_last_used_time = lbolt; 11905 11906 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11907 BUMP_MIB(ill->ill_ip_mib, 11908 ipIfStatsHCOutMcastPkts); 11909 UPDATE_MIB(ill->ill_ip_mib, 11910 ipIfStatsHCOutMcastOctets, 11911 ntohs(ip6h->ip6_plen) + 11912 IPV6_HDR_LEN); 11913 } 11914 } 11915 11916 /* 11917 * Send it down. XXX Do we want to flow control AH/ESP 11918 * packets that carry TCP payloads? We don't flow 11919 * control TCP packets, but we should also not 11920 * flow-control TCP packets that have been protected. 11921 * We don't have an easy way to find out if an AH/ESP 11922 * packet was originally TCP or not currently. 11923 */ 11924 if (io == NULL) { 11925 BUMP_MIB(ill->ill_ip_mib, 11926 ipIfStatsHCOutTransmits); 11927 UPDATE_MIB(ill->ill_ip_mib, 11928 ipIfStatsHCOutOctets, 11929 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11930 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 11931 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 11932 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 11933 int, 0); 11934 11935 putnext(stq, mp); 11936 } else { 11937 /* 11938 * Safety Pup says: make sure this is 11939 * going to the right interface! 11940 */ 11941 if (io->ipsec_out_capab_ill_index != 11942 ill_index) { 11943 /* IPsec kstats: bump lose counter */ 11944 freemsg(mp1); 11945 } else { 11946 BUMP_MIB(ill->ill_ip_mib, 11947 ipIfStatsHCOutTransmits); 11948 UPDATE_MIB(ill->ill_ip_mib, 11949 ipIfStatsHCOutOctets, 11950 ntohs(ip6h->ip6_plen) + 11951 IPV6_HDR_LEN); 11952 DTRACE_IP7(send, mblk_t *, mp, 11953 conn_t *, NULL, void_ip_t *, ip6h, 11954 __dtrace_ipsr_ill_t *, out_ill, 11955 ipha_t *, NULL, ip6_t *, ip6h, int, 11956 0); 11957 ipsec_hw_putnext(stq, mp); 11958 } 11959 } 11960 11961 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11962 if (ire != save_ire) { 11963 ire_refrele(ire); 11964 } 11965 if (multirt_send) { 11966 ASSERT(ire1 != NULL); 11967 /* 11968 * Proceed with the next RTF_MULTIRT 11969 * ire, also set up the send-to queue 11970 * accordingly. 11971 */ 11972 ire = ire1; 11973 ire1 = NULL; 11974 stq = ire->ire_stq; 11975 nce = ire->ire_nce; 11976 ill = ire_to_ill(ire); 11977 mp = next_mp; 11978 next_mp = NULL; 11979 continue; 11980 } 11981 ASSERT(next_mp == NULL); 11982 ASSERT(ire1 == NULL); 11983 return; 11984 } 11985 11986 ASSERT(nce->nce_state != ND_INCOMPLETE); 11987 11988 /* 11989 * Check for upper layer advice 11990 */ 11991 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 11992 /* 11993 * It should be o.k. to check the state without 11994 * a lock here, at most we lose an advice. 11995 */ 11996 nce->nce_last = TICK_TO_MSEC(lbolt64); 11997 if (nce->nce_state != ND_REACHABLE) { 11998 11999 mutex_enter(&nce->nce_lock); 12000 nce->nce_state = ND_REACHABLE; 12001 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12002 mutex_exit(&nce->nce_lock); 12003 (void) untimeout(nce->nce_timeout_id); 12004 if (ip_debug > 2) { 12005 /* ip1dbg */ 12006 pr_addr_dbg("ip_xmit_v6: state" 12007 " for %s changed to" 12008 " REACHABLE\n", AF_INET6, 12009 &ire->ire_addr_v6); 12010 } 12011 } 12012 if (ire != save_ire) { 12013 ire_refrele(ire); 12014 } 12015 if (multirt_send) { 12016 ASSERT(ire1 != NULL); 12017 /* 12018 * Proceed with the next RTF_MULTIRT 12019 * ire, also set up the send-to queue 12020 * accordingly. 12021 */ 12022 ire = ire1; 12023 ire1 = NULL; 12024 stq = ire->ire_stq; 12025 nce = ire->ire_nce; 12026 ill = ire_to_ill(ire); 12027 mp = next_mp; 12028 next_mp = NULL; 12029 continue; 12030 } 12031 ASSERT(next_mp == NULL); 12032 ASSERT(ire1 == NULL); 12033 return; 12034 } 12035 12036 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12037 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12038 " ill_reachable_time = %d \n", delta, 12039 ill->ill_reachable_time)); 12040 if (delta > (uint64_t)ill->ill_reachable_time) { 12041 nce = ire->ire_nce; 12042 mutex_enter(&nce->nce_lock); 12043 switch (nce->nce_state) { 12044 case ND_REACHABLE: 12045 case ND_STALE: 12046 /* 12047 * ND_REACHABLE is identical to 12048 * ND_STALE in this specific case. If 12049 * reachable time has expired for this 12050 * neighbor (delta is greater than 12051 * reachable time), conceptually, the 12052 * neighbor cache is no longer in 12053 * REACHABLE state, but already in 12054 * STALE state. So the correct 12055 * transition here is to ND_DELAY. 12056 */ 12057 nce->nce_state = ND_DELAY; 12058 mutex_exit(&nce->nce_lock); 12059 NDP_RESTART_TIMER(nce, 12060 ipst->ips_delay_first_probe_time); 12061 if (ip_debug > 3) { 12062 /* ip2dbg */ 12063 pr_addr_dbg("ip_xmit_v6: state" 12064 " for %s changed to" 12065 " DELAY\n", AF_INET6, 12066 &ire->ire_addr_v6); 12067 } 12068 break; 12069 case ND_DELAY: 12070 case ND_PROBE: 12071 mutex_exit(&nce->nce_lock); 12072 /* Timers have already started */ 12073 break; 12074 case ND_UNREACHABLE: 12075 /* 12076 * ndp timer has detected that this nce 12077 * is unreachable and initiated deleting 12078 * this nce and all its associated IREs. 12079 * This is a race where we found the 12080 * ire before it was deleted and have 12081 * just sent out a packet using this 12082 * unreachable nce. 12083 */ 12084 mutex_exit(&nce->nce_lock); 12085 break; 12086 default: 12087 ASSERT(0); 12088 } 12089 } 12090 12091 if (multirt_send) { 12092 ASSERT(ire1 != NULL); 12093 /* 12094 * Proceed with the next RTF_MULTIRT ire, 12095 * Also set up the send-to queue accordingly. 12096 */ 12097 if (ire != save_ire) { 12098 ire_refrele(ire); 12099 } 12100 ire = ire1; 12101 ire1 = NULL; 12102 stq = ire->ire_stq; 12103 nce = ire->ire_nce; 12104 ill = ire_to_ill(ire); 12105 mp = next_mp; 12106 next_mp = NULL; 12107 } 12108 } while (multirt_send); 12109 /* 12110 * In the multirouting case, release the last ire used for 12111 * emission. save_ire will be released by the caller. 12112 */ 12113 if (ire != save_ire) { 12114 ire_refrele(ire); 12115 } 12116 } else { 12117 /* 12118 * Can't apply backpressure, just discard the packet. 12119 */ 12120 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12121 freemsg(mp); 12122 return; 12123 } 12124 } 12125 12126 /* 12127 * pr_addr_dbg function provides the needed buffer space to call 12128 * inet_ntop() function's 3rd argument. This function should be 12129 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12130 * stack buffer space in it's own stack frame. This function uses 12131 * a buffer from it's own stack and prints the information. 12132 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12133 * 12134 * Note: This function can call inet_ntop() once. 12135 */ 12136 void 12137 pr_addr_dbg(char *fmt1, int af, const void *addr) 12138 { 12139 char buf[INET6_ADDRSTRLEN]; 12140 12141 if (fmt1 == NULL) { 12142 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12143 return; 12144 } 12145 12146 /* 12147 * This does not compare debug level and just prints 12148 * out. Thus it is the responsibility of the caller 12149 * to check the appropriate debug-level before calling 12150 * this function. 12151 */ 12152 if (ip_debug > 0) { 12153 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12154 } 12155 12156 12157 } 12158 12159 12160 /* 12161 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12162 * if needed and extension headers) that will be needed based on the 12163 * ip6_pkt_t structure passed by the caller. 12164 * 12165 * The returned length does not include the length of the upper level 12166 * protocol (ULP) header. 12167 */ 12168 int 12169 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12170 { 12171 int len; 12172 12173 len = IPV6_HDR_LEN; 12174 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12175 len += sizeof (ip6i_t); 12176 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12177 ASSERT(ipp->ipp_hopoptslen != 0); 12178 len += ipp->ipp_hopoptslen; 12179 } 12180 if (ipp->ipp_fields & IPPF_RTHDR) { 12181 ASSERT(ipp->ipp_rthdrlen != 0); 12182 len += ipp->ipp_rthdrlen; 12183 } 12184 /* 12185 * En-route destination options 12186 * Only do them if there's a routing header as well 12187 */ 12188 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12189 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12190 ASSERT(ipp->ipp_rtdstoptslen != 0); 12191 len += ipp->ipp_rtdstoptslen; 12192 } 12193 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12194 ASSERT(ipp->ipp_dstoptslen != 0); 12195 len += ipp->ipp_dstoptslen; 12196 } 12197 return (len); 12198 } 12199 12200 /* 12201 * All-purpose routine to build a header chain of an IPv6 header 12202 * followed by any required extension headers and a proto header, 12203 * preceeded (where necessary) by an ip6i_t private header. 12204 * 12205 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12206 * will be filled in appropriately. 12207 * Thus the caller must fill in the rest of the IPv6 header, such as 12208 * traffic class/flowid, source address (if not set here), hoplimit (if not 12209 * set here) and destination address. 12210 * 12211 * The extension headers and ip6i_t header will all be fully filled in. 12212 */ 12213 void 12214 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12215 ip6_pkt_t *ipp, uint8_t protocol) 12216 { 12217 uint8_t *nxthdr_ptr; 12218 uint8_t *cp; 12219 ip6i_t *ip6i; 12220 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12221 12222 /* 12223 * If sending private ip6i_t header down (checksum info, nexthop, 12224 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12225 * then fill it in. (The checksum info will be filled in by icmp). 12226 */ 12227 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12228 ip6i = (ip6i_t *)ip6h; 12229 ip6h = (ip6_t *)&ip6i[1]; 12230 12231 ip6i->ip6i_flags = 0; 12232 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12233 if (ipp->ipp_fields & IPPF_IFINDEX || 12234 ipp->ipp_fields & IPPF_SCOPE_ID) { 12235 ASSERT(ipp->ipp_ifindex != 0); 12236 ip6i->ip6i_flags |= IP6I_IFINDEX; 12237 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12238 } 12239 if (ipp->ipp_fields & IPPF_ADDR) { 12240 /* 12241 * Enable per-packet source address verification if 12242 * IPV6_PKTINFO specified the source address. 12243 * ip6_src is set in the transport's _wput function. 12244 */ 12245 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12246 &ipp->ipp_addr)); 12247 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12248 } 12249 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12250 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12251 /* 12252 * We need to set this flag so that IP doesn't 12253 * rewrite the IPv6 header's hoplimit with the 12254 * current default value. 12255 */ 12256 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12257 } 12258 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12259 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12260 &ipp->ipp_nexthop)); 12261 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12262 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12263 } 12264 /* 12265 * tell IP this is an ip6i_t private header 12266 */ 12267 ip6i->ip6i_nxt = IPPROTO_RAW; 12268 } 12269 /* Initialize IPv6 header */ 12270 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12271 if (ipp->ipp_fields & IPPF_TCLASS) { 12272 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12273 (ipp->ipp_tclass << 20); 12274 } 12275 if (ipp->ipp_fields & IPPF_ADDR) 12276 ip6h->ip6_src = ipp->ipp_addr; 12277 12278 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12279 cp = (uint8_t *)&ip6h[1]; 12280 /* 12281 * Here's where we have to start stringing together 12282 * any extension headers in the right order: 12283 * Hop-by-hop, destination, routing, and final destination opts. 12284 */ 12285 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12286 /* Hop-by-hop options */ 12287 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12288 12289 *nxthdr_ptr = IPPROTO_HOPOPTS; 12290 nxthdr_ptr = &hbh->ip6h_nxt; 12291 12292 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12293 cp += ipp->ipp_hopoptslen; 12294 } 12295 /* 12296 * En-route destination options 12297 * Only do them if there's a routing header as well 12298 */ 12299 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12300 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12301 ip6_dest_t *dst = (ip6_dest_t *)cp; 12302 12303 *nxthdr_ptr = IPPROTO_DSTOPTS; 12304 nxthdr_ptr = &dst->ip6d_nxt; 12305 12306 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12307 cp += ipp->ipp_rtdstoptslen; 12308 } 12309 /* 12310 * Routing header next 12311 */ 12312 if (ipp->ipp_fields & IPPF_RTHDR) { 12313 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12314 12315 *nxthdr_ptr = IPPROTO_ROUTING; 12316 nxthdr_ptr = &rt->ip6r_nxt; 12317 12318 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12319 cp += ipp->ipp_rthdrlen; 12320 } 12321 /* 12322 * Do ultimate destination options 12323 */ 12324 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12325 ip6_dest_t *dest = (ip6_dest_t *)cp; 12326 12327 *nxthdr_ptr = IPPROTO_DSTOPTS; 12328 nxthdr_ptr = &dest->ip6d_nxt; 12329 12330 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12331 cp += ipp->ipp_dstoptslen; 12332 } 12333 /* 12334 * Now set the last header pointer to the proto passed in 12335 */ 12336 *nxthdr_ptr = protocol; 12337 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12338 } 12339 12340 /* 12341 * Return a pointer to the routing header extension header 12342 * in the IPv6 header(s) chain passed in. 12343 * If none found, return NULL 12344 * Assumes that all extension headers are in same mblk as the v6 header 12345 */ 12346 ip6_rthdr_t * 12347 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12348 { 12349 ip6_dest_t *desthdr; 12350 ip6_frag_t *fraghdr; 12351 uint_t hdrlen; 12352 uint8_t nexthdr; 12353 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12354 12355 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12356 return ((ip6_rthdr_t *)ptr); 12357 12358 /* 12359 * The routing header will precede all extension headers 12360 * other than the hop-by-hop and destination options 12361 * extension headers, so if we see anything other than those, 12362 * we're done and didn't find it. 12363 * We could see a destination options header alone but no 12364 * routing header, in which case we'll return NULL as soon as 12365 * we see anything after that. 12366 * Hop-by-hop and destination option headers are identical, 12367 * so we can use either one we want as a template. 12368 */ 12369 nexthdr = ip6h->ip6_nxt; 12370 while (ptr < endptr) { 12371 /* Is there enough left for len + nexthdr? */ 12372 if (ptr + MIN_EHDR_LEN > endptr) 12373 return (NULL); 12374 12375 switch (nexthdr) { 12376 case IPPROTO_HOPOPTS: 12377 case IPPROTO_DSTOPTS: 12378 /* Assumes the headers are identical for hbh and dst */ 12379 desthdr = (ip6_dest_t *)ptr; 12380 hdrlen = 8 * (desthdr->ip6d_len + 1); 12381 nexthdr = desthdr->ip6d_nxt; 12382 break; 12383 12384 case IPPROTO_ROUTING: 12385 return ((ip6_rthdr_t *)ptr); 12386 12387 case IPPROTO_FRAGMENT: 12388 fraghdr = (ip6_frag_t *)ptr; 12389 hdrlen = sizeof (ip6_frag_t); 12390 nexthdr = fraghdr->ip6f_nxt; 12391 break; 12392 12393 default: 12394 return (NULL); 12395 } 12396 ptr += hdrlen; 12397 } 12398 return (NULL); 12399 } 12400 12401 /* 12402 * Called for source-routed packets originating on this node. 12403 * Manipulates the original routing header by moving every entry up 12404 * one slot, placing the first entry in the v6 header's v6_dst field, 12405 * and placing the ultimate destination in the routing header's last 12406 * slot. 12407 * 12408 * Returns the checksum diference between the ultimate destination 12409 * (last hop in the routing header when the packet is sent) and 12410 * the first hop (ip6_dst when the packet is sent) 12411 */ 12412 /* ARGSUSED2 */ 12413 uint32_t 12414 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12415 { 12416 uint_t numaddr; 12417 uint_t i; 12418 in6_addr_t *addrptr; 12419 in6_addr_t tmp; 12420 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12421 uint32_t cksm; 12422 uint32_t addrsum = 0; 12423 uint16_t *ptr; 12424 12425 /* 12426 * Perform any processing needed for source routing. 12427 * We know that all extension headers will be in the same mblk 12428 * as the IPv6 header. 12429 */ 12430 12431 /* 12432 * If no segments left in header, or the header length field is zero, 12433 * don't move hop addresses around; 12434 * Checksum difference is zero. 12435 */ 12436 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12437 return (0); 12438 12439 ptr = (uint16_t *)&ip6h->ip6_dst; 12440 cksm = 0; 12441 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12442 cksm += ptr[i]; 12443 } 12444 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12445 12446 /* 12447 * Here's where the fun begins - we have to 12448 * move all addresses up one spot, take the 12449 * first hop and make it our first ip6_dst, 12450 * and place the ultimate destination in the 12451 * newly-opened last slot. 12452 */ 12453 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12454 numaddr = rthdr->ip6r0_len / 2; 12455 tmp = *addrptr; 12456 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12457 *addrptr = addrptr[1]; 12458 } 12459 *addrptr = ip6h->ip6_dst; 12460 ip6h->ip6_dst = tmp; 12461 12462 /* 12463 * From the checksummed ultimate destination subtract the checksummed 12464 * current ip6_dst (the first hop address). Return that number. 12465 * (In the v4 case, the second part of this is done in each routine 12466 * that calls ip_massage_options(). We do it all in this one place 12467 * for v6). 12468 */ 12469 ptr = (uint16_t *)&ip6h->ip6_dst; 12470 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12471 addrsum += ptr[i]; 12472 } 12473 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12474 if ((int)cksm < 0) 12475 cksm--; 12476 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12477 12478 return (cksm); 12479 } 12480 12481 /* 12482 * Propagate a multicast group membership operation (join/leave) (*fn) on 12483 * all interfaces crossed by the related multirt routes. 12484 * The call is considered successful if the operation succeeds 12485 * on at least one interface. 12486 * The function is called if the destination address in the packet to send 12487 * is multirouted. 12488 */ 12489 int 12490 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12491 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12492 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12493 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12494 { 12495 ire_t *ire_gw; 12496 irb_t *irb; 12497 int index, error = 0; 12498 opt_restart_t *or; 12499 ip_stack_t *ipst = ire->ire_ipst; 12500 12501 irb = ire->ire_bucket; 12502 ASSERT(irb != NULL); 12503 12504 ASSERT(DB_TYPE(first_mp) == M_CTL); 12505 or = (opt_restart_t *)first_mp->b_rptr; 12506 12507 IRB_REFHOLD(irb); 12508 for (; ire != NULL; ire = ire->ire_next) { 12509 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12510 continue; 12511 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12512 continue; 12513 12514 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12515 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12516 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12517 /* No resolver exists for the gateway; skip this ire. */ 12518 if (ire_gw == NULL) 12519 continue; 12520 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12521 /* 12522 * A resolver exists: we can get the interface on which we have 12523 * to apply the operation. 12524 */ 12525 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12526 first_mp); 12527 if (error == 0) 12528 or->or_private = CGTP_MCAST_SUCCESS; 12529 12530 if (ip_debug > 0) { 12531 ulong_t off; 12532 char *ksym; 12533 12534 ksym = kobj_getsymname((uintptr_t)fn, &off); 12535 ip2dbg(("ip_multirt_apply_membership_v6: " 12536 "called %s, multirt group 0x%08x via itf 0x%08x, " 12537 "error %d [success %u]\n", 12538 ksym ? ksym : "?", 12539 ntohl(V4_PART_OF_V6((*v6grp))), 12540 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12541 error, or->or_private)); 12542 } 12543 12544 ire_refrele(ire_gw); 12545 if (error == EINPROGRESS) { 12546 IRB_REFRELE(irb); 12547 return (error); 12548 } 12549 } 12550 IRB_REFRELE(irb); 12551 /* 12552 * Consider the call as successful if we succeeded on at least 12553 * one interface. Otherwise, return the last encountered error. 12554 */ 12555 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12556 } 12557 12558 void 12559 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12560 { 12561 kstat_t *ksp; 12562 12563 ip6_stat_t template = { 12564 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12565 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12566 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12567 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12568 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12569 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12570 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12571 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12572 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12573 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12574 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12575 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12576 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12577 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12578 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12579 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12580 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12581 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12582 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12583 }; 12584 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12585 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12586 KSTAT_FLAG_VIRTUAL, stackid); 12587 12588 if (ksp == NULL) 12589 return (NULL); 12590 12591 bcopy(&template, ip6_statisticsp, sizeof (template)); 12592 ksp->ks_data = (void *)ip6_statisticsp; 12593 ksp->ks_private = (void *)(uintptr_t)stackid; 12594 12595 kstat_install(ksp); 12596 return (ksp); 12597 } 12598 12599 void 12600 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12601 { 12602 if (ksp != NULL) { 12603 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12604 kstat_delete_netstack(ksp, stackid); 12605 } 12606 } 12607 12608 /* 12609 * The following two functions set and get the value for the 12610 * IPV6_SRC_PREFERENCES socket option. 12611 */ 12612 int 12613 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12614 { 12615 /* 12616 * We only support preferences that are covered by 12617 * IPV6_PREFER_SRC_MASK. 12618 */ 12619 if (prefs & ~IPV6_PREFER_SRC_MASK) 12620 return (EINVAL); 12621 12622 /* 12623 * Look for conflicting preferences or default preferences. If 12624 * both bits of a related pair are clear, the application wants the 12625 * system's default value for that pair. Both bits in a pair can't 12626 * be set. 12627 */ 12628 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12629 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12630 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12631 IPV6_PREFER_SRC_MIPMASK) { 12632 return (EINVAL); 12633 } 12634 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12635 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12636 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12637 IPV6_PREFER_SRC_TMPMASK) { 12638 return (EINVAL); 12639 } 12640 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12641 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12642 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12643 IPV6_PREFER_SRC_CGAMASK) { 12644 return (EINVAL); 12645 } 12646 12647 connp->conn_src_preferences = prefs; 12648 return (0); 12649 } 12650 12651 size_t 12652 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12653 { 12654 *val = connp->conn_src_preferences; 12655 return (sizeof (connp->conn_src_preferences)); 12656 } 12657 12658 int 12659 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12660 { 12661 ire_t *ire; 12662 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12663 12664 /* 12665 * Verify the source address and ifindex. Privileged users can use 12666 * any source address. For ancillary data the source address is 12667 * checked in ip_wput_v6. 12668 */ 12669 if (pkti->ipi6_ifindex != 0) { 12670 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12671 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12672 rw_exit(&ipst->ips_ill_g_lock); 12673 return (ENXIO); 12674 } 12675 rw_exit(&ipst->ips_ill_g_lock); 12676 } 12677 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12678 secpolicy_net_rawaccess(cr) != 0) { 12679 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12680 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12681 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12682 if (ire != NULL) 12683 ire_refrele(ire); 12684 else 12685 return (ENXIO); 12686 } 12687 return (0); 12688 } 12689 12690 /* 12691 * Get the size of the IP options (including the IP headers size) 12692 * without including the AH header's size. If till_ah is B_FALSE, 12693 * and if AH header is present, dest options beyond AH header will 12694 * also be included in the returned size. 12695 */ 12696 int 12697 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12698 { 12699 ip6_t *ip6h; 12700 uint8_t nexthdr; 12701 uint8_t *whereptr; 12702 ip6_hbh_t *hbhhdr; 12703 ip6_dest_t *dsthdr; 12704 ip6_rthdr_t *rthdr; 12705 int ehdrlen; 12706 int size; 12707 ah_t *ah; 12708 12709 ip6h = (ip6_t *)mp->b_rptr; 12710 size = IPV6_HDR_LEN; 12711 nexthdr = ip6h->ip6_nxt; 12712 whereptr = (uint8_t *)&ip6h[1]; 12713 for (;;) { 12714 /* Assume IP has already stripped it */ 12715 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12716 switch (nexthdr) { 12717 case IPPROTO_HOPOPTS: 12718 hbhhdr = (ip6_hbh_t *)whereptr; 12719 nexthdr = hbhhdr->ip6h_nxt; 12720 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12721 break; 12722 case IPPROTO_DSTOPTS: 12723 dsthdr = (ip6_dest_t *)whereptr; 12724 nexthdr = dsthdr->ip6d_nxt; 12725 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12726 break; 12727 case IPPROTO_ROUTING: 12728 rthdr = (ip6_rthdr_t *)whereptr; 12729 nexthdr = rthdr->ip6r_nxt; 12730 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12731 break; 12732 default : 12733 if (till_ah) { 12734 ASSERT(nexthdr == IPPROTO_AH); 12735 return (size); 12736 } 12737 /* 12738 * If we don't have a AH header to traverse, 12739 * return now. This happens normally for 12740 * outbound datagrams where we have not inserted 12741 * the AH header. 12742 */ 12743 if (nexthdr != IPPROTO_AH) { 12744 return (size); 12745 } 12746 12747 /* 12748 * We don't include the AH header's size 12749 * to be symmetrical with other cases where 12750 * we either don't have a AH header (outbound) 12751 * or peek into the AH header yet (inbound and 12752 * not pulled up yet). 12753 */ 12754 ah = (ah_t *)whereptr; 12755 nexthdr = ah->ah_nexthdr; 12756 ehdrlen = (ah->ah_length << 2) + 8; 12757 12758 if (nexthdr == IPPROTO_DSTOPTS) { 12759 if (whereptr + ehdrlen >= mp->b_wptr) { 12760 /* 12761 * The destination options header 12762 * is not part of the first mblk. 12763 */ 12764 whereptr = mp->b_cont->b_rptr; 12765 } else { 12766 whereptr += ehdrlen; 12767 } 12768 12769 dsthdr = (ip6_dest_t *)whereptr; 12770 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12771 size += ehdrlen; 12772 } 12773 return (size); 12774 } 12775 whereptr += ehdrlen; 12776 size += ehdrlen; 12777 } 12778 } 12779 12780 /* 12781 * Utility routine that checks if `v6srcp' is a valid address on underlying 12782 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12783 * associated with `v6srcp' on success. NOTE: if this is not called from 12784 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12785 * group during or after this lookup. 12786 */ 12787 static boolean_t 12788 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12789 { 12790 ipif_t *ipif; 12791 12792 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12793 if (ipif != NULL) { 12794 if (ipifp != NULL) 12795 *ipifp = ipif; 12796 else 12797 ipif_refrele(ipif); 12798 return (B_TRUE); 12799 } 12800 12801 if (ip_debug > 2) { 12802 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12803 "src %s\n", AF_INET6, v6srcp); 12804 } 12805 return (B_FALSE); 12806 } 12807