1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/iphada.h> 57 #include <sys/policy.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/sctp/sctp_impl.h> 83 #include <inet/ipp_common.h> 84 85 #include <inet/ip_multi.h> 86 #include <inet/ip_if.h> 87 #include <inet/ip_ire.h> 88 #include <inet/ip_rts.h> 89 #include <inet/ip_ndp.h> 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 #include <inet/sadb.h> 93 #include <inet/ipsec_impl.h> 94 #include <inet/iptun/iptun_impl.h> 95 #include <inet/sctp_ip.h> 96 #include <sys/pattr.h> 97 #include <inet/ipclassifier.h> 98 #include <inet/ipsecah.h> 99 #include <inet/rawip_impl.h> 100 #include <inet/rts_impl.h> 101 #include <sys/squeue_impl.h> 102 #include <sys/squeue.h> 103 104 #include <sys/tsol/label.h> 105 #include <sys/tsol/tnet.h> 106 107 #include <rpc/pmap_prot.h> 108 109 /* Temporary; for CR 6451644 work-around */ 110 #include <sys/ethernet.h> 111 112 extern int ip_squeue_flag; 113 114 /* 115 * Naming conventions: 116 * These rules should be judiciously applied 117 * if there is a need to identify something as IPv6 versus IPv4 118 * IPv6 funcions will end with _v6 in the ip module. 119 * IPv6 funcions will end with _ipv6 in the transport modules. 120 * IPv6 macros: 121 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 122 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 123 * And then there are ..V4_PART_OF_V6. 124 * The intent is that macros in the ip module end with _V6. 125 * IPv6 global variables will start with ipv6_ 126 * IPv6 structures will start with ipv6 127 * IPv6 defined constants should start with IPV6_ 128 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 129 */ 130 131 /* 132 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 133 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 134 * from IANA. This mechanism will remain in effect until an official 135 * number is obtained. 136 */ 137 uchar_t ip6opt_ls; 138 139 const in6_addr_t ipv6_all_ones = 140 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 141 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 142 143 #ifdef _BIG_ENDIAN 144 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 145 #else /* _BIG_ENDIAN */ 146 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 147 #endif /* _BIG_ENDIAN */ 148 149 #ifdef _BIG_ENDIAN 150 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 151 #else /* _BIG_ENDIAN */ 152 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 153 #endif /* _BIG_ENDIAN */ 154 155 #ifdef _BIG_ENDIAN 156 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 157 #else /* _BIG_ENDIAN */ 158 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 159 #endif /* _BIG_ENDIAN */ 160 161 #ifdef _BIG_ENDIAN 162 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 163 #else /* _BIG_ENDIAN */ 164 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 165 #endif /* _BIG_ENDIAN */ 166 167 #ifdef _BIG_ENDIAN 168 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 169 #else /* _BIG_ENDIAN */ 170 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 171 #endif /* _BIG_ENDIAN */ 172 173 #ifdef _BIG_ENDIAN 174 const in6_addr_t ipv6_solicited_node_mcast = 175 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 176 #else /* _BIG_ENDIAN */ 177 const in6_addr_t ipv6_solicited_node_mcast = 178 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 179 #endif /* _BIG_ENDIAN */ 180 181 /* Leave room for ip_newroute to tack on the src and target addresses */ 182 #define OK_RESOLVER_MP_V6(mp) \ 183 ((mp) && ((mp)->b_wptr - (mp)->b_rptr) >= (2 * IPV6_ADDR_LEN)) 184 185 #define IP6_MBLK_OK 0 186 #define IP6_MBLK_HDR_ERR 1 187 #define IP6_MBLK_LEN_ERR 2 188 189 static void icmp_inbound_too_big_v6(queue_t *, mblk_t *, ill_t *, ill_t *, 190 boolean_t, zoneid_t); 191 static void icmp_pkt_v6(queue_t *, mblk_t *, void *, size_t, 192 const in6_addr_t *, boolean_t, zoneid_t, ip_stack_t *); 193 static void icmp_redirect_v6(queue_t *, mblk_t *, ill_t *ill); 194 static int ip_bind_connected_v6(conn_t *, mblk_t **, uint8_t, in6_addr_t *, 195 uint16_t, const in6_addr_t *, ip6_pkt_t *, uint16_t, 196 boolean_t, boolean_t, cred_t *); 197 static boolean_t ip_bind_get_ire_v6(mblk_t **, ire_t *, const in6_addr_t *, 198 iulp_t *, ip_stack_t *); 199 static int ip_bind_laddr_v6(conn_t *, mblk_t **, uint8_t, 200 const in6_addr_t *, uint16_t, boolean_t); 201 static void ip_fanout_proto_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 202 ill_t *, uint8_t, uint_t, uint_t, boolean_t, zoneid_t); 203 static void ip_fanout_tcp_v6(queue_t *, mblk_t *, ip6_t *, ill_t *, 204 ill_t *, uint_t, uint_t, boolean_t, zoneid_t); 205 static void ip_fanout_udp_v6(queue_t *, mblk_t *, ip6_t *, uint32_t, 206 ill_t *, ill_t *, uint_t, boolean_t, zoneid_t); 207 static int ip_process_options_v6(queue_t *, mblk_t *, ip6_t *, 208 uint8_t *, uint_t, uint8_t, ip_stack_t *); 209 static mblk_t *ip_rput_frag_v6(ill_t *, ill_t *, mblk_t *, ip6_t *, 210 ip6_frag_t *, uint_t, uint_t *, uint32_t *, uint16_t *); 211 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 212 static void ip_wput_ire_v6(queue_t *, mblk_t *, ire_t *, int, int, 213 conn_t *, int, int, zoneid_t); 214 static boolean_t ipif_lookup_testaddr_v6(ill_t *, const in6_addr_t *, 215 ipif_t **); 216 217 /* 218 * A template for an IPv6 AR_ENTRY_QUERY 219 */ 220 static areq_t ipv6_areq_template = { 221 AR_ENTRY_QUERY, /* cmd */ 222 sizeof (areq_t)+(2*IPV6_ADDR_LEN), /* name offset */ 223 sizeof (areq_t), /* name len (filled by ill_arp_alloc) */ 224 ETHERTYPE_IPV6, /* protocol, from arps perspective */ 225 sizeof (areq_t), /* target addr offset */ 226 IPV6_ADDR_LEN, /* target addr_length */ 227 0, /* flags */ 228 sizeof (areq_t) + IPV6_ADDR_LEN, /* sender addr offset */ 229 IPV6_ADDR_LEN, /* sender addr length */ 230 6, /* xmit_count */ 231 1000, /* (re)xmit_interval in milliseconds */ 232 4 /* max # of requests to buffer */ 233 /* anything else filled in by the code */ 234 }; 235 236 /* 237 * Handle IPv6 ICMP packets sent to us. Consume the mblk passed in. 238 * The message has already been checksummed and if needed, 239 * a copy has been made to be sent any interested ICMP client (conn) 240 * Note that this is different than icmp_inbound() which does the fanout 241 * to conn's as well as local processing of the ICMP packets. 242 * 243 * All error messages are passed to the matching transport stream. 244 * 245 * Zones notes: 246 * The packet is only processed in the context of the specified zone: typically 247 * only this zone will reply to an echo request. This means that the caller must 248 * call icmp_inbound_v6() for each relevant zone. 249 */ 250 static void 251 icmp_inbound_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 252 uint_t hdr_length, boolean_t mctl_present, uint_t flags, zoneid_t zoneid, 253 mblk_t *dl_mp) 254 { 255 icmp6_t *icmp6; 256 ip6_t *ip6h; 257 boolean_t interested; 258 in6_addr_t origsrc; 259 mblk_t *first_mp; 260 ipsec_in_t *ii; 261 ip_stack_t *ipst = ill->ill_ipst; 262 263 ASSERT(ill != NULL); 264 first_mp = mp; 265 if (mctl_present) { 266 mp = first_mp->b_cont; 267 ASSERT(mp != NULL); 268 269 ii = (ipsec_in_t *)first_mp->b_rptr; 270 ASSERT(ii->ipsec_in_type == IPSEC_IN); 271 } 272 273 ip6h = (ip6_t *)mp->b_rptr; 274 275 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 276 277 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + ICMP6_MINLEN)) { 278 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 279 ip1dbg(("icmp_inbound_v6: pullupmsg failed\n")); 280 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 281 freemsg(first_mp); 282 return; 283 } 284 ip6h = (ip6_t *)mp->b_rptr; 285 } 286 if (ipst->ips_icmp_accept_clear_messages == 0) { 287 first_mp = ipsec_check_global_policy(first_mp, NULL, 288 NULL, ip6h, mctl_present, ipst->ips_netstack); 289 if (first_mp == NULL) 290 return; 291 } 292 293 /* 294 * On a labeled system, we have to check whether the zone itself is 295 * permitted to receive raw traffic. 296 */ 297 if (is_system_labeled()) { 298 if (zoneid == ALL_ZONES) 299 zoneid = tsol_packet_to_zoneid(mp); 300 if (!tsol_can_accept_raw(mp, B_FALSE)) { 301 ip1dbg(("icmp_inbound_v6: zone %d can't receive raw", 302 zoneid)); 303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 304 freemsg(first_mp); 305 return; 306 } 307 } 308 309 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 310 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 311 icmp6->icmp6_code)); 312 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 313 314 /* Initiate IPPF processing here */ 315 if (IP6_IN_IPP(flags, ipst)) { 316 317 /* 318 * If the ifindex changes due to SIOCSLIFINDEX 319 * packet may return to IP on the wrong ill. 320 */ 321 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 322 if (mp == NULL) { 323 if (mctl_present) { 324 freeb(first_mp); 325 } 326 return; 327 } 328 } 329 330 switch (icmp6->icmp6_type) { 331 case ICMP6_DST_UNREACH: 332 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 333 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 334 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 335 break; 336 337 case ICMP6_TIME_EXCEEDED: 338 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 339 break; 340 341 case ICMP6_PARAM_PROB: 342 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 343 break; 344 345 case ICMP6_PACKET_TOO_BIG: 346 icmp_inbound_too_big_v6(q, first_mp, ill, inill, mctl_present, 347 zoneid); 348 return; 349 case ICMP6_ECHO_REQUEST: 350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 351 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 352 !ipst->ips_ipv6_resp_echo_mcast) 353 break; 354 355 /* 356 * We must have exclusive use of the mblk to convert it to 357 * a response. 358 * If not, we copy it. 359 */ 360 if (mp->b_datap->db_ref > 1) { 361 mblk_t *mp1; 362 363 mp1 = copymsg(mp); 364 freemsg(mp); 365 if (mp1 == NULL) { 366 BUMP_MIB(ill->ill_icmp6_mib, 367 ipv6IfIcmpInErrors); 368 if (mctl_present) 369 freeb(first_mp); 370 return; 371 } 372 mp = mp1; 373 ip6h = (ip6_t *)mp->b_rptr; 374 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 375 if (mctl_present) 376 first_mp->b_cont = mp; 377 else 378 first_mp = mp; 379 } 380 381 /* 382 * Turn the echo into an echo reply. 383 * Remove any extension headers (do not reverse a source route) 384 * and clear the flow id (keep traffic class for now). 385 */ 386 if (hdr_length != IPV6_HDR_LEN) { 387 int i; 388 389 for (i = 0; i < IPV6_HDR_LEN; i++) 390 mp->b_rptr[hdr_length - i - 1] = 391 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 392 mp->b_rptr += (hdr_length - IPV6_HDR_LEN); 393 ip6h = (ip6_t *)mp->b_rptr; 394 ip6h->ip6_nxt = IPPROTO_ICMPV6; 395 hdr_length = IPV6_HDR_LEN; 396 } 397 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 398 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 399 400 ip6h->ip6_plen = 401 htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 402 origsrc = ip6h->ip6_src; 403 /* 404 * Reverse the source and destination addresses. 405 * If the return address is a multicast, zero out the source 406 * (ip_wput_v6 will set an address). 407 */ 408 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 409 ip6h->ip6_src = ipv6_all_zeros; 410 ip6h->ip6_dst = origsrc; 411 } else { 412 ip6h->ip6_src = ip6h->ip6_dst; 413 ip6h->ip6_dst = origsrc; 414 } 415 416 /* set the hop limit */ 417 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 418 419 /* 420 * Prepare for checksum by putting icmp length in the icmp 421 * checksum field. The checksum is calculated in ip_wput_v6. 422 */ 423 icmp6->icmp6_cksum = ip6h->ip6_plen; 424 425 if (!mctl_present) { 426 /* 427 * This packet should go out the same way as it 428 * came in i.e in clear. To make sure that global 429 * policy will not be applied to this in ip_wput, 430 * we attach a IPSEC_IN mp and clear ipsec_in_secure. 431 */ 432 ASSERT(first_mp == mp); 433 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 434 if (first_mp == NULL) { 435 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 436 freemsg(mp); 437 return; 438 } 439 ii = (ipsec_in_t *)first_mp->b_rptr; 440 441 /* This is not a secure packet */ 442 ii->ipsec_in_secure = B_FALSE; 443 first_mp->b_cont = mp; 444 } 445 if (!ipsec_in_to_out(first_mp, NULL, ip6h, zoneid)) { 446 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 447 return; 448 } 449 put(WR(q), first_mp); 450 return; 451 452 case ICMP6_ECHO_REPLY: 453 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 454 break; 455 456 case ND_ROUTER_SOLICIT: 457 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 458 break; 459 460 case ND_ROUTER_ADVERT: 461 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 462 break; 463 464 case ND_NEIGHBOR_SOLICIT: 465 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 466 if (mctl_present) 467 freeb(first_mp); 468 /* XXX may wish to pass first_mp up to ndp_input someday. */ 469 ndp_input(inill, mp, dl_mp); 470 return; 471 472 case ND_NEIGHBOR_ADVERT: 473 BUMP_MIB(ill->ill_icmp6_mib, 474 ipv6IfIcmpInNeighborAdvertisements); 475 if (mctl_present) 476 freeb(first_mp); 477 /* XXX may wish to pass first_mp up to ndp_input someday. */ 478 ndp_input(inill, mp, dl_mp); 479 return; 480 481 case ND_REDIRECT: { 482 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 483 484 if (ipst->ips_ipv6_ignore_redirect) 485 break; 486 487 /* 488 * As there is no upper client to deliver, we don't 489 * need the first_mp any more. 490 */ 491 if (mctl_present) 492 freeb(first_mp); 493 if (!pullupmsg(mp, -1)) { 494 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 495 break; 496 } 497 icmp_redirect_v6(q, mp, ill); 498 return; 499 } 500 501 /* 502 * The next three icmp messages will be handled by MLD. 503 * Pass all valid MLD packets up to any process(es) 504 * listening on a raw ICMP socket. MLD messages are 505 * freed by mld_input function. 506 */ 507 case MLD_LISTENER_QUERY: 508 case MLD_LISTENER_REPORT: 509 case MLD_LISTENER_REDUCTION: 510 if (mctl_present) 511 freeb(first_mp); 512 mld_input(q, mp, ill); 513 return; 514 default: 515 break; 516 } 517 if (interested) { 518 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, 519 inill, mctl_present, zoneid); 520 } else { 521 freemsg(first_mp); 522 } 523 } 524 525 /* 526 * Process received IPv6 ICMP Packet too big. 527 * After updating any IRE it does the fanout to any matching transport streams. 528 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 529 */ 530 /* ARGSUSED */ 531 static void 532 icmp_inbound_too_big_v6(queue_t *q, mblk_t *mp, ill_t *ill, ill_t *inill, 533 boolean_t mctl_present, zoneid_t zoneid) 534 { 535 ip6_t *ip6h; 536 ip6_t *inner_ip6h; 537 icmp6_t *icmp6; 538 uint16_t hdr_length; 539 uint32_t mtu; 540 ire_t *ire, *first_ire; 541 mblk_t *first_mp; 542 ip_stack_t *ipst = ill->ill_ipst; 543 544 first_mp = mp; 545 if (mctl_present) 546 mp = first_mp->b_cont; 547 /* 548 * We must have exclusive use of the mblk to update the MTU 549 * in the packet. 550 * If not, we copy it. 551 * 552 * If there's an M_CTL present, we know that allocated first_mp 553 * earlier in this function, so we know first_mp has refcnt of one. 554 */ 555 ASSERT(!mctl_present || first_mp->b_datap->db_ref == 1); 556 if (mp->b_datap->db_ref > 1) { 557 mblk_t *mp1; 558 559 mp1 = copymsg(mp); 560 freemsg(mp); 561 if (mp1 == NULL) { 562 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 563 if (mctl_present) 564 freeb(first_mp); 565 return; 566 } 567 mp = mp1; 568 if (mctl_present) 569 first_mp->b_cont = mp; 570 else 571 first_mp = mp; 572 } 573 ip6h = (ip6_t *)mp->b_rptr; 574 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 575 hdr_length = ip_hdr_length_v6(mp, ip6h); 576 else 577 hdr_length = IPV6_HDR_LEN; 578 579 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 580 ASSERT((size_t)(mp->b_wptr - mp->b_rptr) >= hdr_length + ICMP6_MINLEN); 581 inner_ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 582 if ((uchar_t *)&inner_ip6h[1] > mp->b_wptr) { 583 if (!pullupmsg(mp, (uchar_t *)&inner_ip6h[1] - mp->b_rptr)) { 584 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 585 freemsg(first_mp); 586 return; 587 } 588 ip6h = (ip6_t *)mp->b_rptr; 589 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 590 inner_ip6h = (ip6_t *)&icmp6[1]; 591 } 592 593 /* 594 * For link local destinations matching simply on IRE type is not 595 * sufficient. Same link local addresses for different ILL's is 596 * possible. 597 */ 598 if (IN6_IS_ADDR_LINKLOCAL(&inner_ip6h->ip6_dst)) { 599 first_ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 600 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, 601 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 602 603 if (first_ire == NULL) { 604 if (ip_debug > 2) { 605 /* ip1dbg */ 606 pr_addr_dbg("icmp_inbound_too_big_v6:" 607 "no ire for dst %s\n", AF_INET6, 608 &inner_ip6h->ip6_dst); 609 } 610 freemsg(first_mp); 611 return; 612 } 613 614 mtu = ntohl(icmp6->icmp6_mtu); 615 rw_enter(&first_ire->ire_bucket->irb_lock, RW_READER); 616 for (ire = first_ire; ire != NULL && 617 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &inner_ip6h->ip6_dst); 618 ire = ire->ire_next) { 619 mutex_enter(&ire->ire_lock); 620 if (mtu < IPV6_MIN_MTU) { 621 ip1dbg(("Received mtu less than IPv6 " 622 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 623 mtu = IPV6_MIN_MTU; 624 /* 625 * If an mtu less than IPv6 min mtu is received, 626 * we must include a fragment header in 627 * subsequent packets. 628 */ 629 ire->ire_frag_flag |= IPH_FRAG_HDR; 630 } 631 ip1dbg(("Received mtu from router: %d\n", mtu)); 632 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 633 if (ire->ire_max_frag == mtu) { 634 /* Decreased it */ 635 ire->ire_marks |= IRE_MARK_PMTU; 636 } 637 /* Record the new max frag size for the ULP. */ 638 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 639 /* 640 * If we need a fragment header in every packet 641 * (above case or multirouting), make sure the 642 * ULP takes it into account when computing the 643 * payload size. 644 */ 645 icmp6->icmp6_mtu = htonl(ire->ire_max_frag - 646 sizeof (ip6_frag_t)); 647 } else { 648 icmp6->icmp6_mtu = htonl(ire->ire_max_frag); 649 } 650 mutex_exit(&ire->ire_lock); 651 } 652 rw_exit(&first_ire->ire_bucket->irb_lock); 653 ire_refrele(first_ire); 654 } else { 655 irb_t *irb = NULL; 656 /* 657 * for non-link local destinations we match only on the IRE type 658 */ 659 ire = ire_ctable_lookup_v6(&inner_ip6h->ip6_dst, NULL, 660 IRE_CACHE, ill->ill_ipif, ALL_ZONES, NULL, MATCH_IRE_TYPE, 661 ipst); 662 if (ire == NULL) { 663 if (ip_debug > 2) { 664 /* ip1dbg */ 665 pr_addr_dbg("icmp_inbound_too_big_v6:" 666 "no ire for dst %s\n", 667 AF_INET6, &inner_ip6h->ip6_dst); 668 } 669 freemsg(first_mp); 670 return; 671 } 672 irb = ire->ire_bucket; 673 ire_refrele(ire); 674 rw_enter(&irb->irb_lock, RW_READER); 675 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 676 if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, 677 &inner_ip6h->ip6_dst)) { 678 mtu = ntohl(icmp6->icmp6_mtu); 679 mutex_enter(&ire->ire_lock); 680 if (mtu < IPV6_MIN_MTU) { 681 ip1dbg(("Received mtu less than IPv6" 682 "min mtu %d: %d\n", 683 IPV6_MIN_MTU, mtu)); 684 mtu = IPV6_MIN_MTU; 685 /* 686 * If an mtu less than IPv6 min mtu is 687 * received, we must include a fragment 688 * header in subsequent packets. 689 */ 690 ire->ire_frag_flag |= IPH_FRAG_HDR; 691 } 692 693 ip1dbg(("Received mtu from router: %d\n", mtu)); 694 ire->ire_max_frag = MIN(ire->ire_max_frag, mtu); 695 if (ire->ire_max_frag == mtu) { 696 /* Decreased it */ 697 ire->ire_marks |= IRE_MARK_PMTU; 698 } 699 /* Record the new max frag size for the ULP. */ 700 if (ire->ire_frag_flag & IPH_FRAG_HDR) { 701 /* 702 * If we need a fragment header in 703 * every packet (above case or 704 * multirouting), make sure the ULP 705 * takes it into account when computing 706 * the payload size. 707 */ 708 icmp6->icmp6_mtu = 709 htonl(ire->ire_max_frag - 710 sizeof (ip6_frag_t)); 711 } else { 712 icmp6->icmp6_mtu = 713 htonl(ire->ire_max_frag); 714 } 715 mutex_exit(&ire->ire_lock); 716 } 717 } 718 rw_exit(&irb->irb_lock); 719 } 720 icmp_inbound_error_fanout_v6(q, first_mp, ip6h, icmp6, ill, inill, 721 mctl_present, zoneid); 722 } 723 724 /* 725 * Fanout for ICMPv6 errors containing IP-in-IPv6 packets. Returns B_TRUE if a 726 * tunnel consumed the message, and B_FALSE otherwise. 727 */ 728 static boolean_t 729 icmp_inbound_iptun_fanout_v6(mblk_t *first_mp, ip6_t *rip6h, ill_t *ill, 730 ip_stack_t *ipst) 731 { 732 conn_t *connp; 733 734 if ((connp = ipcl_iptun_classify_v6(&rip6h->ip6_src, &rip6h->ip6_dst, 735 ipst)) == NULL) 736 return (B_FALSE); 737 738 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 739 connp->conn_recv(connp, first_mp, NULL); 740 CONN_DEC_REF(connp); 741 return (B_TRUE); 742 } 743 744 /* 745 * Fanout received ICMPv6 error packets to the transports. 746 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 747 */ 748 void 749 icmp_inbound_error_fanout_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 750 icmp6_t *icmp6, ill_t *ill, ill_t *inill, boolean_t mctl_present, 751 zoneid_t zoneid) 752 { 753 uint16_t *up; /* Pointer to ports in ULP header */ 754 uint32_t ports; /* reversed ports for fanout */ 755 ip6_t rip6h; /* With reversed addresses */ 756 uint16_t hdr_length; 757 uint8_t *nexthdrp; 758 uint8_t nexthdr; 759 mblk_t *first_mp; 760 ipsec_in_t *ii; 761 tcpha_t *tcpha; 762 conn_t *connp; 763 ip_stack_t *ipst = ill->ill_ipst; 764 765 first_mp = mp; 766 if (mctl_present) { 767 mp = first_mp->b_cont; 768 ASSERT(mp != NULL); 769 770 ii = (ipsec_in_t *)first_mp->b_rptr; 771 ASSERT(ii->ipsec_in_type == IPSEC_IN); 772 } else { 773 ii = NULL; 774 } 775 776 hdr_length = (uint16_t)((uchar_t *)icmp6 - (uchar_t *)ip6h); 777 ASSERT((size_t)(mp->b_wptr - (uchar_t *)icmp6) >= ICMP6_MINLEN); 778 779 /* 780 * Need to pullup everything in order to use 781 * ip_hdr_length_nexthdr_v6() 782 */ 783 if (mp->b_cont != NULL) { 784 if (!pullupmsg(mp, -1)) { 785 ip1dbg(("icmp_inbound_error_fanout_v6: " 786 "pullupmsg failed\n")); 787 goto drop_pkt; 788 } 789 ip6h = (ip6_t *)mp->b_rptr; 790 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 791 } 792 793 ip6h = (ip6_t *)&icmp6[1]; /* Packet in error */ 794 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 795 goto drop_pkt; 796 797 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 798 goto drop_pkt; 799 nexthdr = *nexthdrp; 800 801 /* Set message type, must be done after pullups */ 802 mp->b_datap->db_type = M_CTL; 803 804 /* 805 * We need a separate IP header with the source and destination 806 * addresses reversed to do fanout/classification because the ip6h in 807 * the ICMPv6 error is in the form we sent it out. 808 */ 809 rip6h.ip6_src = ip6h->ip6_dst; 810 rip6h.ip6_dst = ip6h->ip6_src; 811 rip6h.ip6_nxt = nexthdr; 812 813 /* Try to pass the ICMP message to clients who need it */ 814 switch (nexthdr) { 815 case IPPROTO_UDP: { 816 /* 817 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 818 * UDP header to get the port information. 819 */ 820 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 821 mp->b_wptr) { 822 break; 823 } 824 /* Attempt to find a client stream based on port. */ 825 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 826 ((uint16_t *)&ports)[0] = up[1]; 827 ((uint16_t *)&ports)[1] = up[0]; 828 829 ip_fanout_udp_v6(q, first_mp, &rip6h, ports, ill, inill, 830 IP6_NO_IPPOLICY, mctl_present, zoneid); 831 return; 832 } 833 case IPPROTO_TCP: { 834 /* 835 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 836 * the TCP header to get the port information. 837 */ 838 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 839 mp->b_wptr) { 840 break; 841 } 842 843 /* 844 * Attempt to find a client stream based on port. 845 * Note that we do a reverse lookup since the header is 846 * in the form we sent it out. 847 */ 848 tcpha = (tcpha_t *)((char *)ip6h + hdr_length); 849 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 850 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 851 if (connp == NULL) { 852 goto drop_pkt; 853 } 854 855 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, tcp_input, connp, 856 SQ_FILL, SQTAG_TCP6_INPUT_ICMP_ERR); 857 return; 858 859 } 860 case IPPROTO_SCTP: 861 /* 862 * Verify we have at least ICMP_MIN_SCTP_HDR_LEN bytes of 863 * transport header to get the port information. 864 */ 865 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_SCTP_HDR_LEN > 866 mp->b_wptr) { 867 if (!pullupmsg(mp, (uchar_t *)ip6h + hdr_length + 868 ICMP_MIN_SCTP_HDR_LEN - mp->b_rptr)) { 869 goto drop_pkt; 870 } 871 } 872 873 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 874 ((uint16_t *)&ports)[0] = up[1]; 875 ((uint16_t *)&ports)[1] = up[0]; 876 ip_fanout_sctp(first_mp, inill, (ipha_t *)ip6h, ports, 0, 877 mctl_present, IP6_NO_IPPOLICY, zoneid); 878 return; 879 case IPPROTO_ESP: 880 case IPPROTO_AH: { 881 int ipsec_rc; 882 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 883 884 /* 885 * We need a IPSEC_IN in the front to fanout to AH/ESP. 886 * We will re-use the IPSEC_IN if it is already present as 887 * AH/ESP will not affect any fields in the IPSEC_IN for 888 * ICMP errors. If there is no IPSEC_IN, allocate a new 889 * one and attach it in the front. 890 */ 891 if (ii != NULL) { 892 /* 893 * ip_fanout_proto_again converts the ICMP errors 894 * that come back from AH/ESP to M_DATA so that 895 * if it is non-AH/ESP and we do a pullupmsg in 896 * this function, it would work. Convert it back 897 * to M_CTL before we send up as this is a ICMP 898 * error. This could have been generated locally or 899 * by some router. Validate the inner IPSEC 900 * headers. 901 * 902 * NOTE : ill_index is used by ip_fanout_proto_again 903 * to locate the ill. 904 */ 905 ASSERT(ill != NULL); 906 ii->ipsec_in_ill_index = 907 ill->ill_phyint->phyint_ifindex; 908 ii->ipsec_in_rill_index = 909 inill->ill_phyint->phyint_ifindex; 910 first_mp->b_cont->b_datap->db_type = M_CTL; 911 } else { 912 /* 913 * IPSEC_IN is not present. We attach a ipsec_in 914 * message and send up to IPSEC for validating 915 * and removing the IPSEC headers. Clear 916 * ipsec_in_secure so that when we return 917 * from IPSEC, we don't mistakenly think that this 918 * is a secure packet came from the network. 919 * 920 * NOTE : ill_index is used by ip_fanout_proto_again 921 * to locate the ill. 922 */ 923 ASSERT(first_mp == mp); 924 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 925 ASSERT(ill != NULL); 926 if (first_mp == NULL) { 927 freemsg(mp); 928 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 929 return; 930 } 931 ii = (ipsec_in_t *)first_mp->b_rptr; 932 933 /* This is not a secure packet */ 934 ii->ipsec_in_secure = B_FALSE; 935 first_mp->b_cont = mp; 936 mp->b_datap->db_type = M_CTL; 937 ii->ipsec_in_ill_index = 938 ill->ill_phyint->phyint_ifindex; 939 ii->ipsec_in_rill_index = 940 inill->ill_phyint->phyint_ifindex; 941 } 942 943 if (!ipsec_loaded(ipss)) { 944 ip_proto_not_sup(q, first_mp, 0, zoneid, ipst); 945 return; 946 } 947 948 if (nexthdr == IPPROTO_ESP) 949 ipsec_rc = ipsecesp_icmp_error(first_mp); 950 else 951 ipsec_rc = ipsecah_icmp_error(first_mp); 952 if (ipsec_rc == IPSEC_STATUS_FAILED) 953 return; 954 955 ip_fanout_proto_again(first_mp, ill, inill, NULL); 956 return; 957 } 958 case IPPROTO_ENCAP: 959 case IPPROTO_IPV6: 960 if ((uint8_t *)ip6h + hdr_length + 961 (nexthdr == IPPROTO_ENCAP ? sizeof (ipha_t) : 962 sizeof (ip6_t)) > mp->b_wptr) { 963 goto drop_pkt; 964 } 965 966 if (nexthdr == IPPROTO_ENCAP || 967 !IN6_ARE_ADDR_EQUAL( 968 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_src, 969 &ip6h->ip6_src) || 970 !IN6_ARE_ADDR_EQUAL( 971 &((ip6_t *)(((uint8_t *)ip6h) + hdr_length))->ip6_dst, 972 &ip6h->ip6_dst)) { 973 /* 974 * For tunnels that have used IPsec protection, 975 * we need to adjust the MTU to take into account 976 * the IPsec overhead. 977 */ 978 if (ii != NULL) { 979 icmp6->icmp6_mtu = htonl( 980 ntohl(icmp6->icmp6_mtu) - 981 ipsec_in_extra_length(first_mp)); 982 } 983 } else { 984 /* 985 * Self-encapsulated case. As in the ipv4 case, 986 * we need to strip the 2nd IP header. Since mp 987 * is already pulled-up, we can simply bcopy 988 * the 3rd header + data over the 2nd header. 989 */ 990 uint16_t unused_len; 991 ip6_t *inner_ip6h = (ip6_t *) 992 ((uchar_t *)ip6h + hdr_length); 993 994 /* 995 * Make sure we don't do recursion more than once. 996 */ 997 if (!ip_hdr_length_nexthdr_v6(mp, inner_ip6h, 998 &unused_len, &nexthdrp) || 999 *nexthdrp == IPPROTO_IPV6) { 1000 goto drop_pkt; 1001 } 1002 1003 /* 1004 * We are about to modify the packet. Make a copy if 1005 * someone else has a reference to it. 1006 */ 1007 if (DB_REF(mp) > 1) { 1008 mblk_t *mp1; 1009 uint16_t icmp6_offset; 1010 1011 mp1 = copymsg(mp); 1012 if (mp1 == NULL) { 1013 goto drop_pkt; 1014 } 1015 icmp6_offset = (uint16_t) 1016 ((uchar_t *)icmp6 - mp->b_rptr); 1017 freemsg(mp); 1018 mp = mp1; 1019 1020 icmp6 = (icmp6_t *)(mp->b_rptr + icmp6_offset); 1021 ip6h = (ip6_t *)&icmp6[1]; 1022 inner_ip6h = (ip6_t *) 1023 ((uchar_t *)ip6h + hdr_length); 1024 1025 if (mctl_present) 1026 first_mp->b_cont = mp; 1027 else 1028 first_mp = mp; 1029 } 1030 1031 /* 1032 * Need to set db_type back to M_DATA before 1033 * refeeding mp into this function. 1034 */ 1035 DB_TYPE(mp) = M_DATA; 1036 1037 /* 1038 * Copy the 3rd header + remaining data on top 1039 * of the 2nd header. 1040 */ 1041 bcopy(inner_ip6h, ip6h, 1042 mp->b_wptr - (uchar_t *)inner_ip6h); 1043 1044 /* 1045 * Subtract length of the 2nd header. 1046 */ 1047 mp->b_wptr -= hdr_length; 1048 1049 /* 1050 * Now recurse, and see what I _really_ should be 1051 * doing here. 1052 */ 1053 icmp_inbound_error_fanout_v6(q, first_mp, 1054 (ip6_t *)mp->b_rptr, icmp6, ill, inill, 1055 mctl_present, zoneid); 1056 return; 1057 } 1058 if (icmp_inbound_iptun_fanout_v6(first_mp, &rip6h, ill, ipst)) 1059 return; 1060 /* 1061 * No IP tunnel is associated with this error. Perhaps a raw 1062 * socket will want it. 1063 */ 1064 /* FALLTHRU */ 1065 default: 1066 ip_fanout_proto_v6(q, first_mp, &rip6h, ill, inill, nexthdr, 0, 1067 IP6_NO_IPPOLICY, mctl_present, zoneid); 1068 return; 1069 } 1070 /* NOTREACHED */ 1071 drop_pkt: 1072 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1073 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1074 freemsg(first_mp); 1075 } 1076 1077 /* 1078 * Process received IPv6 ICMP Redirect messages. 1079 */ 1080 /* ARGSUSED */ 1081 static void 1082 icmp_redirect_v6(queue_t *q, mblk_t *mp, ill_t *ill) 1083 { 1084 ip6_t *ip6h; 1085 uint16_t hdr_length; 1086 nd_redirect_t *rd; 1087 ire_t *ire; 1088 ire_t *prev_ire; 1089 ire_t *redir_ire; 1090 in6_addr_t *src, *dst, *gateway; 1091 nd_opt_hdr_t *opt; 1092 nce_t *nce; 1093 int nce_flags = 0; 1094 int err = 0; 1095 boolean_t redirect_to_router = B_FALSE; 1096 int len; 1097 int optlen; 1098 iulp_t ulp_info = { 0 }; 1099 ill_t *prev_ire_ill; 1100 ipif_t *ipif; 1101 ip_stack_t *ipst = ill->ill_ipst; 1102 1103 ip6h = (ip6_t *)mp->b_rptr; 1104 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 1105 hdr_length = ip_hdr_length_v6(mp, ip6h); 1106 else 1107 hdr_length = IPV6_HDR_LEN; 1108 1109 rd = (nd_redirect_t *)&mp->b_rptr[hdr_length]; 1110 len = mp->b_wptr - mp->b_rptr - hdr_length; 1111 src = &ip6h->ip6_src; 1112 dst = &rd->nd_rd_dst; 1113 gateway = &rd->nd_rd_target; 1114 1115 /* Verify if it is a valid redirect */ 1116 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1117 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1118 (rd->nd_rd_code != 0) || 1119 (len < sizeof (nd_redirect_t)) || 1120 (IN6_IS_ADDR_V4MAPPED(dst)) || 1121 (IN6_IS_ADDR_MULTICAST(dst))) { 1122 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1123 freemsg(mp); 1124 return; 1125 } 1126 1127 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1128 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1129 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1130 freemsg(mp); 1131 return; 1132 } 1133 1134 if (len > sizeof (nd_redirect_t)) { 1135 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], 1136 len - sizeof (nd_redirect_t))) { 1137 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1138 freemsg(mp); 1139 return; 1140 } 1141 } 1142 1143 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1144 redirect_to_router = B_TRUE; 1145 nce_flags |= NCE_F_ISROUTER; 1146 } 1147 1148 /* ipif will be refreleased afterwards */ 1149 ipif = ipif_get_next_ipif(NULL, ill); 1150 if (ipif == NULL) { 1151 freemsg(mp); 1152 return; 1153 } 1154 1155 /* 1156 * Verify that the IP source address of the redirect is 1157 * the same as the current first-hop router for the specified 1158 * ICMP destination address. 1159 * Also, Make sure we had a route for the dest in question and 1160 * that route was pointing to the old gateway (the source of the 1161 * redirect packet.) 1162 */ 1163 1164 prev_ire = ire_route_lookup_v6(dst, 0, src, 0, ipif, NULL, ALL_ZONES, 1165 NULL, MATCH_IRE_GW | MATCH_IRE_ILL | MATCH_IRE_DEFAULT, ipst); 1166 1167 /* 1168 * Check that 1169 * the redirect was not from ourselves 1170 * old gateway is still directly reachable 1171 */ 1172 if (prev_ire == NULL || 1173 prev_ire->ire_type == IRE_LOCAL) { 1174 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1175 ipif_refrele(ipif); 1176 goto fail_redirect; 1177 } 1178 prev_ire_ill = ire_to_ill(prev_ire); 1179 ASSERT(prev_ire_ill != NULL); 1180 if (prev_ire_ill->ill_flags & ILLF_NONUD) 1181 nce_flags |= NCE_F_NONUD; 1182 1183 /* 1184 * Should we use the old ULP info to create the new gateway? From 1185 * a user's perspective, we should inherit the info so that it 1186 * is a "smooth" transition. If we do not do that, then new 1187 * connections going thru the new gateway will have no route metrics, 1188 * which is counter-intuitive to user. From a network point of 1189 * view, this may or may not make sense even though the new gateway 1190 * is still directly connected to us so the route metrics should not 1191 * change much. 1192 * 1193 * But if the old ire_uinfo is not initialized, we do another 1194 * recursive lookup on the dest using the new gateway. There may 1195 * be a route to that. If so, use it to initialize the redirect 1196 * route. 1197 */ 1198 if (prev_ire->ire_uinfo.iulp_set) { 1199 bcopy(&prev_ire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1200 } else if (redirect_to_router) { 1201 /* 1202 * Only do the following if the redirection is really to 1203 * a router. 1204 */ 1205 ire_t *tmp_ire; 1206 ire_t *sire; 1207 1208 tmp_ire = ire_ftable_lookup_v6(dst, 0, gateway, 0, NULL, &sire, 1209 ALL_ZONES, 0, NULL, 1210 (MATCH_IRE_RECURSIVE | MATCH_IRE_GW | MATCH_IRE_DEFAULT), 1211 ipst); 1212 if (sire != NULL) { 1213 bcopy(&sire->ire_uinfo, &ulp_info, sizeof (iulp_t)); 1214 ASSERT(tmp_ire != NULL); 1215 ire_refrele(tmp_ire); 1216 ire_refrele(sire); 1217 } else if (tmp_ire != NULL) { 1218 bcopy(&tmp_ire->ire_uinfo, &ulp_info, 1219 sizeof (iulp_t)); 1220 ire_refrele(tmp_ire); 1221 } 1222 } 1223 1224 optlen = mp->b_wptr - mp->b_rptr - hdr_length - sizeof (nd_redirect_t); 1225 opt = (nd_opt_hdr_t *)&rd[1]; 1226 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1227 if (opt != NULL) { 1228 err = ndp_lookup_then_add_v6(ill, 1229 B_FALSE, /* don't match across illgrp */ 1230 (uchar_t *)&opt[1], /* Link layer address */ 1231 gateway, 1232 &ipv6_all_ones, /* prefix mask */ 1233 &ipv6_all_zeros, /* Mapping mask */ 1234 0, 1235 nce_flags, 1236 ND_STALE, 1237 &nce); 1238 switch (err) { 1239 case 0: 1240 NCE_REFRELE(nce); 1241 break; 1242 case EEXIST: 1243 /* 1244 * Check to see if link layer address has changed and 1245 * process the nce_state accordingly. 1246 */ 1247 ndp_process(nce, (uchar_t *)&opt[1], 0, B_FALSE); 1248 NCE_REFRELE(nce); 1249 break; 1250 default: 1251 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1252 err)); 1253 ipif_refrele(ipif); 1254 goto fail_redirect; 1255 } 1256 } 1257 if (redirect_to_router) { 1258 /* icmp_redirect_ok_v6() must have already verified this */ 1259 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1260 1261 /* 1262 * Create a Route Association. This will allow us to remember 1263 * a router told us to use the particular gateway. 1264 */ 1265 ire = ire_create_v6( 1266 dst, 1267 &ipv6_all_ones, /* mask */ 1268 &prev_ire->ire_src_addr_v6, /* source addr */ 1269 gateway, /* gateway addr */ 1270 &prev_ire->ire_max_frag, /* max frag */ 1271 NULL, /* no src nce */ 1272 NULL, /* no rfq */ 1273 NULL, /* no stq */ 1274 IRE_HOST, 1275 prev_ire->ire_ipif, 1276 NULL, 1277 0, 1278 0, 1279 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1280 &ulp_info, 1281 NULL, 1282 NULL, 1283 ipst); 1284 } else { 1285 queue_t *stq; 1286 1287 stq = (ipif->ipif_net_type == IRE_IF_RESOLVER) 1288 ? ipif->ipif_rq : ipif->ipif_wq; 1289 1290 /* 1291 * Just create an on link entry, i.e. interface route. 1292 */ 1293 ire = ire_create_v6( 1294 dst, /* gateway == dst */ 1295 &ipv6_all_ones, /* mask */ 1296 &prev_ire->ire_src_addr_v6, /* source addr */ 1297 &ipv6_all_zeros, /* gateway addr */ 1298 &prev_ire->ire_max_frag, /* max frag */ 1299 NULL, /* no src nce */ 1300 NULL, /* ire rfq */ 1301 stq, /* ire stq */ 1302 ipif->ipif_net_type, /* IF_[NO]RESOLVER */ 1303 prev_ire->ire_ipif, 1304 &ipv6_all_ones, 1305 0, 1306 0, 1307 (RTF_DYNAMIC | RTF_HOST), 1308 &ulp_info, 1309 NULL, 1310 NULL, 1311 ipst); 1312 } 1313 1314 /* Release reference from earlier ipif_get_next_ipif() */ 1315 ipif_refrele(ipif); 1316 1317 if (ire == NULL) 1318 goto fail_redirect; 1319 1320 if (ire_add(&ire, NULL, NULL, NULL, B_FALSE) == 0) { 1321 1322 /* tell routing sockets that we received a redirect */ 1323 ip_rts_change_v6(RTM_REDIRECT, 1324 &rd->nd_rd_dst, 1325 &rd->nd_rd_target, 1326 &ipv6_all_ones, 0, &ire->ire_src_addr_v6, 1327 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1328 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1329 1330 /* 1331 * Delete any existing IRE_HOST type ires for this destination. 1332 * This together with the added IRE has the effect of 1333 * modifying an existing redirect. 1334 */ 1335 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1336 ire->ire_ipif, NULL, ALL_ZONES, 0, NULL, 1337 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), ipst); 1338 1339 ire_refrele(ire); /* Held in ire_add_v6 */ 1340 1341 if (redir_ire != NULL) { 1342 if (redir_ire->ire_flags & RTF_DYNAMIC) 1343 ire_delete(redir_ire); 1344 ire_refrele(redir_ire); 1345 } 1346 } 1347 1348 if (prev_ire->ire_type == IRE_CACHE) 1349 ire_delete(prev_ire); 1350 ire_refrele(prev_ire); 1351 prev_ire = NULL; 1352 1353 fail_redirect: 1354 if (prev_ire != NULL) 1355 ire_refrele(prev_ire); 1356 freemsg(mp); 1357 } 1358 1359 static ill_t * 1360 ip_queue_to_ill_v6(queue_t *q, ip_stack_t *ipst) 1361 { 1362 ill_t *ill; 1363 1364 ASSERT(WR(q) == q); 1365 1366 if (q->q_next != NULL) { 1367 ill = (ill_t *)q->q_ptr; 1368 if (ILL_CAN_LOOKUP(ill)) 1369 ill_refhold(ill); 1370 else 1371 ill = NULL; 1372 } else { 1373 ill = ill_lookup_on_name(ipif_loopback_name, B_FALSE, B_TRUE, 1374 NULL, NULL, NULL, NULL, NULL, ipst); 1375 } 1376 if (ill == NULL) 1377 ip0dbg(("ip_queue_to_ill_v6: no ill\n")); 1378 return (ill); 1379 } 1380 1381 /* 1382 * Assigns an appropriate source address to the packet. 1383 * If origdst is one of our IP addresses that use it as the source. 1384 * If the queue is an ill queue then select a source from that ill. 1385 * Otherwise pick a source based on a route lookup back to the origsrc. 1386 * 1387 * src is the return parameter. Returns a pointer to src or NULL if failure. 1388 */ 1389 static in6_addr_t * 1390 icmp_pick_source_v6(queue_t *wq, in6_addr_t *origsrc, in6_addr_t *origdst, 1391 in6_addr_t *src, zoneid_t zoneid, ip_stack_t *ipst) 1392 { 1393 ill_t *ill; 1394 ire_t *ire; 1395 ipif_t *ipif; 1396 1397 ASSERT(!(wq->q_flag & QREADR)); 1398 if (wq->q_next != NULL) { 1399 ill = (ill_t *)wq->q_ptr; 1400 } else { 1401 ill = NULL; 1402 } 1403 1404 ire = ire_route_lookup_v6(origdst, 0, 0, (IRE_LOCAL|IRE_LOOPBACK), 1405 NULL, NULL, zoneid, NULL, (MATCH_IRE_TYPE|MATCH_IRE_ZONEONLY), 1406 ipst); 1407 if (ire != NULL) { 1408 /* Destined to one of our addresses */ 1409 *src = *origdst; 1410 ire_refrele(ire); 1411 return (src); 1412 } 1413 if (ire != NULL) { 1414 ire_refrele(ire); 1415 ire = NULL; 1416 } 1417 if (ill == NULL) { 1418 /* What is the route back to the original source? */ 1419 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1420 NULL, NULL, zoneid, NULL, 1421 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1422 if (ire == NULL) { 1423 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1424 return (NULL); 1425 } 1426 ASSERT(ire->ire_ipif != NULL); 1427 ill = ire->ire_ipif->ipif_ill; 1428 ire_refrele(ire); 1429 } 1430 ipif = ipif_select_source_v6(ill, origsrc, B_FALSE, 1431 IPV6_PREFER_SRC_DEFAULT, zoneid); 1432 if (ipif != NULL) { 1433 *src = ipif->ipif_v6src_addr; 1434 ipif_refrele(ipif); 1435 return (src); 1436 } 1437 /* 1438 * Unusual case - can't find a usable source address to reach the 1439 * original source. Use what in the route to the source. 1440 */ 1441 ire = ire_route_lookup_v6(origsrc, 0, 0, 0, 1442 NULL, NULL, zoneid, NULL, 1443 (MATCH_IRE_DEFAULT|MATCH_IRE_RECURSIVE), ipst); 1444 if (ire == NULL) { 1445 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 1446 return (NULL); 1447 } 1448 ASSERT(ire != NULL); 1449 *src = ire->ire_src_addr_v6; 1450 ire_refrele(ire); 1451 return (src); 1452 } 1453 1454 /* 1455 * Build and ship an IPv6 ICMP message using the packet data in mp, 1456 * and the ICMP header pointed to by "stuff". (May be called as 1457 * writer.) 1458 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1459 * verify that an icmp error packet can be sent. 1460 * 1461 * If q is an ill write side queue (which is the case when packets 1462 * arrive from ip_rput) then ip_wput code will ensure that packets to 1463 * link-local destinations are sent out that ill. 1464 * 1465 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1466 * source address (see above function). 1467 */ 1468 static void 1469 icmp_pkt_v6(queue_t *q, mblk_t *mp, void *stuff, size_t len, 1470 const in6_addr_t *v6src_ptr, boolean_t mctl_present, zoneid_t zoneid, 1471 ip_stack_t *ipst) 1472 { 1473 ip6_t *ip6h; 1474 in6_addr_t v6dst; 1475 size_t len_needed; 1476 size_t msg_len; 1477 mblk_t *mp1; 1478 icmp6_t *icmp6; 1479 ill_t *ill; 1480 in6_addr_t v6src; 1481 mblk_t *ipsec_mp; 1482 ipsec_out_t *io; 1483 1484 ill = ip_queue_to_ill_v6(q, ipst); 1485 if (ill == NULL) { 1486 freemsg(mp); 1487 return; 1488 } 1489 1490 if (mctl_present) { 1491 /* 1492 * If it is : 1493 * 1494 * 1) a IPSEC_OUT, then this is caused by outbound 1495 * datagram originating on this host. IPSEC processing 1496 * may or may not have been done. Refer to comments above 1497 * icmp_inbound_error_fanout for details. 1498 * 1499 * 2) a IPSEC_IN if we are generating a icmp_message 1500 * for an incoming datagram destined for us i.e called 1501 * from ip_fanout_send_icmp. 1502 */ 1503 ipsec_info_t *in; 1504 1505 ipsec_mp = mp; 1506 mp = ipsec_mp->b_cont; 1507 1508 in = (ipsec_info_t *)ipsec_mp->b_rptr; 1509 ip6h = (ip6_t *)mp->b_rptr; 1510 1511 ASSERT(in->ipsec_info_type == IPSEC_OUT || 1512 in->ipsec_info_type == IPSEC_IN); 1513 1514 if (in->ipsec_info_type == IPSEC_IN) { 1515 /* 1516 * Convert the IPSEC_IN to IPSEC_OUT. 1517 */ 1518 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h, zoneid)) { 1519 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1520 ill_refrele(ill); 1521 return; 1522 } 1523 } else { 1524 ASSERT(in->ipsec_info_type == IPSEC_OUT); 1525 io = (ipsec_out_t *)in; 1526 /* 1527 * Clear out ipsec_out_proc_begin, so we do a fresh 1528 * ire lookup. 1529 */ 1530 io->ipsec_out_proc_begin = B_FALSE; 1531 } 1532 } else { 1533 /* 1534 * This is in clear. The icmp message we are building 1535 * here should go out in clear. 1536 */ 1537 ipsec_in_t *ii; 1538 ASSERT(mp->b_datap->db_type == M_DATA); 1539 ipsec_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 1540 if (ipsec_mp == NULL) { 1541 freemsg(mp); 1542 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1543 ill_refrele(ill); 1544 return; 1545 } 1546 ii = (ipsec_in_t *)ipsec_mp->b_rptr; 1547 1548 /* This is not a secure packet */ 1549 ii->ipsec_in_secure = B_FALSE; 1550 ipsec_mp->b_cont = mp; 1551 ip6h = (ip6_t *)mp->b_rptr; 1552 /* 1553 * Convert the IPSEC_IN to IPSEC_OUT. 1554 */ 1555 if (!ipsec_in_to_out(ipsec_mp, NULL, ip6h, zoneid)) { 1556 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1557 ill_refrele(ill); 1558 return; 1559 } 1560 } 1561 io = (ipsec_out_t *)ipsec_mp->b_rptr; 1562 1563 if (v6src_ptr != NULL) { 1564 v6src = *v6src_ptr; 1565 } else { 1566 if (icmp_pick_source_v6(q, &ip6h->ip6_src, &ip6h->ip6_dst, 1567 &v6src, zoneid, ipst) == NULL) { 1568 freemsg(ipsec_mp); 1569 ill_refrele(ill); 1570 return; 1571 } 1572 } 1573 v6dst = ip6h->ip6_src; 1574 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1575 msg_len = msgdsize(mp); 1576 if (msg_len > len_needed) { 1577 if (!adjmsg(mp, len_needed - msg_len)) { 1578 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1579 freemsg(ipsec_mp); 1580 ill_refrele(ill); 1581 return; 1582 } 1583 msg_len = len_needed; 1584 } 1585 mp1 = allocb_tmpl(IPV6_HDR_LEN + len, mp); 1586 if (mp1 == NULL) { 1587 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1588 freemsg(ipsec_mp); 1589 ill_refrele(ill); 1590 return; 1591 } 1592 ill_refrele(ill); 1593 mp1->b_cont = mp; 1594 mp = mp1; 1595 ASSERT(ipsec_mp->b_datap->db_type == M_CTL && 1596 io->ipsec_out_type == IPSEC_OUT); 1597 ipsec_mp->b_cont = mp; 1598 1599 /* 1600 * Set ipsec_out_icmp_loopback so we can let the ICMP messages this 1601 * node generates be accepted in peace by all on-host destinations. 1602 * If we do NOT assume that all on-host destinations trust 1603 * self-generated ICMP messages, then rework here, ip.c, and spd.c. 1604 * (Look for ipsec_out_icmp_loopback). 1605 */ 1606 io->ipsec_out_icmp_loopback = B_TRUE; 1607 1608 ip6h = (ip6_t *)mp->b_rptr; 1609 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1610 1611 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1612 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1613 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1614 ip6h->ip6_dst = v6dst; 1615 ip6h->ip6_src = v6src; 1616 msg_len += IPV6_HDR_LEN + len; 1617 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1618 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1619 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1620 } 1621 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1622 icmp6 = (icmp6_t *)&ip6h[1]; 1623 bcopy(stuff, (char *)icmp6, len); 1624 /* 1625 * Prepare for checksum by putting icmp length in the icmp 1626 * checksum field. The checksum is calculated in ip_wput_v6. 1627 */ 1628 icmp6->icmp6_cksum = ip6h->ip6_plen; 1629 if (icmp6->icmp6_type == ND_REDIRECT) { 1630 ip6h->ip6_hops = IPV6_MAX_HOPS; 1631 } 1632 /* Send to V6 writeside put routine */ 1633 put(q, ipsec_mp); 1634 } 1635 1636 /* 1637 * Update the output mib when ICMPv6 packets are sent. 1638 */ 1639 static void 1640 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1641 { 1642 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1643 1644 switch (icmp6->icmp6_type) { 1645 case ICMP6_DST_UNREACH: 1646 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1647 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1648 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1649 break; 1650 1651 case ICMP6_TIME_EXCEEDED: 1652 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1653 break; 1654 1655 case ICMP6_PARAM_PROB: 1656 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1657 break; 1658 1659 case ICMP6_PACKET_TOO_BIG: 1660 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1661 break; 1662 1663 case ICMP6_ECHO_REQUEST: 1664 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1665 break; 1666 1667 case ICMP6_ECHO_REPLY: 1668 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1669 break; 1670 1671 case ND_ROUTER_SOLICIT: 1672 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1673 break; 1674 1675 case ND_ROUTER_ADVERT: 1676 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1677 break; 1678 1679 case ND_NEIGHBOR_SOLICIT: 1680 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1681 break; 1682 1683 case ND_NEIGHBOR_ADVERT: 1684 BUMP_MIB(ill->ill_icmp6_mib, 1685 ipv6IfIcmpOutNeighborAdvertisements); 1686 break; 1687 1688 case ND_REDIRECT: 1689 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1690 break; 1691 1692 case MLD_LISTENER_QUERY: 1693 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1694 break; 1695 1696 case MLD_LISTENER_REPORT: 1697 case MLD_V2_LISTENER_REPORT: 1698 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1699 break; 1700 1701 case MLD_LISTENER_REDUCTION: 1702 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1703 break; 1704 } 1705 } 1706 1707 /* 1708 * Check if it is ok to send an ICMPv6 error packet in 1709 * response to the IP packet in mp. 1710 * Free the message and return null if no 1711 * ICMP error packet should be sent. 1712 */ 1713 static mblk_t * 1714 icmp_pkt_err_ok_v6(queue_t *q, mblk_t *mp, 1715 boolean_t llbcast, boolean_t mcast_ok, ip_stack_t *ipst) 1716 { 1717 ip6_t *ip6h; 1718 1719 if (!mp) 1720 return (NULL); 1721 1722 ip6h = (ip6_t *)mp->b_rptr; 1723 1724 /* Check if source address uniquely identifies the host */ 1725 1726 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1727 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1728 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1729 freemsg(mp); 1730 return (NULL); 1731 } 1732 1733 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1734 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1735 icmp6_t *icmp6; 1736 1737 if (mp->b_wptr - mp->b_rptr < len_needed) { 1738 if (!pullupmsg(mp, len_needed)) { 1739 ill_t *ill; 1740 1741 ill = ip_queue_to_ill_v6(q, ipst); 1742 if (ill == NULL) { 1743 BUMP_MIB(&ipst->ips_icmp6_mib, 1744 ipv6IfIcmpInErrors); 1745 } else { 1746 BUMP_MIB(ill->ill_icmp6_mib, 1747 ipv6IfIcmpInErrors); 1748 ill_refrele(ill); 1749 } 1750 freemsg(mp); 1751 return (NULL); 1752 } 1753 ip6h = (ip6_t *)mp->b_rptr; 1754 } 1755 icmp6 = (icmp6_t *)&ip6h[1]; 1756 /* Explicitly do not generate errors in response to redirects */ 1757 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1758 icmp6->icmp6_type == ND_REDIRECT) { 1759 freemsg(mp); 1760 return (NULL); 1761 } 1762 } 1763 /* 1764 * Check that the destination is not multicast and that the packet 1765 * was not sent on link layer broadcast or multicast. (Exception 1766 * is Packet too big message as per the draft - when mcast_ok is set.) 1767 */ 1768 if (!mcast_ok && 1769 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1770 freemsg(mp); 1771 return (NULL); 1772 } 1773 if (icmp_err_rate_limit(ipst)) { 1774 /* 1775 * Only send ICMP error packets every so often. 1776 * This should be done on a per port/source basis, 1777 * but for now this will suffice. 1778 */ 1779 freemsg(mp); 1780 return (NULL); 1781 } 1782 return (mp); 1783 } 1784 1785 /* 1786 * Generate an ICMPv6 redirect message. 1787 * Include target link layer address option if it exits. 1788 * Always include redirect header. 1789 */ 1790 static void 1791 icmp_send_redirect_v6(queue_t *q, mblk_t *mp, in6_addr_t *targetp, 1792 in6_addr_t *dest, ill_t *ill, boolean_t llbcast) 1793 { 1794 nd_redirect_t *rd; 1795 nd_opt_rd_hdr_t *rdh; 1796 uchar_t *buf; 1797 nce_t *nce = NULL; 1798 nd_opt_hdr_t *opt; 1799 int len; 1800 int ll_opt_len = 0; 1801 int max_redir_hdr_data_len; 1802 int pkt_len; 1803 in6_addr_t *srcp; 1804 ip_stack_t *ipst = ill->ill_ipst; 1805 1806 /* 1807 * We are called from ip_rput where we could 1808 * not have attached an IPSEC_IN. 1809 */ 1810 ASSERT(mp->b_datap->db_type == M_DATA); 1811 1812 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, B_FALSE, ipst); 1813 if (mp == NULL) 1814 return; 1815 nce = ndp_lookup_v6(ill, B_TRUE, targetp, B_FALSE); 1816 if (nce != NULL && nce->nce_state != ND_INCOMPLETE) { 1817 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1818 ill->ill_phys_addr_length + 7)/8 * 8; 1819 } 1820 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1821 ASSERT(len % 4 == 0); 1822 buf = kmem_alloc(len, KM_NOSLEEP); 1823 if (buf == NULL) { 1824 if (nce != NULL) 1825 NCE_REFRELE(nce); 1826 freemsg(mp); 1827 return; 1828 } 1829 1830 rd = (nd_redirect_t *)buf; 1831 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1832 rd->nd_rd_code = 0; 1833 rd->nd_rd_reserved = 0; 1834 rd->nd_rd_target = *targetp; 1835 rd->nd_rd_dst = *dest; 1836 1837 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1838 if (nce != NULL && ll_opt_len != 0) { 1839 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1840 opt->nd_opt_len = ll_opt_len/8; 1841 bcopy((char *)nce->nce_res_mp->b_rptr + 1842 NCE_LL_ADDR_OFFSET(ill), &opt[1], 1843 ill->ill_phys_addr_length); 1844 } 1845 if (nce != NULL) 1846 NCE_REFRELE(nce); 1847 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1848 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1849 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1850 max_redir_hdr_data_len = 1851 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1852 pkt_len = msgdsize(mp); 1853 /* Make sure mp is 8 byte aligned */ 1854 if (pkt_len > max_redir_hdr_data_len) { 1855 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1856 sizeof (nd_opt_rd_hdr_t))/8; 1857 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1858 } else { 1859 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1860 (void) adjmsg(mp, -(pkt_len % 8)); 1861 } 1862 rdh->nd_opt_rh_reserved1 = 0; 1863 rdh->nd_opt_rh_reserved2 = 0; 1864 /* ipif_v6src_addr contains the link-local source address */ 1865 srcp = &ill->ill_ipif->ipif_v6src_addr; 1866 1867 /* Redirects sent by router, and router is global zone */ 1868 icmp_pkt_v6(q, mp, buf, len, srcp, B_FALSE, GLOBAL_ZONEID, ipst); 1869 kmem_free(buf, len); 1870 } 1871 1872 1873 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1874 void 1875 icmp_time_exceeded_v6(queue_t *q, mblk_t *mp, uint8_t code, 1876 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1877 ip_stack_t *ipst) 1878 { 1879 icmp6_t icmp6; 1880 boolean_t mctl_present; 1881 mblk_t *first_mp; 1882 1883 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1884 1885 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1886 if (mp == NULL) { 1887 if (mctl_present) 1888 freeb(first_mp); 1889 return; 1890 } 1891 bzero(&icmp6, sizeof (icmp6_t)); 1892 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1893 icmp6.icmp6_code = code; 1894 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1895 zoneid, ipst); 1896 } 1897 1898 /* 1899 * Generate an ICMP unreachable message. 1900 */ 1901 void 1902 icmp_unreachable_v6(queue_t *q, mblk_t *mp, uint8_t code, 1903 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1904 ip_stack_t *ipst) 1905 { 1906 icmp6_t icmp6; 1907 boolean_t mctl_present; 1908 mblk_t *first_mp; 1909 1910 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1911 1912 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1913 if (mp == NULL) { 1914 if (mctl_present) 1915 freeb(first_mp); 1916 return; 1917 } 1918 bzero(&icmp6, sizeof (icmp6_t)); 1919 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1920 icmp6.icmp6_code = code; 1921 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1922 zoneid, ipst); 1923 } 1924 1925 /* 1926 * Generate an ICMP pkt too big message. 1927 */ 1928 static void 1929 icmp_pkt2big_v6(queue_t *q, mblk_t *mp, uint32_t mtu, 1930 boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, ip_stack_t *ipst) 1931 { 1932 icmp6_t icmp6; 1933 mblk_t *first_mp; 1934 boolean_t mctl_present; 1935 1936 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1937 1938 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1939 if (mp == NULL) { 1940 if (mctl_present) 1941 freeb(first_mp); 1942 return; 1943 } 1944 bzero(&icmp6, sizeof (icmp6_t)); 1945 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1946 icmp6.icmp6_code = 0; 1947 icmp6.icmp6_mtu = htonl(mtu); 1948 1949 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1950 zoneid, ipst); 1951 } 1952 1953 /* 1954 * Generate an ICMP parameter problem message. (May be called as writer.) 1955 * 'offset' is the offset from the beginning of the packet in error. 1956 */ 1957 static void 1958 icmp_param_problem_v6(queue_t *q, mblk_t *mp, uint8_t code, 1959 uint32_t offset, boolean_t llbcast, boolean_t mcast_ok, zoneid_t zoneid, 1960 ip_stack_t *ipst) 1961 { 1962 icmp6_t icmp6; 1963 boolean_t mctl_present; 1964 mblk_t *first_mp; 1965 1966 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 1967 1968 mp = icmp_pkt_err_ok_v6(q, mp, llbcast, mcast_ok, ipst); 1969 if (mp == NULL) { 1970 if (mctl_present) 1971 freeb(first_mp); 1972 return; 1973 } 1974 bzero((char *)&icmp6, sizeof (icmp6_t)); 1975 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1976 icmp6.icmp6_code = code; 1977 icmp6.icmp6_pptr = htonl(offset); 1978 icmp_pkt_v6(q, first_mp, &icmp6, sizeof (icmp6_t), NULL, mctl_present, 1979 zoneid, ipst); 1980 } 1981 1982 /* 1983 * This code will need to take into account the possibility of binding 1984 * to a link local address on a multi-homed host, in which case the 1985 * outgoing interface (from the conn) will need to be used when getting 1986 * an ire for the dst. Going through proper outgoing interface and 1987 * choosing the source address corresponding to the outgoing interface 1988 * is necessary when the destination address is a link-local address and 1989 * IPV6_BOUND_IF or IPV6_PKTINFO or scope_id has been set. 1990 * This can happen when active connection is setup; thus ipp pointer 1991 * is passed here from tcp_connect_*() routines, in non-TCP cases NULL 1992 * pointer is passed as ipp pointer. 1993 */ 1994 mblk_t * 1995 ip_bind_v6(queue_t *q, mblk_t *mp, conn_t *connp, ip6_pkt_t *ipp) 1996 { 1997 ssize_t len; 1998 int protocol; 1999 struct T_bind_req *tbr; 2000 sin6_t *sin6; 2001 ipa6_conn_t *ac6; 2002 in6_addr_t *v6srcp; 2003 in6_addr_t *v6dstp; 2004 uint16_t lport; 2005 uint16_t fport; 2006 uchar_t *ucp; 2007 int error = 0; 2008 boolean_t local_bind; 2009 ipa6_conn_x_t *acx6; 2010 boolean_t verify_dst; 2011 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2012 cred_t *cr; 2013 2014 /* 2015 * All Solaris components should pass a db_credp 2016 * for this TPI message, hence we ASSERT. 2017 * But in case there is some other M_PROTO that looks 2018 * like a TPI message sent by some other kernel 2019 * component, we check and return an error. 2020 */ 2021 cr = msg_getcred(mp, NULL); 2022 ASSERT(cr != NULL); 2023 if (cr == NULL) { 2024 error = EINVAL; 2025 goto bad_addr; 2026 } 2027 2028 ASSERT(connp->conn_af_isv6); 2029 len = mp->b_wptr - mp->b_rptr; 2030 if (len < (sizeof (*tbr) + 1)) { 2031 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2032 "ip_bind_v6: bogus msg, len %ld", len); 2033 goto bad_addr; 2034 } 2035 /* Back up and extract the protocol identifier. */ 2036 mp->b_wptr--; 2037 tbr = (struct T_bind_req *)mp->b_rptr; 2038 /* Reset the message type in preparation for shipping it back. */ 2039 mp->b_datap->db_type = M_PCPROTO; 2040 2041 protocol = *mp->b_wptr & 0xFF; 2042 connp->conn_ulp = (uint8_t)protocol; 2043 2044 /* 2045 * Check for a zero length address. This is from a protocol that 2046 * wants to register to receive all packets of its type. 2047 */ 2048 if (tbr->ADDR_length == 0) { 2049 if ((protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP || 2050 protocol == IPPROTO_ESP || protocol == IPPROTO_AH) && 2051 ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head != 2052 NULL) { 2053 /* 2054 * TCP, SCTP, AH, and ESP have single protocol fanouts. 2055 * Do not allow others to bind to these. 2056 */ 2057 goto bad_addr; 2058 } 2059 2060 /* 2061 * 2062 * The udp module never sends down a zero-length address, 2063 * and allowing this on a labeled system will break MLP 2064 * functionality. 2065 */ 2066 if (is_system_labeled() && protocol == IPPROTO_UDP) 2067 goto bad_addr; 2068 2069 /* Allow ipsec plumbing */ 2070 if ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 2071 (protocol != IPPROTO_AH) && (protocol != IPPROTO_ESP)) 2072 goto bad_addr; 2073 2074 connp->conn_srcv6 = ipv6_all_zeros; 2075 ipcl_proto_insert_v6(connp, protocol); 2076 2077 tbr->PRIM_type = T_BIND_ACK; 2078 return (mp); 2079 } 2080 2081 /* Extract the address pointer from the message. */ 2082 ucp = (uchar_t *)mi_offset_param(mp, tbr->ADDR_offset, 2083 tbr->ADDR_length); 2084 if (ucp == NULL) { 2085 ip1dbg(("ip_bind_v6: no address\n")); 2086 goto bad_addr; 2087 } 2088 if (!OK_32PTR(ucp)) { 2089 ip1dbg(("ip_bind_v6: unaligned address\n")); 2090 goto bad_addr; 2091 } 2092 2093 switch (tbr->ADDR_length) { 2094 default: 2095 ip1dbg(("ip_bind_v6: bad address length %d\n", 2096 (int)tbr->ADDR_length)); 2097 goto bad_addr; 2098 2099 case IPV6_ADDR_LEN: 2100 /* Verification of local address only */ 2101 v6srcp = (in6_addr_t *)ucp; 2102 lport = 0; 2103 local_bind = B_TRUE; 2104 break; 2105 2106 case sizeof (sin6_t): 2107 sin6 = (sin6_t *)ucp; 2108 v6srcp = &sin6->sin6_addr; 2109 lport = sin6->sin6_port; 2110 local_bind = B_TRUE; 2111 break; 2112 2113 case sizeof (ipa6_conn_t): 2114 /* 2115 * Verify that both the source and destination addresses 2116 * are valid. 2117 */ 2118 ac6 = (ipa6_conn_t *)ucp; 2119 v6srcp = &ac6->ac6_laddr; 2120 v6dstp = &ac6->ac6_faddr; 2121 fport = ac6->ac6_fport; 2122 /* For raw socket, the local port is not set. */ 2123 lport = ac6->ac6_lport != 0 ? ac6->ac6_lport : 2124 connp->conn_lport; 2125 local_bind = B_FALSE; 2126 /* Always verify destination reachability. */ 2127 verify_dst = B_TRUE; 2128 break; 2129 2130 case sizeof (ipa6_conn_x_t): 2131 /* 2132 * Verify that the source address is valid. 2133 */ 2134 acx6 = (ipa6_conn_x_t *)ucp; 2135 ac6 = &acx6->ac6x_conn; 2136 v6srcp = &ac6->ac6_laddr; 2137 v6dstp = &ac6->ac6_faddr; 2138 fport = ac6->ac6_fport; 2139 lport = ac6->ac6_lport; 2140 local_bind = B_FALSE; 2141 /* 2142 * Client that passed ipa6_conn_x_t to us specifies whether to 2143 * verify destination reachability. 2144 */ 2145 verify_dst = (acx6->ac6x_flags & ACX_VERIFY_DST) != 0; 2146 break; 2147 } 2148 if (local_bind) { 2149 error = ip_proto_bind_laddr_v6(connp, &mp->b_cont, protocol, 2150 v6srcp, lport, tbr->ADDR_length != IPV6_ADDR_LEN); 2151 } else { 2152 error = ip_proto_bind_connected_v6(connp, &mp->b_cont, protocol, 2153 v6srcp, lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2154 } 2155 2156 if (error == 0) { 2157 /* Send it home. */ 2158 mp->b_datap->db_type = M_PCPROTO; 2159 tbr->PRIM_type = T_BIND_ACK; 2160 return (mp); 2161 } 2162 2163 bad_addr: 2164 ASSERT(error != EINPROGRESS); 2165 if (error > 0) 2166 mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error); 2167 else 2168 mp = mi_tpi_err_ack_alloc(mp, TBADADDR, 0); 2169 return (mp); 2170 } 2171 2172 /* 2173 * Here address is verified to be a valid local address. 2174 * If the IRE_DB_REQ_TYPE mp is present, a multicast 2175 * address is also considered a valid local address. 2176 * In the case of a multicast address, however, the 2177 * upper protocol is expected to reset the src address 2178 * to 0 if it sees an ire with IN6_IS_ADDR_MULTICAST returned so that 2179 * no packets are emitted with multicast address as 2180 * source address. 2181 * The addresses valid for bind are: 2182 * (1) - in6addr_any 2183 * (2) - IP address of an UP interface 2184 * (3) - IP address of a DOWN interface 2185 * (4) - a multicast address. In this case 2186 * the conn will only receive packets destined to 2187 * the specified multicast address. Note: the 2188 * application still has to issue an 2189 * IPV6_JOIN_GROUP socket option. 2190 * 2191 * In all the above cases, the bound address must be valid in the current zone. 2192 * When the address is loopback or multicast, there might be many matching IREs 2193 * so bind has to look up based on the zone. 2194 */ 2195 /* 2196 * Verify the local IP address. Does not change the conn_t except 2197 * conn_fully_bound and conn_policy_cached. 2198 */ 2199 static int 2200 ip_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2201 const in6_addr_t *v6src, uint16_t lport, boolean_t fanout_insert) 2202 { 2203 int error = 0; 2204 ire_t *src_ire = NULL; 2205 zoneid_t zoneid; 2206 mblk_t *mp = NULL; 2207 boolean_t ire_requested; 2208 boolean_t ipsec_policy_set; 2209 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2210 2211 if (mpp) 2212 mp = *mpp; 2213 2214 ire_requested = (mp != NULL && DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2215 ipsec_policy_set = (mp != NULL && DB_TYPE(mp) == IPSEC_POLICY_SET); 2216 2217 /* 2218 * If it was previously connected, conn_fully_bound would have 2219 * been set. 2220 */ 2221 connp->conn_fully_bound = B_FALSE; 2222 2223 zoneid = IPCL_ZONEID(connp); 2224 2225 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2226 src_ire = ire_route_lookup_v6(v6src, 0, 0, 2227 0, NULL, NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2228 /* 2229 * If an address other than in6addr_any is requested, 2230 * we verify that it is a valid address for bind 2231 * Note: Following code is in if-else-if form for 2232 * readability compared to a condition check. 2233 */ 2234 ASSERT(src_ire == NULL || !(src_ire->ire_type & IRE_BROADCAST)); 2235 /* LINTED - statement has no consequent */ 2236 if (IRE_IS_LOCAL(src_ire)) { 2237 /* 2238 * (2) Bind to address of local UP interface 2239 */ 2240 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 2241 ipif_t *multi_ipif = NULL; 2242 ire_t *save_ire; 2243 /* 2244 * (4) bind to multicast address. 2245 * Fake out the IRE returned to upper 2246 * layer to be a broadcast IRE in 2247 * ip_bind_insert_ire_v6(). 2248 * Pass other information that matches 2249 * the ipif (e.g. the source address). 2250 * conn_multicast_ill is only used for 2251 * IPv6 packets 2252 */ 2253 mutex_enter(&connp->conn_lock); 2254 if (connp->conn_multicast_ill != NULL) { 2255 (void) ipif_lookup_zoneid( 2256 connp->conn_multicast_ill, zoneid, 0, 2257 &multi_ipif); 2258 } else { 2259 /* 2260 * Look for default like 2261 * ip_wput_v6 2262 */ 2263 multi_ipif = ipif_lookup_group_v6( 2264 &ipv6_unspecified_group, zoneid, ipst); 2265 } 2266 mutex_exit(&connp->conn_lock); 2267 save_ire = src_ire; 2268 src_ire = NULL; 2269 if (multi_ipif == NULL || !ire_requested || 2270 (src_ire = ipif_to_ire_v6(multi_ipif)) == NULL) { 2271 src_ire = save_ire; 2272 error = EADDRNOTAVAIL; 2273 } else { 2274 ASSERT(src_ire != NULL); 2275 if (save_ire != NULL) 2276 ire_refrele(save_ire); 2277 } 2278 if (multi_ipif != NULL) 2279 ipif_refrele(multi_ipif); 2280 } else { 2281 if (!ip_addr_exists_v6(v6src, zoneid, ipst)) { 2282 /* 2283 * Not a valid address for bind 2284 */ 2285 error = EADDRNOTAVAIL; 2286 } 2287 } 2288 2289 if (error != 0) { 2290 /* Red Alert! Attempting to be a bogon! */ 2291 if (ip_debug > 2) { 2292 /* ip1dbg */ 2293 pr_addr_dbg("ip_bind_laddr_v6: bad src" 2294 " address %s\n", AF_INET6, v6src); 2295 } 2296 goto bad_addr; 2297 } 2298 } 2299 2300 /* 2301 * Allow setting new policies. For example, disconnects come 2302 * down as ipa_t bind. As we would have set conn_policy_cached 2303 * to B_TRUE before, we should set it to B_FALSE, so that policy 2304 * can change after the disconnect. 2305 */ 2306 connp->conn_policy_cached = B_FALSE; 2307 2308 /* If not fanout_insert this was just an address verification */ 2309 if (fanout_insert) { 2310 /* 2311 * The addresses have been verified. Time to insert in 2312 * the correct fanout list. 2313 */ 2314 connp->conn_srcv6 = *v6src; 2315 connp->conn_remv6 = ipv6_all_zeros; 2316 connp->conn_lport = lport; 2317 connp->conn_fport = 0; 2318 error = ipcl_bind_insert_v6(connp, protocol, v6src, lport); 2319 } 2320 if (error == 0) { 2321 if (ire_requested) { 2322 if (!ip_bind_get_ire_v6(mpp, src_ire, v6src, NULL, 2323 ipst)) { 2324 error = -1; 2325 goto bad_addr; 2326 } 2327 mp = *mpp; 2328 } else if (ipsec_policy_set) { 2329 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2330 error = -1; 2331 goto bad_addr; 2332 } 2333 } 2334 } 2335 bad_addr: 2336 if (error != 0) { 2337 if (connp->conn_anon_port) { 2338 (void) tsol_mlp_anon(crgetzone(connp->conn_cred), 2339 connp->conn_mlp_type, connp->conn_ulp, ntohs(lport), 2340 B_FALSE); 2341 } 2342 connp->conn_mlp_type = mlptSingle; 2343 } 2344 2345 if (src_ire != NULL) 2346 ire_refrele(src_ire); 2347 2348 if (ipsec_policy_set) { 2349 ASSERT(mp != NULL); 2350 freeb(mp); 2351 /* 2352 * As of now assume that nothing else accompanies 2353 * IPSEC_POLICY_SET. 2354 */ 2355 *mpp = NULL; 2356 } 2357 2358 return (error); 2359 } 2360 int 2361 ip_proto_bind_laddr_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2362 const in6_addr_t *v6srcp, uint16_t lport, boolean_t fanout_insert) 2363 { 2364 int error; 2365 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2366 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2367 2368 ASSERT(connp->conn_af_isv6); 2369 connp->conn_ulp = protocol; 2370 2371 if (IN6_IS_ADDR_V4MAPPED(v6srcp) && !connp->conn_ipv6_v6only) { 2372 /* Bind to IPv4 address */ 2373 ipaddr_t v4src; 2374 2375 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2376 2377 error = ip_bind_laddr_v4(connp, mpp, protocol, v4src, lport, 2378 fanout_insert); 2379 if (error != 0) 2380 goto bad_addr; 2381 connp->conn_pkt_isv6 = B_FALSE; 2382 } else { 2383 if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2384 error = 0; 2385 goto bad_addr; 2386 } 2387 error = ip_bind_laddr_v6(connp, mpp, protocol, v6srcp, 2388 lport, fanout_insert); 2389 if (error != 0) 2390 goto bad_addr; 2391 connp->conn_pkt_isv6 = B_TRUE; 2392 } 2393 2394 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2395 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2396 return (0); 2397 2398 bad_addr: 2399 if (error < 0) 2400 error = -TBADADDR; 2401 return (error); 2402 } 2403 2404 /* 2405 * Verify that both the source and destination addresses 2406 * are valid. If verify_dst, then destination address must also be reachable, 2407 * i.e. have a route. Protocols like TCP want this. Tunnels do not. 2408 * It takes ip6_pkt_t * as one of the arguments to determine correct 2409 * source address when IPV6_PKTINFO or scope_id is set along with a link-local 2410 * destination address. Note that parameter ipp is only useful for TCP connect 2411 * when scope_id is set or IPV6_PKTINFO option is set with an ifindex. For all 2412 * non-TCP cases, it is NULL and for all other tcp cases it is not useful. 2413 * 2414 */ 2415 int 2416 ip_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2417 in6_addr_t *v6src, uint16_t lport, const in6_addr_t *v6dst, 2418 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2419 boolean_t verify_dst, cred_t *cr) 2420 { 2421 ire_t *src_ire; 2422 ire_t *dst_ire; 2423 int error = 0; 2424 ire_t *sire = NULL; 2425 ire_t *md_dst_ire = NULL; 2426 ill_t *md_ill = NULL; 2427 ill_t *dst_ill = NULL; 2428 ipif_t *src_ipif = NULL; 2429 zoneid_t zoneid; 2430 boolean_t ill_held = B_FALSE; 2431 mblk_t *mp = NULL; 2432 boolean_t ire_requested = B_FALSE; 2433 boolean_t ipsec_policy_set = B_FALSE; 2434 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2435 ts_label_t *tsl = NULL; 2436 cred_t *effective_cred = NULL; 2437 2438 if (mpp) 2439 mp = *mpp; 2440 2441 if (mp != NULL) { 2442 ire_requested = (DB_TYPE(mp) == IRE_DB_REQ_TYPE); 2443 ipsec_policy_set = (DB_TYPE(mp) == IPSEC_POLICY_SET); 2444 } 2445 2446 src_ire = dst_ire = NULL; 2447 /* 2448 * If we never got a disconnect before, clear it now. 2449 */ 2450 connp->conn_fully_bound = B_FALSE; 2451 2452 zoneid = connp->conn_zoneid; 2453 2454 /* 2455 * Check whether Trusted Solaris policy allows communication with this 2456 * host, and pretend that the destination is unreachable if not. 2457 * 2458 * This is never a problem for TCP, since that transport is known to 2459 * compute the label properly as part of the tcp_rput_other T_BIND_ACK 2460 * handling. If the remote is unreachable, it will be detected at that 2461 * point, so there's no reason to check it here. 2462 * 2463 * Note that for sendto (and other datagram-oriented friends), this 2464 * check is done as part of the data path label computation instead. 2465 * The check here is just to make non-TCP connect() report the right 2466 * error. 2467 */ 2468 if (is_system_labeled() && !IPCL_IS_TCP(connp)) { 2469 if ((error = tsol_check_dest(cr, v6dst, IPV6_VERSION, 2470 connp->conn_mac_mode, &effective_cred)) != 0) { 2471 if (ip_debug > 2) { 2472 pr_addr_dbg( 2473 "ip_bind_connected: no label for dst %s\n", 2474 AF_INET6, v6dst); 2475 } 2476 goto bad_addr; 2477 } 2478 2479 /* 2480 * tsol_check_dest() may have created a new cred with 2481 * a modified security label. Use that cred if it exists 2482 * for ire lookups. 2483 */ 2484 if (effective_cred == NULL) { 2485 tsl = crgetlabel(cr); 2486 } else { 2487 tsl = crgetlabel(effective_cred); 2488 } 2489 } 2490 2491 if (IN6_IS_ADDR_MULTICAST(v6dst)) { 2492 ipif_t *ipif; 2493 2494 /* 2495 * Use an "emulated" IRE_BROADCAST to tell the transport it 2496 * is a multicast. 2497 * Pass other information that matches 2498 * the ipif (e.g. the source address). 2499 * 2500 * conn_multicast_ill is only used for IPv6 packets 2501 */ 2502 mutex_enter(&connp->conn_lock); 2503 if (connp->conn_multicast_ill != NULL) { 2504 (void) ipif_lookup_zoneid(connp->conn_multicast_ill, 2505 zoneid, 0, &ipif); 2506 } else { 2507 /* Look for default like ip_wput_v6 */ 2508 ipif = ipif_lookup_group_v6(v6dst, zoneid, ipst); 2509 } 2510 mutex_exit(&connp->conn_lock); 2511 if (ipif == NULL || ire_requested || 2512 (dst_ire = ipif_to_ire_v6(ipif)) == NULL) { 2513 if (ipif != NULL) 2514 ipif_refrele(ipif); 2515 if (ip_debug > 2) { 2516 /* ip1dbg */ 2517 pr_addr_dbg("ip_bind_connected_v6: bad " 2518 "connected multicast %s\n", AF_INET6, 2519 v6dst); 2520 } 2521 error = ENETUNREACH; 2522 goto bad_addr; 2523 } 2524 if (ipif != NULL) 2525 ipif_refrele(ipif); 2526 } else { 2527 dst_ire = ire_route_lookup_v6(v6dst, NULL, NULL, 0, 2528 NULL, &sire, zoneid, tsl, 2529 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2530 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR, 2531 ipst); 2532 /* 2533 * We also prevent ire's with src address INADDR_ANY to 2534 * be used, which are created temporarily for 2535 * sending out packets from endpoints that have 2536 * conn_unspec_src set. 2537 */ 2538 if (dst_ire == NULL || 2539 (dst_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2540 IN6_IS_ADDR_UNSPECIFIED(&dst_ire->ire_src_addr_v6)) { 2541 /* 2542 * When verifying destination reachability, we always 2543 * complain. 2544 * 2545 * When not verifying destination reachability but we 2546 * found an IRE, i.e. the destination is reachable, 2547 * then the other tests still apply and we complain. 2548 */ 2549 if (verify_dst || (dst_ire != NULL)) { 2550 if (ip_debug > 2) { 2551 /* ip1dbg */ 2552 pr_addr_dbg("ip_bind_connected_v6: bad" 2553 " connected dst %s\n", AF_INET6, 2554 v6dst); 2555 } 2556 if (dst_ire == NULL || 2557 !(dst_ire->ire_type & IRE_HOST)) { 2558 error = ENETUNREACH; 2559 } else { 2560 error = EHOSTUNREACH; 2561 } 2562 goto bad_addr; 2563 } 2564 } 2565 } 2566 2567 /* 2568 * If the app does a connect(), it means that it will most likely 2569 * send more than 1 packet to the destination. It makes sense 2570 * to clear the temporary flag. 2571 */ 2572 if (dst_ire != NULL && dst_ire->ire_type == IRE_CACHE && 2573 (dst_ire->ire_marks & IRE_MARK_TEMPORARY)) { 2574 irb_t *irb = dst_ire->ire_bucket; 2575 2576 rw_enter(&irb->irb_lock, RW_WRITER); 2577 /* 2578 * We need to recheck for IRE_MARK_TEMPORARY after acquiring 2579 * the lock in order to guarantee irb_tmp_ire_cnt. 2580 */ 2581 if (dst_ire->ire_marks & IRE_MARK_TEMPORARY) { 2582 dst_ire->ire_marks &= ~IRE_MARK_TEMPORARY; 2583 irb->irb_tmp_ire_cnt--; 2584 } 2585 rw_exit(&irb->irb_lock); 2586 } 2587 2588 ASSERT(dst_ire == NULL || dst_ire->ire_ipversion == IPV6_VERSION); 2589 2590 /* 2591 * See if we should notify ULP about MDT; we do this whether or not 2592 * ire_requested is TRUE, in order to handle active connects; MDT 2593 * eligibility tests for passive connects are handled separately 2594 * through tcp_adapt_ire(). We do this before the source address 2595 * selection, because dst_ire may change after a call to 2596 * ipif_select_source_v6(). This is a best-effort check, as the 2597 * packet for this connection may not actually go through 2598 * dst_ire->ire_stq, and the exact IRE can only be known after 2599 * calling ip_newroute_v6(). This is why we further check on the 2600 * IRE during Multidata packet transmission in tcp_multisend(). 2601 */ 2602 if (ipst->ips_ip_multidata_outbound && !ipsec_policy_set && 2603 dst_ire != NULL && 2604 !(dst_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST)) && 2605 (md_ill = ire_to_ill(dst_ire), md_ill != NULL) && 2606 ILL_MDT_CAPABLE(md_ill)) { 2607 md_dst_ire = dst_ire; 2608 IRE_REFHOLD(md_dst_ire); 2609 } 2610 2611 if (dst_ire != NULL && 2612 dst_ire->ire_type == IRE_LOCAL && 2613 dst_ire->ire_zoneid != zoneid && 2614 dst_ire->ire_zoneid != ALL_ZONES) { 2615 src_ire = ire_ftable_lookup_v6(v6dst, 0, 0, 0, NULL, NULL, 2616 zoneid, 0, NULL, 2617 MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 2618 MATCH_IRE_RJ_BHOLE, ipst); 2619 if (src_ire == NULL) { 2620 error = EHOSTUNREACH; 2621 goto bad_addr; 2622 } else if (src_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2623 if (!(src_ire->ire_type & IRE_HOST)) 2624 error = ENETUNREACH; 2625 else 2626 error = EHOSTUNREACH; 2627 goto bad_addr; 2628 } 2629 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2630 src_ipif = src_ire->ire_ipif; 2631 ipif_refhold(src_ipif); 2632 *v6src = src_ipif->ipif_v6lcl_addr; 2633 } 2634 ire_refrele(src_ire); 2635 src_ire = NULL; 2636 } else if (IN6_IS_ADDR_UNSPECIFIED(v6src) && dst_ire != NULL) { 2637 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 2638 *v6src = sire->ire_src_addr_v6; 2639 ire_refrele(dst_ire); 2640 dst_ire = sire; 2641 sire = NULL; 2642 } else if (dst_ire->ire_type == IRE_CACHE && 2643 (dst_ire->ire_flags & RTF_SETSRC)) { 2644 ASSERT(dst_ire->ire_zoneid == zoneid || 2645 dst_ire->ire_zoneid == ALL_ZONES); 2646 *v6src = dst_ire->ire_src_addr_v6; 2647 } else { 2648 /* 2649 * Pick a source address so that a proper inbound load 2650 * spreading would happen. Use dst_ill specified by the 2651 * app. when socket option or scopeid is set. 2652 */ 2653 int err; 2654 2655 if (ipp != NULL && ipp->ipp_ifindex != 0) { 2656 uint_t if_index; 2657 2658 /* 2659 * Scope id or IPV6_PKTINFO 2660 */ 2661 2662 if_index = ipp->ipp_ifindex; 2663 dst_ill = ill_lookup_on_ifindex( 2664 if_index, B_TRUE, NULL, NULL, NULL, NULL, 2665 ipst); 2666 if (dst_ill == NULL) { 2667 ip1dbg(("ip_bind_connected_v6:" 2668 " bad ifindex %d\n", if_index)); 2669 error = EADDRNOTAVAIL; 2670 goto bad_addr; 2671 } 2672 ill_held = B_TRUE; 2673 } else if (connp->conn_outgoing_ill != NULL) { 2674 /* 2675 * For IPV6_BOUND_IF socket option, 2676 * conn_outgoing_ill should be set 2677 * already in TCP or UDP/ICMP. 2678 */ 2679 dst_ill = conn_get_held_ill(connp, 2680 &connp->conn_outgoing_ill, &err); 2681 if (err == ILL_LOOKUP_FAILED) { 2682 ip1dbg(("ip_bind_connected_v6:" 2683 "no ill for bound_if\n")); 2684 error = EADDRNOTAVAIL; 2685 goto bad_addr; 2686 } 2687 ill_held = B_TRUE; 2688 } else if (dst_ire->ire_stq != NULL) { 2689 /* No need to hold ill here */ 2690 dst_ill = (ill_t *)dst_ire->ire_stq->q_ptr; 2691 } else { 2692 /* No need to hold ill here */ 2693 dst_ill = dst_ire->ire_ipif->ipif_ill; 2694 } 2695 if (ip6_asp_can_lookup(ipst)) { 2696 src_ipif = ipif_select_source_v6(dst_ill, 2697 v6dst, B_FALSE, connp->conn_src_preferences, 2698 zoneid); 2699 ip6_asp_table_refrele(ipst); 2700 if (src_ipif == NULL) { 2701 pr_addr_dbg("ip_bind_connected_v6: " 2702 "no usable source address for " 2703 "connection to %s\n", 2704 AF_INET6, v6dst); 2705 error = EADDRNOTAVAIL; 2706 goto bad_addr; 2707 } 2708 *v6src = src_ipif->ipif_v6lcl_addr; 2709 } else { 2710 error = EADDRNOTAVAIL; 2711 goto bad_addr; 2712 } 2713 } 2714 } 2715 2716 /* 2717 * We do ire_route_lookup_v6() here (and not an interface lookup) 2718 * as we assert that v6src should only come from an 2719 * UP interface for hard binding. 2720 */ 2721 src_ire = ire_route_lookup_v6(v6src, 0, 0, 0, NULL, 2722 NULL, zoneid, NULL, MATCH_IRE_ZONEONLY, ipst); 2723 2724 /* src_ire must be a local|loopback */ 2725 if (!IRE_IS_LOCAL(src_ire)) { 2726 if (ip_debug > 2) { 2727 /* ip1dbg */ 2728 pr_addr_dbg("ip_bind_connected_v6: bad " 2729 "connected src %s\n", AF_INET6, v6src); 2730 } 2731 error = EADDRNOTAVAIL; 2732 goto bad_addr; 2733 } 2734 2735 /* 2736 * If the source address is a loopback address, the 2737 * destination had best be local or multicast. 2738 * The transports that can't handle multicast will reject 2739 * those addresses. 2740 */ 2741 if (src_ire->ire_type == IRE_LOOPBACK && 2742 !(IRE_IS_LOCAL(dst_ire) || IN6_IS_ADDR_MULTICAST(v6dst) || 2743 IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst))) { 2744 ip1dbg(("ip_bind_connected_v6: bad connected loopback\n")); 2745 error = -1; 2746 goto bad_addr; 2747 } 2748 /* 2749 * Allow setting new policies. For example, disconnects come 2750 * down as ipa_t bind. As we would have set conn_policy_cached 2751 * to B_TRUE before, we should set it to B_FALSE, so that policy 2752 * can change after the disconnect. 2753 */ 2754 connp->conn_policy_cached = B_FALSE; 2755 2756 /* 2757 * The addresses have been verified. Initialize the conn 2758 * before calling the policy as they expect the conns 2759 * initialized. 2760 */ 2761 connp->conn_srcv6 = *v6src; 2762 connp->conn_remv6 = *v6dst; 2763 connp->conn_lport = lport; 2764 connp->conn_fport = fport; 2765 2766 ASSERT(!(ipsec_policy_set && ire_requested)); 2767 if (ire_requested) { 2768 iulp_t *ulp_info = NULL; 2769 2770 /* 2771 * Note that sire will not be NULL if this is an off-link 2772 * connection and there is not cache for that dest yet. 2773 * 2774 * XXX Because of an existing bug, if there are multiple 2775 * default routes, the IRE returned now may not be the actual 2776 * default route used (default routes are chosen in a 2777 * round robin fashion). So if the metrics for different 2778 * default routes are different, we may return the wrong 2779 * metrics. This will not be a problem if the existing 2780 * bug is fixed. 2781 */ 2782 if (sire != NULL) 2783 ulp_info = &(sire->ire_uinfo); 2784 2785 if (!ip_bind_get_ire_v6(mpp, dst_ire, v6dst, ulp_info, 2786 ipst)) { 2787 error = -1; 2788 goto bad_addr; 2789 } 2790 } else if (ipsec_policy_set) { 2791 if (!ip_bind_ipsec_policy_set(connp, mp)) { 2792 error = -1; 2793 goto bad_addr; 2794 } 2795 } 2796 2797 /* 2798 * Cache IPsec policy in this conn. If we have per-socket policy, 2799 * we'll cache that. If we don't, we'll inherit global policy. 2800 * 2801 * We can't insert until the conn reflects the policy. Note that 2802 * conn_policy_cached is set by ipsec_conn_cache_policy() even for 2803 * connections where we don't have a policy. This is to prevent 2804 * global policy lookups in the inbound path. 2805 * 2806 * If we insert before we set conn_policy_cached, 2807 * CONN_INBOUND_POLICY_PRESENT_V6() check can still evaluate true 2808 * because global policy cound be non-empty. We normally call 2809 * ipsec_check_policy() for conn_policy_cached connections only if 2810 * conn_in_enforce_policy is set. But in this case, 2811 * conn_policy_cached can get set anytime since we made the 2812 * CONN_INBOUND_POLICY_PRESENT_V6() check and ipsec_check_policy() 2813 * is called, which will make the above assumption false. Thus, we 2814 * need to insert after we set conn_policy_cached. 2815 */ 2816 if ((error = ipsec_conn_cache_policy(connp, B_FALSE)) != 0) 2817 goto bad_addr; 2818 2819 /* If not fanout_insert this was just an address verification */ 2820 if (fanout_insert) { 2821 /* 2822 * The addresses have been verified. Time to insert in 2823 * the correct fanout list. 2824 */ 2825 error = ipcl_conn_insert_v6(connp, protocol, v6src, v6dst, 2826 connp->conn_ports, 2827 IPCL_IS_TCP(connp) ? connp->conn_tcp->tcp_bound_if : 0); 2828 } 2829 if (error == 0) { 2830 connp->conn_fully_bound = B_TRUE; 2831 /* 2832 * Our initial checks for MDT have passed; the IRE is not 2833 * LOCAL/LOOPBACK/BROADCAST, and the link layer seems to 2834 * be supporting MDT. Pass the IRE, IPC and ILL into 2835 * ip_mdinfo_return(), which performs further checks 2836 * against them and upon success, returns the MDT info 2837 * mblk which we will attach to the bind acknowledgment. 2838 */ 2839 if (md_dst_ire != NULL) { 2840 mblk_t *mdinfo_mp; 2841 2842 ASSERT(md_ill != NULL); 2843 ASSERT(md_ill->ill_mdt_capab != NULL); 2844 if ((mdinfo_mp = ip_mdinfo_return(md_dst_ire, connp, 2845 md_ill->ill_name, md_ill->ill_mdt_capab)) != NULL) { 2846 if (mp == NULL) { 2847 *mpp = mdinfo_mp; 2848 } else { 2849 linkb(mp, mdinfo_mp); 2850 } 2851 } 2852 } 2853 } 2854 bad_addr: 2855 if (ipsec_policy_set) { 2856 ASSERT(mp != NULL); 2857 freeb(mp); 2858 /* 2859 * As of now assume that nothing else accompanies 2860 * IPSEC_POLICY_SET. 2861 */ 2862 *mpp = NULL; 2863 } 2864 refrele_and_quit: 2865 if (src_ire != NULL) 2866 IRE_REFRELE(src_ire); 2867 if (dst_ire != NULL) 2868 IRE_REFRELE(dst_ire); 2869 if (sire != NULL) 2870 IRE_REFRELE(sire); 2871 if (src_ipif != NULL) 2872 ipif_refrele(src_ipif); 2873 if (md_dst_ire != NULL) 2874 IRE_REFRELE(md_dst_ire); 2875 if (ill_held && dst_ill != NULL) 2876 ill_refrele(dst_ill); 2877 if (effective_cred != NULL) 2878 crfree(effective_cred); 2879 return (error); 2880 } 2881 2882 /* ARGSUSED */ 2883 int 2884 ip_proto_bind_connected_v6(conn_t *connp, mblk_t **mpp, uint8_t protocol, 2885 in6_addr_t *v6srcp, uint16_t lport, const in6_addr_t *v6dstp, 2886 ip6_pkt_t *ipp, uint16_t fport, boolean_t fanout_insert, 2887 boolean_t verify_dst, cred_t *cr) 2888 { 2889 int error = 0; 2890 boolean_t orig_pkt_isv6 = connp->conn_pkt_isv6; 2891 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2892 2893 ASSERT(connp->conn_af_isv6); 2894 connp->conn_ulp = protocol; 2895 2896 /* For raw socket, the local port is not set. */ 2897 lport = lport != 0 ? lport : connp->conn_lport; 2898 2899 /* 2900 * Bind to local and remote address. Local might be 2901 * unspecified in which case it will be extracted from 2902 * ire_src_addr_v6 2903 */ 2904 if (IN6_IS_ADDR_V4MAPPED(v6dstp) && !connp->conn_ipv6_v6only) { 2905 /* Connect to IPv4 address */ 2906 ipaddr_t v4src; 2907 ipaddr_t v4dst; 2908 2909 /* Is the source unspecified or mapped? */ 2910 if (!IN6_IS_ADDR_V4MAPPED(v6srcp) && 2911 !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 2912 ip1dbg(("ip_proto_bind_connected_v6: " 2913 "dst is mapped, but not the src\n")); 2914 goto bad_addr; 2915 } 2916 IN6_V4MAPPED_TO_IPADDR(v6srcp, v4src); 2917 IN6_V4MAPPED_TO_IPADDR(v6dstp, v4dst); 2918 2919 /* Always verify destination reachability. */ 2920 error = ip_bind_connected_v4(connp, mpp, protocol, &v4src, 2921 lport, v4dst, fport, B_TRUE, B_TRUE, cr); 2922 if (error != 0) 2923 goto bad_addr; 2924 IN6_IPADDR_TO_V4MAPPED(v4src, v6srcp); 2925 connp->conn_pkt_isv6 = B_FALSE; 2926 } else if (IN6_IS_ADDR_V4MAPPED(v6srcp)) { 2927 ip1dbg(("ip_proto_bind_connected_v6: " 2928 "src is mapped, but not the dst\n")); 2929 goto bad_addr; 2930 } else { 2931 error = ip_bind_connected_v6(connp, mpp, protocol, v6srcp, 2932 lport, v6dstp, ipp, fport, B_TRUE, verify_dst, cr); 2933 if (error != 0) 2934 goto bad_addr; 2935 connp->conn_pkt_isv6 = B_TRUE; 2936 } 2937 2938 if (orig_pkt_isv6 != connp->conn_pkt_isv6) 2939 ip_setpktversion(connp, connp->conn_pkt_isv6, B_TRUE, ipst); 2940 2941 /* Send it home. */ 2942 return (0); 2943 2944 bad_addr: 2945 if (error == 0) 2946 error = -TBADADDR; 2947 return (error); 2948 } 2949 2950 /* 2951 * Get the ire in *mpp. Returns false if it fails (due to lack of space). 2952 * Makes the IRE be IRE_BROADCAST if dst is a multicast address. 2953 */ 2954 /* ARGSUSED4 */ 2955 static boolean_t 2956 ip_bind_get_ire_v6(mblk_t **mpp, ire_t *ire, const in6_addr_t *dst, 2957 iulp_t *ulp_info, ip_stack_t *ipst) 2958 { 2959 mblk_t *mp = *mpp; 2960 ire_t *ret_ire; 2961 2962 ASSERT(mp != NULL); 2963 2964 if (ire != NULL) { 2965 /* 2966 * mp initialized above to IRE_DB_REQ_TYPE 2967 * appended mblk. Its <upper protocol>'s 2968 * job to make sure there is room. 2969 */ 2970 if ((mp->b_datap->db_lim - mp->b_rptr) < sizeof (ire_t)) 2971 return (B_FALSE); 2972 2973 mp->b_datap->db_type = IRE_DB_TYPE; 2974 mp->b_wptr = mp->b_rptr + sizeof (ire_t); 2975 bcopy(ire, mp->b_rptr, sizeof (ire_t)); 2976 ret_ire = (ire_t *)mp->b_rptr; 2977 if (IN6_IS_ADDR_MULTICAST(dst) || 2978 IN6_IS_ADDR_V4MAPPED_CLASSD(dst)) { 2979 ret_ire->ire_type = IRE_BROADCAST; 2980 ret_ire->ire_addr_v6 = *dst; 2981 } 2982 if (ulp_info != NULL) { 2983 bcopy(ulp_info, &(ret_ire->ire_uinfo), 2984 sizeof (iulp_t)); 2985 } 2986 ret_ire->ire_mp = mp; 2987 } else { 2988 /* 2989 * No IRE was found. Remove IRE mblk. 2990 */ 2991 *mpp = mp->b_cont; 2992 freeb(mp); 2993 } 2994 return (B_TRUE); 2995 } 2996 2997 /* 2998 * Add an ip6i_t header to the front of the mblk. 2999 * Inline if possible else allocate a separate mblk containing only the ip6i_t. 3000 * Returns NULL if allocation fails (and frees original message). 3001 * Used in outgoing path when going through ip_newroute_*v6(). 3002 * Used in incoming path to pass ifindex to transports. 3003 */ 3004 mblk_t * 3005 ip_add_info_v6(mblk_t *mp, ill_t *ill, const in6_addr_t *dst) 3006 { 3007 mblk_t *mp1; 3008 ip6i_t *ip6i; 3009 ip6_t *ip6h; 3010 3011 ip6h = (ip6_t *)mp->b_rptr; 3012 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3013 if ((uchar_t *)ip6i < mp->b_datap->db_base || 3014 mp->b_datap->db_ref > 1) { 3015 mp1 = allocb(sizeof (ip6i_t), BPRI_MED); 3016 if (mp1 == NULL) { 3017 freemsg(mp); 3018 return (NULL); 3019 } 3020 mp1->b_wptr = mp1->b_rptr = mp1->b_datap->db_lim; 3021 mp1->b_cont = mp; 3022 mp = mp1; 3023 ip6i = (ip6i_t *)(mp->b_rptr - sizeof (ip6i_t)); 3024 } 3025 mp->b_rptr = (uchar_t *)ip6i; 3026 ip6i->ip6i_vcf = ip6h->ip6_vcf; 3027 ip6i->ip6i_nxt = IPPROTO_RAW; 3028 if (ill != NULL) { 3029 ip6i->ip6i_flags = IP6I_IFINDEX; 3030 /* 3031 * If `ill' is in an IPMP group, make sure we use the IPMP 3032 * interface index so that e.g. IPV6_RECVPKTINFO will get the 3033 * IPMP interface index and not an underlying interface index. 3034 */ 3035 if (IS_UNDER_IPMP(ill)) 3036 ip6i->ip6i_ifindex = ipmp_ill_get_ipmp_ifindex(ill); 3037 else 3038 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 3039 } else { 3040 ip6i->ip6i_flags = 0; 3041 } 3042 ip6i->ip6i_nexthop = *dst; 3043 return (mp); 3044 } 3045 3046 /* 3047 * Handle protocols with which IP is less intimate. There 3048 * can be more than one stream bound to a particular 3049 * protocol. When this is the case, normally each one gets a copy 3050 * of any incoming packets. 3051 * 3052 * Zones notes: 3053 * Packets will be distributed to streams in all zones. This is really only 3054 * useful for ICMPv6 as only applications in the global zone can create raw 3055 * sockets for other protocols. 3056 */ 3057 static void 3058 ip_fanout_proto_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, 3059 ill_t *inill, uint8_t nexthdr, uint_t nexthdr_offset, uint_t flags, 3060 boolean_t mctl_present, zoneid_t zoneid) 3061 { 3062 queue_t *rq; 3063 mblk_t *mp1, *first_mp1; 3064 in6_addr_t dst = ip6h->ip6_dst; 3065 in6_addr_t src = ip6h->ip6_src; 3066 mblk_t *first_mp = mp; 3067 boolean_t secure, shared_addr; 3068 conn_t *connp, *first_connp, *next_connp; 3069 connf_t *connfp; 3070 ip_stack_t *ipst = inill->ill_ipst; 3071 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3072 3073 if (mctl_present) { 3074 mp = first_mp->b_cont; 3075 secure = ipsec_in_is_secure(first_mp); 3076 ASSERT(mp != NULL); 3077 } else { 3078 secure = B_FALSE; 3079 } 3080 3081 shared_addr = (zoneid == ALL_ZONES); 3082 if (shared_addr) { 3083 /* 3084 * We don't allow multilevel ports for raw IP, so no need to 3085 * check for that here. 3086 */ 3087 zoneid = tsol_packet_to_zoneid(mp); 3088 } 3089 3090 connfp = &ipst->ips_ipcl_proto_fanout_v6[nexthdr]; 3091 mutex_enter(&connfp->connf_lock); 3092 connp = connfp->connf_head; 3093 for (connp = connfp->connf_head; connp != NULL; 3094 connp = connp->conn_next) { 3095 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, flags, 3096 zoneid) && 3097 (!is_system_labeled() || 3098 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3099 connp))) 3100 break; 3101 } 3102 3103 if (connp == NULL) { 3104 /* 3105 * No one bound to this port. Is 3106 * there a client that wants all 3107 * unclaimed datagrams? 3108 */ 3109 mutex_exit(&connfp->connf_lock); 3110 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3111 ICMP6_PARAM_PROB, ICMP6_PARAMPROB_NEXTHEADER, 3112 nexthdr_offset, mctl_present, zoneid, ipst)) { 3113 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 3114 } 3115 3116 return; 3117 } 3118 3119 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_upq != NULL); 3120 3121 CONN_INC_REF(connp); 3122 first_connp = connp; 3123 3124 /* 3125 * XXX: Fix the multiple protocol listeners case. We should not 3126 * be walking the conn->next list here. 3127 */ 3128 connp = connp->conn_next; 3129 for (;;) { 3130 while (connp != NULL) { 3131 if (IPCL_PROTO_MATCH_V6(connp, nexthdr, ip6h, ill, 3132 flags, zoneid) && 3133 (!is_system_labeled() || 3134 tsol_receive_local(mp, &dst, IPV6_VERSION, 3135 shared_addr, connp))) 3136 break; 3137 connp = connp->conn_next; 3138 } 3139 3140 /* 3141 * Just copy the data part alone. The mctl part is 3142 * needed just for verifying policy and it is never 3143 * sent up. 3144 */ 3145 if (connp == NULL || 3146 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3147 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3148 /* 3149 * No more intested clients or memory 3150 * allocation failed 3151 */ 3152 connp = first_connp; 3153 break; 3154 } 3155 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 3156 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3157 CONN_INC_REF(connp); 3158 mutex_exit(&connfp->connf_lock); 3159 rq = connp->conn_rq; 3160 /* 3161 * For link-local always add ifindex so that transport can set 3162 * sin6_scope_id. Avoid it for ICMP error fanout. 3163 */ 3164 if ((connp->conn_ip_recvpktinfo || 3165 IN6_IS_ADDR_LINKLOCAL(&src)) && 3166 (flags & IP_FF_IPINFO)) { 3167 /* Add header */ 3168 mp1 = ip_add_info_v6(mp1, inill, &dst); 3169 } 3170 if (mp1 == NULL) { 3171 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3172 } else if ( 3173 (IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3174 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3175 if (flags & IP_FF_RAWIP) { 3176 BUMP_MIB(ill->ill_ip_mib, 3177 rawipIfStatsInOverflows); 3178 } else { 3179 BUMP_MIB(ill->ill_icmp6_mib, 3180 ipv6IfIcmpInOverflows); 3181 } 3182 3183 freemsg(mp1); 3184 } else { 3185 ASSERT(!IPCL_IS_IPTUN(connp)); 3186 3187 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 3188 secure) { 3189 first_mp1 = ipsec_check_inbound_policy( 3190 first_mp1, connp, NULL, ip6h, mctl_present); 3191 } 3192 if (first_mp1 != NULL) { 3193 if (mctl_present) 3194 freeb(first_mp1); 3195 BUMP_MIB(ill->ill_ip_mib, 3196 ipIfStatsHCInDelivers); 3197 (connp->conn_recv)(connp, mp1, NULL); 3198 } 3199 } 3200 mutex_enter(&connfp->connf_lock); 3201 /* Follow the next pointer before releasing the conn. */ 3202 next_connp = connp->conn_next; 3203 CONN_DEC_REF(connp); 3204 connp = next_connp; 3205 } 3206 3207 /* Last one. Send it upstream. */ 3208 mutex_exit(&connfp->connf_lock); 3209 3210 /* Initiate IPPF processing */ 3211 if (IP6_IN_IPP(flags, ipst)) { 3212 uint_t ifindex; 3213 3214 mutex_enter(&ill->ill_lock); 3215 ifindex = ill->ill_phyint->phyint_ifindex; 3216 mutex_exit(&ill->ill_lock); 3217 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3218 if (mp == NULL) { 3219 CONN_DEC_REF(connp); 3220 if (mctl_present) 3221 freeb(first_mp); 3222 return; 3223 } 3224 } 3225 3226 /* 3227 * For link-local always add ifindex so that transport can set 3228 * sin6_scope_id. Avoid it for ICMP error fanout. 3229 */ 3230 if ((connp->conn_ip_recvpktinfo || IN6_IS_ADDR_LINKLOCAL(&src)) && 3231 (flags & IP_FF_IPINFO)) { 3232 /* Add header */ 3233 mp = ip_add_info_v6(mp, inill, &dst); 3234 if (mp == NULL) { 3235 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3236 CONN_DEC_REF(connp); 3237 if (mctl_present) 3238 freeb(first_mp); 3239 return; 3240 } else if (mctl_present) { 3241 first_mp->b_cont = mp; 3242 } else { 3243 first_mp = mp; 3244 } 3245 } 3246 3247 rq = connp->conn_rq; 3248 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3249 (!IPCL_IS_NONSTR(connp) && !canputnext(rq))) { 3250 3251 if (flags & IP_FF_RAWIP) { 3252 BUMP_MIB(ill->ill_ip_mib, rawipIfStatsInOverflows); 3253 } else { 3254 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInOverflows); 3255 } 3256 3257 freemsg(first_mp); 3258 } else { 3259 ASSERT(!IPCL_IS_IPTUN(connp)); 3260 3261 if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || secure) { 3262 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3263 NULL, ip6h, mctl_present); 3264 if (first_mp == NULL) { 3265 CONN_DEC_REF(connp); 3266 return; 3267 } 3268 } 3269 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3270 (connp->conn_recv)(connp, mp, NULL); 3271 if (mctl_present) 3272 freeb(first_mp); 3273 } 3274 CONN_DEC_REF(connp); 3275 } 3276 3277 /* 3278 * Send an ICMP error after patching up the packet appropriately. Returns 3279 * non-zero if the appropriate MIB should be bumped; zero otherwise. 3280 */ 3281 int 3282 ip_fanout_send_icmp_v6(queue_t *q, mblk_t *mp, uint_t flags, 3283 uint_t icmp_type, uint8_t icmp_code, uint_t nexthdr_offset, 3284 boolean_t mctl_present, zoneid_t zoneid, ip_stack_t *ipst) 3285 { 3286 ip6_t *ip6h; 3287 mblk_t *first_mp; 3288 boolean_t secure; 3289 unsigned char db_type; 3290 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3291 3292 first_mp = mp; 3293 if (mctl_present) { 3294 mp = mp->b_cont; 3295 secure = ipsec_in_is_secure(first_mp); 3296 ASSERT(mp != NULL); 3297 } else { 3298 /* 3299 * If this is an ICMP error being reported - which goes 3300 * up as M_CTLs, we need to convert them to M_DATA till 3301 * we finish checking with global policy because 3302 * ipsec_check_global_policy() assumes M_DATA as clear 3303 * and M_CTL as secure. 3304 */ 3305 db_type = mp->b_datap->db_type; 3306 mp->b_datap->db_type = M_DATA; 3307 secure = B_FALSE; 3308 } 3309 /* 3310 * We are generating an icmp error for some inbound packet. 3311 * Called from all ip_fanout_(udp, tcp, proto) functions. 3312 * Before we generate an error, check with global policy 3313 * to see whether this is allowed to enter the system. As 3314 * there is no "conn", we are checking with global policy. 3315 */ 3316 ip6h = (ip6_t *)mp->b_rptr; 3317 if (secure || ipss->ipsec_inbound_v6_policy_present) { 3318 first_mp = ipsec_check_global_policy(first_mp, NULL, 3319 NULL, ip6h, mctl_present, ipst->ips_netstack); 3320 if (first_mp == NULL) 3321 return (0); 3322 } 3323 3324 if (!mctl_present) 3325 mp->b_datap->db_type = db_type; 3326 3327 if (flags & IP_FF_SEND_ICMP) { 3328 if (flags & IP_FF_HDR_COMPLETE) { 3329 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 3330 freemsg(first_mp); 3331 return (1); 3332 } 3333 } 3334 switch (icmp_type) { 3335 case ICMP6_DST_UNREACH: 3336 icmp_unreachable_v6(WR(q), first_mp, icmp_code, 3337 B_FALSE, B_FALSE, zoneid, ipst); 3338 break; 3339 case ICMP6_PARAM_PROB: 3340 icmp_param_problem_v6(WR(q), first_mp, icmp_code, 3341 nexthdr_offset, B_FALSE, B_FALSE, zoneid, ipst); 3342 break; 3343 default: 3344 #ifdef DEBUG 3345 panic("ip_fanout_send_icmp_v6: wrong type"); 3346 /*NOTREACHED*/ 3347 #else 3348 freemsg(first_mp); 3349 break; 3350 #endif 3351 } 3352 } else { 3353 freemsg(first_mp); 3354 return (0); 3355 } 3356 3357 return (1); 3358 } 3359 3360 /* 3361 * Fanout for TCP packets 3362 * The caller puts <fport, lport> in the ports parameter. 3363 */ 3364 static void 3365 ip_fanout_tcp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, ill_t *ill, ill_t *inill, 3366 uint_t flags, uint_t hdr_len, boolean_t mctl_present, zoneid_t zoneid) 3367 { 3368 mblk_t *first_mp; 3369 boolean_t secure; 3370 conn_t *connp; 3371 tcph_t *tcph; 3372 boolean_t syn_present = B_FALSE; 3373 ip_stack_t *ipst = inill->ill_ipst; 3374 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3375 3376 first_mp = mp; 3377 if (mctl_present) { 3378 mp = first_mp->b_cont; 3379 secure = ipsec_in_is_secure(first_mp); 3380 ASSERT(mp != NULL); 3381 } else { 3382 secure = B_FALSE; 3383 } 3384 3385 connp = ipcl_classify_v6(mp, IPPROTO_TCP, hdr_len, zoneid, ipst); 3386 3387 if (connp == NULL || 3388 !conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid)) { 3389 /* 3390 * No hard-bound match. Send Reset. 3391 */ 3392 dblk_t *dp = mp->b_datap; 3393 uint32_t ill_index; 3394 3395 ASSERT((dp->db_struioflag & STRUIO_IP) == 0); 3396 3397 /* Initiate IPPf processing, if needed. */ 3398 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 3399 (flags & IP6_NO_IPPOLICY)) { 3400 ill_index = ill->ill_phyint->phyint_ifindex; 3401 ip_process(IPP_LOCAL_IN, &first_mp, ill_index); 3402 if (first_mp == NULL) { 3403 if (connp != NULL) 3404 CONN_DEC_REF(connp); 3405 return; 3406 } 3407 } 3408 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3409 if (connp != NULL) { 3410 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3411 ipst->ips_netstack->netstack_tcp, connp); 3412 CONN_DEC_REF(connp); 3413 } else { 3414 tcp_xmit_listeners_reset(first_mp, hdr_len, zoneid, 3415 ipst->ips_netstack->netstack_tcp, NULL); 3416 } 3417 3418 return; 3419 } 3420 3421 tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3422 if ((tcph->th_flags[0] & (TH_SYN|TH_ACK|TH_RST|TH_URG)) == TH_SYN) { 3423 if (IPCL_IS_TCP(connp)) { 3424 squeue_t *sqp; 3425 3426 /* 3427 * If the queue belongs to a conn, and fused tcp 3428 * loopback is enabled, assign the eager's squeue 3429 * to be that of the active connect's. 3430 */ 3431 if ((flags & IP_FF_LOOPBACK) && do_tcp_fusion && 3432 CONN_Q(q) && IPCL_IS_TCP(Q_TO_CONN(q)) && 3433 !CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) && 3434 !secure && 3435 !IP6_IN_IPP(flags, ipst)) { 3436 ASSERT(Q_TO_CONN(q)->conn_sqp != NULL); 3437 sqp = Q_TO_CONN(q)->conn_sqp; 3438 } else { 3439 sqp = IP_SQUEUE_GET(lbolt); 3440 } 3441 3442 mp->b_datap->db_struioflag |= STRUIO_EAGER; 3443 DB_CKSUMSTART(mp) = (intptr_t)sqp; 3444 3445 /* 3446 * db_cksumstuff is unused in the incoming 3447 * path; Thus store the ifindex here. It will 3448 * be cleared in tcp_conn_create_v6(). 3449 */ 3450 DB_CKSUMSTUFF(mp) = 3451 (intptr_t)ill->ill_phyint->phyint_ifindex; 3452 syn_present = B_TRUE; 3453 } 3454 } 3455 3456 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp) && !syn_present) { 3457 uint_t flags = (unsigned int)tcph->th_flags[0] & 0xFF; 3458 if ((flags & TH_RST) || (flags & TH_URG)) { 3459 CONN_DEC_REF(connp); 3460 freemsg(first_mp); 3461 return; 3462 } 3463 if (flags & TH_ACK) { 3464 ip_xmit_reset_serialize(first_mp, hdr_len, zoneid, 3465 ipst->ips_netstack->netstack_tcp, connp); 3466 CONN_DEC_REF(connp); 3467 return; 3468 } 3469 3470 CONN_DEC_REF(connp); 3471 freemsg(first_mp); 3472 return; 3473 } 3474 3475 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3476 first_mp = ipsec_check_inbound_policy(first_mp, connp, 3477 NULL, ip6h, mctl_present); 3478 if (first_mp == NULL) { 3479 CONN_DEC_REF(connp); 3480 return; 3481 } 3482 if (IPCL_IS_TCP(connp) && IPCL_IS_BOUND(connp)) { 3483 ASSERT(syn_present); 3484 if (mctl_present) { 3485 ASSERT(first_mp != mp); 3486 first_mp->b_datap->db_struioflag |= 3487 STRUIO_POLICY; 3488 } else { 3489 ASSERT(first_mp == mp); 3490 mp->b_datap->db_struioflag &= 3491 ~STRUIO_EAGER; 3492 mp->b_datap->db_struioflag |= 3493 STRUIO_POLICY; 3494 } 3495 } else { 3496 /* 3497 * Discard first_mp early since we're dealing with a 3498 * fully-connected conn_t and tcp doesn't do policy in 3499 * this case. Also, if someone is bound to IPPROTO_TCP 3500 * over raw IP, they don't expect to see a M_CTL. 3501 */ 3502 if (mctl_present) { 3503 freeb(first_mp); 3504 mctl_present = B_FALSE; 3505 } 3506 first_mp = mp; 3507 } 3508 } 3509 3510 /* Initiate IPPF processing */ 3511 if (IP6_IN_IPP(flags, ipst)) { 3512 uint_t ifindex; 3513 3514 mutex_enter(&ill->ill_lock); 3515 ifindex = ill->ill_phyint->phyint_ifindex; 3516 mutex_exit(&ill->ill_lock); 3517 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3518 if (mp == NULL) { 3519 CONN_DEC_REF(connp); 3520 if (mctl_present) { 3521 freeb(first_mp); 3522 } 3523 return; 3524 } else if (mctl_present) { 3525 /* 3526 * ip_add_info_v6 might return a new mp. 3527 */ 3528 ASSERT(first_mp != mp); 3529 first_mp->b_cont = mp; 3530 } else { 3531 first_mp = mp; 3532 } 3533 } 3534 3535 /* 3536 * For link-local always add ifindex so that TCP can bind to that 3537 * interface. Avoid it for ICMP error fanout. 3538 */ 3539 if (!syn_present && ((connp->conn_ip_recvpktinfo || 3540 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) && 3541 (flags & IP_FF_IPINFO))) { 3542 /* Add header */ 3543 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 3544 if (mp == NULL) { 3545 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3546 CONN_DEC_REF(connp); 3547 if (mctl_present) 3548 freeb(first_mp); 3549 return; 3550 } else if (mctl_present) { 3551 ASSERT(first_mp != mp); 3552 first_mp->b_cont = mp; 3553 } else { 3554 first_mp = mp; 3555 } 3556 } 3557 3558 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3559 if (IPCL_IS_TCP(connp)) { 3560 SQUEUE_ENTER_ONE(connp->conn_sqp, first_mp, connp->conn_recv, 3561 connp, ip_squeue_flag, SQTAG_IP6_TCP_INPUT); 3562 } else { 3563 /* SOCK_RAW, IPPROTO_TCP case */ 3564 (connp->conn_recv)(connp, first_mp, NULL); 3565 CONN_DEC_REF(connp); 3566 } 3567 } 3568 3569 /* 3570 * Fanout for UDP packets. 3571 * The caller puts <fport, lport> in the ports parameter. 3572 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 3573 * 3574 * If SO_REUSEADDR is set all multicast and broadcast packets 3575 * will be delivered to all streams bound to the same port. 3576 * 3577 * Zones notes: 3578 * Multicast packets will be distributed to streams in all zones. 3579 */ 3580 static void 3581 ip_fanout_udp_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, uint32_t ports, 3582 ill_t *ill, ill_t *inill, uint_t flags, boolean_t mctl_present, 3583 zoneid_t zoneid) 3584 { 3585 uint32_t dstport, srcport; 3586 in6_addr_t dst; 3587 mblk_t *first_mp; 3588 boolean_t secure; 3589 conn_t *connp; 3590 connf_t *connfp; 3591 conn_t *first_conn; 3592 conn_t *next_conn; 3593 mblk_t *mp1, *first_mp1; 3594 in6_addr_t src; 3595 boolean_t shared_addr; 3596 ip_stack_t *ipst = inill->ill_ipst; 3597 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3598 3599 first_mp = mp; 3600 if (mctl_present) { 3601 mp = first_mp->b_cont; 3602 secure = ipsec_in_is_secure(first_mp); 3603 ASSERT(mp != NULL); 3604 } else { 3605 secure = B_FALSE; 3606 } 3607 3608 /* Extract ports in net byte order */ 3609 dstport = htons(ntohl(ports) & 0xFFFF); 3610 srcport = htons(ntohl(ports) >> 16); 3611 dst = ip6h->ip6_dst; 3612 src = ip6h->ip6_src; 3613 3614 shared_addr = (zoneid == ALL_ZONES); 3615 if (shared_addr) { 3616 /* 3617 * No need to handle exclusive-stack zones since ALL_ZONES 3618 * only applies to the shared stack. 3619 */ 3620 zoneid = tsol_mlp_findzone(IPPROTO_UDP, dstport); 3621 /* 3622 * If no shared MLP is found, tsol_mlp_findzone returns 3623 * ALL_ZONES. In that case, we assume it's SLP, and 3624 * search for the zone based on the packet label. 3625 * That will also return ALL_ZONES on failure, but 3626 * we never allow conn_zoneid to be set to ALL_ZONES. 3627 */ 3628 if (zoneid == ALL_ZONES) 3629 zoneid = tsol_packet_to_zoneid(mp); 3630 } 3631 3632 /* Attempt to find a client stream based on destination port. */ 3633 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(dstport, ipst)]; 3634 mutex_enter(&connfp->connf_lock); 3635 connp = connfp->connf_head; 3636 if (!IN6_IS_ADDR_MULTICAST(&dst)) { 3637 /* 3638 * Not multicast. Send to the one (first) client we find. 3639 */ 3640 while (connp != NULL) { 3641 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3642 src) && IPCL_ZONE_MATCH(connp, zoneid) && 3643 conn_wantpacket_v6(connp, ill, ip6h, 3644 flags, zoneid)) { 3645 break; 3646 } 3647 connp = connp->conn_next; 3648 } 3649 if (connp == NULL || connp->conn_upq == NULL) 3650 goto notfound; 3651 3652 if (is_system_labeled() && 3653 !tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3654 connp)) 3655 goto notfound; 3656 3657 /* Found a client */ 3658 CONN_INC_REF(connp); 3659 mutex_exit(&connfp->connf_lock); 3660 3661 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3662 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3663 freemsg(first_mp); 3664 CONN_DEC_REF(connp); 3665 return; 3666 } 3667 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3668 first_mp = ipsec_check_inbound_policy(first_mp, 3669 connp, NULL, ip6h, mctl_present); 3670 if (first_mp == NULL) { 3671 CONN_DEC_REF(connp); 3672 return; 3673 } 3674 } 3675 /* Initiate IPPF processing */ 3676 if (IP6_IN_IPP(flags, ipst)) { 3677 uint_t ifindex; 3678 3679 mutex_enter(&ill->ill_lock); 3680 ifindex = ill->ill_phyint->phyint_ifindex; 3681 mutex_exit(&ill->ill_lock); 3682 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3683 if (mp == NULL) { 3684 CONN_DEC_REF(connp); 3685 if (mctl_present) 3686 freeb(first_mp); 3687 return; 3688 } 3689 } 3690 /* 3691 * For link-local always add ifindex so that 3692 * transport can set sin6_scope_id. Avoid it for 3693 * ICMP error fanout. 3694 */ 3695 if ((connp->conn_ip_recvpktinfo || 3696 IN6_IS_ADDR_LINKLOCAL(&src)) && 3697 (flags & IP_FF_IPINFO)) { 3698 /* Add header */ 3699 mp = ip_add_info_v6(mp, inill, &dst); 3700 if (mp == NULL) { 3701 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3702 CONN_DEC_REF(connp); 3703 if (mctl_present) 3704 freeb(first_mp); 3705 return; 3706 } else if (mctl_present) { 3707 first_mp->b_cont = mp; 3708 } else { 3709 first_mp = mp; 3710 } 3711 } 3712 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3713 3714 /* Send it upstream */ 3715 (connp->conn_recv)(connp, mp, NULL); 3716 3717 IP6_STAT(ipst, ip6_udp_fannorm); 3718 CONN_DEC_REF(connp); 3719 if (mctl_present) 3720 freeb(first_mp); 3721 return; 3722 } 3723 3724 while (connp != NULL) { 3725 if ((IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, src)) && 3726 conn_wantpacket_v6(connp, ill, ip6h, flags, zoneid) && 3727 (!is_system_labeled() || 3728 tsol_receive_local(mp, &dst, IPV6_VERSION, shared_addr, 3729 connp))) 3730 break; 3731 connp = connp->conn_next; 3732 } 3733 3734 if (connp == NULL || connp->conn_upq == NULL) 3735 goto notfound; 3736 3737 first_conn = connp; 3738 3739 CONN_INC_REF(connp); 3740 connp = connp->conn_next; 3741 for (;;) { 3742 while (connp != NULL) { 3743 if (IPCL_UDP_MATCH_V6(connp, dstport, dst, srcport, 3744 src) && conn_wantpacket_v6(connp, ill, ip6h, 3745 flags, zoneid) && 3746 (!is_system_labeled() || 3747 tsol_receive_local(mp, &dst, IPV6_VERSION, 3748 shared_addr, connp))) 3749 break; 3750 connp = connp->conn_next; 3751 } 3752 /* 3753 * Just copy the data part alone. The mctl part is 3754 * needed just for verifying policy and it is never 3755 * sent up. 3756 */ 3757 if (connp == NULL || 3758 (((first_mp1 = dupmsg(first_mp)) == NULL) && 3759 ((first_mp1 = ip_copymsg(first_mp)) == NULL))) { 3760 /* 3761 * No more interested clients or memory 3762 * allocation failed 3763 */ 3764 connp = first_conn; 3765 break; 3766 } 3767 mp1 = mctl_present ? first_mp1->b_cont : first_mp1; 3768 CONN_INC_REF(connp); 3769 mutex_exit(&connfp->connf_lock); 3770 /* 3771 * For link-local always add ifindex so that transport 3772 * can set sin6_scope_id. Avoid it for ICMP error 3773 * fanout. 3774 */ 3775 if ((connp->conn_ip_recvpktinfo || 3776 IN6_IS_ADDR_LINKLOCAL(&src)) && 3777 (flags & IP_FF_IPINFO)) { 3778 /* Add header */ 3779 mp1 = ip_add_info_v6(mp1, inill, &dst); 3780 } 3781 /* mp1 could have changed */ 3782 if (mctl_present) 3783 first_mp1->b_cont = mp1; 3784 else 3785 first_mp1 = mp1; 3786 if (mp1 == NULL) { 3787 if (mctl_present) 3788 freeb(first_mp1); 3789 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3790 goto next_one; 3791 } 3792 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3793 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3794 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3795 freemsg(first_mp1); 3796 goto next_one; 3797 } 3798 3799 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3800 first_mp1 = ipsec_check_inbound_policy 3801 (first_mp1, connp, NULL, ip6h, 3802 mctl_present); 3803 } 3804 if (first_mp1 != NULL) { 3805 if (mctl_present) 3806 freeb(first_mp1); 3807 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3808 3809 /* Send it upstream */ 3810 (connp->conn_recv)(connp, mp1, NULL); 3811 } 3812 next_one: 3813 mutex_enter(&connfp->connf_lock); 3814 /* Follow the next pointer before releasing the conn. */ 3815 next_conn = connp->conn_next; 3816 IP6_STAT(ipst, ip6_udp_fanmb); 3817 CONN_DEC_REF(connp); 3818 connp = next_conn; 3819 } 3820 3821 /* Last one. Send it upstream. */ 3822 mutex_exit(&connfp->connf_lock); 3823 3824 /* Initiate IPPF processing */ 3825 if (IP6_IN_IPP(flags, ipst)) { 3826 uint_t ifindex; 3827 3828 mutex_enter(&ill->ill_lock); 3829 ifindex = ill->ill_phyint->phyint_ifindex; 3830 mutex_exit(&ill->ill_lock); 3831 ip_process(IPP_LOCAL_IN, &mp, ifindex); 3832 if (mp == NULL) { 3833 CONN_DEC_REF(connp); 3834 if (mctl_present) { 3835 freeb(first_mp); 3836 } 3837 return; 3838 } 3839 } 3840 3841 /* 3842 * For link-local always add ifindex so that transport can set 3843 * sin6_scope_id. Avoid it for ICMP error fanout. 3844 */ 3845 if ((connp->conn_ip_recvpktinfo || 3846 IN6_IS_ADDR_LINKLOCAL(&src)) && (flags & IP_FF_IPINFO)) { 3847 /* Add header */ 3848 mp = ip_add_info_v6(mp, inill, &dst); 3849 if (mp == NULL) { 3850 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3851 CONN_DEC_REF(connp); 3852 if (mctl_present) 3853 freeb(first_mp); 3854 return; 3855 } else if (mctl_present) { 3856 first_mp->b_cont = mp; 3857 } else { 3858 first_mp = mp; 3859 } 3860 } 3861 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 3862 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 3863 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 3864 freemsg(mp); 3865 } else { 3866 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || secure) { 3867 first_mp = ipsec_check_inbound_policy(first_mp, 3868 connp, NULL, ip6h, mctl_present); 3869 if (first_mp == NULL) { 3870 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3871 CONN_DEC_REF(connp); 3872 return; 3873 } 3874 } 3875 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3876 3877 /* Send it upstream */ 3878 (connp->conn_recv)(connp, mp, NULL); 3879 } 3880 IP6_STAT(ipst, ip6_udp_fanmb); 3881 CONN_DEC_REF(connp); 3882 if (mctl_present) 3883 freeb(first_mp); 3884 return; 3885 3886 notfound: 3887 mutex_exit(&connfp->connf_lock); 3888 /* 3889 * No one bound to this port. Is 3890 * there a client that wants all 3891 * unclaimed datagrams? 3892 */ 3893 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 3894 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, IPPROTO_UDP, 3895 0, flags | IP_FF_RAWIP | IP_FF_IPINFO, mctl_present, 3896 zoneid); 3897 } else { 3898 if (ip_fanout_send_icmp_v6(q, first_mp, flags, 3899 ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0, 3900 mctl_present, zoneid, ipst)) { 3901 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 3902 } 3903 } 3904 } 3905 3906 /* 3907 * int ip_find_hdr_v6() 3908 * 3909 * This routine is used by the upper layer protocols and the IP tunnel 3910 * module to: 3911 * - Set extension header pointers to appropriate locations 3912 * - Determine IPv6 header length and return it 3913 * - Return a pointer to the last nexthdr value 3914 * 3915 * The caller must initialize ipp_fields. 3916 * 3917 * NOTE: If multiple extension headers of the same type are present, 3918 * ip_find_hdr_v6() will set the respective extension header pointers 3919 * to the first one that it encounters in the IPv6 header. It also 3920 * skips fragment headers. This routine deals with malformed packets 3921 * of various sorts in which case the returned length is up to the 3922 * malformed part. 3923 */ 3924 int 3925 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip6_pkt_t *ipp, uint8_t *nexthdrp) 3926 { 3927 uint_t length, ehdrlen; 3928 uint8_t nexthdr; 3929 uint8_t *whereptr, *endptr; 3930 ip6_dest_t *tmpdstopts; 3931 ip6_rthdr_t *tmprthdr; 3932 ip6_hbh_t *tmphopopts; 3933 ip6_frag_t *tmpfraghdr; 3934 3935 length = IPV6_HDR_LEN; 3936 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3937 endptr = mp->b_wptr; 3938 3939 nexthdr = ip6h->ip6_nxt; 3940 while (whereptr < endptr) { 3941 /* Is there enough left for len + nexthdr? */ 3942 if (whereptr + MIN_EHDR_LEN > endptr) 3943 goto done; 3944 3945 switch (nexthdr) { 3946 case IPPROTO_HOPOPTS: 3947 tmphopopts = (ip6_hbh_t *)whereptr; 3948 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 3949 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 3950 goto done; 3951 nexthdr = tmphopopts->ip6h_nxt; 3952 /* return only 1st hbh */ 3953 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) { 3954 ipp->ipp_fields |= IPPF_HOPOPTS; 3955 ipp->ipp_hopopts = tmphopopts; 3956 ipp->ipp_hopoptslen = ehdrlen; 3957 } 3958 break; 3959 case IPPROTO_DSTOPTS: 3960 tmpdstopts = (ip6_dest_t *)whereptr; 3961 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 3962 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 3963 goto done; 3964 nexthdr = tmpdstopts->ip6d_nxt; 3965 /* 3966 * ipp_dstopts is set to the destination header after a 3967 * routing header. 3968 * Assume it is a post-rthdr destination header 3969 * and adjust when we find an rthdr. 3970 */ 3971 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 3972 ipp->ipp_fields |= IPPF_DSTOPTS; 3973 ipp->ipp_dstopts = tmpdstopts; 3974 ipp->ipp_dstoptslen = ehdrlen; 3975 } 3976 break; 3977 case IPPROTO_ROUTING: 3978 tmprthdr = (ip6_rthdr_t *)whereptr; 3979 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 3980 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 3981 goto done; 3982 nexthdr = tmprthdr->ip6r_nxt; 3983 /* return only 1st rthdr */ 3984 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 3985 ipp->ipp_fields |= IPPF_RTHDR; 3986 ipp->ipp_rthdr = tmprthdr; 3987 ipp->ipp_rthdrlen = ehdrlen; 3988 } 3989 /* 3990 * Make any destination header we've seen be a 3991 * pre-rthdr destination header. 3992 */ 3993 if (ipp->ipp_fields & IPPF_DSTOPTS) { 3994 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3995 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3996 ipp->ipp_rtdstopts = ipp->ipp_dstopts; 3997 ipp->ipp_dstopts = NULL; 3998 ipp->ipp_rtdstoptslen = ipp->ipp_dstoptslen; 3999 ipp->ipp_dstoptslen = 0; 4000 } 4001 break; 4002 case IPPROTO_FRAGMENT: 4003 tmpfraghdr = (ip6_frag_t *)whereptr; 4004 ehdrlen = sizeof (ip6_frag_t); 4005 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 4006 goto done; 4007 nexthdr = tmpfraghdr->ip6f_nxt; 4008 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 4009 ipp->ipp_fields |= IPPF_FRAGHDR; 4010 ipp->ipp_fraghdr = tmpfraghdr; 4011 ipp->ipp_fraghdrlen = ehdrlen; 4012 } 4013 break; 4014 case IPPROTO_NONE: 4015 default: 4016 goto done; 4017 } 4018 length += ehdrlen; 4019 whereptr += ehdrlen; 4020 } 4021 done: 4022 if (nexthdrp != NULL) 4023 *nexthdrp = nexthdr; 4024 return (length); 4025 } 4026 4027 int 4028 ip_hdr_complete_v6(ip6_t *ip6h, zoneid_t zoneid, ip_stack_t *ipst) 4029 { 4030 ire_t *ire; 4031 4032 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4033 ire = ire_lookup_local_v6(zoneid, ipst); 4034 if (ire == NULL) { 4035 ip1dbg(("ip_hdr_complete_v6: no source IRE\n")); 4036 return (1); 4037 } 4038 ip6h->ip6_src = ire->ire_addr_v6; 4039 ire_refrele(ire); 4040 } 4041 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4042 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 4043 return (0); 4044 } 4045 4046 /* 4047 * Try to determine where and what are the IPv6 header length and 4048 * pointer to nexthdr value for the upper layer protocol (or an 4049 * unknown next hdr). 4050 * 4051 * Parameters returns a pointer to the nexthdr value; 4052 * Must handle malformed packets of various sorts. 4053 * Function returns failure for malformed cases. 4054 */ 4055 boolean_t 4056 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 4057 uint8_t **nexthdrpp) 4058 { 4059 uint16_t length; 4060 uint_t ehdrlen; 4061 uint8_t *nexthdrp; 4062 uint8_t *whereptr; 4063 uint8_t *endptr; 4064 ip6_dest_t *desthdr; 4065 ip6_rthdr_t *rthdr; 4066 ip6_frag_t *fraghdr; 4067 4068 ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION); 4069 length = IPV6_HDR_LEN; 4070 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 4071 endptr = mp->b_wptr; 4072 4073 nexthdrp = &ip6h->ip6_nxt; 4074 while (whereptr < endptr) { 4075 /* Is there enough left for len + nexthdr? */ 4076 if (whereptr + MIN_EHDR_LEN > endptr) 4077 break; 4078 4079 switch (*nexthdrp) { 4080 case IPPROTO_HOPOPTS: 4081 case IPPROTO_DSTOPTS: 4082 /* Assumes the headers are identical for hbh and dst */ 4083 desthdr = (ip6_dest_t *)whereptr; 4084 ehdrlen = 8 * (desthdr->ip6d_len + 1); 4085 if ((uchar_t *)desthdr + ehdrlen > endptr) 4086 return (B_FALSE); 4087 nexthdrp = &desthdr->ip6d_nxt; 4088 break; 4089 case IPPROTO_ROUTING: 4090 rthdr = (ip6_rthdr_t *)whereptr; 4091 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4092 if ((uchar_t *)rthdr + ehdrlen > endptr) 4093 return (B_FALSE); 4094 nexthdrp = &rthdr->ip6r_nxt; 4095 break; 4096 case IPPROTO_FRAGMENT: 4097 fraghdr = (ip6_frag_t *)whereptr; 4098 ehdrlen = sizeof (ip6_frag_t); 4099 if ((uchar_t *)&fraghdr[1] > endptr) 4100 return (B_FALSE); 4101 nexthdrp = &fraghdr->ip6f_nxt; 4102 break; 4103 case IPPROTO_NONE: 4104 /* No next header means we're finished */ 4105 default: 4106 *hdr_length_ptr = length; 4107 *nexthdrpp = nexthdrp; 4108 return (B_TRUE); 4109 } 4110 length += ehdrlen; 4111 whereptr += ehdrlen; 4112 *hdr_length_ptr = length; 4113 *nexthdrpp = nexthdrp; 4114 } 4115 switch (*nexthdrp) { 4116 case IPPROTO_HOPOPTS: 4117 case IPPROTO_DSTOPTS: 4118 case IPPROTO_ROUTING: 4119 case IPPROTO_FRAGMENT: 4120 /* 4121 * If any know extension headers are still to be processed, 4122 * the packet's malformed (or at least all the IP header(s) are 4123 * not in the same mblk - and that should never happen. 4124 */ 4125 return (B_FALSE); 4126 4127 default: 4128 /* 4129 * If we get here, we know that all of the IP headers were in 4130 * the same mblk, even if the ULP header is in the next mblk. 4131 */ 4132 *hdr_length_ptr = length; 4133 *nexthdrpp = nexthdrp; 4134 return (B_TRUE); 4135 } 4136 } 4137 4138 /* 4139 * Return the length of the IPv6 related headers (including extension headers) 4140 * Returns a length even if the packet is malformed. 4141 */ 4142 int 4143 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 4144 { 4145 uint16_t hdr_len; 4146 uint8_t *nexthdrp; 4147 4148 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 4149 return (hdr_len); 4150 } 4151 4152 /* 4153 * IPv6 - 4154 * ip_newroute_v6 is called by ip_rput_data_v6 or ip_wput_v6 whenever we need 4155 * to send out a packet to a destination address for which we do not have 4156 * specific routing information. 4157 * 4158 * Handle non-multicast packets. If ill is non-NULL the match is done 4159 * for that ill. 4160 * 4161 * When a specific ill is specified (using IPV6_PKTINFO, 4162 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 4163 * on routing entries (ftable and ctable) that have a matching 4164 * ire->ire_ipif->ipif_ill. Thus this can only be used 4165 * for destinations that are on-link for the specific ill 4166 * and that can appear on multiple links. Thus it is useful 4167 * for multicast destinations, link-local destinations, and 4168 * at some point perhaps for site-local destinations (if the 4169 * node sits at a site boundary). 4170 * We create the cache entries in the regular ctable since 4171 * it can not "confuse" things for other destinations. 4172 * 4173 * NOTE : These are the scopes of some of the variables that point at IRE, 4174 * which needs to be followed while making any future modifications 4175 * to avoid memory leaks. 4176 * 4177 * - ire and sire are the entries looked up initially by 4178 * ire_ftable_lookup_v6. 4179 * - ipif_ire is used to hold the interface ire associated with 4180 * the new cache ire. But it's scope is limited, so we always REFRELE 4181 * it before branching out to error paths. 4182 * - save_ire is initialized before ire_create, so that ire returned 4183 * by ire_create will not over-write the ire. We REFRELE save_ire 4184 * before breaking out of the switch. 4185 * 4186 * Thus on failures, we have to REFRELE only ire and sire, if they 4187 * are not NULL. 4188 */ 4189 /* ARGSUSED */ 4190 void 4191 ip_newroute_v6(queue_t *q, mblk_t *mp, const in6_addr_t *v6dstp, 4192 const in6_addr_t *v6srcp, ill_t *ill, zoneid_t zoneid, ip_stack_t *ipst) 4193 { 4194 in6_addr_t v6gw; 4195 in6_addr_t dst; 4196 ire_t *ire = NULL; 4197 ipif_t *src_ipif = NULL; 4198 ill_t *dst_ill = NULL; 4199 ire_t *sire = NULL; 4200 ire_t *save_ire; 4201 ip6_t *ip6h; 4202 int err = 0; 4203 mblk_t *first_mp; 4204 ipsec_out_t *io; 4205 ushort_t ire_marks = 0; 4206 int match_flags; 4207 ire_t *first_sire = NULL; 4208 mblk_t *copy_mp = NULL; 4209 mblk_t *xmit_mp = NULL; 4210 in6_addr_t save_dst; 4211 uint32_t multirt_flags = 4212 MULTIRT_CACHEGW | MULTIRT_USESTAMP | MULTIRT_SETSTAMP; 4213 boolean_t multirt_is_resolvable; 4214 boolean_t multirt_resolve_next; 4215 boolean_t need_rele = B_FALSE; 4216 boolean_t ip6_asp_table_held = B_FALSE; 4217 tsol_ire_gw_secattr_t *attrp = NULL; 4218 tsol_gcgrp_t *gcgrp = NULL; 4219 tsol_gcgrp_addr_t ga; 4220 4221 ASSERT(!IN6_IS_ADDR_MULTICAST(v6dstp)); 4222 4223 first_mp = mp; 4224 if (mp->b_datap->db_type == M_CTL) { 4225 mp = mp->b_cont; 4226 io = (ipsec_out_t *)first_mp->b_rptr; 4227 ASSERT(io->ipsec_out_type == IPSEC_OUT); 4228 } else { 4229 io = NULL; 4230 } 4231 4232 ip6h = (ip6_t *)mp->b_rptr; 4233 4234 if (IN6_IS_ADDR_LOOPBACK(v6dstp)) { 4235 ip1dbg(("ip_newroute_v6: dst with loopback addr\n")); 4236 goto icmp_err_ret; 4237 } else if (IN6_IS_ADDR_LOOPBACK(v6srcp)) { 4238 ip1dbg(("ip_newroute_v6: src with loopback addr\n")); 4239 goto icmp_err_ret; 4240 } 4241 4242 /* 4243 * If this IRE is created for forwarding or it is not for 4244 * TCP traffic, mark it as temporary. 4245 * 4246 * Is it sufficient just to check the next header?? 4247 */ 4248 if (mp->b_prev != NULL || !IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt)) 4249 ire_marks |= IRE_MARK_TEMPORARY; 4250 4251 /* 4252 * Get what we can from ire_ftable_lookup_v6 which will follow an IRE 4253 * chain until it gets the most specific information available. 4254 * For example, we know that there is no IRE_CACHE for this dest, 4255 * but there may be an IRE_OFFSUBNET which specifies a gateway. 4256 * ire_ftable_lookup_v6 will look up the gateway, etc. 4257 */ 4258 4259 if (ill == NULL) { 4260 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4261 MATCH_IRE_PARENT | MATCH_IRE_RJ_BHOLE | MATCH_IRE_SECATTR; 4262 ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, 4263 NULL, &sire, zoneid, 0, msg_getlabel(mp), 4264 match_flags, ipst); 4265 } else { 4266 match_flags = MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT | 4267 MATCH_IRE_RJ_BHOLE | MATCH_IRE_ILL; 4268 match_flags |= MATCH_IRE_PARENT | MATCH_IRE_SECATTR; 4269 4270 /* 4271 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4272 * tied to an underlying interface, IS_UNDER_IPMP() may be 4273 * true even when building IREs that will be used for data 4274 * traffic. As such, use the packet's source address to 4275 * determine whether the traffic is test traffic, and set 4276 * MATCH_IRE_MARK_TESTHIDDEN if so. 4277 */ 4278 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(v6srcp)) { 4279 if (ipif_lookup_testaddr_v6(ill, v6srcp, NULL)) 4280 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 4281 } 4282 4283 ire = ire_ftable_lookup_v6(v6dstp, NULL, NULL, 0, ill->ill_ipif, 4284 &sire, zoneid, 0, msg_getlabel(mp), match_flags, ipst); 4285 } 4286 4287 ip3dbg(("ip_newroute_v6: ire_ftable_lookup_v6() " 4288 "returned ire %p, sire %p\n", (void *)ire, (void *)sire)); 4289 4290 /* 4291 * We enter a loop that will be run only once in most cases. 4292 * The loop is re-entered in the case where the destination 4293 * can be reached through multiple RTF_MULTIRT-flagged routes. 4294 * The intention is to compute multiple routes to a single 4295 * destination in a single ip_newroute_v6 call. 4296 * The information is contained in sire->ire_flags. 4297 */ 4298 do { 4299 multirt_resolve_next = B_FALSE; 4300 4301 if (dst_ill != NULL) { 4302 ill_refrele(dst_ill); 4303 dst_ill = NULL; 4304 } 4305 if (src_ipif != NULL) { 4306 ipif_refrele(src_ipif); 4307 src_ipif = NULL; 4308 } 4309 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4310 ip3dbg(("ip_newroute_v6: starting new resolution " 4311 "with first_mp %p, tag %d\n", 4312 (void *)first_mp, MULTIRT_DEBUG_TAGGED(first_mp))); 4313 4314 /* 4315 * We check if there are trailing unresolved routes for 4316 * the destination contained in sire. 4317 */ 4318 multirt_is_resolvable = ire_multirt_lookup_v6(&ire, 4319 &sire, multirt_flags, msg_getlabel(mp), ipst); 4320 4321 ip3dbg(("ip_newroute_v6: multirt_is_resolvable %d, " 4322 "ire %p, sire %p\n", 4323 multirt_is_resolvable, (void *)ire, (void *)sire)); 4324 4325 if (!multirt_is_resolvable) { 4326 /* 4327 * No more multirt routes to resolve; give up 4328 * (all routes resolved or no more resolvable 4329 * routes). 4330 */ 4331 if (ire != NULL) { 4332 ire_refrele(ire); 4333 ire = NULL; 4334 } 4335 } else { 4336 ASSERT(sire != NULL); 4337 ASSERT(ire != NULL); 4338 /* 4339 * We simply use first_sire as a flag that 4340 * indicates if a resolvable multirt route has 4341 * already been found during the preceding 4342 * loops. If it is not the case, we may have 4343 * to send an ICMP error to report that the 4344 * destination is unreachable. We do not 4345 * IRE_REFHOLD first_sire. 4346 */ 4347 if (first_sire == NULL) { 4348 first_sire = sire; 4349 } 4350 } 4351 } 4352 if ((ire == NULL) || (ire == sire)) { 4353 /* 4354 * either ire == NULL (the destination cannot be 4355 * resolved) or ire == sire (the gateway cannot be 4356 * resolved). At this point, there are no more routes 4357 * to resolve for the destination, thus we exit. 4358 */ 4359 if (ip_debug > 3) { 4360 /* ip2dbg */ 4361 pr_addr_dbg("ip_newroute_v6: " 4362 "can't resolve %s\n", AF_INET6, v6dstp); 4363 } 4364 ip3dbg(("ip_newroute_v6: " 4365 "ire %p, sire %p, first_sire %p\n", 4366 (void *)ire, (void *)sire, (void *)first_sire)); 4367 4368 if (sire != NULL) { 4369 ire_refrele(sire); 4370 sire = NULL; 4371 } 4372 4373 if (first_sire != NULL) { 4374 /* 4375 * At least one multirt route has been found 4376 * in the same ip_newroute() call; there is no 4377 * need to report an ICMP error. 4378 * first_sire was not IRE_REFHOLDed. 4379 */ 4380 MULTIRT_DEBUG_UNTAG(first_mp); 4381 freemsg(first_mp); 4382 return; 4383 } 4384 ip_rts_change_v6(RTM_MISS, v6dstp, 0, 0, 0, 0, 0, 0, 4385 RTA_DST, ipst); 4386 goto icmp_err_ret; 4387 } 4388 4389 ASSERT(ire->ire_ipversion == IPV6_VERSION); 4390 4391 /* 4392 * Verify that the returned IRE does not have either the 4393 * RTF_REJECT or RTF_BLACKHOLE flags set and that the IRE is 4394 * either an IRE_CACHE, IRE_IF_NORESOLVER or IRE_IF_RESOLVER. 4395 */ 4396 if ((ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) || 4397 (ire->ire_type & (IRE_CACHE | IRE_INTERFACE)) == 0) 4398 goto icmp_err_ret; 4399 4400 /* 4401 * Increment the ire_ob_pkt_count field for ire if it is an 4402 * INTERFACE (IF_RESOLVER or IF_NORESOLVER) IRE type, and 4403 * increment the same for the parent IRE, sire, if it is some 4404 * sort of prefix IRE (which includes DEFAULT, PREFIX, and HOST) 4405 */ 4406 if ((ire->ire_type & IRE_INTERFACE) != 0) { 4407 UPDATE_OB_PKT_COUNT(ire); 4408 ire->ire_last_used_time = lbolt; 4409 } 4410 4411 if (sire != NULL) { 4412 mutex_enter(&sire->ire_lock); 4413 v6gw = sire->ire_gateway_addr_v6; 4414 mutex_exit(&sire->ire_lock); 4415 ASSERT((sire->ire_type & (IRE_CACHETABLE | 4416 IRE_INTERFACE)) == 0); 4417 UPDATE_OB_PKT_COUNT(sire); 4418 sire->ire_last_used_time = lbolt; 4419 } else { 4420 v6gw = ipv6_all_zeros; 4421 } 4422 4423 /* 4424 * We have a route to reach the destination. Find the 4425 * appropriate ill, then get a source address that matches the 4426 * right scope via ipif_select_source_v6(). 4427 * 4428 * If we are here trying to create an IRE_CACHE for an offlink 4429 * destination and have an IRE_CACHE entry for VNI, then use 4430 * ire_stq instead since VNI's queue is a black hole. 4431 * 4432 * Note: While we pick a dst_ill we are really only interested 4433 * in the ill for load spreading. The source ipif is 4434 * determined by source address selection below. 4435 */ 4436 if ((ire->ire_type == IRE_CACHE) && 4437 IS_VNI(ire->ire_ipif->ipif_ill)) { 4438 dst_ill = ire->ire_stq->q_ptr; 4439 ill_refhold(dst_ill); 4440 } else { 4441 ill_t *ill = ire->ire_ipif->ipif_ill; 4442 4443 if (IS_IPMP(ill)) { 4444 dst_ill = 4445 ipmp_illgrp_hold_next_ill(ill->ill_grp); 4446 } else { 4447 dst_ill = ill; 4448 ill_refhold(dst_ill); 4449 } 4450 } 4451 4452 if (dst_ill == NULL) { 4453 if (ip_debug > 2) { 4454 pr_addr_dbg("ip_newroute_v6 : no dst " 4455 "ill for dst %s\n", AF_INET6, v6dstp); 4456 } 4457 goto icmp_err_ret; 4458 } 4459 4460 if (ill != NULL && dst_ill != ill && 4461 !IS_IN_SAME_ILLGRP(dst_ill, ill)) { 4462 /* 4463 * We should have found a route matching "ill" 4464 * as we called ire_ftable_lookup_v6 with 4465 * MATCH_IRE_ILL. Rather than asserting when 4466 * there is a mismatch, we just drop the packet. 4467 */ 4468 ip0dbg(("ip_newroute_v6: BOUND_IF failed: " 4469 "dst_ill %s ill %s\n", dst_ill->ill_name, 4470 ill->ill_name)); 4471 goto icmp_err_ret; 4472 } 4473 4474 /* 4475 * Pick a source address which matches the scope of the 4476 * destination address. 4477 * For RTF_SETSRC routes, the source address is imposed by the 4478 * parent ire (sire). 4479 */ 4480 ASSERT(src_ipif == NULL); 4481 4482 /* 4483 * Because nce_xmit() calls ip_output_v6() and NCEs are always 4484 * tied to the underlying interface, IS_UNDER_IPMP() may be 4485 * true even when building IREs that will be used for data 4486 * traffic. As such, see if the packet's source address is a 4487 * test address, and if so use that test address's ipif for 4488 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 4489 * ire_add_v6() can work properly. 4490 */ 4491 if (ill != NULL && IS_UNDER_IPMP(ill)) 4492 (void) ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 4493 4494 if (src_ipif == NULL && ire->ire_type == IRE_IF_RESOLVER && 4495 !IN6_IS_ADDR_UNSPECIFIED(&v6gw) && 4496 ip6_asp_can_lookup(ipst)) { 4497 /* 4498 * The ire cache entry we're adding is for the 4499 * gateway itself. The source address in this case 4500 * is relative to the gateway's address. 4501 */ 4502 ip6_asp_table_held = B_TRUE; 4503 src_ipif = ipif_select_source_v6(dst_ill, &v6gw, 4504 B_TRUE, IPV6_PREFER_SRC_DEFAULT, zoneid); 4505 if (src_ipif != NULL) 4506 ire_marks |= IRE_MARK_USESRC_CHECK; 4507 } else if (src_ipif == NULL) { 4508 if ((sire != NULL) && (sire->ire_flags & RTF_SETSRC)) { 4509 /* 4510 * Check that the ipif matching the requested 4511 * source address still exists. 4512 */ 4513 src_ipif = ipif_lookup_addr_v6( 4514 &sire->ire_src_addr_v6, NULL, zoneid, 4515 NULL, NULL, NULL, NULL, ipst); 4516 } 4517 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 4518 ip6_asp_table_held = B_TRUE; 4519 src_ipif = ipif_select_source_v6(dst_ill, 4520 v6dstp, B_FALSE, 4521 IPV6_PREFER_SRC_DEFAULT, zoneid); 4522 if (src_ipif != NULL) 4523 ire_marks |= IRE_MARK_USESRC_CHECK; 4524 } 4525 } 4526 4527 if (src_ipif == NULL) { 4528 if (ip_debug > 2) { 4529 /* ip1dbg */ 4530 pr_addr_dbg("ip_newroute_v6: no src for " 4531 "dst %s\n", AF_INET6, v6dstp); 4532 printf("ip_newroute_v6: interface name %s\n", 4533 dst_ill->ill_name); 4534 } 4535 goto icmp_err_ret; 4536 } 4537 4538 if (ip_debug > 3) { 4539 /* ip2dbg */ 4540 pr_addr_dbg("ip_newroute_v6: first hop %s\n", 4541 AF_INET6, &v6gw); 4542 } 4543 ip2dbg(("\tire type %s (%d)\n", 4544 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 4545 4546 /* 4547 * At this point in ip_newroute_v6(), ire is either the 4548 * IRE_CACHE of the next-hop gateway for an off-subnet 4549 * destination or an IRE_INTERFACE type that should be used 4550 * to resolve an on-subnet destination or an on-subnet 4551 * next-hop gateway. 4552 * 4553 * In the IRE_CACHE case, we have the following : 4554 * 4555 * 1) src_ipif - used for getting a source address. 4556 * 4557 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4558 * means packets using this IRE_CACHE will go out on dst_ill. 4559 * 4560 * 3) The IRE sire will point to the prefix that is the longest 4561 * matching route for the destination. These prefix types 4562 * include IRE_DEFAULT, IRE_PREFIX, IRE_HOST. 4563 * 4564 * The newly created IRE_CACHE entry for the off-subnet 4565 * destination is tied to both the prefix route and the 4566 * interface route used to resolve the next-hop gateway 4567 * via the ire_phandle and ire_ihandle fields, respectively. 4568 * 4569 * In the IRE_INTERFACE case, we have the following : 4570 * 4571 * 1) src_ipif - used for getting a source address. 4572 * 4573 * 2) dst_ill - from which we derive ire_stq/ire_rfq. This 4574 * means packets using the IRE_CACHE that we will build 4575 * here will go out on dst_ill. 4576 * 4577 * 3) sire may or may not be NULL. But, the IRE_CACHE that is 4578 * to be created will only be tied to the IRE_INTERFACE that 4579 * was derived from the ire_ihandle field. 4580 * 4581 * If sire is non-NULL, it means the destination is off-link 4582 * and we will first create the IRE_CACHE for the gateway. 4583 * Next time through ip_newroute_v6, we will create the 4584 * IRE_CACHE for the final destination as described above. 4585 */ 4586 save_ire = ire; 4587 switch (ire->ire_type) { 4588 case IRE_CACHE: { 4589 ire_t *ipif_ire; 4590 4591 ASSERT(sire != NULL); 4592 if (IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4593 mutex_enter(&ire->ire_lock); 4594 v6gw = ire->ire_gateway_addr_v6; 4595 mutex_exit(&ire->ire_lock); 4596 } 4597 /* 4598 * We need 3 ire's to create a new cache ire for an 4599 * off-link destination from the cache ire of the 4600 * gateway. 4601 * 4602 * 1. The prefix ire 'sire' 4603 * 2. The cache ire of the gateway 'ire' 4604 * 3. The interface ire 'ipif_ire' 4605 * 4606 * We have (1) and (2). We lookup (3) below. 4607 * 4608 * If there is no interface route to the gateway, 4609 * it is a race condition, where we found the cache 4610 * but the inteface route has been deleted. 4611 */ 4612 ipif_ire = ire_ihandle_lookup_offlink_v6(ire, sire); 4613 if (ipif_ire == NULL) { 4614 ip1dbg(("ip_newroute_v6:" 4615 "ire_ihandle_lookup_offlink_v6 failed\n")); 4616 goto icmp_err_ret; 4617 } 4618 4619 /* 4620 * Note: the new ire inherits RTF_SETSRC 4621 * and RTF_MULTIRT to propagate these flags from prefix 4622 * to cache. 4623 */ 4624 4625 /* 4626 * Check cached gateway IRE for any security 4627 * attributes; if found, associate the gateway 4628 * credentials group to the destination IRE. 4629 */ 4630 if ((attrp = save_ire->ire_gw_secattr) != NULL) { 4631 mutex_enter(&attrp->igsa_lock); 4632 if ((gcgrp = attrp->igsa_gcgrp) != NULL) 4633 GCGRP_REFHOLD(gcgrp); 4634 mutex_exit(&attrp->igsa_lock); 4635 } 4636 4637 ire = ire_create_v6( 4638 v6dstp, /* dest address */ 4639 &ipv6_all_ones, /* mask */ 4640 &src_ipif->ipif_v6src_addr, /* source address */ 4641 &v6gw, /* gateway address */ 4642 &save_ire->ire_max_frag, 4643 NULL, /* src nce */ 4644 dst_ill->ill_rq, /* recv-from queue */ 4645 dst_ill->ill_wq, /* send-to queue */ 4646 IRE_CACHE, 4647 src_ipif, 4648 &sire->ire_mask_v6, /* Parent mask */ 4649 sire->ire_phandle, /* Parent handle */ 4650 ipif_ire->ire_ihandle, /* Interface handle */ 4651 sire->ire_flags & /* flags if any */ 4652 (RTF_SETSRC | RTF_MULTIRT), 4653 &(sire->ire_uinfo), 4654 NULL, 4655 gcgrp, 4656 ipst); 4657 4658 if (ire == NULL) { 4659 if (gcgrp != NULL) { 4660 GCGRP_REFRELE(gcgrp); 4661 gcgrp = NULL; 4662 } 4663 ire_refrele(save_ire); 4664 ire_refrele(ipif_ire); 4665 break; 4666 } 4667 4668 /* reference now held by IRE */ 4669 gcgrp = NULL; 4670 4671 ire->ire_marks |= ire_marks; 4672 4673 /* 4674 * Prevent sire and ipif_ire from getting deleted. The 4675 * newly created ire is tied to both of them via the 4676 * phandle and ihandle respectively. 4677 */ 4678 IRB_REFHOLD(sire->ire_bucket); 4679 /* Has it been removed already ? */ 4680 if (sire->ire_marks & IRE_MARK_CONDEMNED) { 4681 IRB_REFRELE(sire->ire_bucket); 4682 ire_refrele(ipif_ire); 4683 ire_refrele(save_ire); 4684 break; 4685 } 4686 4687 IRB_REFHOLD(ipif_ire->ire_bucket); 4688 /* Has it been removed already ? */ 4689 if (ipif_ire->ire_marks & IRE_MARK_CONDEMNED) { 4690 IRB_REFRELE(ipif_ire->ire_bucket); 4691 IRB_REFRELE(sire->ire_bucket); 4692 ire_refrele(ipif_ire); 4693 ire_refrele(save_ire); 4694 break; 4695 } 4696 4697 xmit_mp = first_mp; 4698 if (ire->ire_flags & RTF_MULTIRT) { 4699 copy_mp = copymsg(first_mp); 4700 if (copy_mp != NULL) { 4701 xmit_mp = copy_mp; 4702 MULTIRT_DEBUG_TAG(first_mp); 4703 } 4704 } 4705 ire_add_then_send(q, ire, xmit_mp); 4706 if (ip6_asp_table_held) { 4707 ip6_asp_table_refrele(ipst); 4708 ip6_asp_table_held = B_FALSE; 4709 } 4710 ire_refrele(save_ire); 4711 4712 /* Assert that sire is not deleted yet. */ 4713 ASSERT(sire->ire_ptpn != NULL); 4714 IRB_REFRELE(sire->ire_bucket); 4715 4716 /* Assert that ipif_ire is not deleted yet. */ 4717 ASSERT(ipif_ire->ire_ptpn != NULL); 4718 IRB_REFRELE(ipif_ire->ire_bucket); 4719 ire_refrele(ipif_ire); 4720 4721 if (copy_mp != NULL) { 4722 /* 4723 * Search for the next unresolved 4724 * multirt route. 4725 */ 4726 copy_mp = NULL; 4727 ipif_ire = NULL; 4728 ire = NULL; 4729 /* re-enter the loop */ 4730 multirt_resolve_next = B_TRUE; 4731 continue; 4732 } 4733 ire_refrele(sire); 4734 ill_refrele(dst_ill); 4735 ipif_refrele(src_ipif); 4736 return; 4737 } 4738 case IRE_IF_NORESOLVER: 4739 /* 4740 * We have what we need to build an IRE_CACHE. 4741 * 4742 * handle the Gated case, where we create 4743 * a NORESOLVER route for loopback. 4744 */ 4745 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 4746 break; 4747 /* 4748 * TSol note: We are creating the ire cache for the 4749 * destination 'dst'. If 'dst' is offlink, going 4750 * through the first hop 'gw', the security attributes 4751 * of 'dst' must be set to point to the gateway 4752 * credentials of gateway 'gw'. If 'dst' is onlink, it 4753 * is possible that 'dst' is a potential gateway that is 4754 * referenced by some route that has some security 4755 * attributes. Thus in the former case, we need to do a 4756 * gcgrp_lookup of 'gw' while in the latter case we 4757 * need to do gcgrp_lookup of 'dst' itself. 4758 */ 4759 ga.ga_af = AF_INET6; 4760 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4761 ga.ga_addr = v6gw; 4762 else 4763 ga.ga_addr = *v6dstp; 4764 gcgrp = gcgrp_lookup(&ga, B_FALSE); 4765 4766 /* 4767 * Note: the new ire inherits sire flags RTF_SETSRC 4768 * and RTF_MULTIRT to propagate those rules from prefix 4769 * to cache. 4770 */ 4771 ire = ire_create_v6( 4772 v6dstp, /* dest address */ 4773 &ipv6_all_ones, /* mask */ 4774 &src_ipif->ipif_v6src_addr, /* source address */ 4775 &v6gw, /* gateway address */ 4776 &save_ire->ire_max_frag, 4777 NULL, /* no src nce */ 4778 dst_ill->ill_rq, /* recv-from queue */ 4779 dst_ill->ill_wq, /* send-to queue */ 4780 IRE_CACHE, 4781 src_ipif, 4782 &save_ire->ire_mask_v6, /* Parent mask */ 4783 (sire != NULL) ? /* Parent handle */ 4784 sire->ire_phandle : 0, 4785 save_ire->ire_ihandle, /* Interface handle */ 4786 (sire != NULL) ? /* flags if any */ 4787 sire->ire_flags & 4788 (RTF_SETSRC | RTF_MULTIRT) : 0, 4789 &(save_ire->ire_uinfo), 4790 NULL, 4791 gcgrp, 4792 ipst); 4793 4794 if (ire == NULL) { 4795 if (gcgrp != NULL) { 4796 GCGRP_REFRELE(gcgrp); 4797 gcgrp = NULL; 4798 } 4799 ire_refrele(save_ire); 4800 break; 4801 } 4802 4803 /* reference now held by IRE */ 4804 gcgrp = NULL; 4805 4806 ire->ire_marks |= ire_marks; 4807 4808 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) 4809 dst = v6gw; 4810 else 4811 dst = *v6dstp; 4812 err = ndp_noresolver(dst_ill, &dst); 4813 if (err != 0) { 4814 ire_refrele(save_ire); 4815 break; 4816 } 4817 4818 /* Prevent save_ire from getting deleted */ 4819 IRB_REFHOLD(save_ire->ire_bucket); 4820 /* Has it been removed already ? */ 4821 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 4822 IRB_REFRELE(save_ire->ire_bucket); 4823 ire_refrele(save_ire); 4824 break; 4825 } 4826 4827 xmit_mp = first_mp; 4828 /* 4829 * In case of MULTIRT, a copy of the current packet 4830 * to send is made to further re-enter the 4831 * loop and attempt another route resolution 4832 */ 4833 if ((sire != NULL) && sire->ire_flags & RTF_MULTIRT) { 4834 copy_mp = copymsg(first_mp); 4835 if (copy_mp != NULL) { 4836 xmit_mp = copy_mp; 4837 MULTIRT_DEBUG_TAG(first_mp); 4838 } 4839 } 4840 ire_add_then_send(q, ire, xmit_mp); 4841 if (ip6_asp_table_held) { 4842 ip6_asp_table_refrele(ipst); 4843 ip6_asp_table_held = B_FALSE; 4844 } 4845 4846 /* Assert that it is not deleted yet. */ 4847 ASSERT(save_ire->ire_ptpn != NULL); 4848 IRB_REFRELE(save_ire->ire_bucket); 4849 ire_refrele(save_ire); 4850 4851 if (copy_mp != NULL) { 4852 /* 4853 * If we found a (no)resolver, we ignore any 4854 * trailing top priority IRE_CACHE in 4855 * further loops. This ensures that we do not 4856 * omit any (no)resolver despite the priority 4857 * in this call. 4858 * IRE_CACHE, if any, will be processed 4859 * by another thread entering ip_newroute(), 4860 * (on resolver response, for example). 4861 * We use this to force multiple parallel 4862 * resolution as soon as a packet needs to be 4863 * sent. The result is, after one packet 4864 * emission all reachable routes are generally 4865 * resolved. 4866 * Otherwise, complete resolution of MULTIRT 4867 * routes would require several emissions as 4868 * side effect. 4869 */ 4870 multirt_flags &= ~MULTIRT_CACHEGW; 4871 4872 /* 4873 * Search for the next unresolved multirt 4874 * route. 4875 */ 4876 copy_mp = NULL; 4877 save_ire = NULL; 4878 ire = NULL; 4879 /* re-enter the loop */ 4880 multirt_resolve_next = B_TRUE; 4881 continue; 4882 } 4883 4884 /* Don't need sire anymore */ 4885 if (sire != NULL) 4886 ire_refrele(sire); 4887 ill_refrele(dst_ill); 4888 ipif_refrele(src_ipif); 4889 return; 4890 4891 case IRE_IF_RESOLVER: 4892 /* 4893 * We can't build an IRE_CACHE yet, but at least we 4894 * found a resolver that can help. 4895 */ 4896 dst = *v6dstp; 4897 4898 /* 4899 * To be at this point in the code with a non-zero gw 4900 * means that dst is reachable through a gateway that 4901 * we have never resolved. By changing dst to the gw 4902 * addr we resolve the gateway first. When 4903 * ire_add_then_send() tries to put the IP dg to dst, 4904 * it will reenter ip_newroute() at which time we will 4905 * find the IRE_CACHE for the gw and create another 4906 * IRE_CACHE above (for dst itself). 4907 */ 4908 if (!IN6_IS_ADDR_UNSPECIFIED(&v6gw)) { 4909 save_dst = dst; 4910 dst = v6gw; 4911 v6gw = ipv6_all_zeros; 4912 } 4913 if (dst_ill->ill_flags & ILLF_XRESOLV) { 4914 /* 4915 * Ask the external resolver to do its thing. 4916 * Make an mblk chain in the following form: 4917 * ARQ_REQ_MBLK-->IRE_MBLK-->packet 4918 */ 4919 mblk_t *ire_mp; 4920 mblk_t *areq_mp; 4921 areq_t *areq; 4922 in6_addr_t *addrp; 4923 4924 ip1dbg(("ip_newroute_v6:ILLF_XRESOLV\n")); 4925 if (ip6_asp_table_held) { 4926 ip6_asp_table_refrele(ipst); 4927 ip6_asp_table_held = B_FALSE; 4928 } 4929 ire = ire_create_mp_v6( 4930 &dst, /* dest address */ 4931 &ipv6_all_ones, /* mask */ 4932 &src_ipif->ipif_v6src_addr, 4933 /* source address */ 4934 &v6gw, /* gateway address */ 4935 NULL, /* no src nce */ 4936 dst_ill->ill_rq, /* recv-from queue */ 4937 dst_ill->ill_wq, /* send-to queue */ 4938 IRE_CACHE, 4939 src_ipif, 4940 &save_ire->ire_mask_v6, /* Parent mask */ 4941 0, 4942 save_ire->ire_ihandle, 4943 /* Interface handle */ 4944 0, /* flags if any */ 4945 &(save_ire->ire_uinfo), 4946 NULL, 4947 NULL, 4948 ipst); 4949 4950 ire_refrele(save_ire); 4951 if (ire == NULL) { 4952 ip1dbg(("ip_newroute_v6:" 4953 "ire is NULL\n")); 4954 break; 4955 } 4956 4957 if ((sire != NULL) && 4958 (sire->ire_flags & RTF_MULTIRT)) { 4959 /* 4960 * processing a copy of the packet to 4961 * send for further resolution loops 4962 */ 4963 copy_mp = copymsg(first_mp); 4964 if (copy_mp != NULL) 4965 MULTIRT_DEBUG_TAG(copy_mp); 4966 } 4967 ire->ire_marks |= ire_marks; 4968 ire_mp = ire->ire_mp; 4969 /* 4970 * Now create or find an nce for this interface. 4971 * The hw addr will need to to be set from 4972 * the reply to the AR_ENTRY_QUERY that 4973 * we're about to send. This will be done in 4974 * ire_add_v6(). 4975 */ 4976 err = ndp_resolver(dst_ill, &dst, mp, zoneid); 4977 switch (err) { 4978 case 0: 4979 /* 4980 * New cache entry created. 4981 * Break, then ask the external 4982 * resolver. 4983 */ 4984 break; 4985 case EINPROGRESS: 4986 /* 4987 * Resolution in progress; 4988 * packet has been queued by 4989 * ndp_resolver(). 4990 */ 4991 ire_delete(ire); 4992 ire = NULL; 4993 /* 4994 * Check if another multirt 4995 * route must be resolved. 4996 */ 4997 if (copy_mp != NULL) { 4998 /* 4999 * If we found a resolver, we 5000 * ignore any trailing top 5001 * priority IRE_CACHE in 5002 * further loops. The reason is 5003 * the same as for noresolver. 5004 */ 5005 multirt_flags &= 5006 ~MULTIRT_CACHEGW; 5007 /* 5008 * Search for the next 5009 * unresolved multirt route. 5010 */ 5011 first_mp = copy_mp; 5012 copy_mp = NULL; 5013 mp = first_mp; 5014 if (mp->b_datap->db_type == 5015 M_CTL) { 5016 mp = mp->b_cont; 5017 } 5018 ASSERT(sire != NULL); 5019 dst = save_dst; 5020 /* 5021 * re-enter the loop 5022 */ 5023 multirt_resolve_next = 5024 B_TRUE; 5025 continue; 5026 } 5027 5028 if (sire != NULL) 5029 ire_refrele(sire); 5030 ill_refrele(dst_ill); 5031 ipif_refrele(src_ipif); 5032 return; 5033 default: 5034 /* 5035 * Transient error; packet will be 5036 * freed. 5037 */ 5038 ire_delete(ire); 5039 ire = NULL; 5040 break; 5041 } 5042 if (err != 0) 5043 break; 5044 /* 5045 * Now set up the AR_ENTRY_QUERY and send it. 5046 */ 5047 areq_mp = ill_arp_alloc(dst_ill, 5048 (uchar_t *)&ipv6_areq_template, 5049 (caddr_t)&dst); 5050 if (areq_mp == NULL) { 5051 ip1dbg(("ip_newroute_v6:" 5052 "areq_mp is NULL\n")); 5053 freemsg(ire_mp); 5054 break; 5055 } 5056 areq = (areq_t *)areq_mp->b_rptr; 5057 addrp = (in6_addr_t *)((char *)areq + 5058 areq->areq_target_addr_offset); 5059 *addrp = dst; 5060 addrp = (in6_addr_t *)((char *)areq + 5061 areq->areq_sender_addr_offset); 5062 *addrp = src_ipif->ipif_v6src_addr; 5063 /* 5064 * link the chain, then send up to the resolver. 5065 */ 5066 linkb(areq_mp, ire_mp); 5067 linkb(areq_mp, mp); 5068 ip1dbg(("ip_newroute_v6:" 5069 "putnext to resolver\n")); 5070 putnext(dst_ill->ill_rq, areq_mp); 5071 /* 5072 * Check if another multirt route 5073 * must be resolved. 5074 */ 5075 ire = NULL; 5076 if (copy_mp != NULL) { 5077 /* 5078 * If we find a resolver, we ignore any 5079 * trailing top priority IRE_CACHE in 5080 * further loops. The reason is the 5081 * same as for noresolver. 5082 */ 5083 multirt_flags &= ~MULTIRT_CACHEGW; 5084 /* 5085 * Search for the next unresolved 5086 * multirt route. 5087 */ 5088 first_mp = copy_mp; 5089 copy_mp = NULL; 5090 mp = first_mp; 5091 if (mp->b_datap->db_type == M_CTL) { 5092 mp = mp->b_cont; 5093 } 5094 ASSERT(sire != NULL); 5095 dst = save_dst; 5096 /* 5097 * re-enter the loop 5098 */ 5099 multirt_resolve_next = B_TRUE; 5100 continue; 5101 } 5102 5103 if (sire != NULL) 5104 ire_refrele(sire); 5105 ill_refrele(dst_ill); 5106 ipif_refrele(src_ipif); 5107 return; 5108 } 5109 /* 5110 * Non-external resolver case. 5111 * 5112 * TSol note: Please see the note above the 5113 * IRE_IF_NORESOLVER case. 5114 */ 5115 ga.ga_af = AF_INET6; 5116 ga.ga_addr = dst; 5117 gcgrp = gcgrp_lookup(&ga, B_FALSE); 5118 5119 ire = ire_create_v6( 5120 &dst, /* dest address */ 5121 &ipv6_all_ones, /* mask */ 5122 &src_ipif->ipif_v6src_addr, /* source address */ 5123 &v6gw, /* gateway address */ 5124 &save_ire->ire_max_frag, 5125 NULL, /* no src nce */ 5126 dst_ill->ill_rq, /* recv-from queue */ 5127 dst_ill->ill_wq, /* send-to queue */ 5128 IRE_CACHE, 5129 src_ipif, 5130 &save_ire->ire_mask_v6, /* Parent mask */ 5131 0, 5132 save_ire->ire_ihandle, /* Interface handle */ 5133 0, /* flags if any */ 5134 &(save_ire->ire_uinfo), 5135 NULL, 5136 gcgrp, 5137 ipst); 5138 5139 if (ire == NULL) { 5140 if (gcgrp != NULL) { 5141 GCGRP_REFRELE(gcgrp); 5142 gcgrp = NULL; 5143 } 5144 ire_refrele(save_ire); 5145 break; 5146 } 5147 5148 /* reference now held by IRE */ 5149 gcgrp = NULL; 5150 5151 if ((sire != NULL) && 5152 (sire->ire_flags & RTF_MULTIRT)) { 5153 copy_mp = copymsg(first_mp); 5154 if (copy_mp != NULL) 5155 MULTIRT_DEBUG_TAG(copy_mp); 5156 } 5157 5158 ire->ire_marks |= ire_marks; 5159 err = ndp_resolver(dst_ill, &dst, first_mp, zoneid); 5160 switch (err) { 5161 case 0: 5162 /* Prevent save_ire from getting deleted */ 5163 IRB_REFHOLD(save_ire->ire_bucket); 5164 /* Has it been removed already ? */ 5165 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5166 IRB_REFRELE(save_ire->ire_bucket); 5167 ire_refrele(save_ire); 5168 break; 5169 } 5170 5171 /* 5172 * We have a resolved cache entry, 5173 * add in the IRE. 5174 */ 5175 ire_add_then_send(q, ire, first_mp); 5176 if (ip6_asp_table_held) { 5177 ip6_asp_table_refrele(ipst); 5178 ip6_asp_table_held = B_FALSE; 5179 } 5180 5181 /* Assert that it is not deleted yet. */ 5182 ASSERT(save_ire->ire_ptpn != NULL); 5183 IRB_REFRELE(save_ire->ire_bucket); 5184 ire_refrele(save_ire); 5185 /* 5186 * Check if another multirt route 5187 * must be resolved. 5188 */ 5189 ire = NULL; 5190 if (copy_mp != NULL) { 5191 /* 5192 * If we find a resolver, we ignore any 5193 * trailing top priority IRE_CACHE in 5194 * further loops. The reason is the 5195 * same as for noresolver. 5196 */ 5197 multirt_flags &= ~MULTIRT_CACHEGW; 5198 /* 5199 * Search for the next unresolved 5200 * multirt route. 5201 */ 5202 first_mp = copy_mp; 5203 copy_mp = NULL; 5204 mp = first_mp; 5205 if (mp->b_datap->db_type == M_CTL) { 5206 mp = mp->b_cont; 5207 } 5208 ASSERT(sire != NULL); 5209 dst = save_dst; 5210 /* 5211 * re-enter the loop 5212 */ 5213 multirt_resolve_next = B_TRUE; 5214 continue; 5215 } 5216 5217 if (sire != NULL) 5218 ire_refrele(sire); 5219 ill_refrele(dst_ill); 5220 ipif_refrele(src_ipif); 5221 return; 5222 5223 case EINPROGRESS: 5224 /* 5225 * mp was consumed - presumably queued. 5226 * No need for ire, presumably resolution is 5227 * in progress, and ire will be added when the 5228 * address is resolved. 5229 */ 5230 if (ip6_asp_table_held) { 5231 ip6_asp_table_refrele(ipst); 5232 ip6_asp_table_held = B_FALSE; 5233 } 5234 ASSERT(ire->ire_nce == NULL); 5235 ire_delete(ire); 5236 ire_refrele(save_ire); 5237 /* 5238 * Check if another multirt route 5239 * must be resolved. 5240 */ 5241 ire = NULL; 5242 if (copy_mp != NULL) { 5243 /* 5244 * If we find a resolver, we ignore any 5245 * trailing top priority IRE_CACHE in 5246 * further loops. The reason is the 5247 * same as for noresolver. 5248 */ 5249 multirt_flags &= ~MULTIRT_CACHEGW; 5250 /* 5251 * Search for the next unresolved 5252 * multirt route. 5253 */ 5254 first_mp = copy_mp; 5255 copy_mp = NULL; 5256 mp = first_mp; 5257 if (mp->b_datap->db_type == M_CTL) { 5258 mp = mp->b_cont; 5259 } 5260 ASSERT(sire != NULL); 5261 dst = save_dst; 5262 /* 5263 * re-enter the loop 5264 */ 5265 multirt_resolve_next = B_TRUE; 5266 continue; 5267 } 5268 if (sire != NULL) 5269 ire_refrele(sire); 5270 ill_refrele(dst_ill); 5271 ipif_refrele(src_ipif); 5272 return; 5273 default: 5274 /* Some transient error */ 5275 ASSERT(ire->ire_nce == NULL); 5276 ire_refrele(save_ire); 5277 break; 5278 } 5279 break; 5280 default: 5281 break; 5282 } 5283 if (ip6_asp_table_held) { 5284 ip6_asp_table_refrele(ipst); 5285 ip6_asp_table_held = B_FALSE; 5286 } 5287 } while (multirt_resolve_next); 5288 5289 err_ret: 5290 ip1dbg(("ip_newroute_v6: dropped\n")); 5291 if (src_ipif != NULL) 5292 ipif_refrele(src_ipif); 5293 if (dst_ill != NULL) { 5294 need_rele = B_TRUE; 5295 ill = dst_ill; 5296 } 5297 if (ill != NULL) { 5298 if (mp->b_prev != NULL) { 5299 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 5300 } else { 5301 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 5302 } 5303 5304 if (need_rele) 5305 ill_refrele(ill); 5306 } else { 5307 if (mp->b_prev != NULL) { 5308 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 5309 } else { 5310 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 5311 } 5312 } 5313 /* Did this packet originate externally? */ 5314 if (mp->b_prev) { 5315 mp->b_next = NULL; 5316 mp->b_prev = NULL; 5317 } 5318 if (copy_mp != NULL) { 5319 MULTIRT_DEBUG_UNTAG(copy_mp); 5320 freemsg(copy_mp); 5321 } 5322 MULTIRT_DEBUG_UNTAG(first_mp); 5323 freemsg(first_mp); 5324 if (ire != NULL) 5325 ire_refrele(ire); 5326 if (sire != NULL) 5327 ire_refrele(sire); 5328 return; 5329 5330 icmp_err_ret: 5331 if (ip6_asp_table_held) 5332 ip6_asp_table_refrele(ipst); 5333 if (src_ipif != NULL) 5334 ipif_refrele(src_ipif); 5335 if (dst_ill != NULL) { 5336 need_rele = B_TRUE; 5337 ill = dst_ill; 5338 } 5339 ip1dbg(("ip_newroute_v6: no route\n")); 5340 if (sire != NULL) 5341 ire_refrele(sire); 5342 /* 5343 * We need to set sire to NULL to avoid double freeing if we 5344 * ever goto err_ret from below. 5345 */ 5346 sire = NULL; 5347 ip6h = (ip6_t *)mp->b_rptr; 5348 /* Skip ip6i_t header if present */ 5349 if (ip6h->ip6_nxt == IPPROTO_RAW) { 5350 /* Make sure the IPv6 header is present */ 5351 if ((mp->b_wptr - (uchar_t *)ip6h) < 5352 sizeof (ip6i_t) + IPV6_HDR_LEN) { 5353 if (!pullupmsg(mp, sizeof (ip6i_t) + IPV6_HDR_LEN)) { 5354 ip1dbg(("ip_newroute_v6: pullupmsg failed\n")); 5355 goto err_ret; 5356 } 5357 } 5358 mp->b_rptr += sizeof (ip6i_t); 5359 ip6h = (ip6_t *)mp->b_rptr; 5360 } 5361 /* Did this packet originate externally? */ 5362 if (mp->b_prev) { 5363 if (ill != NULL) { 5364 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 5365 } else { 5366 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInNoRoutes); 5367 } 5368 mp->b_next = NULL; 5369 mp->b_prev = NULL; 5370 q = WR(q); 5371 } else { 5372 if (ill != NULL) { 5373 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 5374 } else { 5375 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutNoRoutes); 5376 } 5377 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 5378 /* Failed */ 5379 if (copy_mp != NULL) { 5380 MULTIRT_DEBUG_UNTAG(copy_mp); 5381 freemsg(copy_mp); 5382 } 5383 MULTIRT_DEBUG_UNTAG(first_mp); 5384 freemsg(first_mp); 5385 if (ire != NULL) 5386 ire_refrele(ire); 5387 if (need_rele) 5388 ill_refrele(ill); 5389 return; 5390 } 5391 } 5392 5393 if (need_rele) 5394 ill_refrele(ill); 5395 5396 /* 5397 * At this point we will have ire only if RTF_BLACKHOLE 5398 * or RTF_REJECT flags are set on the IRE. It will not 5399 * generate ICMP6_DST_UNREACH_NOROUTE if RTF_BLACKHOLE is set. 5400 */ 5401 if (ire != NULL) { 5402 if (ire->ire_flags & RTF_BLACKHOLE) { 5403 ire_refrele(ire); 5404 if (copy_mp != NULL) { 5405 MULTIRT_DEBUG_UNTAG(copy_mp); 5406 freemsg(copy_mp); 5407 } 5408 MULTIRT_DEBUG_UNTAG(first_mp); 5409 freemsg(first_mp); 5410 return; 5411 } 5412 ire_refrele(ire); 5413 } 5414 if (ip_debug > 3) { 5415 /* ip2dbg */ 5416 pr_addr_dbg("ip_newroute_v6: no route to %s\n", 5417 AF_INET6, v6dstp); 5418 } 5419 icmp_unreachable_v6(WR(q), first_mp, ICMP6_DST_UNREACH_NOROUTE, 5420 B_FALSE, B_FALSE, zoneid, ipst); 5421 } 5422 5423 /* 5424 * ip_newroute_ipif_v6 is called by ip_wput_v6 and ip_wput_ipsec_out_v6 whenever 5425 * we need to send out a packet to a destination address for which we do not 5426 * have specific routing information. It is only used for multicast packets. 5427 * 5428 * If unspec_src we allow creating an IRE with source address zero. 5429 * ire_send_v6() will delete it after the packet is sent. 5430 */ 5431 void 5432 ip_newroute_ipif_v6(queue_t *q, mblk_t *mp, ipif_t *ipif, 5433 const in6_addr_t *v6dstp, const in6_addr_t *v6srcp, int unspec_src, 5434 zoneid_t zoneid) 5435 { 5436 ire_t *ire = NULL; 5437 ipif_t *src_ipif = NULL; 5438 int err = 0; 5439 ill_t *dst_ill = NULL; 5440 ire_t *save_ire; 5441 ipsec_out_t *io; 5442 ill_t *ill; 5443 mblk_t *first_mp; 5444 ire_t *fire = NULL; 5445 mblk_t *copy_mp = NULL; 5446 const in6_addr_t *ire_v6srcp; 5447 boolean_t probe = B_FALSE; 5448 boolean_t multirt_resolve_next; 5449 boolean_t ipif_held = B_FALSE; 5450 boolean_t ill_held = B_FALSE; 5451 boolean_t ip6_asp_table_held = B_FALSE; 5452 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 5453 5454 /* 5455 * This loop is run only once in most cases. 5456 * We loop to resolve further routes only when the destination 5457 * can be reached through multiple RTF_MULTIRT-flagged ires. 5458 */ 5459 do { 5460 multirt_resolve_next = B_FALSE; 5461 if (dst_ill != NULL) { 5462 ill_refrele(dst_ill); 5463 dst_ill = NULL; 5464 } 5465 5466 if (src_ipif != NULL) { 5467 ipif_refrele(src_ipif); 5468 src_ipif = NULL; 5469 } 5470 ASSERT(ipif != NULL); 5471 ill = ipif->ipif_ill; 5472 5473 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6dstp)); 5474 if (ip_debug > 2) { 5475 /* ip1dbg */ 5476 pr_addr_dbg("ip_newroute_ipif_v6: v6dst %s\n", 5477 AF_INET6, v6dstp); 5478 printf("ip_newroute_ipif_v6: if %s, v6 %d\n", 5479 ill->ill_name, ipif->ipif_isv6); 5480 } 5481 5482 first_mp = mp; 5483 if (mp->b_datap->db_type == M_CTL) { 5484 mp = mp->b_cont; 5485 io = (ipsec_out_t *)first_mp->b_rptr; 5486 ASSERT(io->ipsec_out_type == IPSEC_OUT); 5487 } else { 5488 io = NULL; 5489 } 5490 5491 /* 5492 * If the interface is a pt-pt interface we look for an 5493 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER that matches both the 5494 * local_address and the pt-pt destination address. 5495 * Otherwise we just match the local address. 5496 */ 5497 if (!(ill->ill_flags & ILLF_MULTICAST)) { 5498 goto err_ret; 5499 } 5500 5501 /* 5502 * We check if an IRE_OFFSUBNET for the addr that goes through 5503 * ipif exists. We need it to determine if the RTF_SETSRC and/or 5504 * RTF_MULTIRT flags must be honored. 5505 */ 5506 fire = ipif_lookup_multi_ire_v6(ipif, v6dstp); 5507 ip2dbg(("ip_newroute_ipif_v6: " 5508 "ipif_lookup_multi_ire_v6(" 5509 "ipif %p, dst %08x) = fire %p\n", 5510 (void *)ipif, ntohl(V4_PART_OF_V6((*v6dstp))), 5511 (void *)fire)); 5512 5513 ASSERT(src_ipif == NULL); 5514 5515 /* 5516 * Because nce_xmit() calls ip_output_v6() and NCEs are always 5517 * tied to the underlying interface, IS_UNDER_IPMP() may be 5518 * true even when building IREs that will be used for data 5519 * traffic. As such, see if the packet's source address is a 5520 * test address, and if so use that test address's ipif for 5521 * the IRE so that the logic that sets IRE_MARK_TESTHIDDEN in 5522 * ire_add_v6() can work properly. 5523 */ 5524 if (IS_UNDER_IPMP(ill)) 5525 probe = ipif_lookup_testaddr_v6(ill, v6srcp, &src_ipif); 5526 5527 /* 5528 * Determine the outbound (destination) ill for this route. 5529 * If IPMP is not in use, that's the same as our ill. If IPMP 5530 * is in-use and we're on the IPMP interface, or we're on an 5531 * underlying ill but sending data traffic, use a suitable 5532 * destination ill from the group. The latter case covers a 5533 * subtle edge condition with multicast: when we bring up an 5534 * IPv6 data address, we will create an NCE on an underlying 5535 * interface, and send solitications to ff02::1, which would 5536 * take us through here, and cause us to create an IRE for 5537 * ff02::1. To meet our defined semantics for multicast (and 5538 * ensure there aren't unexpected echoes), that IRE needs to 5539 * use the IPMP group's nominated multicast interface. 5540 * 5541 * Note: the source ipif is determined by source address 5542 * selection later. 5543 */ 5544 if (IS_IPMP(ill) || (IS_UNDER_IPMP(ill) && !probe)) { 5545 ill_t *ipmp_ill; 5546 ipmp_illgrp_t *illg; 5547 5548 if (IS_UNDER_IPMP(ill)) { 5549 ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 5550 } else { 5551 ipmp_ill = ill; 5552 ill_refhold(ipmp_ill); /* for symmetry */ 5553 } 5554 5555 if (ipmp_ill == NULL) 5556 goto err_ret; 5557 5558 illg = ipmp_ill->ill_grp; 5559 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 5560 dst_ill = ipmp_illgrp_hold_cast_ill(illg); 5561 else 5562 dst_ill = ipmp_illgrp_hold_next_ill(illg); 5563 5564 ill_refrele(ipmp_ill); 5565 } else { 5566 dst_ill = ill; 5567 ill_refhold(dst_ill); /* for symmetry */ 5568 } 5569 5570 if (dst_ill == NULL) { 5571 if (ip_debug > 2) { 5572 pr_addr_dbg("ip_newroute_ipif_v6: " 5573 "no dst ill for dst %s\n", 5574 AF_INET6, v6dstp); 5575 } 5576 goto err_ret; 5577 } 5578 5579 /* 5580 * Pick a source address which matches the scope of the 5581 * destination address. 5582 * For RTF_SETSRC routes, the source address is imposed by the 5583 * parent ire (fire). 5584 */ 5585 5586 if (src_ipif == NULL && fire != NULL && 5587 (fire->ire_flags & RTF_SETSRC)) { 5588 /* 5589 * Check that the ipif matching the requested source 5590 * address still exists. 5591 */ 5592 src_ipif = ipif_lookup_addr_v6(&fire->ire_src_addr_v6, 5593 NULL, zoneid, NULL, NULL, NULL, NULL, ipst); 5594 } 5595 5596 if (src_ipif == NULL && ip6_asp_can_lookup(ipst)) { 5597 ip6_asp_table_held = B_TRUE; 5598 src_ipif = ipif_select_source_v6(dst_ill, v6dstp, 5599 B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid); 5600 } 5601 5602 if (src_ipif == NULL) { 5603 if (!unspec_src) { 5604 if (ip_debug > 2) { 5605 /* ip1dbg */ 5606 pr_addr_dbg("ip_newroute_ipif_v6: " 5607 "no src for dst %s\n", 5608 AF_INET6, v6dstp); 5609 printf(" through interface %s\n", 5610 dst_ill->ill_name); 5611 } 5612 goto err_ret; 5613 } 5614 ire_v6srcp = &ipv6_all_zeros; 5615 src_ipif = ipif; 5616 ipif_refhold(src_ipif); 5617 } else { 5618 ire_v6srcp = &src_ipif->ipif_v6src_addr; 5619 } 5620 5621 ire = ipif_to_ire_v6(ipif); 5622 if (ire == NULL) { 5623 if (ip_debug > 2) { 5624 /* ip1dbg */ 5625 pr_addr_dbg("ip_newroute_ipif_v6: v6src %s\n", 5626 AF_INET6, &ipif->ipif_v6lcl_addr); 5627 printf("ip_newroute_ipif_v6: " 5628 "if %s\n", dst_ill->ill_name); 5629 } 5630 goto err_ret; 5631 } 5632 if (ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) 5633 goto err_ret; 5634 5635 ASSERT(ire->ire_ipversion == IPV6_VERSION); 5636 5637 ip1dbg(("ip_newroute_ipif_v6: interface type %s (%d),", 5638 ip_nv_lookup(ire_nv_tbl, ire->ire_type), ire->ire_type)); 5639 if (ip_debug > 2) { 5640 /* ip1dbg */ 5641 pr_addr_dbg(" address %s\n", 5642 AF_INET6, &ire->ire_src_addr_v6); 5643 } 5644 save_ire = ire; 5645 ip2dbg(("ip_newroute_ipif: ire %p, ipif %p\n", 5646 (void *)ire, (void *)ipif)); 5647 5648 if ((fire != NULL) && (fire->ire_flags & RTF_MULTIRT)) { 5649 /* 5650 * an IRE_OFFSUBET was looked up 5651 * on that interface. 5652 * this ire has RTF_MULTIRT flag, 5653 * so the resolution loop 5654 * will be re-entered to resolve 5655 * additional routes on other 5656 * interfaces. For that purpose, 5657 * a copy of the packet is 5658 * made at this point. 5659 */ 5660 fire->ire_last_used_time = lbolt; 5661 copy_mp = copymsg(first_mp); 5662 if (copy_mp) { 5663 MULTIRT_DEBUG_TAG(copy_mp); 5664 } 5665 } 5666 5667 switch (ire->ire_type) { 5668 case IRE_IF_NORESOLVER: { 5669 /* 5670 * We have what we need to build an IRE_CACHE. 5671 * 5672 * handle the Gated case, where we create 5673 * a NORESOLVER route for loopback. 5674 */ 5675 if (dst_ill->ill_net_type != IRE_IF_NORESOLVER) 5676 break; 5677 /* 5678 * The newly created ire will inherit the flags of the 5679 * parent ire, if any. 5680 */ 5681 ire = ire_create_v6( 5682 v6dstp, /* dest address */ 5683 &ipv6_all_ones, /* mask */ 5684 ire_v6srcp, /* source address */ 5685 NULL, /* gateway address */ 5686 &save_ire->ire_max_frag, 5687 NULL, /* no src nce */ 5688 dst_ill->ill_rq, /* recv-from queue */ 5689 dst_ill->ill_wq, /* send-to queue */ 5690 IRE_CACHE, 5691 src_ipif, 5692 NULL, 5693 (fire != NULL) ? /* Parent handle */ 5694 fire->ire_phandle : 0, 5695 save_ire->ire_ihandle, /* Interface handle */ 5696 (fire != NULL) ? 5697 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5698 0, 5699 &ire_uinfo_null, 5700 NULL, 5701 NULL, 5702 ipst); 5703 5704 if (ire == NULL) { 5705 ire_refrele(save_ire); 5706 break; 5707 } 5708 5709 err = ndp_noresolver(dst_ill, v6dstp); 5710 if (err != 0) { 5711 ire_refrele(save_ire); 5712 break; 5713 } 5714 5715 /* Prevent save_ire from getting deleted */ 5716 IRB_REFHOLD(save_ire->ire_bucket); 5717 /* Has it been removed already ? */ 5718 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5719 IRB_REFRELE(save_ire->ire_bucket); 5720 ire_refrele(save_ire); 5721 break; 5722 } 5723 5724 ire_add_then_send(q, ire, first_mp); 5725 if (ip6_asp_table_held) { 5726 ip6_asp_table_refrele(ipst); 5727 ip6_asp_table_held = B_FALSE; 5728 } 5729 5730 /* Assert that it is not deleted yet. */ 5731 ASSERT(save_ire->ire_ptpn != NULL); 5732 IRB_REFRELE(save_ire->ire_bucket); 5733 ire_refrele(save_ire); 5734 if (fire != NULL) { 5735 ire_refrele(fire); 5736 fire = NULL; 5737 } 5738 5739 /* 5740 * The resolution loop is re-entered if we 5741 * actually are in a multirouting case. 5742 */ 5743 if (copy_mp != NULL) { 5744 boolean_t need_resolve = 5745 ire_multirt_need_resolve_v6(v6dstp, 5746 msg_getlabel(copy_mp), ipst); 5747 if (!need_resolve) { 5748 MULTIRT_DEBUG_UNTAG(copy_mp); 5749 freemsg(copy_mp); 5750 copy_mp = NULL; 5751 } else { 5752 /* 5753 * ipif_lookup_group_v6() calls 5754 * ire_lookup_multi_v6() that uses 5755 * ire_ftable_lookup_v6() to find 5756 * an IRE_INTERFACE for the group. 5757 * In the multirt case, 5758 * ire_lookup_multi_v6() then invokes 5759 * ire_multirt_lookup_v6() to find 5760 * the next resolvable ire. 5761 * As a result, we obtain a new 5762 * interface, derived from the 5763 * next ire. 5764 */ 5765 if (ipif_held) { 5766 ipif_refrele(ipif); 5767 ipif_held = B_FALSE; 5768 } 5769 ipif = ipif_lookup_group_v6(v6dstp, 5770 zoneid, ipst); 5771 ip2dbg(("ip_newroute_ipif: " 5772 "multirt dst %08x, ipif %p\n", 5773 ntohl(V4_PART_OF_V6((*v6dstp))), 5774 (void *)ipif)); 5775 if (ipif != NULL) { 5776 ipif_held = B_TRUE; 5777 mp = copy_mp; 5778 copy_mp = NULL; 5779 multirt_resolve_next = 5780 B_TRUE; 5781 continue; 5782 } else { 5783 freemsg(copy_mp); 5784 } 5785 } 5786 } 5787 ill_refrele(dst_ill); 5788 if (ipif_held) { 5789 ipif_refrele(ipif); 5790 ipif_held = B_FALSE; 5791 } 5792 if (src_ipif != NULL) 5793 ipif_refrele(src_ipif); 5794 return; 5795 } 5796 case IRE_IF_RESOLVER: { 5797 5798 ASSERT(dst_ill->ill_isv6); 5799 5800 /* 5801 * We obtain a partial IRE_CACHE which we will pass 5802 * along with the resolver query. When the response 5803 * comes back it will be there ready for us to add. 5804 */ 5805 /* 5806 * the newly created ire will inherit the flags of the 5807 * parent ire, if any. 5808 */ 5809 ire = ire_create_v6( 5810 v6dstp, /* dest address */ 5811 &ipv6_all_ones, /* mask */ 5812 ire_v6srcp, /* source address */ 5813 NULL, /* gateway address */ 5814 &save_ire->ire_max_frag, 5815 NULL, /* src nce */ 5816 dst_ill->ill_rq, /* recv-from queue */ 5817 dst_ill->ill_wq, /* send-to queue */ 5818 IRE_CACHE, 5819 src_ipif, 5820 NULL, 5821 (fire != NULL) ? /* Parent handle */ 5822 fire->ire_phandle : 0, 5823 save_ire->ire_ihandle, /* Interface handle */ 5824 (fire != NULL) ? 5825 (fire->ire_flags & (RTF_SETSRC | RTF_MULTIRT)) : 5826 0, 5827 &ire_uinfo_null, 5828 NULL, 5829 NULL, 5830 ipst); 5831 5832 if (ire == NULL) { 5833 ire_refrele(save_ire); 5834 break; 5835 } 5836 5837 /* Resolve and add ire to the ctable */ 5838 err = ndp_resolver(dst_ill, v6dstp, first_mp, zoneid); 5839 switch (err) { 5840 case 0: 5841 /* Prevent save_ire from getting deleted */ 5842 IRB_REFHOLD(save_ire->ire_bucket); 5843 /* Has it been removed already ? */ 5844 if (save_ire->ire_marks & IRE_MARK_CONDEMNED) { 5845 IRB_REFRELE(save_ire->ire_bucket); 5846 ire_refrele(save_ire); 5847 break; 5848 } 5849 /* 5850 * We have a resolved cache entry, 5851 * add in the IRE. 5852 */ 5853 ire_add_then_send(q, ire, first_mp); 5854 if (ip6_asp_table_held) { 5855 ip6_asp_table_refrele(ipst); 5856 ip6_asp_table_held = B_FALSE; 5857 } 5858 5859 /* Assert that it is not deleted yet. */ 5860 ASSERT(save_ire->ire_ptpn != NULL); 5861 IRB_REFRELE(save_ire->ire_bucket); 5862 ire_refrele(save_ire); 5863 if (fire != NULL) { 5864 ire_refrele(fire); 5865 fire = NULL; 5866 } 5867 5868 /* 5869 * The resolution loop is re-entered if we 5870 * actually are in a multirouting case. 5871 */ 5872 if (copy_mp != NULL) { 5873 boolean_t need_resolve = 5874 ire_multirt_need_resolve_v6(v6dstp, 5875 msg_getlabel(copy_mp), ipst); 5876 if (!need_resolve) { 5877 MULTIRT_DEBUG_UNTAG(copy_mp); 5878 freemsg(copy_mp); 5879 copy_mp = NULL; 5880 } else { 5881 /* 5882 * ipif_lookup_group_v6() calls 5883 * ire_lookup_multi_v6() that 5884 * uses ire_ftable_lookup_v6() 5885 * to find an IRE_INTERFACE for 5886 * the group. In the multirt 5887 * case, ire_lookup_multi_v6() 5888 * then invokes 5889 * ire_multirt_lookup_v6() to 5890 * find the next resolvable ire. 5891 * As a result, we obtain a new 5892 * interface, derived from the 5893 * next ire. 5894 */ 5895 if (ipif_held) { 5896 ipif_refrele(ipif); 5897 ipif_held = B_FALSE; 5898 } 5899 ipif = ipif_lookup_group_v6( 5900 v6dstp, zoneid, ipst); 5901 ip2dbg(("ip_newroute_ipif: " 5902 "multirt dst %08x, " 5903 "ipif %p\n", 5904 ntohl(V4_PART_OF_V6( 5905 (*v6dstp))), 5906 (void *)ipif)); 5907 if (ipif != NULL) { 5908 ipif_held = B_TRUE; 5909 mp = copy_mp; 5910 copy_mp = NULL; 5911 multirt_resolve_next = 5912 B_TRUE; 5913 continue; 5914 } else { 5915 freemsg(copy_mp); 5916 } 5917 } 5918 } 5919 ill_refrele(dst_ill); 5920 if (ipif_held) { 5921 ipif_refrele(ipif); 5922 ipif_held = B_FALSE; 5923 } 5924 if (src_ipif != NULL) 5925 ipif_refrele(src_ipif); 5926 return; 5927 5928 case EINPROGRESS: 5929 /* 5930 * mp was consumed - presumably queued. 5931 * No need for ire, presumably resolution is 5932 * in progress, and ire will be added when the 5933 * address is resolved. 5934 */ 5935 if (ip6_asp_table_held) { 5936 ip6_asp_table_refrele(ipst); 5937 ip6_asp_table_held = B_FALSE; 5938 } 5939 ire_delete(ire); 5940 ire_refrele(save_ire); 5941 if (fire != NULL) { 5942 ire_refrele(fire); 5943 fire = NULL; 5944 } 5945 5946 /* 5947 * The resolution loop is re-entered if we 5948 * actually are in a multirouting case. 5949 */ 5950 if (copy_mp != NULL) { 5951 boolean_t need_resolve = 5952 ire_multirt_need_resolve_v6(v6dstp, 5953 msg_getlabel(copy_mp), ipst); 5954 if (!need_resolve) { 5955 MULTIRT_DEBUG_UNTAG(copy_mp); 5956 freemsg(copy_mp); 5957 copy_mp = NULL; 5958 } else { 5959 /* 5960 * ipif_lookup_group_v6() calls 5961 * ire_lookup_multi_v6() that 5962 * uses ire_ftable_lookup_v6() 5963 * to find an IRE_INTERFACE for 5964 * the group. In the multirt 5965 * case, ire_lookup_multi_v6() 5966 * then invokes 5967 * ire_multirt_lookup_v6() to 5968 * find the next resolvable ire. 5969 * As a result, we obtain a new 5970 * interface, derived from the 5971 * next ire. 5972 */ 5973 if (ipif_held) { 5974 ipif_refrele(ipif); 5975 ipif_held = B_FALSE; 5976 } 5977 ipif = ipif_lookup_group_v6( 5978 v6dstp, zoneid, ipst); 5979 ip2dbg(("ip_newroute_ipif: " 5980 "multirt dst %08x, " 5981 "ipif %p\n", 5982 ntohl(V4_PART_OF_V6( 5983 (*v6dstp))), 5984 (void *)ipif)); 5985 if (ipif != NULL) { 5986 ipif_held = B_TRUE; 5987 mp = copy_mp; 5988 copy_mp = NULL; 5989 multirt_resolve_next = 5990 B_TRUE; 5991 continue; 5992 } else { 5993 freemsg(copy_mp); 5994 } 5995 } 5996 } 5997 ill_refrele(dst_ill); 5998 if (ipif_held) { 5999 ipif_refrele(ipif); 6000 ipif_held = B_FALSE; 6001 } 6002 if (src_ipif != NULL) 6003 ipif_refrele(src_ipif); 6004 return; 6005 default: 6006 /* Some transient error */ 6007 ire_refrele(save_ire); 6008 break; 6009 } 6010 break; 6011 } 6012 default: 6013 break; 6014 } 6015 if (ip6_asp_table_held) { 6016 ip6_asp_table_refrele(ipst); 6017 ip6_asp_table_held = B_FALSE; 6018 } 6019 } while (multirt_resolve_next); 6020 6021 err_ret: 6022 if (ip6_asp_table_held) 6023 ip6_asp_table_refrele(ipst); 6024 if (ire != NULL) 6025 ire_refrele(ire); 6026 if (fire != NULL) 6027 ire_refrele(fire); 6028 if (ipif != NULL && ipif_held) 6029 ipif_refrele(ipif); 6030 if (src_ipif != NULL) 6031 ipif_refrele(src_ipif); 6032 6033 /* Multicast - no point in trying to generate ICMP error */ 6034 if (dst_ill != NULL) { 6035 ill = dst_ill; 6036 ill_held = B_TRUE; 6037 } 6038 if (mp->b_prev || mp->b_next) { 6039 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6040 } else { 6041 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6042 } 6043 ip1dbg(("ip_newroute_ipif_v6: dropped\n")); 6044 mp->b_next = NULL; 6045 mp->b_prev = NULL; 6046 freemsg(first_mp); 6047 if (ill_held) 6048 ill_refrele(ill); 6049 } 6050 6051 /* 6052 * Parse and process any hop-by-hop or destination options. 6053 * 6054 * Assumes that q is an ill read queue so that ICMP errors for link-local 6055 * destinations are sent out the correct interface. 6056 * 6057 * Returns -1 if there was an error and mp has been consumed. 6058 * Returns 0 if no special action is needed. 6059 * Returns 1 if the packet contained a router alert option for this node 6060 * which is verified to be "interesting/known" for our implementation. 6061 * 6062 * XXX Note: In future as more hbh or dest options are defined, 6063 * it may be better to have different routines for hbh and dest 6064 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 6065 * may have same value in different namespaces. Or is it same namespace ?? 6066 * Current code checks for each opt_type (other than pads) if it is in 6067 * the expected nexthdr (hbh or dest) 6068 */ 6069 static int 6070 ip_process_options_v6(queue_t *q, mblk_t *mp, ip6_t *ip6h, 6071 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_stack_t *ipst) 6072 { 6073 uint8_t opt_type; 6074 uint_t optused; 6075 int ret = 0; 6076 mblk_t *first_mp; 6077 const char *errtype; 6078 zoneid_t zoneid; 6079 ill_t *ill = q->q_ptr; 6080 ipif_t *ipif; 6081 6082 first_mp = mp; 6083 if (mp->b_datap->db_type == M_CTL) { 6084 mp = mp->b_cont; 6085 } 6086 6087 while (optlen != 0) { 6088 opt_type = *optptr; 6089 if (opt_type == IP6OPT_PAD1) { 6090 optused = 1; 6091 } else { 6092 if (optlen < 2) 6093 goto bad_opt; 6094 errtype = "malformed"; 6095 if (opt_type == ip6opt_ls) { 6096 optused = 2 + optptr[1]; 6097 if (optused > optlen) 6098 goto bad_opt; 6099 } else switch (opt_type) { 6100 case IP6OPT_PADN: 6101 /* 6102 * Note:We don't verify that (N-2) pad octets 6103 * are zero as required by spec. Adhere to 6104 * "be liberal in what you accept..." part of 6105 * implementation philosophy (RFC791,RFC1122) 6106 */ 6107 optused = 2 + optptr[1]; 6108 if (optused > optlen) 6109 goto bad_opt; 6110 break; 6111 6112 case IP6OPT_JUMBO: 6113 if (hdr_type != IPPROTO_HOPOPTS) 6114 goto opt_error; 6115 goto opt_error; /* XXX Not implemented! */ 6116 6117 case IP6OPT_ROUTER_ALERT: { 6118 struct ip6_opt_router *or; 6119 6120 if (hdr_type != IPPROTO_HOPOPTS) 6121 goto opt_error; 6122 optused = 2 + optptr[1]; 6123 if (optused > optlen) 6124 goto bad_opt; 6125 or = (struct ip6_opt_router *)optptr; 6126 /* Check total length and alignment */ 6127 if (optused != sizeof (*or) || 6128 ((uintptr_t)or->ip6or_value & 0x1) != 0) 6129 goto opt_error; 6130 /* Check value */ 6131 switch (*((uint16_t *)or->ip6or_value)) { 6132 case IP6_ALERT_MLD: 6133 case IP6_ALERT_RSVP: 6134 ret = 1; 6135 } 6136 break; 6137 } 6138 case IP6OPT_HOME_ADDRESS: { 6139 /* 6140 * Minimal support for the home address option 6141 * (which is required by all IPv6 nodes). 6142 * Implement by just swapping the home address 6143 * and source address. 6144 * XXX Note: this has IPsec implications since 6145 * AH needs to take this into account. 6146 * Also, when IPsec is used we need to ensure 6147 * that this is only processed once 6148 * in the received packet (to avoid swapping 6149 * back and forth). 6150 * NOTE:This option processing is considered 6151 * to be unsafe and prone to a denial of 6152 * service attack. 6153 * The current processing is not safe even with 6154 * IPsec secured IP packets. Since the home 6155 * address option processing requirement still 6156 * is in the IETF draft and in the process of 6157 * being redefined for its usage, it has been 6158 * decided to turn off the option by default. 6159 * If this section of code needs to be executed, 6160 * ndd variable ip6_ignore_home_address_opt 6161 * should be set to 0 at the user's own risk. 6162 */ 6163 struct ip6_opt_home_address *oh; 6164 in6_addr_t tmp; 6165 6166 if (ipst->ips_ipv6_ignore_home_address_opt) 6167 goto opt_error; 6168 6169 if (hdr_type != IPPROTO_DSTOPTS) 6170 goto opt_error; 6171 optused = 2 + optptr[1]; 6172 if (optused > optlen) 6173 goto bad_opt; 6174 6175 /* 6176 * We did this dest. opt the first time 6177 * around (i.e. before AH processing). 6178 * If we've done AH... stop now. 6179 */ 6180 if (first_mp != mp) { 6181 ipsec_in_t *ii; 6182 6183 ii = (ipsec_in_t *)first_mp->b_rptr; 6184 if (ii->ipsec_in_ah_sa != NULL) 6185 break; 6186 } 6187 6188 oh = (struct ip6_opt_home_address *)optptr; 6189 /* Check total length and alignment */ 6190 if (optused < sizeof (*oh) || 6191 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 6192 goto opt_error; 6193 /* Swap ip6_src and the home address */ 6194 tmp = ip6h->ip6_src; 6195 /* XXX Note: only 8 byte alignment option */ 6196 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 6197 *(in6_addr_t *)oh->ip6oh_addr = tmp; 6198 break; 6199 } 6200 6201 case IP6OPT_TUNNEL_LIMIT: 6202 if (hdr_type != IPPROTO_DSTOPTS) { 6203 goto opt_error; 6204 } 6205 optused = 2 + optptr[1]; 6206 if (optused > optlen) { 6207 goto bad_opt; 6208 } 6209 if (optused != 3) { 6210 goto opt_error; 6211 } 6212 break; 6213 6214 default: 6215 errtype = "unknown"; 6216 /* FALLTHROUGH */ 6217 opt_error: 6218 /* Determine which zone should send error */ 6219 zoneid = ipif_lookup_addr_zoneid_v6( 6220 &ip6h->ip6_dst, ill, ipst); 6221 switch (IP6OPT_TYPE(opt_type)) { 6222 case IP6OPT_TYPE_SKIP: 6223 optused = 2 + optptr[1]; 6224 if (optused > optlen) 6225 goto bad_opt; 6226 ip1dbg(("ip_process_options_v6: %s " 6227 "opt 0x%x skipped\n", 6228 errtype, opt_type)); 6229 break; 6230 case IP6OPT_TYPE_DISCARD: 6231 ip1dbg(("ip_process_options_v6: %s " 6232 "opt 0x%x; packet dropped\n", 6233 errtype, opt_type)); 6234 freemsg(first_mp); 6235 return (-1); 6236 case IP6OPT_TYPE_ICMP: 6237 if (zoneid == ALL_ZONES) { 6238 freemsg(first_mp); 6239 return (-1); 6240 } 6241 icmp_param_problem_v6(WR(q), first_mp, 6242 ICMP6_PARAMPROB_OPTION, 6243 (uint32_t)(optptr - 6244 (uint8_t *)ip6h), 6245 B_FALSE, B_FALSE, zoneid, ipst); 6246 return (-1); 6247 case IP6OPT_TYPE_FORCEICMP: 6248 /* 6249 * If we don't have a zone and the dst 6250 * addr is multicast, then pick a zone 6251 * based on the inbound interface. 6252 */ 6253 if (zoneid == ALL_ZONES && 6254 IN6_IS_ADDR_MULTICAST( 6255 &ip6h->ip6_dst)) { 6256 ipif = ipif_select_source_v6( 6257 ill, &ip6h->ip6_src, 6258 B_TRUE, 6259 IPV6_PREFER_SRC_DEFAULT, 6260 ALL_ZONES); 6261 if (ipif != NULL) { 6262 zoneid = 6263 ipif->ipif_zoneid; 6264 ipif_refrele(ipif); 6265 } 6266 } 6267 if (zoneid == ALL_ZONES) { 6268 freemsg(first_mp); 6269 return (-1); 6270 } 6271 icmp_param_problem_v6(WR(q), first_mp, 6272 ICMP6_PARAMPROB_OPTION, 6273 (uint32_t)(optptr - 6274 (uint8_t *)ip6h), 6275 B_FALSE, B_TRUE, zoneid, ipst); 6276 return (-1); 6277 default: 6278 ASSERT(0); 6279 } 6280 } 6281 } 6282 optlen -= optused; 6283 optptr += optused; 6284 } 6285 return (ret); 6286 6287 bad_opt: 6288 /* Determine which zone should send error */ 6289 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 6290 if (zoneid == ALL_ZONES) { 6291 freemsg(first_mp); 6292 } else { 6293 icmp_param_problem_v6(WR(q), first_mp, ICMP6_PARAMPROB_OPTION, 6294 (uint32_t)(optptr - (uint8_t *)ip6h), 6295 B_FALSE, B_FALSE, zoneid, ipst); 6296 } 6297 return (-1); 6298 } 6299 6300 /* 6301 * Process a routing header that is not yet empty. 6302 * Because of RFC 5095, we now reject all route headers. 6303 */ 6304 static void 6305 ip_process_rthdr(queue_t *q, mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 6306 ill_t *ill, mblk_t *hada_mp) 6307 { 6308 ip_stack_t *ipst = ill->ill_ipst; 6309 6310 ASSERT(rth->ip6r_segleft != 0); 6311 6312 if (!ipst->ips_ipv6_forward_src_routed) { 6313 /* XXX Check for source routed out same interface? */ 6314 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 6315 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 6316 freemsg(hada_mp); 6317 freemsg(mp); 6318 return; 6319 } 6320 if (hada_mp != NULL) { 6321 freemsg(hada_mp); 6322 freemsg(mp); 6323 return; 6324 } 6325 /* Sent by forwarding path, and router is global zone */ 6326 icmp_param_problem_v6(WR(q), mp, ICMP6_PARAMPROB_HEADER, 6327 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), B_FALSE, 6328 B_FALSE, GLOBAL_ZONEID, ipst); 6329 } 6330 6331 /* 6332 * Read side put procedure for IPv6 module. 6333 */ 6334 void 6335 ip_rput_v6(queue_t *q, mblk_t *mp) 6336 { 6337 mblk_t *first_mp; 6338 mblk_t *hada_mp = NULL; 6339 ip6_t *ip6h; 6340 boolean_t ll_multicast = B_FALSE; 6341 boolean_t mctl_present = B_FALSE; 6342 ill_t *ill; 6343 struct iocblk *iocp; 6344 uint_t flags = 0; 6345 mblk_t *dl_mp; 6346 ip_stack_t *ipst; 6347 int check; 6348 6349 ill = (ill_t *)q->q_ptr; 6350 ipst = ill->ill_ipst; 6351 if (ill->ill_state_flags & ILL_CONDEMNED) { 6352 union DL_primitives *dl; 6353 6354 dl = (union DL_primitives *)mp->b_rptr; 6355 /* 6356 * Things are opening or closing - only accept DLPI 6357 * ack messages. If the stream is closing and ip_wsrv 6358 * has completed, ip_close is out of the qwait, but has 6359 * not yet completed qprocsoff. Don't proceed any further 6360 * because the ill has been cleaned up and things hanging 6361 * off the ill have been freed. 6362 */ 6363 if ((mp->b_datap->db_type != M_PCPROTO) || 6364 (dl->dl_primitive == DL_UNITDATA_IND)) { 6365 inet_freemsg(mp); 6366 return; 6367 } 6368 } 6369 6370 dl_mp = NULL; 6371 switch (mp->b_datap->db_type) { 6372 case M_DATA: { 6373 int hlen; 6374 uchar_t *ucp; 6375 struct ether_header *eh; 6376 dl_unitdata_ind_t *dui; 6377 6378 /* 6379 * This is a work-around for CR 6451644, a bug in Nemo. It 6380 * should be removed when that problem is fixed. 6381 */ 6382 if (ill->ill_mactype == DL_ETHER && 6383 (hlen = MBLKHEAD(mp)) >= sizeof (struct ether_header) && 6384 (ucp = mp->b_rptr)[-1] == (ETHERTYPE_IPV6 & 0xFF) && 6385 ucp[-2] == (ETHERTYPE_IPV6 >> 8)) { 6386 if (hlen >= sizeof (struct ether_vlan_header) && 6387 ucp[-5] == 0 && ucp[-6] == 0x81) 6388 ucp -= sizeof (struct ether_vlan_header); 6389 else 6390 ucp -= sizeof (struct ether_header); 6391 /* 6392 * If it's a group address, then fabricate a 6393 * DL_UNITDATA_IND message. 6394 */ 6395 if ((ll_multicast = (ucp[0] & 1)) != 0 && 6396 (dl_mp = allocb(DL_UNITDATA_IND_SIZE + 16, 6397 BPRI_HI)) != NULL) { 6398 eh = (struct ether_header *)ucp; 6399 dui = (dl_unitdata_ind_t *)dl_mp->b_rptr; 6400 DB_TYPE(dl_mp) = M_PROTO; 6401 dl_mp->b_wptr = (uchar_t *)(dui + 1) + 16; 6402 dui->dl_primitive = DL_UNITDATA_IND; 6403 dui->dl_dest_addr_length = 8; 6404 dui->dl_dest_addr_offset = DL_UNITDATA_IND_SIZE; 6405 dui->dl_src_addr_length = 8; 6406 dui->dl_src_addr_offset = DL_UNITDATA_IND_SIZE + 6407 8; 6408 dui->dl_group_address = 1; 6409 ucp = (uchar_t *)(dui + 1); 6410 if (ill->ill_sap_length > 0) 6411 ucp += ill->ill_sap_length; 6412 bcopy(&eh->ether_dhost, ucp, 6); 6413 bcopy(&eh->ether_shost, ucp + 8, 6); 6414 ucp = (uchar_t *)(dui + 1); 6415 if (ill->ill_sap_length < 0) 6416 ucp += 8 + ill->ill_sap_length; 6417 bcopy(&eh->ether_type, ucp, 2); 6418 bcopy(&eh->ether_type, ucp + 8, 2); 6419 } 6420 } 6421 break; 6422 } 6423 6424 case M_PROTO: 6425 case M_PCPROTO: 6426 if (((dl_unitdata_ind_t *)mp->b_rptr)->dl_primitive != 6427 DL_UNITDATA_IND) { 6428 /* Go handle anything other than data elsewhere. */ 6429 ip_rput_dlpi(q, mp); 6430 return; 6431 } 6432 ll_multicast = ip_get_dlpi_mbcast(ill, mp); 6433 6434 /* Save the DLPI header. */ 6435 dl_mp = mp; 6436 mp = mp->b_cont; 6437 dl_mp->b_cont = NULL; 6438 break; 6439 case M_BREAK: 6440 panic("ip_rput_v6: got an M_BREAK"); 6441 /*NOTREACHED*/ 6442 case M_IOCACK: 6443 iocp = (struct iocblk *)mp->b_rptr; 6444 switch (iocp->ioc_cmd) { 6445 case DL_IOC_HDR_INFO: 6446 ill = (ill_t *)q->q_ptr; 6447 ill_fastpath_ack(ill, mp); 6448 return; 6449 default: 6450 putnext(q, mp); 6451 return; 6452 } 6453 /* FALLTHRU */ 6454 case M_ERROR: 6455 case M_HANGUP: 6456 mutex_enter(&ill->ill_lock); 6457 if (ill->ill_state_flags & ILL_CONDEMNED) { 6458 mutex_exit(&ill->ill_lock); 6459 freemsg(mp); 6460 return; 6461 } 6462 ill_refhold_locked(ill); 6463 mutex_exit(&ill->ill_lock); 6464 qwriter_ip(ill, q, mp, ip_rput_other, CUR_OP, B_FALSE); 6465 return; 6466 case M_CTL: 6467 if ((MBLKL(mp) > sizeof (int)) && 6468 ((da_ipsec_t *)mp->b_rptr)->da_type == IPHADA_M_CTL) { 6469 ASSERT(MBLKL(mp) >= sizeof (da_ipsec_t)); 6470 mctl_present = B_TRUE; 6471 break; 6472 } 6473 putnext(q, mp); 6474 return; 6475 case M_IOCNAK: 6476 iocp = (struct iocblk *)mp->b_rptr; 6477 switch (iocp->ioc_cmd) { 6478 case DL_IOC_HDR_INFO: 6479 ip_rput_other(NULL, q, mp, NULL); 6480 return; 6481 default: 6482 break; 6483 } 6484 /* FALLTHRU */ 6485 default: 6486 putnext(q, mp); 6487 return; 6488 } 6489 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 6490 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 6491 (mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 6492 /* 6493 * if db_ref > 1 then copymsg and free original. Packet may be 6494 * changed and do not want other entity who has a reference to this 6495 * message to trip over the changes. This is a blind change because 6496 * trying to catch all places that might change packet is too 6497 * difficult (since it may be a module above this one). 6498 */ 6499 if (mp->b_datap->db_ref > 1) { 6500 mblk_t *mp1; 6501 6502 mp1 = copymsg(mp); 6503 freemsg(mp); 6504 if (mp1 == NULL) { 6505 first_mp = NULL; 6506 goto discard; 6507 } 6508 mp = mp1; 6509 } 6510 first_mp = mp; 6511 if (mctl_present) { 6512 hada_mp = first_mp; 6513 mp = first_mp->b_cont; 6514 } 6515 6516 if ((check = ip_check_v6_mblk(mp, ill)) == IP6_MBLK_HDR_ERR) { 6517 freemsg(mp); 6518 return; 6519 } 6520 6521 ip6h = (ip6_t *)mp->b_rptr; 6522 6523 /* 6524 * ip:::receive must see ipv6 packets with a full header, 6525 * and so is placed after the IP6_MBLK_HDR_ERR check. 6526 */ 6527 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 6528 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 6529 int, 0); 6530 6531 if (check != IP6_MBLK_OK) { 6532 freemsg(mp); 6533 return; 6534 } 6535 6536 DTRACE_PROBE4(ip6__physical__in__start, 6537 ill_t *, ill, ill_t *, NULL, 6538 ip6_t *, ip6h, mblk_t *, first_mp); 6539 6540 FW_HOOKS6(ipst->ips_ip6_physical_in_event, 6541 ipst->ips_ipv6firewall_physical_in, 6542 ill, NULL, ip6h, first_mp, mp, ll_multicast, ipst); 6543 6544 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, first_mp); 6545 6546 if (first_mp == NULL) 6547 return; 6548 6549 /* 6550 * Attach any necessary label information to this packet. 6551 */ 6552 if (is_system_labeled() && !tsol_get_pkt_label(mp, IPV6_VERSION)) { 6553 if (ip6opt_ls != 0) 6554 ip0dbg(("tsol_get_pkt_label v6 failed\n")); 6555 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 6556 goto discard; 6557 } 6558 6559 /* IP observability hook. */ 6560 if (ipst->ips_ip6_observe.he_interested) { 6561 zoneid_t dzone; 6562 6563 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 6564 ALL_ZONES); 6565 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, 6566 ill, ipst); 6567 } 6568 6569 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) == 6570 IPV6_DEFAULT_VERS_AND_FLOW) { 6571 /* 6572 * It may be a bit too expensive to do this mapped address 6573 * check here, but in the interest of robustness, it seems 6574 * like the correct place. 6575 * TODO: Avoid this check for e.g. connected TCP sockets 6576 */ 6577 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src)) { 6578 ip1dbg(("ip_rput_v6: pkt with mapped src addr\n")); 6579 goto discard; 6580 } 6581 6582 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src)) { 6583 ip1dbg(("ip_rput_v6: pkt with loopback src")); 6584 goto discard; 6585 } else if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) { 6586 ip1dbg(("ip_rput_v6: pkt with loopback dst")); 6587 goto discard; 6588 } 6589 6590 flags |= (ll_multicast ? IP6_IN_LLMCAST : 0); 6591 ip_rput_data_v6(q, ill, mp, ip6h, flags, hada_mp, dl_mp); 6592 } else { 6593 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion); 6594 goto discard; 6595 } 6596 freemsg(dl_mp); 6597 return; 6598 6599 discard: 6600 if (dl_mp != NULL) 6601 freeb(dl_mp); 6602 freemsg(first_mp); 6603 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6604 } 6605 6606 /* 6607 * Walk through the IPv6 packet in mp and see if there's an AH header 6608 * in it. See if the AH header needs to get done before other headers in 6609 * the packet. (Worker function for ipsec_early_ah_v6().) 6610 */ 6611 #define IPSEC_HDR_DONT_PROCESS 0 6612 #define IPSEC_HDR_PROCESS 1 6613 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 6614 static int 6615 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 6616 { 6617 uint_t length; 6618 uint_t ehdrlen; 6619 uint8_t *whereptr; 6620 uint8_t *endptr; 6621 uint8_t *nexthdrp; 6622 ip6_dest_t *desthdr; 6623 ip6_rthdr_t *rthdr; 6624 ip6_t *ip6h; 6625 6626 /* 6627 * For now just pullup everything. In general, the less pullups, 6628 * the better, but there's so much squirrelling through anyway, 6629 * it's just easier this way. 6630 */ 6631 if (!pullupmsg(mp, -1)) { 6632 return (IPSEC_MEMORY_ERROR); 6633 } 6634 6635 ip6h = (ip6_t *)mp->b_rptr; 6636 length = IPV6_HDR_LEN; 6637 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 6638 endptr = mp->b_wptr; 6639 6640 /* 6641 * We can't just use the argument nexthdr in the place 6642 * of nexthdrp becaue we don't dereference nexthdrp 6643 * till we confirm whether it is a valid address. 6644 */ 6645 nexthdrp = &ip6h->ip6_nxt; 6646 while (whereptr < endptr) { 6647 /* Is there enough left for len + nexthdr? */ 6648 if (whereptr + MIN_EHDR_LEN > endptr) 6649 return (IPSEC_MEMORY_ERROR); 6650 6651 switch (*nexthdrp) { 6652 case IPPROTO_HOPOPTS: 6653 case IPPROTO_DSTOPTS: 6654 /* Assumes the headers are identical for hbh and dst */ 6655 desthdr = (ip6_dest_t *)whereptr; 6656 ehdrlen = 8 * (desthdr->ip6d_len + 1); 6657 if ((uchar_t *)desthdr + ehdrlen > endptr) 6658 return (IPSEC_MEMORY_ERROR); 6659 /* 6660 * Return DONT_PROCESS because the destination 6661 * options header may be for each hop in a 6662 * routing-header, and we only want AH if we're 6663 * finished with routing headers. 6664 */ 6665 if (*nexthdrp == IPPROTO_DSTOPTS) 6666 return (IPSEC_HDR_DONT_PROCESS); 6667 nexthdrp = &desthdr->ip6d_nxt; 6668 break; 6669 case IPPROTO_ROUTING: 6670 rthdr = (ip6_rthdr_t *)whereptr; 6671 6672 /* 6673 * If there's more hops left on the routing header, 6674 * return now with DON'T PROCESS. 6675 */ 6676 if (rthdr->ip6r_segleft > 0) 6677 return (IPSEC_HDR_DONT_PROCESS); 6678 6679 ehdrlen = 8 * (rthdr->ip6r_len + 1); 6680 if ((uchar_t *)rthdr + ehdrlen > endptr) 6681 return (IPSEC_MEMORY_ERROR); 6682 nexthdrp = &rthdr->ip6r_nxt; 6683 break; 6684 case IPPROTO_FRAGMENT: 6685 /* Wait for reassembly */ 6686 return (IPSEC_HDR_DONT_PROCESS); 6687 case IPPROTO_AH: 6688 *nexthdr = IPPROTO_AH; 6689 return (IPSEC_HDR_PROCESS); 6690 case IPPROTO_NONE: 6691 /* No next header means we're finished */ 6692 default: 6693 return (IPSEC_HDR_DONT_PROCESS); 6694 } 6695 length += ehdrlen; 6696 whereptr += ehdrlen; 6697 } 6698 /* 6699 * Malformed/truncated packet. 6700 */ 6701 return (IPSEC_MEMORY_ERROR); 6702 } 6703 6704 /* 6705 * Path for AH if options are present. If this is the first time we are 6706 * sending a datagram to AH, allocate a IPSEC_IN message and prepend it. 6707 * Otherwise, just fanout. Return value answers the boolean question: 6708 * "Did I consume the mblk you sent me?" 6709 * 6710 * Sometimes AH needs to be done before other IPv6 headers for security 6711 * reasons. This function (and its ipsec_needs_processing_v6() above) 6712 * indicates if that is so, and fans out to the appropriate IPsec protocol 6713 * for the datagram passed in. 6714 */ 6715 static boolean_t 6716 ipsec_early_ah_v6(queue_t *q, mblk_t *first_mp, boolean_t mctl_present, 6717 ill_t *ill, ill_t *inill, mblk_t *hada_mp, zoneid_t zoneid) 6718 { 6719 mblk_t *mp; 6720 uint8_t nexthdr; 6721 ipsec_in_t *ii = NULL; 6722 ah_t *ah; 6723 ipsec_status_t ipsec_rc; 6724 ip_stack_t *ipst = ill->ill_ipst; 6725 netstack_t *ns = ipst->ips_netstack; 6726 ipsec_stack_t *ipss = ns->netstack_ipsec; 6727 6728 ASSERT((hada_mp == NULL) || (!mctl_present)); 6729 6730 switch (ipsec_needs_processing_v6( 6731 (mctl_present ? first_mp->b_cont : first_mp), &nexthdr)) { 6732 case IPSEC_MEMORY_ERROR: 6733 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6734 freemsg(hada_mp); 6735 freemsg(first_mp); 6736 return (B_TRUE); 6737 case IPSEC_HDR_DONT_PROCESS: 6738 return (B_FALSE); 6739 } 6740 6741 /* Default means send it to AH! */ 6742 ASSERT(nexthdr == IPPROTO_AH); 6743 if (!mctl_present) { 6744 mp = first_mp; 6745 first_mp = ipsec_in_alloc(B_FALSE, ipst->ips_netstack); 6746 if (first_mp == NULL) { 6747 ip1dbg(("ipsec_early_ah_v6: IPSEC_IN " 6748 "allocation failure.\n")); 6749 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6750 freemsg(hada_mp); 6751 freemsg(mp); 6752 return (B_TRUE); 6753 } 6754 /* 6755 * Store the ill_index so that when we come back 6756 * from IPSEC we ride on the same queue. 6757 */ 6758 ii = (ipsec_in_t *)first_mp->b_rptr; 6759 ii->ipsec_in_ill_index = ill->ill_phyint->phyint_ifindex; 6760 ii->ipsec_in_rill_index = inill->ill_phyint->phyint_ifindex; 6761 first_mp->b_cont = mp; 6762 } 6763 /* 6764 * Cache hardware acceleration info. 6765 */ 6766 if (hada_mp != NULL) { 6767 ASSERT(ii != NULL); 6768 IPSECHW_DEBUG(IPSECHW_PKT, ("ipsec_early_ah_v6: " 6769 "caching data attr.\n")); 6770 ii->ipsec_in_accelerated = B_TRUE; 6771 ii->ipsec_in_da = hada_mp; 6772 } 6773 6774 if (!ipsec_loaded(ipss)) { 6775 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, zoneid, ipst); 6776 return (B_TRUE); 6777 } 6778 6779 ah = ipsec_inbound_ah_sa(first_mp, ns); 6780 if (ah == NULL) 6781 return (B_TRUE); 6782 ASSERT(ii->ipsec_in_ah_sa != NULL); 6783 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != NULL); 6784 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func(first_mp, ah); 6785 6786 switch (ipsec_rc) { 6787 case IPSEC_STATUS_SUCCESS: 6788 /* we're done with IPsec processing, send it up */ 6789 ip_fanout_proto_again(first_mp, ill, inill, NULL); 6790 break; 6791 case IPSEC_STATUS_FAILED: 6792 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsInDiscards); 6793 break; 6794 case IPSEC_STATUS_PENDING: 6795 /* no action needed */ 6796 break; 6797 } 6798 return (B_TRUE); 6799 } 6800 6801 static boolean_t 6802 ip_iptun_input_v6(mblk_t *ipsec_mp, mblk_t *data_mp, 6803 size_t hdr_len, uint8_t nexthdr, zoneid_t zoneid, ill_t *ill, 6804 ip_stack_t *ipst) 6805 { 6806 conn_t *connp; 6807 6808 ASSERT(ipsec_mp == NULL || ipsec_mp->b_cont == data_mp); 6809 6810 connp = ipcl_classify_v6(data_mp, nexthdr, hdr_len, zoneid, ipst); 6811 if (connp != NULL) { 6812 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 6813 connp->conn_recv(connp, ipsec_mp != NULL ? ipsec_mp : data_mp, 6814 NULL); 6815 CONN_DEC_REF(connp); 6816 return (B_TRUE); 6817 } 6818 return (B_FALSE); 6819 } 6820 6821 /* 6822 * Validate the IPv6 mblk for alignment. 6823 */ 6824 int 6825 ip_check_v6_mblk(mblk_t *mp, ill_t *ill) 6826 { 6827 int pkt_len, ip6_len; 6828 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 6829 6830 /* check for alignment and full IPv6 header */ 6831 if (!OK_32PTR((uchar_t *)ip6h) || 6832 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 6833 if (!pullupmsg(mp, IPV6_HDR_LEN)) { 6834 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6835 ip1dbg(("ip_rput_v6: pullupmsg failed\n")); 6836 return (IP6_MBLK_HDR_ERR); 6837 } 6838 ip6h = (ip6_t *)mp->b_rptr; 6839 } 6840 6841 ASSERT(OK_32PTR((uchar_t *)ip6h) && 6842 (mp->b_wptr - (uchar_t *)ip6h) >= IPV6_HDR_LEN); 6843 6844 if (mp->b_cont == NULL) 6845 pkt_len = mp->b_wptr - mp->b_rptr; 6846 else 6847 pkt_len = msgdsize(mp); 6848 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6849 6850 /* 6851 * Check for bogus (too short packet) and packet which 6852 * was padded by the link layer. 6853 */ 6854 if (ip6_len != pkt_len) { 6855 ssize_t diff; 6856 6857 if (ip6_len > pkt_len) { 6858 ip1dbg(("ip_rput_data_v6: packet too short %d %d\n", 6859 ip6_len, pkt_len)); 6860 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 6861 return (IP6_MBLK_LEN_ERR); 6862 } 6863 diff = (ssize_t)(pkt_len - ip6_len); 6864 6865 if (!adjmsg(mp, -diff)) { 6866 ip1dbg(("ip_rput_data_v6: adjmsg failed\n")); 6867 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6868 return (IP6_MBLK_LEN_ERR); 6869 } 6870 6871 /* 6872 * adjmsg may have freed an mblk from the chain, hence 6873 * invalidate any hw checksum here. This will force IP to 6874 * calculate the checksum in sw, but only for this packet. 6875 */ 6876 DB_CKSUMFLAGS(mp) = 0; 6877 } 6878 return (IP6_MBLK_OK); 6879 } 6880 6881 /* 6882 * ip_rput_data_v6 -- received IPv6 packets in M_DATA messages show up here. 6883 * ip_rput_v6 has already verified alignment, the min length, the version, 6884 * and db_ref = 1. 6885 * 6886 * The ill passed in (the arg named inill) is the ill that the packet 6887 * actually arrived on. We need to remember this when saving the 6888 * input interface index into potential IPV6_PKTINFO data in 6889 * ip_add_info_v6(). 6890 * 6891 * This routine doesn't free dl_mp; that's the caller's responsibility on 6892 * return. (Note that the callers are complex enough that there's no tail 6893 * recursion here anyway.) 6894 */ 6895 void 6896 ip_rput_data_v6(queue_t *q, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 6897 uint_t flags, mblk_t *hada_mp, mblk_t *dl_mp) 6898 { 6899 ire_t *ire = NULL; 6900 ill_t *ill = inill; 6901 ill_t *outill; 6902 uint8_t *whereptr; 6903 uint8_t nexthdr; 6904 uint16_t remlen; 6905 uint_t prev_nexthdr_offset; 6906 uint_t used; 6907 size_t old_pkt_len; 6908 size_t pkt_len; 6909 uint16_t ip6_len; 6910 uint_t hdr_len; 6911 boolean_t mctl_present; 6912 mblk_t *first_mp; 6913 mblk_t *first_mp1; 6914 boolean_t no_forward; 6915 ip6_hbh_t *hbhhdr; 6916 boolean_t ll_multicast = (flags & IP6_IN_LLMCAST); 6917 conn_t *connp; 6918 uint32_t ports; 6919 zoneid_t zoneid = GLOBAL_ZONEID; 6920 uint16_t hck_flags, reass_hck_flags; 6921 uint32_t reass_sum; 6922 boolean_t cksum_err; 6923 mblk_t *mp1; 6924 ip_stack_t *ipst = inill->ill_ipst; 6925 6926 EXTRACT_PKT_MP(mp, first_mp, mctl_present); 6927 6928 if (hada_mp != NULL) { 6929 /* 6930 * It's an IPsec accelerated packet. 6931 * Keep a pointer to the data attributes around until 6932 * we allocate the ipsecinfo structure. 6933 */ 6934 IPSECHW_DEBUG(IPSECHW_PKT, 6935 ("ip_rput_data_v6: inbound HW accelerated IPsec pkt\n")); 6936 hada_mp->b_cont = NULL; 6937 /* 6938 * Since it is accelerated, it came directly from 6939 * the ill. 6940 */ 6941 ASSERT(mctl_present == B_FALSE); 6942 ASSERT(mp->b_datap->db_type != M_CTL); 6943 } 6944 6945 ip6h = (ip6_t *)mp->b_rptr; 6946 ip6_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 6947 old_pkt_len = pkt_len = ip6_len; 6948 6949 if (ILL_HCKSUM_CAPABLE(ill) && !mctl_present && dohwcksum) 6950 hck_flags = DB_CKSUMFLAGS(mp); 6951 else 6952 hck_flags = 0; 6953 6954 /* Clear checksum flags in case we need to forward */ 6955 DB_CKSUMFLAGS(mp) = 0; 6956 reass_sum = reass_hck_flags = 0; 6957 6958 nexthdr = ip6h->ip6_nxt; 6959 6960 prev_nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 6961 (uchar_t *)ip6h); 6962 whereptr = (uint8_t *)&ip6h[1]; 6963 remlen = pkt_len - IPV6_HDR_LEN; /* Track how much is left */ 6964 6965 /* Process hop by hop header options */ 6966 if (nexthdr == IPPROTO_HOPOPTS) { 6967 uint_t ehdrlen; 6968 uint8_t *optptr; 6969 6970 if (remlen < MIN_EHDR_LEN) 6971 goto pkt_too_short; 6972 if (mp->b_cont != NULL && 6973 whereptr + MIN_EHDR_LEN > mp->b_wptr) { 6974 if (!pullupmsg(mp, IPV6_HDR_LEN + MIN_EHDR_LEN)) { 6975 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6976 freemsg(hada_mp); 6977 freemsg(first_mp); 6978 return; 6979 } 6980 ip6h = (ip6_t *)mp->b_rptr; 6981 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 6982 } 6983 hbhhdr = (ip6_hbh_t *)whereptr; 6984 nexthdr = hbhhdr->ip6h_nxt; 6985 prev_nexthdr_offset = (uint_t)(whereptr - (uint8_t *)ip6h); 6986 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 6987 6988 if (remlen < ehdrlen) 6989 goto pkt_too_short; 6990 if (mp->b_cont != NULL && 6991 whereptr + ehdrlen > mp->b_wptr) { 6992 if (!pullupmsg(mp, IPV6_HDR_LEN + ehdrlen)) { 6993 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 6994 freemsg(hada_mp); 6995 freemsg(first_mp); 6996 return; 6997 } 6998 ip6h = (ip6_t *)mp->b_rptr; 6999 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7000 hbhhdr = (ip6_hbh_t *)whereptr; 7001 } 7002 7003 optptr = whereptr + 2; 7004 whereptr += ehdrlen; 7005 remlen -= ehdrlen; 7006 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7007 ehdrlen - 2, IPPROTO_HOPOPTS, ipst)) { 7008 case -1: 7009 /* 7010 * Packet has been consumed and any 7011 * needed ICMP messages sent. 7012 */ 7013 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7014 freemsg(hada_mp); 7015 return; 7016 case 0: 7017 /* no action needed */ 7018 break; 7019 case 1: 7020 /* Known router alert */ 7021 goto ipv6forus; 7022 } 7023 } 7024 7025 /* 7026 * On incoming v6 multicast packets we will bypass the ire table, 7027 * and assume that the read queue corresponds to the targetted 7028 * interface. 7029 * 7030 * The effect of this is the same as the IPv4 original code, but is 7031 * much cleaner I think. See ip_rput for how that was done. 7032 */ 7033 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7034 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 7035 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, pkt_len); 7036 7037 /* 7038 * So that we don't end up with dups, only one ill in an IPMP 7039 * group is nominated to receive multicast data traffic. 7040 * However, link-locals on any underlying interfaces will have 7041 * joined their solicited-node multicast addresses and we must 7042 * accept those packets. (We don't attempt to precisely 7043 * filter out duplicate solicited-node multicast packets since 7044 * e.g. an IPMP interface and underlying interface may have 7045 * the same solicited-node multicast address.) Note that we 7046 * won't generally have duplicates because we only issue a 7047 * DL_ENABMULTI_REQ on one interface in a group; the exception 7048 * is when PHYI_MULTI_BCAST is set. 7049 */ 7050 if (IS_UNDER_IPMP(ill) && !ill->ill_nom_cast && 7051 !IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 7052 goto drop_pkt; 7053 } 7054 7055 /* 7056 * XXX TODO Give to mrouted to for multicast forwarding. 7057 */ 7058 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 7059 ALL_ZONES) == NULL) { 7060 if (ip_debug > 3) { 7061 /* ip2dbg */ 7062 pr_addr_dbg("ip_rput_data_v6: got mcast packet" 7063 " which is not for us: %s\n", AF_INET6, 7064 &ip6h->ip6_dst); 7065 } 7066 drop_pkt: BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7067 freemsg(hada_mp); 7068 freemsg(first_mp); 7069 return; 7070 } 7071 if (ip_debug > 3) { 7072 /* ip2dbg */ 7073 pr_addr_dbg("ip_rput_data_v6: multicast for us: %s\n", 7074 AF_INET6, &ip6h->ip6_dst); 7075 } 7076 zoneid = GLOBAL_ZONEID; 7077 goto ipv6forus; 7078 } 7079 7080 /* 7081 * Find an ire that matches destination. For link-local addresses 7082 * we have to match the ill. 7083 * TBD for site local addresses. 7084 */ 7085 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { 7086 ire = ire_ctable_lookup_v6(&ip6h->ip6_dst, NULL, 7087 IRE_CACHE|IRE_LOCAL, ill->ill_ipif, ALL_ZONES, NULL, 7088 MATCH_IRE_TYPE | MATCH_IRE_ILL, ipst); 7089 } else { 7090 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, ALL_ZONES, 7091 msg_getlabel(mp), ipst); 7092 7093 if (ire != NULL && ire->ire_stq != NULL && 7094 ire->ire_zoneid != GLOBAL_ZONEID && 7095 ire->ire_zoneid != ALL_ZONES) { 7096 /* 7097 * Should only use IREs that are visible from the 7098 * global zone for forwarding. 7099 */ 7100 ire_refrele(ire); 7101 ire = ire_cache_lookup_v6(&ip6h->ip6_dst, 7102 GLOBAL_ZONEID, msg_getlabel(mp), ipst); 7103 } 7104 } 7105 7106 if (ire == NULL) { 7107 /* 7108 * No matching IRE found. Mark this packet as having 7109 * originated externally. 7110 */ 7111 if (!(ill->ill_flags & ILLF_ROUTER) || ll_multicast) { 7112 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7113 if (!(ill->ill_flags & ILLF_ROUTER)) { 7114 BUMP_MIB(ill->ill_ip_mib, 7115 ipIfStatsInAddrErrors); 7116 } 7117 freemsg(hada_mp); 7118 freemsg(first_mp); 7119 return; 7120 } 7121 if (ip6h->ip6_hops <= 1) { 7122 if (hada_mp != NULL) 7123 goto hada_drop; 7124 /* Sent by forwarding path, and router is global zone */ 7125 icmp_time_exceeded_v6(WR(q), first_mp, 7126 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7127 GLOBAL_ZONEID, ipst); 7128 return; 7129 } 7130 /* 7131 * Per RFC 3513 section 2.5.2, we must not forward packets with 7132 * an unspecified source address. 7133 */ 7134 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7135 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7136 freemsg(hada_mp); 7137 freemsg(first_mp); 7138 return; 7139 } 7140 mp->b_prev = (mblk_t *)(uintptr_t) 7141 ill->ill_phyint->phyint_ifindex; 7142 ip_newroute_v6(q, mp, &ip6h->ip6_dst, &ip6h->ip6_src, 7143 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ? ill : NULL, 7144 GLOBAL_ZONEID, ipst); 7145 return; 7146 } 7147 /* we have a matching IRE */ 7148 if (ire->ire_stq != NULL) { 7149 /* 7150 * To be quicker, we may wish not to chase pointers 7151 * (ire->ire_ipif->ipif_ill...) and instead store the 7152 * forwarding policy in the ire. An unfortunate side- 7153 * effect of this would be requiring an ire flush whenever 7154 * the ILLF_ROUTER flag changes. For now, chase pointers 7155 * once and store in the boolean no_forward. 7156 * 7157 * This appears twice to keep it out of the non-forwarding, 7158 * yes-it's-for-us-on-the-right-interface case. 7159 */ 7160 no_forward = ((ill->ill_flags & 7161 ire->ire_ipif->ipif_ill->ill_flags & ILLF_ROUTER) == 0); 7162 7163 ASSERT(first_mp == mp); 7164 /* 7165 * This ire has a send-to queue - forward the packet. 7166 */ 7167 if (no_forward || ll_multicast || (hada_mp != NULL)) { 7168 freemsg(hada_mp); 7169 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7170 if (no_forward) { 7171 BUMP_MIB(ill->ill_ip_mib, 7172 ipIfStatsInAddrErrors); 7173 } 7174 freemsg(mp); 7175 ire_refrele(ire); 7176 return; 7177 } 7178 /* 7179 * ipIfStatsHCInForwDatagrams should only be increment if there 7180 * will be an attempt to forward the packet, which is why we 7181 * increment after the above condition has been checked. 7182 */ 7183 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 7184 if (ip6h->ip6_hops <= 1) { 7185 ip1dbg(("ip_rput_data_v6: hop limit expired.\n")); 7186 /* Sent by forwarding path, and router is global zone */ 7187 icmp_time_exceeded_v6(WR(q), mp, 7188 ICMP6_TIME_EXCEED_TRANSIT, ll_multicast, B_FALSE, 7189 GLOBAL_ZONEID, ipst); 7190 ire_refrele(ire); 7191 return; 7192 } 7193 /* 7194 * Per RFC 3513 section 2.5.2, we must not forward packets with 7195 * an unspecified source address. 7196 */ 7197 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7198 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7199 freemsg(mp); 7200 ire_refrele(ire); 7201 return; 7202 } 7203 7204 if (is_system_labeled()) { 7205 mblk_t *mp1; 7206 7207 if ((mp1 = tsol_ip_forward(ire, mp)) == NULL) { 7208 BUMP_MIB(ill->ill_ip_mib, 7209 ipIfStatsForwProhibits); 7210 freemsg(mp); 7211 ire_refrele(ire); 7212 return; 7213 } 7214 /* Size may have changed */ 7215 mp = mp1; 7216 ip6h = (ip6_t *)mp->b_rptr; 7217 pkt_len = msgdsize(mp); 7218 } 7219 7220 if (pkt_len > ire->ire_max_frag) { 7221 int max_frag = ire->ire_max_frag; 7222 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTooBigErrors); 7223 /* 7224 * Handle labeled packet resizing. 7225 */ 7226 if (is_system_labeled()) { 7227 max_frag = tsol_pmtu_adjust(mp, max_frag, 7228 pkt_len - old_pkt_len, AF_INET6); 7229 } 7230 7231 /* Sent by forwarding path, and router is global zone */ 7232 icmp_pkt2big_v6(WR(q), mp, max_frag, 7233 ll_multicast, B_TRUE, GLOBAL_ZONEID, ipst); 7234 ire_refrele(ire); 7235 return; 7236 } 7237 7238 /* 7239 * Check to see if we're forwarding the packet to a 7240 * different link from which it came. If so, check the 7241 * source and destination addresses since routers must not 7242 * forward any packets with link-local source or 7243 * destination addresses to other links. Otherwise (if 7244 * we're forwarding onto the same link), conditionally send 7245 * a redirect message. 7246 */ 7247 if (ire->ire_rfq != q && 7248 !IS_IN_SAME_ILLGRP(ill, (ill_t *)ire->ire_rfq->q_ptr)) { 7249 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) || 7250 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 7251 BUMP_MIB(ill->ill_ip_mib, 7252 ipIfStatsInAddrErrors); 7253 freemsg(mp); 7254 ire_refrele(ire); 7255 return; 7256 } 7257 /* TBD add site-local check at site boundary? */ 7258 } else if (ipst->ips_ipv6_send_redirects) { 7259 in6_addr_t *v6targ; 7260 in6_addr_t gw_addr_v6; 7261 ire_t *src_ire_v6 = NULL; 7262 7263 /* 7264 * Don't send a redirect when forwarding a source 7265 * routed packet. 7266 */ 7267 if (ip_source_routed_v6(ip6h, mp, ipst)) 7268 goto forward; 7269 7270 mutex_enter(&ire->ire_lock); 7271 gw_addr_v6 = ire->ire_gateway_addr_v6; 7272 mutex_exit(&ire->ire_lock); 7273 if (!IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) { 7274 v6targ = &gw_addr_v6; 7275 /* 7276 * We won't send redirects to a router 7277 * that doesn't have a link local 7278 * address, but will forward. 7279 */ 7280 if (!IN6_IS_ADDR_LINKLOCAL(v6targ)) { 7281 BUMP_MIB(ill->ill_ip_mib, 7282 ipIfStatsInAddrErrors); 7283 goto forward; 7284 } 7285 } else { 7286 v6targ = &ip6h->ip6_dst; 7287 } 7288 7289 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 7290 NULL, NULL, IRE_INTERFACE, ire->ire_ipif, NULL, 7291 GLOBAL_ZONEID, 0, NULL, 7292 MATCH_IRE_IPIF | MATCH_IRE_TYPE, 7293 ipst); 7294 7295 if (src_ire_v6 != NULL) { 7296 /* 7297 * The source is directly connected. 7298 */ 7299 mp1 = copymsg(mp); 7300 if (mp1 != NULL) { 7301 icmp_send_redirect_v6(WR(q), 7302 mp1, v6targ, &ip6h->ip6_dst, 7303 ill, B_FALSE); 7304 } 7305 ire_refrele(src_ire_v6); 7306 } 7307 } 7308 7309 forward: 7310 /* Hoplimit verified above */ 7311 ip6h->ip6_hops--; 7312 7313 outill = ire->ire_ipif->ipif_ill; 7314 7315 DTRACE_PROBE4(ip6__forwarding__start, 7316 ill_t *, inill, ill_t *, outill, 7317 ip6_t *, ip6h, mblk_t *, mp); 7318 7319 FW_HOOKS6(ipst->ips_ip6_forwarding_event, 7320 ipst->ips_ipv6firewall_forwarding, 7321 inill, outill, ip6h, mp, mp, 0, ipst); 7322 7323 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp); 7324 7325 if (mp != NULL) { 7326 UPDATE_IB_PKT_COUNT(ire); 7327 ire->ire_last_used_time = lbolt; 7328 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 7329 ip_xmit_v6(mp, ire, 0, NULL, B_FALSE, NULL); 7330 } 7331 IRE_REFRELE(ire); 7332 return; 7333 } 7334 7335 /* 7336 * Need to put on correct queue for reassembly to find it. 7337 * No need to use put() since reassembly has its own locks. 7338 * Note: multicast packets and packets destined to addresses 7339 * assigned to loopback (ire_rfq is NULL) will be reassembled on 7340 * the arriving ill. Unlike the IPv4 case, enabling strict 7341 * destination multihoming will prevent accepting packets 7342 * addressed to an IRE_LOCAL on lo0. 7343 */ 7344 if (ire->ire_rfq != q) { 7345 if ((ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill)) 7346 == NULL) { 7347 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 7348 freemsg(hada_mp); 7349 freemsg(first_mp); 7350 return; 7351 } 7352 if (ire->ire_rfq != NULL) { 7353 q = ire->ire_rfq; 7354 ill = (ill_t *)q->q_ptr; 7355 ASSERT(ill != NULL); 7356 } 7357 } 7358 7359 zoneid = ire->ire_zoneid; 7360 UPDATE_IB_PKT_COUNT(ire); 7361 ire->ire_last_used_time = lbolt; 7362 /* Don't use the ire after this point, we'll NULL it out to be sure. */ 7363 ire_refrele(ire); 7364 ire = NULL; 7365 ipv6forus: 7366 /* 7367 * Looks like this packet is for us one way or another. 7368 * This is where we'll process destination headers etc. 7369 */ 7370 for (; ; ) { 7371 switch (nexthdr) { 7372 case IPPROTO_TCP: { 7373 uint16_t *up; 7374 uint32_t sum; 7375 int offset; 7376 7377 hdr_len = pkt_len - remlen; 7378 7379 if (hada_mp != NULL) { 7380 ip0dbg(("tcp hada drop\n")); 7381 goto hada_drop; 7382 } 7383 7384 7385 /* TCP needs all of the TCP header */ 7386 if (remlen < TCP_MIN_HEADER_LENGTH) 7387 goto pkt_too_short; 7388 if (mp->b_cont != NULL && 7389 whereptr + TCP_MIN_HEADER_LENGTH > mp->b_wptr) { 7390 if (!pullupmsg(mp, 7391 hdr_len + TCP_MIN_HEADER_LENGTH)) { 7392 BUMP_MIB(ill->ill_ip_mib, 7393 ipIfStatsInDiscards); 7394 freemsg(first_mp); 7395 return; 7396 } 7397 hck_flags = 0; 7398 ip6h = (ip6_t *)mp->b_rptr; 7399 whereptr = (uint8_t *)ip6h + hdr_len; 7400 } 7401 /* 7402 * Extract the offset field from the TCP header. 7403 */ 7404 offset = ((uchar_t *)ip6h)[hdr_len + 12] >> 4; 7405 if (offset != 5) { 7406 if (offset < 5) { 7407 ip1dbg(("ip_rput_data_v6: short " 7408 "TCP data offset")); 7409 BUMP_MIB(ill->ill_ip_mib, 7410 ipIfStatsInDiscards); 7411 freemsg(first_mp); 7412 return; 7413 } 7414 /* 7415 * There must be TCP options. 7416 * Make sure we can grab them. 7417 */ 7418 offset <<= 2; 7419 if (remlen < offset) 7420 goto pkt_too_short; 7421 if (mp->b_cont != NULL && 7422 whereptr + offset > mp->b_wptr) { 7423 if (!pullupmsg(mp, 7424 hdr_len + offset)) { 7425 BUMP_MIB(ill->ill_ip_mib, 7426 ipIfStatsInDiscards); 7427 freemsg(first_mp); 7428 return; 7429 } 7430 hck_flags = 0; 7431 ip6h = (ip6_t *)mp->b_rptr; 7432 whereptr = (uint8_t *)ip6h + hdr_len; 7433 } 7434 } 7435 7436 up = (uint16_t *)&ip6h->ip6_src; 7437 /* 7438 * TCP checksum calculation. First sum up the 7439 * pseudo-header fields: 7440 * - Source IPv6 address 7441 * - Destination IPv6 address 7442 * - TCP payload length 7443 * - TCP protocol ID 7444 */ 7445 sum = htons(IPPROTO_TCP + remlen) + 7446 up[0] + up[1] + up[2] + up[3] + 7447 up[4] + up[5] + up[6] + up[7] + 7448 up[8] + up[9] + up[10] + up[11] + 7449 up[12] + up[13] + up[14] + up[15]; 7450 7451 /* Fold initial sum */ 7452 sum = (sum & 0xffff) + (sum >> 16); 7453 7454 mp1 = mp->b_cont; 7455 7456 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7457 IP6_STAT(ipst, ip6_in_sw_cksum); 7458 7459 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7460 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7461 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7462 mp, mp1, cksum_err); 7463 7464 if (cksum_err) { 7465 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 7466 7467 if (hck_flags & HCK_FULLCKSUM) { 7468 IP6_STAT(ipst, 7469 ip6_tcp_in_full_hw_cksum_err); 7470 } else if (hck_flags & HCK_PARTIALCKSUM) { 7471 IP6_STAT(ipst, 7472 ip6_tcp_in_part_hw_cksum_err); 7473 } else { 7474 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err); 7475 } 7476 freemsg(first_mp); 7477 return; 7478 } 7479 tcp_fanout: 7480 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, inill, 7481 (flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 7482 IP_FF_IPINFO), hdr_len, mctl_present, zoneid); 7483 return; 7484 } 7485 case IPPROTO_SCTP: 7486 { 7487 sctp_hdr_t *sctph; 7488 uint32_t calcsum, pktsum; 7489 uint_t hdr_len = pkt_len - remlen; 7490 sctp_stack_t *sctps; 7491 7492 sctps = inill->ill_ipst->ips_netstack->netstack_sctp; 7493 7494 /* SCTP needs all of the SCTP header */ 7495 if (remlen < sizeof (*sctph)) { 7496 goto pkt_too_short; 7497 } 7498 if (whereptr + sizeof (*sctph) > mp->b_wptr) { 7499 ASSERT(mp->b_cont != NULL); 7500 if (!pullupmsg(mp, hdr_len + sizeof (*sctph))) { 7501 BUMP_MIB(ill->ill_ip_mib, 7502 ipIfStatsInDiscards); 7503 freemsg(mp); 7504 return; 7505 } 7506 ip6h = (ip6_t *)mp->b_rptr; 7507 whereptr = (uint8_t *)ip6h + hdr_len; 7508 } 7509 7510 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_len); 7511 /* checksum */ 7512 pktsum = sctph->sh_chksum; 7513 sctph->sh_chksum = 0; 7514 calcsum = sctp_cksum(mp, hdr_len); 7515 if (calcsum != pktsum) { 7516 BUMP_MIB(&sctps->sctps_mib, sctpChecksumError); 7517 freemsg(mp); 7518 return; 7519 } 7520 sctph->sh_chksum = pktsum; 7521 ports = *(uint32_t *)(mp->b_rptr + hdr_len); 7522 if ((connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, 7523 ports, zoneid, mp, sctps)) == NULL) { 7524 ip_fanout_sctp_raw(first_mp, ill, 7525 (ipha_t *)ip6h, B_FALSE, ports, 7526 mctl_present, 7527 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), 7528 B_TRUE, zoneid); 7529 return; 7530 } 7531 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 7532 sctp_input(connp, (ipha_t *)ip6h, mp, first_mp, ill, 7533 B_FALSE, mctl_present); 7534 return; 7535 } 7536 case IPPROTO_UDP: { 7537 uint16_t *up; 7538 uint32_t sum; 7539 7540 hdr_len = pkt_len - remlen; 7541 7542 if (hada_mp != NULL) { 7543 ip0dbg(("udp hada drop\n")); 7544 goto hada_drop; 7545 } 7546 7547 /* Verify that at least the ports are present */ 7548 if (remlen < UDPH_SIZE) 7549 goto pkt_too_short; 7550 if (mp->b_cont != NULL && 7551 whereptr + UDPH_SIZE > mp->b_wptr) { 7552 if (!pullupmsg(mp, hdr_len + UDPH_SIZE)) { 7553 BUMP_MIB(ill->ill_ip_mib, 7554 ipIfStatsInDiscards); 7555 freemsg(first_mp); 7556 return; 7557 } 7558 hck_flags = 0; 7559 ip6h = (ip6_t *)mp->b_rptr; 7560 whereptr = (uint8_t *)ip6h + hdr_len; 7561 } 7562 7563 /* 7564 * Before going through the regular checksum 7565 * calculation, make sure the received checksum 7566 * is non-zero. RFC 2460 says, a 0x0000 checksum 7567 * in a UDP packet (within IPv6 packet) is invalid 7568 * and should be replaced by 0xffff. This makes 7569 * sense as regular checksum calculation will 7570 * pass for both the cases i.e. 0x0000 and 0xffff. 7571 * Removing one of the case makes error detection 7572 * stronger. 7573 */ 7574 7575 if (((udpha_t *)whereptr)->uha_checksum == 0) { 7576 /* 0x0000 checksum is invalid */ 7577 ip1dbg(("ip_rput_data_v6: Invalid UDP " 7578 "checksum value 0x0000\n")); 7579 BUMP_MIB(ill->ill_ip_mib, 7580 udpIfStatsInCksumErrs); 7581 freemsg(first_mp); 7582 return; 7583 } 7584 7585 up = (uint16_t *)&ip6h->ip6_src; 7586 7587 /* 7588 * UDP checksum calculation. First sum up the 7589 * pseudo-header fields: 7590 * - Source IPv6 address 7591 * - Destination IPv6 address 7592 * - UDP payload length 7593 * - UDP protocol ID 7594 */ 7595 7596 sum = htons(IPPROTO_UDP + remlen) + 7597 up[0] + up[1] + up[2] + up[3] + 7598 up[4] + up[5] + up[6] + up[7] + 7599 up[8] + up[9] + up[10] + up[11] + 7600 up[12] + up[13] + up[14] + up[15]; 7601 7602 /* Fold initial sum */ 7603 sum = (sum & 0xffff) + (sum >> 16); 7604 7605 if (reass_hck_flags != 0) { 7606 hck_flags = reass_hck_flags; 7607 7608 IP_CKSUM_RECV_REASS(hck_flags, 7609 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7610 sum, reass_sum, cksum_err); 7611 } else { 7612 mp1 = mp->b_cont; 7613 7614 IP_CKSUM_RECV(hck_flags, sum, (uchar_t *) 7615 ((uchar_t *)mp->b_rptr + DB_CKSUMSTART(mp)), 7616 (int32_t)(whereptr - (uchar_t *)mp->b_rptr), 7617 mp, mp1, cksum_err); 7618 } 7619 7620 if ((hck_flags & (HCK_FULLCKSUM|HCK_PARTIALCKSUM)) == 0) 7621 IP6_STAT(ipst, ip6_in_sw_cksum); 7622 7623 if (cksum_err) { 7624 BUMP_MIB(ill->ill_ip_mib, 7625 udpIfStatsInCksumErrs); 7626 7627 if (hck_flags & HCK_FULLCKSUM) 7628 IP6_STAT(ipst, 7629 ip6_udp_in_full_hw_cksum_err); 7630 else if (hck_flags & HCK_PARTIALCKSUM) 7631 IP6_STAT(ipst, 7632 ip6_udp_in_part_hw_cksum_err); 7633 else 7634 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err); 7635 7636 freemsg(first_mp); 7637 return; 7638 } 7639 goto udp_fanout; 7640 } 7641 case IPPROTO_ICMPV6: { 7642 uint16_t *up; 7643 uint32_t sum; 7644 uint_t hdr_len = pkt_len - remlen; 7645 7646 if (hada_mp != NULL) { 7647 ip0dbg(("icmp hada drop\n")); 7648 goto hada_drop; 7649 } 7650 7651 up = (uint16_t *)&ip6h->ip6_src; 7652 sum = htons(IPPROTO_ICMPV6 + remlen) + 7653 up[0] + up[1] + up[2] + up[3] + 7654 up[4] + up[5] + up[6] + up[7] + 7655 up[8] + up[9] + up[10] + up[11] + 7656 up[12] + up[13] + up[14] + up[15]; 7657 sum = (sum & 0xffff) + (sum >> 16); 7658 sum = IP_CSUM(mp, hdr_len, sum); 7659 if (sum != 0) { 7660 /* IPv6 ICMP checksum failed */ 7661 ip1dbg(("ip_rput_data_v6: ICMPv6 checksum " 7662 "failed %x\n", 7663 sum)); 7664 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 7665 BUMP_MIB(ill->ill_icmp6_mib, 7666 ipv6IfIcmpInErrors); 7667 freemsg(first_mp); 7668 return; 7669 } 7670 7671 icmp_fanout: 7672 /* Check variable for testing applications */ 7673 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 7674 freemsg(first_mp); 7675 return; 7676 } 7677 /* 7678 * Assume that there is always at least one conn for 7679 * ICMPv6 (in.ndpd) i.e. don't optimize the case 7680 * where there is no conn. 7681 */ 7682 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 7683 ilm_t *ilm; 7684 ilm_walker_t ilw; 7685 7686 ASSERT(!IS_LOOPBACK(ill)); 7687 /* 7688 * In the multicast case, applications may have 7689 * joined the group from different zones, so we 7690 * need to deliver the packet to each of them. 7691 * Loop through the multicast memberships 7692 * structures (ilm) on the receive ill and send 7693 * a copy of the packet up each matching one. 7694 */ 7695 ilm = ilm_walker_start(&ilw, inill); 7696 for (; ilm != NULL; 7697 ilm = ilm_walker_step(&ilw, ilm)) { 7698 if (!IN6_ARE_ADDR_EQUAL( 7699 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 7700 continue; 7701 if (!ipif_lookup_zoneid( 7702 ilw.ilw_walk_ill, ilm->ilm_zoneid, 7703 IPIF_UP, NULL)) 7704 continue; 7705 7706 first_mp1 = ip_copymsg(first_mp); 7707 if (first_mp1 == NULL) 7708 continue; 7709 icmp_inbound_v6(q, first_mp1, 7710 ilw.ilw_walk_ill, inill, 7711 hdr_len, mctl_present, 0, 7712 ilm->ilm_zoneid, dl_mp); 7713 } 7714 ilm_walker_finish(&ilw); 7715 } else { 7716 first_mp1 = ip_copymsg(first_mp); 7717 if (first_mp1 != NULL) 7718 icmp_inbound_v6(q, first_mp1, ill, 7719 inill, hdr_len, mctl_present, 0, 7720 zoneid, dl_mp); 7721 } 7722 goto proto_fanout; 7723 } 7724 case IPPROTO_ENCAP: 7725 case IPPROTO_IPV6: 7726 if (ip_iptun_input_v6(mctl_present ? first_mp : NULL, 7727 mp, pkt_len - remlen, nexthdr, zoneid, ill, ipst)) { 7728 return; 7729 } 7730 /* 7731 * If there was no IP tunnel data-link bound to 7732 * receive this packet, then we fall through to 7733 * allow potential raw sockets bound to either of 7734 * these protocols to pick it up. 7735 */ 7736 /* FALLTHRU */ 7737 proto_fanout: 7738 default: { 7739 /* 7740 * Handle protocols with which IPv6 is less intimate. 7741 */ 7742 uint_t proto_flags = IP_FF_RAWIP|IP_FF_IPINFO; 7743 7744 if (hada_mp != NULL) { 7745 ip0dbg(("default hada drop\n")); 7746 goto hada_drop; 7747 } 7748 7749 /* 7750 * Enable sending ICMP for "Unknown" nexthdr 7751 * case. i.e. where we did not FALLTHRU from 7752 * IPPROTO_ICMPV6 processing case above. 7753 * If we did FALLTHRU, then the packet has already been 7754 * processed for IPPF, don't process it again in 7755 * ip_fanout_proto_v6; set IP6_NO_IPPOLICY in the 7756 * flags 7757 */ 7758 if (nexthdr != IPPROTO_ICMPV6) 7759 proto_flags |= IP_FF_SEND_ICMP; 7760 else 7761 proto_flags |= IP6_NO_IPPOLICY; 7762 7763 ip_fanout_proto_v6(q, first_mp, ip6h, ill, inill, 7764 nexthdr, prev_nexthdr_offset, (flags|proto_flags), 7765 mctl_present, zoneid); 7766 return; 7767 } 7768 7769 case IPPROTO_DSTOPTS: { 7770 uint_t ehdrlen; 7771 uint8_t *optptr; 7772 ip6_dest_t *desthdr; 7773 7774 /* If packet is too short, look no further */ 7775 if (remlen < MIN_EHDR_LEN) 7776 goto pkt_too_short; 7777 7778 /* Check if AH is present. */ 7779 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7780 inill, hada_mp, zoneid)) { 7781 return; 7782 } 7783 7784 /* 7785 * Reinitialize pointers, as ipsec_early_ah_v6() does 7786 * complete pullups. We don't have to do more pullups 7787 * as a result. 7788 */ 7789 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7790 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7791 ip6h = (ip6_t *)mp->b_rptr; 7792 7793 desthdr = (ip6_dest_t *)whereptr; 7794 nexthdr = desthdr->ip6d_nxt; 7795 prev_nexthdr_offset = (uint_t)(whereptr - 7796 (uint8_t *)ip6h); 7797 ehdrlen = 8 * (desthdr->ip6d_len + 1); 7798 if (remlen < ehdrlen) 7799 goto pkt_too_short; 7800 optptr = whereptr + 2; 7801 /* 7802 * Note: XXX This code does not seem to make 7803 * distinction between Destination Options Header 7804 * being before/after Routing Header which can 7805 * happen if we are at the end of source route. 7806 * This may become significant in future. 7807 * (No real significant Destination Options are 7808 * defined/implemented yet ). 7809 */ 7810 switch (ip_process_options_v6(q, first_mp, ip6h, optptr, 7811 ehdrlen - 2, IPPROTO_DSTOPTS, ipst)) { 7812 case -1: 7813 /* 7814 * Packet has been consumed and any needed 7815 * ICMP errors sent. 7816 */ 7817 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 7818 freemsg(hada_mp); 7819 return; 7820 case 0: 7821 /* No action needed continue */ 7822 break; 7823 case 1: 7824 /* 7825 * Unnexpected return value 7826 * (Router alert is a Hop-by-Hop option) 7827 */ 7828 #ifdef DEBUG 7829 panic("ip_rput_data_v6: router " 7830 "alert hbh opt indication in dest opt"); 7831 /*NOTREACHED*/ 7832 #else 7833 freemsg(hada_mp); 7834 freemsg(first_mp); 7835 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 7836 return; 7837 #endif 7838 } 7839 used = ehdrlen; 7840 break; 7841 } 7842 case IPPROTO_FRAGMENT: { 7843 ip6_frag_t *fraghdr; 7844 size_t no_frag_hdr_len; 7845 7846 if (hada_mp != NULL) { 7847 ip0dbg(("frag hada drop\n")); 7848 goto hada_drop; 7849 } 7850 7851 ASSERT(first_mp == mp); 7852 if (remlen < sizeof (ip6_frag_t)) 7853 goto pkt_too_short; 7854 7855 if (mp->b_cont != NULL && 7856 whereptr + sizeof (ip6_frag_t) > mp->b_wptr) { 7857 if (!pullupmsg(mp, 7858 pkt_len - remlen + sizeof (ip6_frag_t))) { 7859 BUMP_MIB(ill->ill_ip_mib, 7860 ipIfStatsInDiscards); 7861 freemsg(mp); 7862 return; 7863 } 7864 hck_flags = 0; 7865 ip6h = (ip6_t *)mp->b_rptr; 7866 whereptr = (uint8_t *)ip6h + pkt_len - remlen; 7867 } 7868 7869 fraghdr = (ip6_frag_t *)whereptr; 7870 used = (uint_t)sizeof (ip6_frag_t); 7871 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds); 7872 7873 /* 7874 * Invoke the CGTP (multirouting) filtering module to 7875 * process the incoming packet. Packets identified as 7876 * duplicates must be discarded. Filtering is active 7877 * only if the the ip_cgtp_filter ndd variable is 7878 * non-zero. 7879 */ 7880 if (ipst->ips_ip_cgtp_filter && 7881 ipst->ips_ip_cgtp_filter_ops != NULL) { 7882 int cgtp_flt_pkt; 7883 netstackid_t stackid; 7884 7885 stackid = ipst->ips_netstack->netstack_stackid; 7886 7887 cgtp_flt_pkt = 7888 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6( 7889 stackid, inill->ill_phyint->phyint_ifindex, 7890 ip6h, fraghdr); 7891 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 7892 freemsg(mp); 7893 return; 7894 } 7895 } 7896 7897 /* Restore the flags */ 7898 DB_CKSUMFLAGS(mp) = hck_flags; 7899 7900 mp = ip_rput_frag_v6(ill, inill, mp, ip6h, fraghdr, 7901 remlen - used, &prev_nexthdr_offset, 7902 &reass_sum, &reass_hck_flags); 7903 if (mp == NULL) { 7904 /* Reassembly is still pending */ 7905 return; 7906 } 7907 /* The first mblk are the headers before the frag hdr */ 7908 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs); 7909 7910 first_mp = mp; /* mp has most likely changed! */ 7911 no_frag_hdr_len = mp->b_wptr - mp->b_rptr; 7912 ip6h = (ip6_t *)mp->b_rptr; 7913 nexthdr = ((char *)ip6h)[prev_nexthdr_offset]; 7914 whereptr = mp->b_rptr + no_frag_hdr_len; 7915 remlen = ntohs(ip6h->ip6_plen) + 7916 (uint16_t)(IPV6_HDR_LEN - no_frag_hdr_len); 7917 pkt_len = msgdsize(mp); 7918 used = 0; 7919 break; 7920 } 7921 case IPPROTO_HOPOPTS: { 7922 if (hada_mp != NULL) { 7923 ip0dbg(("hop hada drop\n")); 7924 goto hada_drop; 7925 } 7926 /* 7927 * Illegal header sequence. 7928 * (Hop-by-hop headers are processed above 7929 * and required to immediately follow IPv6 header) 7930 */ 7931 icmp_param_problem_v6(WR(q), first_mp, 7932 ICMP6_PARAMPROB_NEXTHEADER, 7933 prev_nexthdr_offset, 7934 B_FALSE, B_FALSE, zoneid, ipst); 7935 return; 7936 } 7937 case IPPROTO_ROUTING: { 7938 uint_t ehdrlen; 7939 ip6_rthdr_t *rthdr; 7940 7941 /* If packet is too short, look no further */ 7942 if (remlen < MIN_EHDR_LEN) 7943 goto pkt_too_short; 7944 7945 /* Check if AH is present. */ 7946 if (ipsec_early_ah_v6(q, first_mp, mctl_present, ill, 7947 inill, hada_mp, zoneid)) { 7948 return; 7949 } 7950 7951 /* 7952 * Reinitialize pointers, as ipsec_early_ah_v6() does 7953 * complete pullups. We don't have to do more pullups 7954 * as a result. 7955 */ 7956 whereptr = (uint8_t *)((uintptr_t)mp->b_rptr + 7957 (uintptr_t)(whereptr - ((uint8_t *)ip6h))); 7958 ip6h = (ip6_t *)mp->b_rptr; 7959 7960 rthdr = (ip6_rthdr_t *)whereptr; 7961 nexthdr = rthdr->ip6r_nxt; 7962 prev_nexthdr_offset = (uint_t)(whereptr - 7963 (uint8_t *)ip6h); 7964 ehdrlen = 8 * (rthdr->ip6r_len + 1); 7965 if (remlen < ehdrlen) 7966 goto pkt_too_short; 7967 if (rthdr->ip6r_segleft != 0) { 7968 /* Not end of source route */ 7969 if (ll_multicast) { 7970 BUMP_MIB(ill->ill_ip_mib, 7971 ipIfStatsForwProhibits); 7972 freemsg(hada_mp); 7973 freemsg(mp); 7974 return; 7975 } 7976 ip_process_rthdr(q, mp, ip6h, rthdr, ill, 7977 hada_mp); 7978 return; 7979 } 7980 used = ehdrlen; 7981 break; 7982 } 7983 case IPPROTO_AH: 7984 case IPPROTO_ESP: { 7985 /* 7986 * Fast path for AH/ESP. If this is the first time 7987 * we are sending a datagram to AH/ESP, allocate 7988 * a IPSEC_IN message and prepend it. Otherwise, 7989 * just fanout. 7990 */ 7991 7992 ipsec_in_t *ii; 7993 int ipsec_rc; 7994 ipsec_stack_t *ipss; 7995 7996 ipss = ipst->ips_netstack->netstack_ipsec; 7997 if (!mctl_present) { 7998 ASSERT(first_mp == mp); 7999 first_mp = ipsec_in_alloc(B_FALSE, 8000 ipst->ips_netstack); 8001 if (first_mp == NULL) { 8002 ip1dbg(("ip_rput_data_v6: IPSEC_IN " 8003 "allocation failure.\n")); 8004 BUMP_MIB(ill->ill_ip_mib, 8005 ipIfStatsInDiscards); 8006 freemsg(mp); 8007 return; 8008 } 8009 /* 8010 * Store the ill_index so that when we come back 8011 * from IPSEC we ride on the same queue. 8012 */ 8013 ii = (ipsec_in_t *)first_mp->b_rptr; 8014 ii->ipsec_in_ill_index = 8015 ill->ill_phyint->phyint_ifindex; 8016 ii->ipsec_in_rill_index = 8017 inill->ill_phyint->phyint_ifindex; 8018 first_mp->b_cont = mp; 8019 /* 8020 * Cache hardware acceleration info. 8021 */ 8022 if (hada_mp != NULL) { 8023 IPSECHW_DEBUG(IPSECHW_PKT, 8024 ("ip_rput_data_v6: " 8025 "caching data attr.\n")); 8026 ii->ipsec_in_accelerated = B_TRUE; 8027 ii->ipsec_in_da = hada_mp; 8028 hada_mp = NULL; 8029 } 8030 } else { 8031 ii = (ipsec_in_t *)first_mp->b_rptr; 8032 } 8033 8034 if (!ipsec_loaded(ipss)) { 8035 ip_proto_not_sup(q, first_mp, IP_FF_SEND_ICMP, 8036 zoneid, ipst); 8037 return; 8038 } 8039 8040 /* select inbound SA and have IPsec process the pkt */ 8041 if (nexthdr == IPPROTO_ESP) { 8042 esph_t *esph = ipsec_inbound_esp_sa(first_mp, 8043 ipst->ips_netstack); 8044 if (esph == NULL) 8045 return; 8046 ASSERT(ii->ipsec_in_esp_sa != NULL); 8047 ASSERT(ii->ipsec_in_esp_sa->ipsa_input_func != 8048 NULL); 8049 ipsec_rc = ii->ipsec_in_esp_sa->ipsa_input_func( 8050 first_mp, esph); 8051 } else { 8052 ah_t *ah = ipsec_inbound_ah_sa(first_mp, 8053 ipst->ips_netstack); 8054 if (ah == NULL) 8055 return; 8056 ASSERT(ii->ipsec_in_ah_sa != NULL); 8057 ASSERT(ii->ipsec_in_ah_sa->ipsa_input_func != 8058 NULL); 8059 ipsec_rc = ii->ipsec_in_ah_sa->ipsa_input_func( 8060 first_mp, ah); 8061 } 8062 8063 switch (ipsec_rc) { 8064 case IPSEC_STATUS_SUCCESS: 8065 break; 8066 case IPSEC_STATUS_FAILED: 8067 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8068 /* FALLTHRU */ 8069 case IPSEC_STATUS_PENDING: 8070 return; 8071 } 8072 /* we're done with IPsec processing, send it up */ 8073 ip_fanout_proto_again(first_mp, ill, inill, NULL); 8074 return; 8075 } 8076 case IPPROTO_NONE: 8077 /* All processing is done. Count as "delivered". */ 8078 freemsg(hada_mp); 8079 freemsg(first_mp); 8080 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8081 return; 8082 } 8083 whereptr += used; 8084 ASSERT(remlen >= used); 8085 remlen -= used; 8086 } 8087 /* NOTREACHED */ 8088 8089 pkt_too_short: 8090 ip1dbg(("ip_rput_data_v6: packet too short %d %lu %d\n", 8091 ip6_len, pkt_len, remlen)); 8092 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 8093 freemsg(hada_mp); 8094 freemsg(first_mp); 8095 return; 8096 udp_fanout: 8097 if (mctl_present || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 8098 connp = NULL; 8099 } else { 8100 connp = ipcl_classify_v6(mp, IPPROTO_UDP, hdr_len, zoneid, 8101 ipst); 8102 if ((connp != NULL) && (connp->conn_upq == NULL)) { 8103 CONN_DEC_REF(connp); 8104 connp = NULL; 8105 } 8106 } 8107 8108 if (connp == NULL) { 8109 uint32_t ports; 8110 8111 ports = *(uint32_t *)(mp->b_rptr + hdr_len + 8112 UDP_PORTS_OFFSET); 8113 IP6_STAT(ipst, ip6_udp_slow_path); 8114 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, inill, 8115 (flags|IP_FF_SEND_ICMP|IP_FF_IPINFO), mctl_present, 8116 zoneid); 8117 return; 8118 } 8119 8120 if ((IPCL_IS_NONSTR(connp) && PROTO_FLOW_CNTRLD(connp)) || 8121 (!IPCL_IS_NONSTR(connp) && CONN_UDP_FLOWCTLD(connp))) { 8122 freemsg(first_mp); 8123 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 8124 CONN_DEC_REF(connp); 8125 return; 8126 } 8127 8128 /* Initiate IPPF processing */ 8129 if (IP6_IN_IPP(flags, ipst)) { 8130 ip_process(IPP_LOCAL_IN, &mp, ill->ill_phyint->phyint_ifindex); 8131 if (mp == NULL) { 8132 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8133 CONN_DEC_REF(connp); 8134 return; 8135 } 8136 } 8137 8138 if (connp->conn_ip_recvpktinfo || 8139 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) { 8140 mp = ip_add_info_v6(mp, inill, &ip6h->ip6_dst); 8141 if (mp == NULL) { 8142 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8143 CONN_DEC_REF(connp); 8144 return; 8145 } 8146 } 8147 8148 IP6_STAT(ipst, ip6_udp_fast_path); 8149 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 8150 8151 /* Send it upstream */ 8152 (connp->conn_recv)(connp, mp, NULL); 8153 8154 CONN_DEC_REF(connp); 8155 freemsg(hada_mp); 8156 return; 8157 8158 hada_drop: 8159 ip1dbg(("ip_rput_data_v6: malformed accelerated packet\n")); 8160 /* IPsec kstats: bump counter here */ 8161 freemsg(hada_mp); 8162 freemsg(first_mp); 8163 } 8164 8165 /* 8166 * Reassemble fragment. 8167 * When it returns a completed message the first mblk will only contain 8168 * the headers prior to the fragment header. 8169 * 8170 * prev_nexthdr_offset is an offset indication of where the nexthdr field is 8171 * of the preceding header. This is needed to patch the previous header's 8172 * nexthdr field when reassembly completes. 8173 */ 8174 static mblk_t * 8175 ip_rput_frag_v6(ill_t *ill, ill_t *inill, mblk_t *mp, ip6_t *ip6h, 8176 ip6_frag_t *fraghdr, uint_t remlen, uint_t *prev_nexthdr_offset, 8177 uint32_t *cksum_val, uint16_t *cksum_flags) 8178 { 8179 uint32_t ident = ntohl(fraghdr->ip6f_ident); 8180 uint16_t offset; 8181 boolean_t more_frags; 8182 uint8_t nexthdr = fraghdr->ip6f_nxt; 8183 in6_addr_t *v6dst_ptr; 8184 in6_addr_t *v6src_ptr; 8185 uint_t end; 8186 uint_t hdr_length; 8187 size_t count; 8188 ipf_t *ipf; 8189 ipf_t **ipfp; 8190 ipfb_t *ipfb; 8191 mblk_t *mp1; 8192 uint8_t ecn_info = 0; 8193 size_t msg_len; 8194 mblk_t *tail_mp; 8195 mblk_t *t_mp; 8196 boolean_t pruned = B_FALSE; 8197 uint32_t sum_val; 8198 uint16_t sum_flags; 8199 ip_stack_t *ipst = ill->ill_ipst; 8200 8201 if (cksum_val != NULL) 8202 *cksum_val = 0; 8203 if (cksum_flags != NULL) 8204 *cksum_flags = 0; 8205 8206 /* 8207 * We utilize hardware computed checksum info only for UDP since 8208 * IP fragmentation is a normal occurence for the protocol. In 8209 * addition, checksum offload support for IP fragments carrying 8210 * UDP payload is commonly implemented across network adapters. 8211 */ 8212 ASSERT(inill != NULL); 8213 if (nexthdr == IPPROTO_UDP && dohwcksum && ILL_HCKSUM_CAPABLE(inill) && 8214 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 8215 mblk_t *mp1 = mp->b_cont; 8216 int32_t len; 8217 8218 /* Record checksum information from the packet */ 8219 sum_val = (uint32_t)DB_CKSUM16(mp); 8220 sum_flags = DB_CKSUMFLAGS(mp); 8221 8222 /* fragmented payload offset from beginning of mblk */ 8223 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 8224 8225 if ((sum_flags & HCK_PARTIALCKSUM) && 8226 (mp1 == NULL || mp1->b_cont == NULL) && 8227 offset >= (uint16_t)DB_CKSUMSTART(mp) && 8228 ((len = offset - (uint16_t)DB_CKSUMSTART(mp)) & 1) == 0) { 8229 uint32_t adj; 8230 /* 8231 * Partial checksum has been calculated by hardware 8232 * and attached to the packet; in addition, any 8233 * prepended extraneous data is even byte aligned. 8234 * If any such data exists, we adjust the checksum; 8235 * this would also handle any postpended data. 8236 */ 8237 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 8238 mp, mp1, len, adj); 8239 8240 /* One's complement subtract extraneous checksum */ 8241 if (adj >= sum_val) 8242 sum_val = ~(adj - sum_val) & 0xFFFF; 8243 else 8244 sum_val -= adj; 8245 } 8246 } else { 8247 sum_val = 0; 8248 sum_flags = 0; 8249 } 8250 8251 /* Clear hardware checksumming flag */ 8252 DB_CKSUMFLAGS(mp) = 0; 8253 8254 /* 8255 * Note: Fragment offset in header is in 8-octet units. 8256 * Clearing least significant 3 bits not only extracts 8257 * it but also gets it in units of octets. 8258 */ 8259 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 8260 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 8261 8262 /* 8263 * Is the more frags flag on and the payload length not a multiple 8264 * of eight? 8265 */ 8266 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 8267 zoneid_t zoneid; 8268 8269 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8270 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8271 if (zoneid == ALL_ZONES) { 8272 freemsg(mp); 8273 return (NULL); 8274 } 8275 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8276 (uint32_t)((char *)&ip6h->ip6_plen - 8277 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8278 return (NULL); 8279 } 8280 8281 v6src_ptr = &ip6h->ip6_src; 8282 v6dst_ptr = &ip6h->ip6_dst; 8283 end = remlen; 8284 8285 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 8286 end += offset; 8287 8288 /* 8289 * Would fragment cause reassembled packet to have a payload length 8290 * greater than IP_MAXPACKET - the max payload size? 8291 */ 8292 if (end > IP_MAXPACKET) { 8293 zoneid_t zoneid; 8294 8295 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8296 zoneid = ipif_lookup_addr_zoneid_v6(&ip6h->ip6_dst, ill, ipst); 8297 if (zoneid == ALL_ZONES) { 8298 freemsg(mp); 8299 return (NULL); 8300 } 8301 icmp_param_problem_v6(ill->ill_wq, mp, ICMP6_PARAMPROB_HEADER, 8302 (uint32_t)((char *)&fraghdr->ip6f_offlg - 8303 (char *)ip6h), B_FALSE, B_FALSE, zoneid, ipst); 8304 return (NULL); 8305 } 8306 8307 /* 8308 * This packet just has one fragment. Reassembly not 8309 * needed. 8310 */ 8311 if (!more_frags && offset == 0) { 8312 goto reass_done; 8313 } 8314 8315 /* 8316 * Drop the fragmented as early as possible, if 8317 * we don't have resource(s) to re-assemble. 8318 */ 8319 if (ipst->ips_ip_reass_queue_bytes == 0) { 8320 freemsg(mp); 8321 return (NULL); 8322 } 8323 8324 /* Record the ECN field info. */ 8325 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 8326 /* 8327 * If this is not the first fragment, dump the unfragmentable 8328 * portion of the packet. 8329 */ 8330 if (offset) 8331 mp->b_rptr = (uchar_t *)&fraghdr[1]; 8332 8333 /* 8334 * Fragmentation reassembly. Each ILL has a hash table for 8335 * queueing packets undergoing reassembly for all IPIFs 8336 * associated with the ILL. The hash is based on the packet 8337 * IP ident field. The ILL frag hash table was allocated 8338 * as a timer block at the time the ILL was created. Whenever 8339 * there is anything on the reassembly queue, the timer will 8340 * be running. 8341 */ 8342 msg_len = MBLKSIZE(mp); 8343 tail_mp = mp; 8344 while (tail_mp->b_cont != NULL) { 8345 tail_mp = tail_mp->b_cont; 8346 msg_len += MBLKSIZE(tail_mp); 8347 } 8348 /* 8349 * If the reassembly list for this ILL will get too big 8350 * prune it. 8351 */ 8352 8353 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 8354 ipst->ips_ip_reass_queue_bytes) { 8355 ill_frag_prune(ill, 8356 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 8357 (ipst->ips_ip_reass_queue_bytes - msg_len)); 8358 pruned = B_TRUE; 8359 } 8360 8361 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 8362 mutex_enter(&ipfb->ipfb_lock); 8363 8364 ipfp = &ipfb->ipfb_ipf; 8365 /* Try to find an existing fragment queue for this packet. */ 8366 for (;;) { 8367 ipf = ipfp[0]; 8368 if (ipf) { 8369 /* 8370 * It has to match on ident, source address, and 8371 * dest address. 8372 */ 8373 if (ipf->ipf_ident == ident && 8374 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 8375 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 8376 8377 /* 8378 * If we have received too many 8379 * duplicate fragments for this packet 8380 * free it. 8381 */ 8382 if (ipf->ipf_num_dups > ip_max_frag_dups) { 8383 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8384 freemsg(mp); 8385 mutex_exit(&ipfb->ipfb_lock); 8386 return (NULL); 8387 } 8388 8389 break; 8390 } 8391 ipfp = &ipf->ipf_hash_next; 8392 continue; 8393 } 8394 8395 8396 /* 8397 * If we pruned the list, do we want to store this new 8398 * fragment?. We apply an optimization here based on the 8399 * fact that most fragments will be received in order. 8400 * So if the offset of this incoming fragment is zero, 8401 * it is the first fragment of a new packet. We will 8402 * keep it. Otherwise drop the fragment, as we have 8403 * probably pruned the packet already (since the 8404 * packet cannot be found). 8405 */ 8406 8407 if (pruned && offset != 0) { 8408 mutex_exit(&ipfb->ipfb_lock); 8409 freemsg(mp); 8410 return (NULL); 8411 } 8412 8413 /* New guy. Allocate a frag message. */ 8414 mp1 = allocb(sizeof (*ipf), BPRI_MED); 8415 if (!mp1) { 8416 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8417 freemsg(mp); 8418 partial_reass_done: 8419 mutex_exit(&ipfb->ipfb_lock); 8420 return (NULL); 8421 } 8422 8423 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 8424 /* 8425 * Too many fragmented packets in this hash bucket. 8426 * Free the oldest. 8427 */ 8428 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 8429 } 8430 8431 mp1->b_cont = mp; 8432 8433 /* Initialize the fragment header. */ 8434 ipf = (ipf_t *)mp1->b_rptr; 8435 ipf->ipf_mp = mp1; 8436 ipf->ipf_ptphn = ipfp; 8437 ipfp[0] = ipf; 8438 ipf->ipf_hash_next = NULL; 8439 ipf->ipf_ident = ident; 8440 ipf->ipf_v6src = *v6src_ptr; 8441 ipf->ipf_v6dst = *v6dst_ptr; 8442 /* Record reassembly start time. */ 8443 ipf->ipf_timestamp = gethrestime_sec(); 8444 /* Record ipf generation and account for frag header */ 8445 ipf->ipf_gen = ill->ill_ipf_gen++; 8446 ipf->ipf_count = MBLKSIZE(mp1); 8447 ipf->ipf_protocol = nexthdr; 8448 ipf->ipf_nf_hdr_len = 0; 8449 ipf->ipf_prev_nexthdr_offset = 0; 8450 ipf->ipf_last_frag_seen = B_FALSE; 8451 ipf->ipf_ecn = ecn_info; 8452 ipf->ipf_num_dups = 0; 8453 ipfb->ipfb_frag_pkts++; 8454 ipf->ipf_checksum = 0; 8455 ipf->ipf_checksum_flags = 0; 8456 8457 /* Store checksum value in fragment header */ 8458 if (sum_flags != 0) { 8459 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8460 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8461 ipf->ipf_checksum = sum_val; 8462 ipf->ipf_checksum_flags = sum_flags; 8463 } 8464 8465 /* 8466 * We handle reassembly two ways. In the easy case, 8467 * where all the fragments show up in order, we do 8468 * minimal bookkeeping, and just clip new pieces on 8469 * the end. If we ever see a hole, then we go off 8470 * to ip_reassemble which has to mark the pieces and 8471 * keep track of the number of holes, etc. Obviously, 8472 * the point of having both mechanisms is so we can 8473 * handle the easy case as efficiently as possible. 8474 */ 8475 if (offset == 0) { 8476 /* Easy case, in-order reassembly so far. */ 8477 /* Update the byte count */ 8478 ipf->ipf_count += msg_len; 8479 ipf->ipf_tail_mp = tail_mp; 8480 /* 8481 * Keep track of next expected offset in 8482 * ipf_end. 8483 */ 8484 ipf->ipf_end = end; 8485 ipf->ipf_nf_hdr_len = hdr_length; 8486 ipf->ipf_prev_nexthdr_offset = *prev_nexthdr_offset; 8487 } else { 8488 /* Hard case, hole at the beginning. */ 8489 ipf->ipf_tail_mp = NULL; 8490 /* 8491 * ipf_end == 0 means that we have given up 8492 * on easy reassembly. 8493 */ 8494 ipf->ipf_end = 0; 8495 8496 /* Forget checksum offload from now on */ 8497 ipf->ipf_checksum_flags = 0; 8498 8499 /* 8500 * ipf_hole_cnt is set by ip_reassemble. 8501 * ipf_count is updated by ip_reassemble. 8502 * No need to check for return value here 8503 * as we don't expect reassembly to complete or 8504 * fail for the first fragment itself. 8505 */ 8506 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 8507 msg_len); 8508 } 8509 /* Update per ipfb and ill byte counts */ 8510 ipfb->ipfb_count += ipf->ipf_count; 8511 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8512 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 8513 /* If the frag timer wasn't already going, start it. */ 8514 mutex_enter(&ill->ill_lock); 8515 ill_frag_timer_start(ill); 8516 mutex_exit(&ill->ill_lock); 8517 goto partial_reass_done; 8518 } 8519 8520 /* 8521 * If the packet's flag has changed (it could be coming up 8522 * from an interface different than the previous, therefore 8523 * possibly different checksum capability), then forget about 8524 * any stored checksum states. Otherwise add the value to 8525 * the existing one stored in the fragment header. 8526 */ 8527 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 8528 sum_val += ipf->ipf_checksum; 8529 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8530 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 8531 ipf->ipf_checksum = sum_val; 8532 } else if (ipf->ipf_checksum_flags != 0) { 8533 /* Forget checksum offload from now on */ 8534 ipf->ipf_checksum_flags = 0; 8535 } 8536 8537 /* 8538 * We have a new piece of a datagram which is already being 8539 * reassembled. Update the ECN info if all IP fragments 8540 * are ECN capable. If there is one which is not, clear 8541 * all the info. If there is at least one which has CE 8542 * code point, IP needs to report that up to transport. 8543 */ 8544 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 8545 if (ecn_info == IPH_ECN_CE) 8546 ipf->ipf_ecn = IPH_ECN_CE; 8547 } else { 8548 ipf->ipf_ecn = IPH_ECN_NECT; 8549 } 8550 8551 if (offset && ipf->ipf_end == offset) { 8552 /* The new fragment fits at the end */ 8553 ipf->ipf_tail_mp->b_cont = mp; 8554 /* Update the byte count */ 8555 ipf->ipf_count += msg_len; 8556 /* Update per ipfb and ill byte counts */ 8557 ipfb->ipfb_count += msg_len; 8558 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8559 atomic_add_32(&ill->ill_frag_count, msg_len); 8560 if (more_frags) { 8561 /* More to come. */ 8562 ipf->ipf_end = end; 8563 ipf->ipf_tail_mp = tail_mp; 8564 goto partial_reass_done; 8565 } 8566 } else { 8567 /* 8568 * Go do the hard cases. 8569 * Call ip_reassemble(). 8570 */ 8571 int ret; 8572 8573 if (offset == 0) { 8574 if (ipf->ipf_prev_nexthdr_offset == 0) { 8575 ipf->ipf_nf_hdr_len = hdr_length; 8576 ipf->ipf_prev_nexthdr_offset = 8577 *prev_nexthdr_offset; 8578 } 8579 } 8580 /* Save current byte count */ 8581 count = ipf->ipf_count; 8582 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 8583 8584 /* Count of bytes added and subtracted (freeb()ed) */ 8585 count = ipf->ipf_count - count; 8586 if (count) { 8587 /* Update per ipfb and ill byte counts */ 8588 ipfb->ipfb_count += count; 8589 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 8590 atomic_add_32(&ill->ill_frag_count, count); 8591 } 8592 if (ret == IP_REASS_PARTIAL) { 8593 goto partial_reass_done; 8594 } else if (ret == IP_REASS_FAILED) { 8595 /* Reassembly failed. Free up all resources */ 8596 ill_frag_free_pkts(ill, ipfb, ipf, 1); 8597 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 8598 IP_REASS_SET_START(t_mp, 0); 8599 IP_REASS_SET_END(t_mp, 0); 8600 } 8601 freemsg(mp); 8602 goto partial_reass_done; 8603 } 8604 8605 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 8606 } 8607 /* 8608 * We have completed reassembly. Unhook the frag header from 8609 * the reassembly list. 8610 * 8611 * Grab the unfragmentable header length next header value out 8612 * of the first fragment 8613 */ 8614 ASSERT(ipf->ipf_nf_hdr_len != 0); 8615 hdr_length = ipf->ipf_nf_hdr_len; 8616 8617 /* 8618 * Before we free the frag header, record the ECN info 8619 * to report back to the transport. 8620 */ 8621 ecn_info = ipf->ipf_ecn; 8622 8623 /* 8624 * Store the nextheader field in the header preceding the fragment 8625 * header 8626 */ 8627 nexthdr = ipf->ipf_protocol; 8628 *prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 8629 ipfp = ipf->ipf_ptphn; 8630 8631 /* We need to supply these to caller */ 8632 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 8633 sum_val = ipf->ipf_checksum; 8634 else 8635 sum_val = 0; 8636 8637 mp1 = ipf->ipf_mp; 8638 count = ipf->ipf_count; 8639 ipf = ipf->ipf_hash_next; 8640 if (ipf) 8641 ipf->ipf_ptphn = ipfp; 8642 ipfp[0] = ipf; 8643 atomic_add_32(&ill->ill_frag_count, -count); 8644 ASSERT(ipfb->ipfb_count >= count); 8645 ipfb->ipfb_count -= count; 8646 ipfb->ipfb_frag_pkts--; 8647 mutex_exit(&ipfb->ipfb_lock); 8648 /* Ditch the frag header. */ 8649 mp = mp1->b_cont; 8650 freeb(mp1); 8651 8652 /* 8653 * Make sure the packet is good by doing some sanity 8654 * check. If bad we can silentely drop the packet. 8655 */ 8656 reass_done: 8657 if (hdr_length < sizeof (ip6_frag_t)) { 8658 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 8659 ip1dbg(("ip_rput_frag_v6: bad packet\n")); 8660 freemsg(mp); 8661 return (NULL); 8662 } 8663 8664 /* 8665 * Remove the fragment header from the initial header by 8666 * splitting the mblk into the non-fragmentable header and 8667 * everthing after the fragment extension header. This has the 8668 * side effect of putting all the headers that need destination 8669 * processing into the b_cont block-- on return this fact is 8670 * used in order to avoid having to look at the extensions 8671 * already processed. 8672 * 8673 * Note that this code assumes that the unfragmentable portion 8674 * of the header is in the first mblk and increments 8675 * the read pointer past it. If this assumption is broken 8676 * this code fails badly. 8677 */ 8678 if (mp->b_rptr + hdr_length != mp->b_wptr) { 8679 mblk_t *nmp; 8680 8681 if (!(nmp = dupb(mp))) { 8682 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 8683 ip1dbg(("ip_rput_frag_v6: dupb failed\n")); 8684 freemsg(mp); 8685 return (NULL); 8686 } 8687 nmp->b_cont = mp->b_cont; 8688 mp->b_cont = nmp; 8689 nmp->b_rptr += hdr_length; 8690 } 8691 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 8692 8693 ip6h = (ip6_t *)mp->b_rptr; 8694 ((char *)ip6h)[*prev_nexthdr_offset] = nexthdr; 8695 8696 /* Restore original IP length in header. */ 8697 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 8698 /* Record the ECN info. */ 8699 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 8700 ip6h->ip6_vcf |= htonl(ecn_info << 20); 8701 8702 /* Reassembly is successful; return checksum information if needed */ 8703 if (cksum_val != NULL) 8704 *cksum_val = sum_val; 8705 if (cksum_flags != NULL) 8706 *cksum_flags = sum_flags; 8707 8708 return (mp); 8709 } 8710 8711 /* 8712 * Given an mblk and a ptr, find the destination address in an IPv6 routing 8713 * header. 8714 */ 8715 static in6_addr_t 8716 pluck_out_dst(mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 8717 { 8718 ip6_rthdr0_t *rt0; 8719 int segleft, numaddr; 8720 in6_addr_t *ap, rv = oldrv; 8721 8722 rt0 = (ip6_rthdr0_t *)whereptr; 8723 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 8724 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 8725 uint8_t *, whereptr); 8726 return (rv); 8727 } 8728 segleft = rt0->ip6r0_segleft; 8729 numaddr = rt0->ip6r0_len / 2; 8730 8731 if ((rt0->ip6r0_len & 0x1) || 8732 whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr || 8733 (segleft > rt0->ip6r0_len / 2)) { 8734 /* 8735 * Corrupt packet. Either the routing header length is odd 8736 * (can't happen) or mismatched compared to the packet, or the 8737 * number of addresses is. Return what we can. This will 8738 * only be a problem on forwarded packets that get squeezed 8739 * through an outbound tunnel enforcing IPsec Tunnel Mode. 8740 */ 8741 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 8742 whereptr); 8743 return (rv); 8744 } 8745 8746 if (segleft != 0) { 8747 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 8748 rv = ap[numaddr - 1]; 8749 } 8750 8751 return (rv); 8752 } 8753 8754 /* 8755 * Walk through the options to see if there is a routing header. 8756 * If present get the destination which is the last address of 8757 * the option. 8758 */ 8759 in6_addr_t 8760 ip_get_dst_v6(ip6_t *ip6h, mblk_t *mp, boolean_t *is_fragment) 8761 { 8762 mblk_t *current_mp = mp; 8763 uint8_t nexthdr; 8764 uint8_t *whereptr; 8765 int ehdrlen; 8766 in6_addr_t rv; 8767 8768 whereptr = (uint8_t *)ip6h; 8769 ehdrlen = sizeof (ip6_t); 8770 8771 /* We assume at least the IPv6 base header is within one mblk. */ 8772 ASSERT(mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen); 8773 8774 rv = ip6h->ip6_dst; 8775 nexthdr = ip6h->ip6_nxt; 8776 if (is_fragment != NULL) 8777 *is_fragment = B_FALSE; 8778 8779 /* 8780 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 8781 * no extension headers will be split across mblks. 8782 */ 8783 8784 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 8785 nexthdr == IPPROTO_ROUTING) { 8786 if (nexthdr == IPPROTO_ROUTING) 8787 rv = pluck_out_dst(current_mp, whereptr, rv); 8788 8789 /* 8790 * All IPv6 extension headers have the next-header in byte 8791 * 0, and the (length - 8) in 8-byte-words. 8792 */ 8793 while (whereptr + ehdrlen >= current_mp->b_wptr) { 8794 ehdrlen -= (current_mp->b_wptr - whereptr); 8795 current_mp = current_mp->b_cont; 8796 if (current_mp == NULL) { 8797 /* Bad packet. Return what we can. */ 8798 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 8799 mp, mblk_t *, current_mp, ip6_t *, ip6h); 8800 goto done; 8801 } 8802 whereptr = current_mp->b_rptr; 8803 } 8804 whereptr += ehdrlen; 8805 8806 nexthdr = *whereptr; 8807 ASSERT(whereptr + 1 < current_mp->b_wptr); 8808 ehdrlen = (*(whereptr + 1) + 1) * 8; 8809 } 8810 8811 done: 8812 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 8813 *is_fragment = B_TRUE; 8814 return (rv); 8815 } 8816 8817 /* 8818 * ip_source_routed_v6: 8819 * This function is called by redirect code in ip_rput_data_v6 to 8820 * know whether this packet is source routed through this node i.e 8821 * whether this node (router) is part of the journey. This 8822 * function is called under two cases : 8823 * 8824 * case 1 : Routing header was processed by this node and 8825 * ip_process_rthdr replaced ip6_dst with the next hop 8826 * and we are forwarding the packet to the next hop. 8827 * 8828 * case 2 : Routing header was not processed by this node and we 8829 * are just forwarding the packet. 8830 * 8831 * For case (1) we don't want to send redirects. For case(2) we 8832 * want to send redirects. 8833 */ 8834 static boolean_t 8835 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 8836 { 8837 uint8_t nexthdr; 8838 in6_addr_t *addrptr; 8839 ip6_rthdr0_t *rthdr; 8840 uint8_t numaddr; 8841 ip6_hbh_t *hbhhdr; 8842 uint_t ehdrlen; 8843 uint8_t *byteptr; 8844 8845 ip2dbg(("ip_source_routed_v6\n")); 8846 nexthdr = ip6h->ip6_nxt; 8847 ehdrlen = IPV6_HDR_LEN; 8848 8849 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 8850 while (nexthdr == IPPROTO_HOPOPTS || 8851 nexthdr == IPPROTO_DSTOPTS) { 8852 byteptr = (uint8_t *)ip6h + ehdrlen; 8853 /* 8854 * Check if we have already processed 8855 * packets or we are just a forwarding 8856 * router which only pulled up msgs up 8857 * to IPV6HDR and one HBH ext header 8858 */ 8859 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8860 ip2dbg(("ip_source_routed_v6: Extension" 8861 " headers not processed\n")); 8862 return (B_FALSE); 8863 } 8864 hbhhdr = (ip6_hbh_t *)byteptr; 8865 nexthdr = hbhhdr->ip6h_nxt; 8866 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 8867 } 8868 switch (nexthdr) { 8869 case IPPROTO_ROUTING: 8870 byteptr = (uint8_t *)ip6h + ehdrlen; 8871 /* 8872 * If for some reason, we haven't pulled up 8873 * the routing hdr data mblk, then we must 8874 * not have processed it at all. So for sure 8875 * we are not part of the source routed journey. 8876 */ 8877 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 8878 ip2dbg(("ip_source_routed_v6: Routing" 8879 " header not processed\n")); 8880 return (B_FALSE); 8881 } 8882 rthdr = (ip6_rthdr0_t *)byteptr; 8883 /* 8884 * Either we are an intermediate router or the 8885 * last hop before destination and we have 8886 * already processed the routing header. 8887 * If segment_left is greater than or equal to zero, 8888 * then we must be the (numaddr - segleft) entry 8889 * of the routing header. Although ip6r0_segleft 8890 * is a unit8_t variable, we still check for zero 8891 * or greater value, if in case the data type 8892 * is changed someday in future. 8893 */ 8894 if (rthdr->ip6r0_segleft > 0 || 8895 rthdr->ip6r0_segleft == 0) { 8896 ire_t *ire = NULL; 8897 8898 numaddr = rthdr->ip6r0_len / 2; 8899 addrptr = (in6_addr_t *)((char *)rthdr + 8900 sizeof (*rthdr)); 8901 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 8902 if (addrptr != NULL) { 8903 ire = ire_ctable_lookup_v6(addrptr, NULL, 8904 IRE_LOCAL, NULL, ALL_ZONES, NULL, 8905 MATCH_IRE_TYPE, 8906 ipst); 8907 if (ire != NULL) { 8908 ire_refrele(ire); 8909 return (B_TRUE); 8910 } 8911 ip1dbg(("ip_source_routed_v6: No ire found\n")); 8912 } 8913 } 8914 /* FALLTHRU */ 8915 default: 8916 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 8917 return (B_FALSE); 8918 } 8919 } 8920 8921 /* 8922 * ip_wput_v6 -- Packets sent down from transport modules show up here. 8923 * Assumes that the following set of headers appear in the first 8924 * mblk: 8925 * ip6i_t (if present) CAN also appear as a separate mblk. 8926 * ip6_t 8927 * Any extension headers 8928 * TCP/UDP/SCTP header (if present) 8929 * The routine can handle an ICMPv6 header that is not in the first mblk. 8930 * 8931 * The order to determine the outgoing interface is as follows: 8932 * 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. 8933 * 2. If q is an ill queue and (link local or multicast destination) then 8934 * use that ill. 8935 * 3. If IPV6_BOUND_IF has been set use that ill. 8936 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. Otherwise 8937 * look for the best IRE match for the unspecified group to determine 8938 * the ill. 8939 * 5. For unicast: Just do an IRE lookup for the best match. 8940 * 8941 * arg2 is always a queue_t *. 8942 * When that queue is an ill_t (i.e. q_next != NULL), then arg must be 8943 * the zoneid. 8944 * When that queue is not an ill_t, then arg must be a conn_t pointer. 8945 */ 8946 void 8947 ip_output_v6(void *arg, mblk_t *mp, void *arg2, int caller) 8948 { 8949 conn_t *connp = NULL; 8950 queue_t *q = (queue_t *)arg2; 8951 ire_t *ire = NULL; 8952 ire_t *sctp_ire = NULL; 8953 ip6_t *ip6h; 8954 in6_addr_t *v6dstp; 8955 ill_t *ill = NULL; 8956 ipif_t *ipif; 8957 ip6i_t *ip6i; 8958 int cksum_request; /* -1 => normal. */ 8959 /* 1 => Skip TCP/UDP/SCTP checksum */ 8960 /* Otherwise contains insert offset for checksum */ 8961 int unspec_src; 8962 boolean_t do_outrequests; /* Increment OutRequests? */ 8963 mib2_ipIfStatsEntry_t *mibptr; 8964 int match_flags = MATCH_IRE_ILL; 8965 mblk_t *first_mp; 8966 boolean_t mctl_present; 8967 ipsec_out_t *io; 8968 boolean_t multirt_need_resolve = B_FALSE; 8969 mblk_t *copy_mp = NULL; 8970 int err = 0; 8971 int ip6i_flags = 0; 8972 zoneid_t zoneid; 8973 ill_t *saved_ill = NULL; 8974 boolean_t conn_lock_held; 8975 boolean_t need_decref = B_FALSE; 8976 ip_stack_t *ipst; 8977 8978 if (q->q_next != NULL) { 8979 ill = (ill_t *)q->q_ptr; 8980 ipst = ill->ill_ipst; 8981 } else { 8982 connp = (conn_t *)arg; 8983 ASSERT(connp != NULL); 8984 ipst = connp->conn_netstack->netstack_ip; 8985 } 8986 8987 /* 8988 * Highest bit in version field is Reachability Confirmation bit 8989 * used by NUD in ip_xmit_v6(). 8990 */ 8991 #ifdef _BIG_ENDIAN 8992 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 8993 #else 8994 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 8995 #endif 8996 8997 /* 8998 * M_CTL comes from 5 places 8999 * 9000 * 1) TCP sends down IPSEC_OUT(M_CTL) for detached connections 9001 * both V4 and V6 datagrams. 9002 * 9003 * 2) AH/ESP sends down M_CTL after doing their job with both 9004 * V4 and V6 datagrams. 9005 * 9006 * 3) NDP callbacks when nce is resolved and IPSEC_OUT has been 9007 * attached. 9008 * 9009 * 4) Notifications from an external resolver (for XRESOLV ifs) 9010 * 9011 * 5) AH/ESP send down IPSEC_CTL(M_CTL) to be relayed to hardware for 9012 * IPsec hardware acceleration support. 9013 * 9014 * We need to handle (1)'s IPv6 case and (3) here. For the 9015 * IPv4 case in (1), and (2), IPSEC processing has already 9016 * started. The code in ip_wput() already knows how to handle 9017 * continuing IPSEC processing (for IPv4 and IPv6). All other 9018 * M_CTLs (including case (4)) are passed on to ip_wput_nondata() 9019 * for handling. 9020 */ 9021 first_mp = mp; 9022 mctl_present = B_FALSE; 9023 io = NULL; 9024 9025 /* Multidata transmit? */ 9026 if (DB_TYPE(mp) == M_MULTIDATA) { 9027 /* 9028 * We should never get here, since all Multidata messages 9029 * originating from tcp should have been directed over to 9030 * tcp_multisend() in the first place. 9031 */ 9032 BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 9033 freemsg(mp); 9034 return; 9035 } else if (DB_TYPE(mp) == M_CTL) { 9036 uint32_t mctltype = 0; 9037 uint32_t mlen = MBLKL(first_mp); 9038 9039 mp = mp->b_cont; 9040 mctl_present = B_TRUE; 9041 io = (ipsec_out_t *)first_mp->b_rptr; 9042 9043 /* 9044 * Validate this M_CTL message. The only three types of 9045 * M_CTL messages we expect to see in this code path are 9046 * ipsec_out_t or ipsec_in_t structures (allocated as 9047 * ipsec_info_t unions), or ipsec_ctl_t structures. 9048 * The ipsec_out_type and ipsec_in_type overlap in the two 9049 * data structures, and they are either set to IPSEC_OUT 9050 * or IPSEC_IN depending on which data structure it is. 9051 * ipsec_ctl_t is an IPSEC_CTL. 9052 * 9053 * All other M_CTL messages are sent to ip_wput_nondata() 9054 * for handling. 9055 */ 9056 if (mlen >= sizeof (io->ipsec_out_type)) 9057 mctltype = io->ipsec_out_type; 9058 9059 if ((mlen == sizeof (ipsec_ctl_t)) && 9060 (mctltype == IPSEC_CTL)) { 9061 ip_output(arg, first_mp, arg2, caller); 9062 return; 9063 } 9064 9065 if ((mlen < sizeof (ipsec_info_t)) || 9066 (mctltype != IPSEC_OUT && mctltype != IPSEC_IN) || 9067 mp == NULL) { 9068 ip_wput_nondata(NULL, q, first_mp, NULL); 9069 return; 9070 } 9071 /* NDP callbacks have q_next non-NULL. That's case #3. */ 9072 if (q->q_next == NULL) { 9073 ip6h = (ip6_t *)mp->b_rptr; 9074 /* 9075 * For a freshly-generated TCP dgram that needs IPV6 9076 * processing, don't call ip_wput immediately. We can 9077 * tell this by the ipsec_out_proc_begin. In-progress 9078 * IPSEC_OUT messages have proc_begin set to TRUE, 9079 * and we want to send all IPSEC_IN messages to 9080 * ip_wput() for IPsec processing or finishing. 9081 */ 9082 if (mctltype == IPSEC_IN || 9083 IPVER(ip6h) != IPV6_VERSION || 9084 io->ipsec_out_proc_begin) { 9085 mibptr = &ipst->ips_ip6_mib; 9086 goto notv6; 9087 } 9088 } 9089 } else if (DB_TYPE(mp) != M_DATA) { 9090 ip_wput_nondata(NULL, q, mp, NULL); 9091 return; 9092 } 9093 9094 ip6h = (ip6_t *)mp->b_rptr; 9095 9096 if (IPVER(ip6h) != IPV6_VERSION) { 9097 mibptr = &ipst->ips_ip6_mib; 9098 goto notv6; 9099 } 9100 9101 if (is_system_labeled() && DB_TYPE(mp) == M_DATA && 9102 (connp == NULL || !connp->conn_ulp_labeled)) { 9103 cred_t *cr; 9104 pid_t pid; 9105 9106 if (connp != NULL) { 9107 ASSERT(CONN_CRED(connp) != NULL); 9108 cr = BEST_CRED(mp, connp, &pid); 9109 err = tsol_check_label_v6(cr, &mp, 9110 connp->conn_mac_mode, ipst, pid); 9111 } else if ((cr = msg_getcred(mp, &pid)) != NULL) { 9112 err = tsol_check_label_v6(cr, &mp, CONN_MAC_DEFAULT, 9113 ipst, pid); 9114 } 9115 if (mctl_present) 9116 first_mp->b_cont = mp; 9117 else 9118 first_mp = mp; 9119 if (err != 0) { 9120 DTRACE_PROBE3( 9121 tsol_ip_log_drop_checklabel_ip6, char *, 9122 "conn(1), failed to check/update mp(2)", 9123 conn_t, connp, mblk_t, mp); 9124 freemsg(first_mp); 9125 return; 9126 } 9127 ip6h = (ip6_t *)mp->b_rptr; 9128 } 9129 if (q->q_next != NULL) { 9130 /* 9131 * We don't know if this ill will be used for IPv6 9132 * until the ILLF_IPV6 flag is set via SIOCSLIFNAME. 9133 * ipif_set_values() sets the ill_isv6 flag to true if 9134 * ILLF_IPV6 is set. If the ill_isv6 flag isn't true, 9135 * just drop the packet. 9136 */ 9137 if (!ill->ill_isv6) { 9138 ip1dbg(("ip_wput_v6: Received an IPv6 packet before " 9139 "ILLF_IPV6 was set\n")); 9140 freemsg(first_mp); 9141 return; 9142 } 9143 /* For uniformity do a refhold */ 9144 mutex_enter(&ill->ill_lock); 9145 if (!ILL_CAN_LOOKUP(ill)) { 9146 mutex_exit(&ill->ill_lock); 9147 freemsg(first_mp); 9148 return; 9149 } 9150 ill_refhold_locked(ill); 9151 mutex_exit(&ill->ill_lock); 9152 mibptr = ill->ill_ip_mib; 9153 9154 ASSERT(mibptr != NULL); 9155 unspec_src = 0; 9156 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9157 do_outrequests = B_FALSE; 9158 zoneid = (zoneid_t)(uintptr_t)arg; 9159 } else { 9160 ASSERT(connp != NULL); 9161 zoneid = connp->conn_zoneid; 9162 9163 /* is queue flow controlled? */ 9164 if ((q->q_first || connp->conn_draining) && 9165 (caller == IP_WPUT)) { 9166 /* 9167 * 1) TCP sends down M_CTL for detached connections. 9168 * 2) AH/ESP sends down M_CTL. 9169 * 9170 * We don't flow control either of the above. Only 9171 * UDP and others are flow controlled for which we 9172 * can't have a M_CTL. 9173 */ 9174 ASSERT(first_mp == mp); 9175 (void) putq(q, mp); 9176 return; 9177 } 9178 mibptr = &ipst->ips_ip6_mib; 9179 unspec_src = connp->conn_unspec_src; 9180 do_outrequests = B_TRUE; 9181 if (mp->b_flag & MSGHASREF) { 9182 mp->b_flag &= ~MSGHASREF; 9183 ASSERT(connp->conn_ulp == IPPROTO_SCTP); 9184 SCTP_EXTRACT_IPINFO(mp, sctp_ire); 9185 need_decref = B_TRUE; 9186 } 9187 9188 /* 9189 * If there is a policy, try to attach an ipsec_out in 9190 * the front. At the end, first_mp either points to a 9191 * M_DATA message or IPSEC_OUT message linked to a 9192 * M_DATA message. We have to do it now as we might 9193 * lose the "conn" if we go through ip_newroute. 9194 */ 9195 if (!mctl_present && 9196 (connp->conn_out_enforce_policy || 9197 connp->conn_latch != NULL)) { 9198 ASSERT(first_mp == mp); 9199 /* XXX Any better way to get the protocol fast ? */ 9200 if (((mp = ipsec_attach_ipsec_out(&mp, connp, NULL, 9201 connp->conn_ulp, ipst->ips_netstack)) == NULL)) { 9202 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9203 if (need_decref) 9204 CONN_DEC_REF(connp); 9205 return; 9206 } else { 9207 ASSERT(mp->b_datap->db_type == M_CTL); 9208 first_mp = mp; 9209 mp = mp->b_cont; 9210 mctl_present = B_TRUE; 9211 io = (ipsec_out_t *)first_mp->b_rptr; 9212 } 9213 } 9214 } 9215 9216 /* check for alignment and full IPv6 header */ 9217 if (!OK_32PTR((uchar_t *)ip6h) || 9218 (mp->b_wptr - (uchar_t *)ip6h) < IPV6_HDR_LEN) { 9219 ip0dbg(("ip_wput_v6: bad alignment or length\n")); 9220 if (do_outrequests) 9221 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9222 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9223 freemsg(first_mp); 9224 if (ill != NULL) 9225 ill_refrele(ill); 9226 if (need_decref) 9227 CONN_DEC_REF(connp); 9228 return; 9229 } 9230 v6dstp = &ip6h->ip6_dst; 9231 cksum_request = -1; 9232 ip6i = NULL; 9233 9234 /* 9235 * Once neighbor discovery has completed, ndp_process() will provide 9236 * locally generated packets for which processing can be reattempted. 9237 * In these cases, connp is NULL and the original zone is part of a 9238 * prepended ipsec_out_t. 9239 */ 9240 if (io != NULL) { 9241 /* 9242 * When coming from icmp_input_v6, the zoneid might not match 9243 * for the loopback case, because inside icmp_input_v6 the 9244 * queue_t is a conn queue from the sending side. 9245 */ 9246 zoneid = io->ipsec_out_zoneid; 9247 ASSERT(zoneid != ALL_ZONES); 9248 } 9249 9250 if (ip6h->ip6_nxt == IPPROTO_RAW) { 9251 /* 9252 * This is an ip6i_t header followed by an ip6_hdr. 9253 * Check which fields are set. 9254 * 9255 * When the packet comes from a transport we should have 9256 * all needed headers in the first mblk. However, when 9257 * going through ip_newroute*_v6 the ip6i might be in 9258 * a separate mblk when we return here. In that case 9259 * we pullup everything to ensure that extension and transport 9260 * headers "stay" in the first mblk. 9261 */ 9262 ip6i = (ip6i_t *)ip6h; 9263 ip6i_flags = ip6i->ip6i_flags; 9264 9265 ASSERT((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t) || 9266 ((mp->b_wptr - (uchar_t *)ip6i) >= 9267 sizeof (ip6i_t) + IPV6_HDR_LEN)); 9268 9269 if ((mp->b_wptr - (uchar_t *)ip6i) == sizeof (ip6i_t)) { 9270 if (!pullupmsg(mp, -1)) { 9271 ip1dbg(("ip_wput_v6: pullupmsg failed\n")); 9272 if (do_outrequests) { 9273 BUMP_MIB(mibptr, 9274 ipIfStatsHCOutRequests); 9275 } 9276 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9277 freemsg(first_mp); 9278 if (ill != NULL) 9279 ill_refrele(ill); 9280 if (need_decref) 9281 CONN_DEC_REF(connp); 9282 return; 9283 } 9284 ip6h = (ip6_t *)mp->b_rptr; 9285 v6dstp = &ip6h->ip6_dst; 9286 ip6i = (ip6i_t *)ip6h; 9287 } 9288 ip6h = (ip6_t *)&ip6i[1]; 9289 9290 /* 9291 * Advance rptr past the ip6i_t to get ready for 9292 * transmitting the packet. However, if the packet gets 9293 * passed to ip_newroute*_v6 then rptr is moved back so 9294 * that the ip6i_t header can be inspected when the 9295 * packet comes back here after passing through 9296 * ire_add_then_send. 9297 */ 9298 mp->b_rptr = (uchar_t *)ip6h; 9299 9300 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 9301 ASSERT(ip6i->ip6i_ifindex != 0); 9302 if (ill != NULL) 9303 ill_refrele(ill); 9304 ill = ill_lookup_on_ifindex(ip6i->ip6i_ifindex, 1, 9305 NULL, NULL, NULL, NULL, ipst); 9306 if (ill == NULL) { 9307 if (do_outrequests) { 9308 BUMP_MIB(mibptr, 9309 ipIfStatsHCOutRequests); 9310 } 9311 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9312 ip1dbg(("ip_wput_v6: bad ifindex %d\n", 9313 ip6i->ip6i_ifindex)); 9314 if (need_decref) 9315 CONN_DEC_REF(connp); 9316 freemsg(first_mp); 9317 return; 9318 } 9319 mibptr = ill->ill_ip_mib; 9320 /* 9321 * Preserve the index so that when we return from 9322 * IPSEC processing, we know where to send the packet. 9323 */ 9324 if (mctl_present) { 9325 ASSERT(io != NULL); 9326 io->ipsec_out_ill_index = ip6i->ip6i_ifindex; 9327 } 9328 } 9329 if (ip6i->ip6i_flags & IP6I_VERIFY_SRC) { 9330 cred_t *cr = msg_getcred(mp, NULL); 9331 9332 /* rpcmod doesn't send down db_credp for UDP packets */ 9333 if (cr == NULL) { 9334 if (connp != NULL) 9335 cr = connp->conn_cred; 9336 else 9337 cr = ill->ill_credp; 9338 } 9339 9340 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)); 9341 if (secpolicy_net_rawaccess(cr) != 0) { 9342 /* 9343 * Use IPCL_ZONEID to honor SO_ALLZONES. 9344 */ 9345 ire = ire_route_lookup_v6(&ip6h->ip6_src, 9346 0, 0, (IRE_LOCAL|IRE_LOOPBACK), NULL, 9347 NULL, connp != NULL ? 9348 IPCL_ZONEID(connp) : zoneid, NULL, 9349 MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, ipst); 9350 if (ire == NULL) { 9351 if (do_outrequests) 9352 BUMP_MIB(mibptr, 9353 ipIfStatsHCOutRequests); 9354 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9355 ip1dbg(("ip_wput_v6: bad source " 9356 "addr\n")); 9357 freemsg(first_mp); 9358 if (ill != NULL) 9359 ill_refrele(ill); 9360 if (need_decref) 9361 CONN_DEC_REF(connp); 9362 return; 9363 } 9364 ire_refrele(ire); 9365 } 9366 /* No need to verify again when using ip_newroute */ 9367 ip6i->ip6i_flags &= ~IP6I_VERIFY_SRC; 9368 } 9369 if (!(ip6i->ip6i_flags & IP6I_NEXTHOP)) { 9370 /* 9371 * Make sure they match since ip_newroute*_v6 etc might 9372 * (unknown to them) inspect ip6i_nexthop when 9373 * they think they access ip6_dst. 9374 */ 9375 ip6i->ip6i_nexthop = ip6h->ip6_dst; 9376 } 9377 if (ip6i->ip6i_flags & IP6I_NO_ULP_CKSUM) 9378 cksum_request = 1; 9379 if (ip6i->ip6i_flags & IP6I_RAW_CHECKSUM) 9380 cksum_request = ip6i->ip6i_checksum_off; 9381 if (ip6i->ip6i_flags & IP6I_UNSPEC_SRC) 9382 unspec_src = 1; 9383 9384 if (do_outrequests && ill != NULL) { 9385 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9386 do_outrequests = B_FALSE; 9387 } 9388 /* 9389 * Store ip6i_t info that we need after we come back 9390 * from IPSEC processing. 9391 */ 9392 if (mctl_present) { 9393 ASSERT(io != NULL); 9394 io->ipsec_out_unspec_src = unspec_src; 9395 } 9396 } 9397 if (connp != NULL && connp->conn_dontroute) 9398 ip6h->ip6_hops = 1; 9399 9400 if (IN6_IS_ADDR_MULTICAST(v6dstp)) 9401 goto ipv6multicast; 9402 9403 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9404 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9405 ASSERT(ill != NULL); 9406 goto send_from_ill; 9407 } 9408 9409 /* 9410 * 2. If q is an ill queue and there's a link-local destination 9411 * then use that ill. 9412 */ 9413 if (ill != NULL && IN6_IS_ADDR_LINKLOCAL(v6dstp)) 9414 goto send_from_ill; 9415 9416 /* 3. If IPV6_BOUND_IF has been set use that ill. */ 9417 if (connp != NULL && connp->conn_outgoing_ill != NULL) { 9418 ill_t *conn_outgoing_ill; 9419 9420 conn_outgoing_ill = conn_get_held_ill(connp, 9421 &connp->conn_outgoing_ill, &err); 9422 if (err == ILL_LOOKUP_FAILED) { 9423 if (ill != NULL) 9424 ill_refrele(ill); 9425 if (need_decref) 9426 CONN_DEC_REF(connp); 9427 freemsg(first_mp); 9428 return; 9429 } 9430 if (ill != NULL) 9431 ill_refrele(ill); 9432 ill = conn_outgoing_ill; 9433 mibptr = ill->ill_ip_mib; 9434 goto send_from_ill; 9435 } 9436 9437 /* 9438 * 4. For unicast: Just do an IRE lookup for the best match. 9439 * If we get here for a link-local address it is rather random 9440 * what interface we pick on a multihomed host. 9441 * *If* there is an IRE_CACHE (and the link-local address 9442 * isn't duplicated on multi links) this will find the IRE_CACHE. 9443 * Otherwise it will use one of the matching IRE_INTERFACE routes 9444 * for the link-local prefix. Hence, applications 9445 * *should* be encouraged to specify an outgoing interface when sending 9446 * to a link local address. 9447 */ 9448 if (connp == NULL || (IP_FLOW_CONTROLLED_ULP(connp->conn_ulp) && 9449 !connp->conn_fully_bound)) { 9450 /* 9451 * We cache IRE_CACHEs to avoid lookups. We don't do 9452 * this for the tcp global queue and listen end point 9453 * as it does not really have a real destination to 9454 * talk to. 9455 */ 9456 ire = ire_cache_lookup_v6(v6dstp, zoneid, msg_getlabel(mp), 9457 ipst); 9458 } else { 9459 /* 9460 * IRE_MARK_CONDEMNED is marked in ire_delete. We don't 9461 * grab a lock here to check for CONDEMNED as it is okay 9462 * to send a packet or two with the IRE_CACHE that is going 9463 * away. 9464 */ 9465 mutex_enter(&connp->conn_lock); 9466 ire = sctp_ire != NULL ? sctp_ire : connp->conn_ire_cache; 9467 if (ire != NULL && 9468 IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) && 9469 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 9470 9471 IRE_REFHOLD(ire); 9472 mutex_exit(&connp->conn_lock); 9473 9474 } else { 9475 boolean_t cached = B_FALSE; 9476 9477 connp->conn_ire_cache = NULL; 9478 mutex_exit(&connp->conn_lock); 9479 /* Release the old ire */ 9480 if (ire != NULL && sctp_ire == NULL) 9481 IRE_REFRELE_NOTR(ire); 9482 9483 ire = ire_cache_lookup_v6(v6dstp, zoneid, 9484 msg_getlabel(mp), ipst); 9485 if (ire != NULL) { 9486 IRE_REFHOLD_NOTR(ire); 9487 9488 mutex_enter(&connp->conn_lock); 9489 if (CONN_CACHE_IRE(connp) && 9490 (connp->conn_ire_cache == NULL)) { 9491 rw_enter(&ire->ire_bucket->irb_lock, 9492 RW_READER); 9493 if (!(ire->ire_marks & 9494 IRE_MARK_CONDEMNED)) { 9495 connp->conn_ire_cache = ire; 9496 cached = B_TRUE; 9497 } 9498 rw_exit(&ire->ire_bucket->irb_lock); 9499 } 9500 mutex_exit(&connp->conn_lock); 9501 9502 /* 9503 * We can continue to use the ire but since it 9504 * was not cached, we should drop the extra 9505 * reference. 9506 */ 9507 if (!cached) 9508 IRE_REFRELE_NOTR(ire); 9509 } 9510 } 9511 } 9512 9513 if (ire != NULL) { 9514 if (do_outrequests) { 9515 /* Handle IRE_LOCAL's that might appear here */ 9516 if (ire->ire_type == IRE_CACHE) { 9517 mibptr = ((ill_t *)ire->ire_stq->q_ptr)-> 9518 ill_ip_mib; 9519 } else { 9520 mibptr = ire->ire_ipif->ipif_ill->ill_ip_mib; 9521 } 9522 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9523 } 9524 9525 /* 9526 * Check if the ire has the RTF_MULTIRT flag, inherited 9527 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9528 */ 9529 if (ire->ire_flags & RTF_MULTIRT) { 9530 /* 9531 * Force hop limit of multirouted packets if required. 9532 * The hop limit of such packets is bounded by the 9533 * ip_multirt_ttl ndd variable. 9534 * NDP packets must have a hop limit of 255; don't 9535 * change the hop limit in that case. 9536 */ 9537 if ((ipst->ips_ip_multirt_ttl > 0) && 9538 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9539 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9540 if (ip_debug > 3) { 9541 ip2dbg(("ip_wput_v6: forcing multirt " 9542 "hop limit to %d (was %d) ", 9543 ipst->ips_ip_multirt_ttl, 9544 ip6h->ip6_hops)); 9545 pr_addr_dbg("v6dst %s\n", AF_INET6, 9546 &ire->ire_addr_v6); 9547 } 9548 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9549 } 9550 9551 /* 9552 * We look at this point if there are pending 9553 * unresolved routes. ire_multirt_need_resolve_v6() 9554 * checks in O(n) that all IRE_OFFSUBNET ire 9555 * entries for the packet's destination and 9556 * flagged RTF_MULTIRT are currently resolved. 9557 * If some remain unresolved, we do a copy 9558 * of the current message. It will be used 9559 * to initiate additional route resolutions. 9560 */ 9561 multirt_need_resolve = 9562 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9563 msg_getlabel(first_mp), ipst); 9564 ip2dbg(("ip_wput_v6: ire %p, " 9565 "multirt_need_resolve %d, first_mp %p\n", 9566 (void *)ire, multirt_need_resolve, 9567 (void *)first_mp)); 9568 if (multirt_need_resolve) { 9569 copy_mp = copymsg(first_mp); 9570 if (copy_mp != NULL) { 9571 MULTIRT_DEBUG_TAG(copy_mp); 9572 } 9573 } 9574 } 9575 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9576 connp, caller, ip6i_flags, zoneid); 9577 if (need_decref) { 9578 CONN_DEC_REF(connp); 9579 connp = NULL; 9580 } 9581 IRE_REFRELE(ire); 9582 9583 /* 9584 * Try to resolve another multiroute if 9585 * ire_multirt_need_resolve_v6() deemed it necessary. 9586 * copy_mp will be consumed (sent or freed) by 9587 * ip_newroute_v6(). 9588 */ 9589 if (copy_mp != NULL) { 9590 if (mctl_present) { 9591 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9592 } else { 9593 ip6h = (ip6_t *)copy_mp->b_rptr; 9594 } 9595 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 9596 &ip6h->ip6_src, NULL, zoneid, ipst); 9597 } 9598 if (ill != NULL) 9599 ill_refrele(ill); 9600 return; 9601 } 9602 9603 /* 9604 * No full IRE for this destination. Send it to 9605 * ip_newroute_v6 to see if anything else matches. 9606 * Mark this packet as having originated on this 9607 * machine. 9608 * Update rptr if there was an ip6i_t header. 9609 */ 9610 mp->b_prev = NULL; 9611 mp->b_next = NULL; 9612 if (ip6i != NULL) 9613 mp->b_rptr -= sizeof (ip6i_t); 9614 9615 if (unspec_src) { 9616 if (ip6i == NULL) { 9617 /* 9618 * Add ip6i_t header to carry unspec_src 9619 * until the packet comes back in ip_wput_v6. 9620 */ 9621 mp = ip_add_info_v6(mp, NULL, v6dstp); 9622 if (mp == NULL) { 9623 if (do_outrequests) 9624 BUMP_MIB(mibptr, 9625 ipIfStatsHCOutRequests); 9626 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9627 if (mctl_present) 9628 freeb(first_mp); 9629 if (ill != NULL) 9630 ill_refrele(ill); 9631 if (need_decref) 9632 CONN_DEC_REF(connp); 9633 return; 9634 } 9635 ip6i = (ip6i_t *)mp->b_rptr; 9636 9637 if (mctl_present) { 9638 ASSERT(first_mp != mp); 9639 first_mp->b_cont = mp; 9640 } else { 9641 first_mp = mp; 9642 } 9643 9644 if ((mp->b_wptr - (uchar_t *)ip6i) == 9645 sizeof (ip6i_t)) { 9646 /* 9647 * ndp_resolver called from ip_newroute_v6 9648 * expects pulled up message. 9649 */ 9650 if (!pullupmsg(mp, -1)) { 9651 ip1dbg(("ip_wput_v6: pullupmsg" 9652 " failed\n")); 9653 if (do_outrequests) { 9654 BUMP_MIB(mibptr, 9655 ipIfStatsHCOutRequests); 9656 } 9657 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9658 freemsg(first_mp); 9659 if (ill != NULL) 9660 ill_refrele(ill); 9661 if (need_decref) 9662 CONN_DEC_REF(connp); 9663 return; 9664 } 9665 ip6i = (ip6i_t *)mp->b_rptr; 9666 } 9667 ip6h = (ip6_t *)&ip6i[1]; 9668 v6dstp = &ip6h->ip6_dst; 9669 } 9670 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 9671 if (mctl_present) { 9672 ASSERT(io != NULL); 9673 io->ipsec_out_unspec_src = unspec_src; 9674 } 9675 } 9676 if (do_outrequests) 9677 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9678 if (need_decref) 9679 CONN_DEC_REF(connp); 9680 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, NULL, zoneid, ipst); 9681 if (ill != NULL) 9682 ill_refrele(ill); 9683 return; 9684 9685 9686 /* 9687 * Handle multicast packets with or without an conn. 9688 * Assumes that the transports set ip6_hops taking 9689 * IPV6_MULTICAST_HOPS (and the other ways to set the hoplimit) 9690 * into account. 9691 */ 9692 ipv6multicast: 9693 ip2dbg(("ip_wput_v6: multicast\n")); 9694 9695 /* 9696 * Hold the conn_lock till we refhold the ill of interest that is 9697 * pointed to from the conn. Since we cannot do an ill/ipif_refrele 9698 * while holding any locks, postpone the refrele until after the 9699 * conn_lock is dropped. 9700 */ 9701 if (connp != NULL) { 9702 mutex_enter(&connp->conn_lock); 9703 conn_lock_held = B_TRUE; 9704 } else { 9705 conn_lock_held = B_FALSE; 9706 } 9707 if (ip6i != NULL && (ip6i->ip6i_flags & IP6I_IFINDEX)) { 9708 /* 1. If an ip6i_t with IP6I_IFINDEX set then use that ill. */ 9709 ASSERT(ill != NULL); 9710 } else if (ill != NULL) { 9711 /* 9712 * 2. If q is an ill queue and (link local or multicast 9713 * destination) then use that ill. 9714 * We don't need the ipif initialization here. 9715 * This useless assert below is just to prevent lint from 9716 * reporting a null body if statement. 9717 */ 9718 ASSERT(ill != NULL); 9719 } else if (connp != NULL) { 9720 /* 9721 * 3. If IPV6_BOUND_IF has been set use that ill. 9722 * 9723 * 4. For multicast: if IPV6_MULTICAST_IF has been set use it. 9724 * Otherwise look for the best IRE match for the unspecified 9725 * group to determine the ill. 9726 * 9727 * conn_multicast_ill is used for only IPv6 packets. 9728 * conn_multicast_ipif is used for only IPv4 packets. 9729 * Thus a PF_INET6 socket send both IPv4 and IPv6 9730 * multicast packets using different IP*_MULTICAST_IF 9731 * interfaces. 9732 */ 9733 if (connp->conn_outgoing_ill != NULL) { 9734 err = ill_check_and_refhold(connp->conn_outgoing_ill); 9735 if (err == ILL_LOOKUP_FAILED) { 9736 ip1dbg(("ip_output_v6: multicast" 9737 " conn_outgoing_ill no ipif\n")); 9738 multicast_discard: 9739 ASSERT(saved_ill == NULL); 9740 if (conn_lock_held) 9741 mutex_exit(&connp->conn_lock); 9742 if (ill != NULL) 9743 ill_refrele(ill); 9744 freemsg(first_mp); 9745 if (do_outrequests) 9746 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9747 if (need_decref) 9748 CONN_DEC_REF(connp); 9749 return; 9750 } 9751 ill = connp->conn_outgoing_ill; 9752 } else if (connp->conn_multicast_ill != NULL) { 9753 err = ill_check_and_refhold(connp->conn_multicast_ill); 9754 if (err == ILL_LOOKUP_FAILED) { 9755 ip1dbg(("ip_output_v6: multicast" 9756 " conn_multicast_ill no ipif\n")); 9757 goto multicast_discard; 9758 } 9759 ill = connp->conn_multicast_ill; 9760 } else { 9761 mutex_exit(&connp->conn_lock); 9762 conn_lock_held = B_FALSE; 9763 ipif = ipif_lookup_group_v6(v6dstp, zoneid, ipst); 9764 if (ipif == NULL) { 9765 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9766 goto multicast_discard; 9767 } 9768 /* 9769 * We have a ref to this ipif, so we can safely 9770 * access ipif_ill. 9771 */ 9772 ill = ipif->ipif_ill; 9773 mutex_enter(&ill->ill_lock); 9774 if (!ILL_CAN_LOOKUP(ill)) { 9775 mutex_exit(&ill->ill_lock); 9776 ipif_refrele(ipif); 9777 ill = NULL; 9778 ip1dbg(("ip_output_v6: multicast no ipif\n")); 9779 goto multicast_discard; 9780 } 9781 ill_refhold_locked(ill); 9782 mutex_exit(&ill->ill_lock); 9783 ipif_refrele(ipif); 9784 /* 9785 * Save binding until IPV6_MULTICAST_IF 9786 * changes it 9787 */ 9788 mutex_enter(&connp->conn_lock); 9789 connp->conn_multicast_ill = ill; 9790 mutex_exit(&connp->conn_lock); 9791 } 9792 } 9793 if (conn_lock_held) 9794 mutex_exit(&connp->conn_lock); 9795 9796 if (saved_ill != NULL) 9797 ill_refrele(saved_ill); 9798 9799 ASSERT(ill != NULL); 9800 /* 9801 * For multicast loopback interfaces replace the multicast address 9802 * with a unicast address for the ire lookup. 9803 */ 9804 if (IS_LOOPBACK(ill)) 9805 v6dstp = &ill->ill_ipif->ipif_v6lcl_addr; 9806 9807 mibptr = ill->ill_ip_mib; 9808 if (do_outrequests) { 9809 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9810 do_outrequests = B_FALSE; 9811 } 9812 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 9813 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 9814 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 9815 9816 /* 9817 * As we may lose the conn by the time we reach ip_wput_ire_v6 9818 * we copy conn_multicast_loop and conn_dontroute on to an 9819 * ipsec_out. In case if this datagram goes out secure, 9820 * we need the ill_index also. Copy that also into the 9821 * ipsec_out. 9822 */ 9823 if (mctl_present) { 9824 io = (ipsec_out_t *)first_mp->b_rptr; 9825 ASSERT(first_mp->b_datap->db_type == M_CTL); 9826 ASSERT(io->ipsec_out_type == IPSEC_OUT); 9827 } else { 9828 ASSERT(mp == first_mp); 9829 if ((first_mp = ipsec_alloc_ipsec_out(ipst->ips_netstack)) == 9830 NULL) { 9831 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9832 freemsg(mp); 9833 if (ill != NULL) 9834 ill_refrele(ill); 9835 if (need_decref) 9836 CONN_DEC_REF(connp); 9837 return; 9838 } 9839 io = (ipsec_out_t *)first_mp->b_rptr; 9840 /* This is not a secure packet */ 9841 io->ipsec_out_secure = B_FALSE; 9842 io->ipsec_out_use_global_policy = B_TRUE; 9843 io->ipsec_out_zoneid = 9844 (zoneid != ALL_ZONES ? zoneid : GLOBAL_ZONEID); 9845 first_mp->b_cont = mp; 9846 mctl_present = B_TRUE; 9847 } 9848 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9849 io->ipsec_out_unspec_src = unspec_src; 9850 if (connp != NULL) 9851 io->ipsec_out_dontroute = connp->conn_dontroute; 9852 9853 send_from_ill: 9854 ASSERT(ill != NULL); 9855 ASSERT(mibptr == ill->ill_ip_mib); 9856 9857 if (do_outrequests) { 9858 BUMP_MIB(mibptr, ipIfStatsHCOutRequests); 9859 do_outrequests = B_FALSE; 9860 } 9861 9862 /* 9863 * Because nce_xmit() calls ip_output_v6() and NCEs are always tied to 9864 * an underlying interface, IS_UNDER_IPMP() may be true even when 9865 * building IREs that will be used for data traffic. As such, use the 9866 * packet's source address to determine whether the traffic is test 9867 * traffic, and set MATCH_IRE_MARK_TESTHIDDEN if so. 9868 * 9869 * Separately, we also need to mark probe packets so that ND can 9870 * process them specially; see the comments in nce_queue_mp_common(). 9871 */ 9872 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 9873 ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL)) { 9874 if (ip6i == NULL) { 9875 if ((mp = ip_add_info_v6(mp, NULL, v6dstp)) == NULL) { 9876 if (mctl_present) 9877 freeb(first_mp); 9878 goto discard; 9879 } 9880 9881 if (mctl_present) 9882 first_mp->b_cont = mp; 9883 else 9884 first_mp = mp; 9885 9886 /* ndp_resolver() expects a pulled-up message */ 9887 if (MBLKL(mp) == sizeof (ip6i_t) && 9888 pullupmsg(mp, -1) == 0) { 9889 ip1dbg(("ip_output_v6: pullupmsg failed\n")); 9890 discard: BUMP_MIB(mibptr, ipIfStatsOutDiscards); 9891 ill_refrele(ill); 9892 if (need_decref) 9893 CONN_DEC_REF(connp); 9894 return; 9895 } 9896 ip6i = (ip6i_t *)mp->b_rptr; 9897 ip6h = (ip6_t *)&ip6i[1]; 9898 v6dstp = &ip6h->ip6_dst; 9899 mp->b_rptr = (uchar_t *)ip6h; /* rewound below */ 9900 } 9901 ip6i->ip6i_flags |= IP6I_IPMP_PROBE; 9902 match_flags |= MATCH_IRE_MARK_TESTHIDDEN; 9903 } 9904 9905 if (io != NULL) 9906 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 9907 9908 /* 9909 * When a specific ill is specified (using IPV6_PKTINFO, 9910 * IPV6_MULTICAST_IF, or IPV6_BOUND_IF) we will only match 9911 * on routing entries (ftable and ctable) that have a matching 9912 * ire->ire_ipif->ipif_ill. Thus this can only be used 9913 * for destinations that are on-link for the specific ill 9914 * and that can appear on multiple links. Thus it is useful 9915 * for multicast destinations, link-local destinations, and 9916 * at some point perhaps for site-local destinations (if the 9917 * node sits at a site boundary). 9918 * We create the cache entries in the regular ctable since 9919 * it can not "confuse" things for other destinations. 9920 * table. 9921 * 9922 * NOTE : conn_ire_cache is not used for caching ire_ctable_lookups. 9923 * It is used only when ire_cache_lookup is used above. 9924 */ 9925 ire = ire_ctable_lookup_v6(v6dstp, 0, 0, ill->ill_ipif, 9926 zoneid, msg_getlabel(mp), match_flags, ipst); 9927 if (ire != NULL) { 9928 /* 9929 * Check if the ire has the RTF_MULTIRT flag, inherited 9930 * from an IRE_OFFSUBNET ire entry in ip_newroute(). 9931 */ 9932 if (ire->ire_flags & RTF_MULTIRT) { 9933 /* 9934 * Force hop limit of multirouted packets if required. 9935 * The hop limit of such packets is bounded by the 9936 * ip_multirt_ttl ndd variable. 9937 * NDP packets must have a hop limit of 255; don't 9938 * change the hop limit in that case. 9939 */ 9940 if ((ipst->ips_ip_multirt_ttl > 0) && 9941 (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl) && 9942 (ip6h->ip6_hops != IPV6_MAX_HOPS)) { 9943 if (ip_debug > 3) { 9944 ip2dbg(("ip_wput_v6: forcing multirt " 9945 "hop limit to %d (was %d) ", 9946 ipst->ips_ip_multirt_ttl, 9947 ip6h->ip6_hops)); 9948 pr_addr_dbg("v6dst %s\n", AF_INET6, 9949 &ire->ire_addr_v6); 9950 } 9951 ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 9952 } 9953 9954 /* 9955 * We look at this point if there are pending 9956 * unresolved routes. ire_multirt_need_resolve_v6() 9957 * checks in O(n) that all IRE_OFFSUBNET ire 9958 * entries for the packet's destination and 9959 * flagged RTF_MULTIRT are currently resolved. 9960 * If some remain unresolved, we make a copy 9961 * of the current message. It will be used 9962 * to initiate additional route resolutions. 9963 */ 9964 multirt_need_resolve = 9965 ire_multirt_need_resolve_v6(&ire->ire_addr_v6, 9966 msg_getlabel(first_mp), ipst); 9967 ip2dbg(("ip_wput_v6[send_from_ill]: ire %p, " 9968 "multirt_need_resolve %d, first_mp %p\n", 9969 (void *)ire, multirt_need_resolve, 9970 (void *)first_mp)); 9971 if (multirt_need_resolve) { 9972 copy_mp = copymsg(first_mp); 9973 if (copy_mp != NULL) { 9974 MULTIRT_DEBUG_TAG(copy_mp); 9975 } 9976 } 9977 } 9978 9979 ip1dbg(("ip_wput_v6: send on %s, ire = %p, ill index = %d\n", 9980 ill->ill_name, (void *)ire, 9981 ill->ill_phyint->phyint_ifindex)); 9982 ip_wput_ire_v6(q, first_mp, ire, unspec_src, cksum_request, 9983 connp, caller, ip6i_flags, zoneid); 9984 ire_refrele(ire); 9985 if (need_decref) { 9986 CONN_DEC_REF(connp); 9987 connp = NULL; 9988 } 9989 9990 /* 9991 * Try to resolve another multiroute if 9992 * ire_multirt_need_resolve_v6() deemed it necessary. 9993 * copy_mp will be consumed (sent or freed) by 9994 * ip_newroute_[ipif_]v6(). 9995 */ 9996 if (copy_mp != NULL) { 9997 if (mctl_present) { 9998 ip6h = (ip6_t *)copy_mp->b_cont->b_rptr; 9999 } else { 10000 ip6h = (ip6_t *)copy_mp->b_rptr; 10001 } 10002 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10003 ipif = ipif_lookup_group_v6(&ip6h->ip6_dst, 10004 zoneid, ipst); 10005 if (ipif == NULL) { 10006 ip1dbg(("ip_wput_v6: No ipif for " 10007 "multicast\n")); 10008 MULTIRT_DEBUG_UNTAG(copy_mp); 10009 freemsg(copy_mp); 10010 return; 10011 } 10012 ip_newroute_ipif_v6(q, copy_mp, ipif, 10013 &ip6h->ip6_dst, &ip6h->ip6_src, unspec_src, 10014 zoneid); 10015 ipif_refrele(ipif); 10016 } else { 10017 ip_newroute_v6(q, copy_mp, &ip6h->ip6_dst, 10018 &ip6h->ip6_src, ill, zoneid, ipst); 10019 } 10020 } 10021 ill_refrele(ill); 10022 return; 10023 } 10024 if (need_decref) { 10025 CONN_DEC_REF(connp); 10026 connp = NULL; 10027 } 10028 10029 /* Update rptr if there was an ip6i_t header. */ 10030 if (ip6i != NULL) 10031 mp->b_rptr -= sizeof (ip6i_t); 10032 if (unspec_src) { 10033 if (ip6i == NULL) { 10034 /* 10035 * Add ip6i_t header to carry unspec_src 10036 * until the packet comes back in ip_wput_v6. 10037 */ 10038 if (mctl_present) { 10039 first_mp->b_cont = 10040 ip_add_info_v6(mp, NULL, v6dstp); 10041 mp = first_mp->b_cont; 10042 if (mp == NULL) 10043 freeb(first_mp); 10044 } else { 10045 first_mp = mp = ip_add_info_v6(mp, NULL, 10046 v6dstp); 10047 } 10048 if (mp == NULL) { 10049 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10050 ill_refrele(ill); 10051 return; 10052 } 10053 ip6i = (ip6i_t *)mp->b_rptr; 10054 if ((mp->b_wptr - (uchar_t *)ip6i) == 10055 sizeof (ip6i_t)) { 10056 /* 10057 * ndp_resolver called from ip_newroute_v6 10058 * expects a pulled up message. 10059 */ 10060 if (!pullupmsg(mp, -1)) { 10061 ip1dbg(("ip_wput_v6: pullupmsg" 10062 " failed\n")); 10063 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10064 freemsg(first_mp); 10065 return; 10066 } 10067 ip6i = (ip6i_t *)mp->b_rptr; 10068 } 10069 ip6h = (ip6_t *)&ip6i[1]; 10070 v6dstp = &ip6h->ip6_dst; 10071 } 10072 ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 10073 if (mctl_present) { 10074 ASSERT(io != NULL); 10075 io->ipsec_out_unspec_src = unspec_src; 10076 } 10077 } 10078 if (IN6_IS_ADDR_MULTICAST(v6dstp)) { 10079 ip_newroute_ipif_v6(q, first_mp, ill->ill_ipif, v6dstp, 10080 &ip6h->ip6_src, unspec_src, zoneid); 10081 } else { 10082 ip_newroute_v6(q, first_mp, v6dstp, &ip6h->ip6_src, ill, 10083 zoneid, ipst); 10084 } 10085 ill_refrele(ill); 10086 return; 10087 10088 notv6: 10089 /* FIXME?: assume the caller calls the right version of ip_output? */ 10090 if (q->q_next == NULL) { 10091 connp = Q_TO_CONN(q); 10092 10093 /* 10094 * We can change conn_send for all types of conn, even 10095 * though only TCP uses it right now. 10096 * FIXME: sctp could use conn_send but doesn't currently. 10097 */ 10098 ip_setpktversion(connp, B_FALSE, B_TRUE, ipst); 10099 } 10100 BUMP_MIB(mibptr, ipIfStatsOutWrongIPVersion); 10101 (void) ip_output(arg, first_mp, arg2, caller); 10102 if (ill != NULL) 10103 ill_refrele(ill); 10104 } 10105 10106 /* 10107 * If this is a conn_t queue, then we pass in the conn. This includes the 10108 * zoneid. 10109 * Otherwise, this is a message for an ill_t queue, 10110 * in which case we use the global zoneid since those are all part of 10111 * the global zone. 10112 */ 10113 void 10114 ip_wput_v6(queue_t *q, mblk_t *mp) 10115 { 10116 if (CONN_Q(q)) 10117 ip_output_v6(Q_TO_CONN(q), mp, q, IP_WPUT); 10118 else 10119 ip_output_v6(GLOBAL_ZONEID, mp, q, IP_WPUT); 10120 } 10121 10122 /* 10123 * NULL send-to queue - packet is to be delivered locally. 10124 */ 10125 void 10126 ip_wput_local_v6(queue_t *q, ill_t *ill, ip6_t *ip6h, mblk_t *first_mp, 10127 ire_t *ire, int fanout_flags, zoneid_t zoneid) 10128 { 10129 uint32_t ports; 10130 mblk_t *mp = first_mp, *first_mp1; 10131 boolean_t mctl_present; 10132 uint8_t nexthdr; 10133 uint16_t hdr_length; 10134 ipsec_out_t *io; 10135 mib2_ipIfStatsEntry_t *mibptr; 10136 ilm_t *ilm; 10137 uint_t nexthdr_offset; 10138 ip_stack_t *ipst = ill->ill_ipst; 10139 10140 if (DB_TYPE(mp) == M_CTL) { 10141 io = (ipsec_out_t *)mp->b_rptr; 10142 if (!io->ipsec_out_secure) { 10143 mp = mp->b_cont; 10144 freeb(first_mp); 10145 first_mp = mp; 10146 mctl_present = B_FALSE; 10147 } else { 10148 mctl_present = B_TRUE; 10149 mp = first_mp->b_cont; 10150 ipsec_out_to_in(first_mp); 10151 } 10152 } else { 10153 mctl_present = B_FALSE; 10154 } 10155 10156 /* 10157 * Remove reachability confirmation bit from version field 10158 * before passing the packet on to any firewall hooks or 10159 * looping back the packet. 10160 */ 10161 if (ip6h->ip6_vcf & IP_FORWARD_PROG) 10162 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10163 10164 DTRACE_PROBE4(ip6__loopback__in__start, 10165 ill_t *, ill, ill_t *, NULL, 10166 ip6_t *, ip6h, mblk_t *, first_mp); 10167 10168 FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 10169 ipst->ips_ipv6firewall_loopback_in, 10170 ill, NULL, ip6h, first_mp, mp, 0, ipst); 10171 10172 DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, first_mp); 10173 10174 if (first_mp == NULL) 10175 return; 10176 10177 if (ipst->ips_ip6_observe.he_interested) { 10178 zoneid_t szone, dzone, lookup_zoneid = ALL_ZONES; 10179 zoneid_t stackzoneid = netstackid_to_zoneid( 10180 ipst->ips_netstack->netstack_stackid); 10181 10182 szone = (stackzoneid == GLOBAL_ZONEID) ? zoneid : stackzoneid; 10183 /* 10184 * ::1 is special, as we cannot lookup its zoneid by 10185 * address. For this case, restrict the lookup to the 10186 * source zone. 10187 */ 10188 if (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)) 10189 lookup_zoneid = zoneid; 10190 dzone = ip_get_zoneid_v6(&ip6h->ip6_dst, mp, ill, ipst, 10191 lookup_zoneid); 10192 ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst); 10193 } 10194 10195 DTRACE_IP7(receive, mblk_t *, first_mp, conn_t *, NULL, void_ip_t *, 10196 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 10197 int, 1); 10198 10199 nexthdr = ip6h->ip6_nxt; 10200 mibptr = ill->ill_ip_mib; 10201 10202 /* Fastpath */ 10203 switch (nexthdr) { 10204 case IPPROTO_TCP: 10205 case IPPROTO_UDP: 10206 case IPPROTO_ICMPV6: 10207 case IPPROTO_SCTP: 10208 hdr_length = IPV6_HDR_LEN; 10209 nexthdr_offset = (uint_t)((uchar_t *)&ip6h->ip6_nxt - 10210 (uchar_t *)ip6h); 10211 break; 10212 default: { 10213 uint8_t *nexthdrp; 10214 10215 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10216 &hdr_length, &nexthdrp)) { 10217 /* Malformed packet */ 10218 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10219 freemsg(first_mp); 10220 return; 10221 } 10222 nexthdr = *nexthdrp; 10223 nexthdr_offset = nexthdrp - (uint8_t *)ip6h; 10224 break; 10225 } 10226 } 10227 10228 UPDATE_OB_PKT_COUNT(ire); 10229 ire->ire_last_used_time = lbolt; 10230 10231 switch (nexthdr) { 10232 case IPPROTO_TCP: 10233 if (DB_TYPE(mp) == M_DATA) { 10234 /* 10235 * M_DATA mblk, so init mblk (chain) for 10236 * no struio(). 10237 */ 10238 mblk_t *mp1 = mp; 10239 10240 do { 10241 mp1->b_datap->db_struioflag = 0; 10242 } while ((mp1 = mp1->b_cont) != NULL); 10243 } 10244 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10245 TCP_PORTS_OFFSET); 10246 ip_fanout_tcp_v6(q, first_mp, ip6h, ill, ill, 10247 fanout_flags|IP_FF_SEND_ICMP|IP_FF_SYN_ADDIRE| 10248 IP_FF_IPINFO|IP6_NO_IPPOLICY|IP_FF_LOOPBACK, 10249 hdr_length, mctl_present, ire->ire_zoneid); 10250 return; 10251 10252 case IPPROTO_UDP: 10253 ports = *(uint32_t *)(mp->b_rptr + hdr_length + 10254 UDP_PORTS_OFFSET); 10255 ip_fanout_udp_v6(q, first_mp, ip6h, ports, ill, ill, 10256 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO| 10257 IP6_NO_IPPOLICY, mctl_present, ire->ire_zoneid); 10258 return; 10259 10260 case IPPROTO_SCTP: 10261 { 10262 ports = *(uint32_t *)(mp->b_rptr + hdr_length); 10263 ip_fanout_sctp(first_mp, ill, (ipha_t *)ip6h, ports, 10264 fanout_flags|IP_FF_SEND_ICMP|IP_FF_IPINFO, 10265 mctl_present, IP6_NO_IPPOLICY, ire->ire_zoneid); 10266 return; 10267 } 10268 case IPPROTO_ICMPV6: { 10269 icmp6_t *icmp6; 10270 10271 /* check for full IPv6+ICMPv6 header */ 10272 if ((mp->b_wptr - mp->b_rptr) < 10273 (hdr_length + ICMP6_MINLEN)) { 10274 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10275 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10276 " failed\n")); 10277 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10278 freemsg(first_mp); 10279 return; 10280 } 10281 ip6h = (ip6_t *)mp->b_rptr; 10282 } 10283 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10284 10285 /* Update output mib stats */ 10286 icmp_update_out_mib_v6(ill, icmp6); 10287 10288 /* Check variable for testing applications */ 10289 if (ipst->ips_ipv6_drop_inbound_icmpv6) { 10290 freemsg(first_mp); 10291 return; 10292 } 10293 /* 10294 * Assume that there is always at least one conn for 10295 * ICMPv6 (in.ndpd) i.e. don't optimize the case 10296 * where there is no conn. 10297 */ 10298 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 10299 !IS_LOOPBACK(ill)) { 10300 ilm_walker_t ilw; 10301 10302 /* 10303 * In the multicast case, applications may have 10304 * joined the group from different zones, so we 10305 * need to deliver the packet to each of them. 10306 * Loop through the multicast memberships 10307 * structures (ilm) on the receive ill and send 10308 * a copy of the packet up each matching one. 10309 * However, we don't do this for multicasts sent 10310 * on the loopback interface (PHYI_LOOPBACK flag 10311 * set) as they must stay in the sender's zone. 10312 */ 10313 ilm = ilm_walker_start(&ilw, ill); 10314 for (; ilm != NULL; 10315 ilm = ilm_walker_step(&ilw, ilm)) { 10316 if (!IN6_ARE_ADDR_EQUAL( 10317 &ilm->ilm_v6addr, &ip6h->ip6_dst)) 10318 continue; 10319 if ((fanout_flags & 10320 IP_FF_NO_MCAST_LOOP) && 10321 ilm->ilm_zoneid == ire->ire_zoneid) 10322 continue; 10323 if (!ipif_lookup_zoneid( 10324 ilw.ilw_walk_ill, ilm->ilm_zoneid, 10325 IPIF_UP, NULL)) 10326 continue; 10327 10328 first_mp1 = ip_copymsg(first_mp); 10329 if (first_mp1 == NULL) 10330 continue; 10331 icmp_inbound_v6(q, first_mp1, 10332 ilw.ilw_walk_ill, ill, hdr_length, 10333 mctl_present, IP6_NO_IPPOLICY, 10334 ilm->ilm_zoneid, NULL); 10335 } 10336 ilm_walker_finish(&ilw); 10337 } else { 10338 first_mp1 = ip_copymsg(first_mp); 10339 if (first_mp1 != NULL) 10340 icmp_inbound_v6(q, first_mp1, ill, ill, 10341 hdr_length, mctl_present, 10342 IP6_NO_IPPOLICY, ire->ire_zoneid, 10343 NULL); 10344 } 10345 } 10346 /* FALLTHRU */ 10347 default: { 10348 /* 10349 * Handle protocols with which IPv6 is less intimate. 10350 */ 10351 fanout_flags |= IP_FF_RAWIP|IP_FF_IPINFO; 10352 10353 /* 10354 * Enable sending ICMP for "Unknown" nexthdr 10355 * case. i.e. where we did not FALLTHRU from 10356 * IPPROTO_ICMPV6 processing case above. 10357 */ 10358 if (nexthdr != IPPROTO_ICMPV6) 10359 fanout_flags |= IP_FF_SEND_ICMP; 10360 /* 10361 * Note: There can be more than one stream bound 10362 * to a particular protocol. When this is the case, 10363 * each one gets a copy of any incoming packets. 10364 */ 10365 ip_fanout_proto_v6(q, first_mp, ip6h, ill, ill, nexthdr, 10366 nexthdr_offset, fanout_flags|IP6_NO_IPPOLICY, 10367 mctl_present, ire->ire_zoneid); 10368 return; 10369 } 10370 } 10371 } 10372 10373 /* 10374 * Send packet using IRE. 10375 * Checksumming is controlled by cksum_request: 10376 * -1 => normal i.e. TCP/UDP/SCTP/ICMPv6 are checksummed and nothing else. 10377 * 1 => Skip TCP/UDP/SCTP checksum 10378 * Otherwise => checksum_request contains insert offset for checksum 10379 * 10380 * Assumes that the following set of headers appear in the first 10381 * mblk: 10382 * ip6_t 10383 * Any extension headers 10384 * TCP/UDP/SCTP header (if present) 10385 * The routine can handle an ICMPv6 header that is not in the first mblk. 10386 * 10387 * NOTE : This function does not ire_refrele the ire passed in as the 10388 * argument unlike ip_wput_ire where the REFRELE is done. 10389 * Refer to ip_wput_ire for more on this. 10390 */ 10391 static void 10392 ip_wput_ire_v6(queue_t *q, mblk_t *mp, ire_t *ire, int unspec_src, 10393 int cksum_request, conn_t *connp, int caller, int flags, zoneid_t zoneid) 10394 { 10395 ip6_t *ip6h; 10396 uint8_t nexthdr; 10397 uint16_t hdr_length; 10398 uint_t reachable = 0x0; 10399 ill_t *ill; 10400 mib2_ipIfStatsEntry_t *mibptr; 10401 mblk_t *first_mp; 10402 boolean_t mctl_present; 10403 ipsec_out_t *io; 10404 boolean_t conn_dontroute; /* conn value for multicast */ 10405 boolean_t conn_multicast_loop; /* conn value for multicast */ 10406 boolean_t multicast_forward; /* Should we forward ? */ 10407 int max_frag; 10408 ip_stack_t *ipst = ire->ire_ipst; 10409 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 10410 10411 ill = ire_to_ill(ire); 10412 first_mp = mp; 10413 multicast_forward = B_FALSE; 10414 10415 if (mp->b_datap->db_type != M_CTL) { 10416 ip6h = (ip6_t *)first_mp->b_rptr; 10417 } else { 10418 io = (ipsec_out_t *)first_mp->b_rptr; 10419 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10420 /* 10421 * Grab the zone id now because the M_CTL can be discarded by 10422 * ip_wput_ire_parse_ipsec_out() below. 10423 */ 10424 ASSERT(zoneid == io->ipsec_out_zoneid); 10425 ASSERT(zoneid != ALL_ZONES); 10426 ip6h = (ip6_t *)first_mp->b_cont->b_rptr; 10427 /* 10428 * For the multicast case, ipsec_out carries conn_dontroute and 10429 * conn_multicast_loop as conn may not be available here. We 10430 * need this for multicast loopback and forwarding which is done 10431 * later in the code. 10432 */ 10433 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10434 conn_dontroute = io->ipsec_out_dontroute; 10435 conn_multicast_loop = io->ipsec_out_multicast_loop; 10436 /* 10437 * If conn_dontroute is not set or conn_multicast_loop 10438 * is set, we need to do forwarding/loopback. For 10439 * datagrams from ip_wput_multicast, conn_dontroute is 10440 * set to B_TRUE and conn_multicast_loop is set to 10441 * B_FALSE so that we neither do forwarding nor 10442 * loopback. 10443 */ 10444 if (!conn_dontroute || conn_multicast_loop) 10445 multicast_forward = B_TRUE; 10446 } 10447 } 10448 10449 /* 10450 * If the sender didn't supply the hop limit and there is a default 10451 * unicast hop limit associated with the output interface, we use 10452 * that if the packet is unicast. Interface specific unicast hop 10453 * limits as set via the SIOCSLIFLNKINFO ioctl. 10454 */ 10455 if (ill->ill_max_hops != 0 && !(flags & IP6I_HOPLIMIT) && 10456 !(IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 10457 ip6h->ip6_hops = ill->ill_max_hops; 10458 } 10459 10460 if (ire->ire_type == IRE_LOCAL && ire->ire_zoneid != zoneid && 10461 ire->ire_zoneid != ALL_ZONES) { 10462 /* 10463 * When a zone sends a packet to another zone, we try to deliver 10464 * the packet under the same conditions as if the destination 10465 * was a real node on the network. To do so, we look for a 10466 * matching route in the forwarding table. 10467 * RTF_REJECT and RTF_BLACKHOLE are handled just like 10468 * ip_newroute_v6() does. 10469 * Note that IRE_LOCAL are special, since they are used 10470 * when the zoneid doesn't match in some cases. This means that 10471 * we need to handle ipha_src differently since ire_src_addr 10472 * belongs to the receiving zone instead of the sending zone. 10473 * When ip_restrict_interzone_loopback is set, then 10474 * ire_cache_lookup_v6() ensures that IRE_LOCAL are only used 10475 * for loopback between zones when the logical "Ethernet" would 10476 * have looped them back. 10477 */ 10478 ire_t *src_ire; 10479 10480 src_ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, 10481 NULL, NULL, zoneid, 0, NULL, (MATCH_IRE_RECURSIVE | 10482 MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE), ipst); 10483 if (src_ire != NULL && 10484 !(src_ire->ire_flags & (RTF_REJECT | RTF_BLACKHOLE)) && 10485 (!ipst->ips_ip_restrict_interzone_loopback || 10486 ire_local_same_lan(ire, src_ire))) { 10487 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && 10488 !unspec_src) { 10489 ip6h->ip6_src = src_ire->ire_src_addr_v6; 10490 } 10491 ire_refrele(src_ire); 10492 } else { 10493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutNoRoutes); 10494 if (src_ire != NULL) { 10495 if (src_ire->ire_flags & RTF_BLACKHOLE) { 10496 ire_refrele(src_ire); 10497 freemsg(first_mp); 10498 return; 10499 } 10500 ire_refrele(src_ire); 10501 } 10502 if (ip_hdr_complete_v6(ip6h, zoneid, ipst)) { 10503 /* Failed */ 10504 freemsg(first_mp); 10505 return; 10506 } 10507 icmp_unreachable_v6(q, first_mp, 10508 ICMP6_DST_UNREACH_NOROUTE, B_FALSE, B_FALSE, 10509 zoneid, ipst); 10510 return; 10511 } 10512 } 10513 10514 if (mp->b_datap->db_type == M_CTL || 10515 ipss->ipsec_outbound_v6_policy_present) { 10516 mp = ip_wput_ire_parse_ipsec_out(first_mp, NULL, ip6h, ire, 10517 connp, unspec_src, zoneid); 10518 if (mp == NULL) { 10519 return; 10520 } 10521 } 10522 10523 first_mp = mp; 10524 if (mp->b_datap->db_type == M_CTL) { 10525 io = (ipsec_out_t *)mp->b_rptr; 10526 ASSERT(io->ipsec_out_type == IPSEC_OUT); 10527 mp = mp->b_cont; 10528 mctl_present = B_TRUE; 10529 } else { 10530 mctl_present = B_FALSE; 10531 } 10532 10533 ip6h = (ip6_t *)mp->b_rptr; 10534 nexthdr = ip6h->ip6_nxt; 10535 mibptr = ill->ill_ip_mib; 10536 10537 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) && !unspec_src) { 10538 ipif_t *ipif; 10539 10540 /* 10541 * Select the source address using ipif_select_source_v6. 10542 */ 10543 ipif = ipif_select_source_v6(ill, &ip6h->ip6_dst, B_FALSE, 10544 IPV6_PREFER_SRC_DEFAULT, zoneid); 10545 if (ipif == NULL) { 10546 if (ip_debug > 2) { 10547 /* ip1dbg */ 10548 pr_addr_dbg("ip_wput_ire_v6: no src for " 10549 "dst %s\n", AF_INET6, &ip6h->ip6_dst); 10550 printf("through interface %s\n", ill->ill_name); 10551 } 10552 freemsg(first_mp); 10553 return; 10554 } 10555 ip6h->ip6_src = ipif->ipif_v6src_addr; 10556 ipif_refrele(ipif); 10557 } 10558 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10559 if ((connp != NULL && connp->conn_multicast_loop) || 10560 !IS_LOOPBACK(ill)) { 10561 if (ilm_lookup_ill_v6(ill, &ip6h->ip6_dst, B_FALSE, 10562 ALL_ZONES) != NULL) { 10563 mblk_t *nmp; 10564 int fanout_flags = 0; 10565 10566 if (connp != NULL && 10567 !connp->conn_multicast_loop) { 10568 fanout_flags |= IP_FF_NO_MCAST_LOOP; 10569 } 10570 ip1dbg(("ip_wput_ire_v6: " 10571 "Loopback multicast\n")); 10572 nmp = ip_copymsg(first_mp); 10573 if (nmp != NULL) { 10574 ip6_t *nip6h; 10575 mblk_t *mp_ip6h; 10576 10577 if (mctl_present) { 10578 nip6h = (ip6_t *) 10579 nmp->b_cont->b_rptr; 10580 mp_ip6h = nmp->b_cont; 10581 } else { 10582 nip6h = (ip6_t *)nmp->b_rptr; 10583 mp_ip6h = nmp; 10584 } 10585 10586 DTRACE_PROBE4( 10587 ip6__loopback__out__start, 10588 ill_t *, NULL, 10589 ill_t *, ill, 10590 ip6_t *, nip6h, 10591 mblk_t *, nmp); 10592 10593 FW_HOOKS6( 10594 ipst->ips_ip6_loopback_out_event, 10595 ipst->ips_ipv6firewall_loopback_out, 10596 NULL, ill, nip6h, nmp, mp_ip6h, 10597 0, ipst); 10598 10599 DTRACE_PROBE1( 10600 ip6__loopback__out__end, 10601 mblk_t *, nmp); 10602 10603 /* 10604 * DTrace this as ip:::send. A blocked 10605 * packet will fire the send probe, but 10606 * not the receive probe. 10607 */ 10608 DTRACE_IP7(send, mblk_t *, nmp, 10609 conn_t *, NULL, void_ip_t *, nip6h, 10610 __dtrace_ipsr_ill_t *, ill, 10611 ipha_t *, NULL, ip6_t *, nip6h, 10612 int, 1); 10613 10614 if (nmp != NULL) { 10615 /* 10616 * Deliver locally and to 10617 * every local zone, except 10618 * the sending zone when 10619 * IPV6_MULTICAST_LOOP is 10620 * disabled. 10621 */ 10622 ip_wput_local_v6(RD(q), ill, 10623 nip6h, nmp, ire, 10624 fanout_flags, zoneid); 10625 } 10626 } else { 10627 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10628 ip1dbg(("ip_wput_ire_v6: " 10629 "copymsg failed\n")); 10630 } 10631 } 10632 } 10633 if (ip6h->ip6_hops == 0 || 10634 IN6_IS_ADDR_MC_NODELOCAL(&ip6h->ip6_dst) || 10635 IS_LOOPBACK(ill)) { 10636 /* 10637 * Local multicast or just loopback on loopback 10638 * interface. 10639 */ 10640 BUMP_MIB(mibptr, ipIfStatsHCOutMcastPkts); 10641 UPDATE_MIB(mibptr, ipIfStatsHCOutMcastOctets, 10642 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 10643 ip1dbg(("ip_wput_ire_v6: local multicast only\n")); 10644 freemsg(first_mp); 10645 return; 10646 } 10647 } 10648 10649 if (ire->ire_stq != NULL) { 10650 uint32_t sum; 10651 uint_t ill_index = ((ill_t *)ire->ire_stq->q_ptr)-> 10652 ill_phyint->phyint_ifindex; 10653 queue_t *dev_q = ire->ire_stq->q_next; 10654 10655 /* 10656 * non-NULL send-to queue - packet is to be sent 10657 * out an interface. 10658 */ 10659 10660 /* Driver is flow-controlling? */ 10661 if (!IP_FLOW_CONTROLLED_ULP(nexthdr) && 10662 DEV_Q_FLOW_BLOCKED(dev_q)) { 10663 /* 10664 * Queue packet if we have an conn to give back 10665 * pressure. We can't queue packets intended for 10666 * hardware acceleration since we've tossed that 10667 * state already. If the packet is being fed back 10668 * from ire_send_v6, we don't know the position in 10669 * the queue to enqueue the packet and we discard 10670 * the packet. 10671 */ 10672 if (ipst->ips_ip_output_queue && connp != NULL && 10673 !mctl_present && caller != IRE_SEND) { 10674 if (caller == IP_WSRV) { 10675 idl_tx_list_t *idl_txl; 10676 10677 idl_txl = &ipst->ips_idl_tx_list[0]; 10678 connp->conn_did_putbq = 1; 10679 (void) putbq(connp->conn_wq, mp); 10680 conn_drain_insert(connp, idl_txl); 10681 /* 10682 * caller == IP_WSRV implies we are 10683 * the service thread, and the 10684 * queue is already noenabled. 10685 * The check for canput and 10686 * the putbq is not atomic. 10687 * So we need to check again. 10688 */ 10689 if (canput(dev_q)) 10690 connp->conn_did_putbq = 0; 10691 } else { 10692 (void) putq(connp->conn_wq, mp); 10693 } 10694 return; 10695 } 10696 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10697 freemsg(first_mp); 10698 return; 10699 } 10700 10701 /* 10702 * Look for reachability confirmations from the transport. 10703 */ 10704 if (ip6h->ip6_vcf & IP_FORWARD_PROG) { 10705 reachable |= IPV6_REACHABILITY_CONFIRMATION; 10706 ip6h->ip6_vcf &= ~IP_FORWARD_PROG; 10707 if (mctl_present) 10708 io->ipsec_out_reachable = B_TRUE; 10709 } 10710 /* Fastpath */ 10711 switch (nexthdr) { 10712 case IPPROTO_TCP: 10713 case IPPROTO_UDP: 10714 case IPPROTO_ICMPV6: 10715 case IPPROTO_SCTP: 10716 hdr_length = IPV6_HDR_LEN; 10717 break; 10718 default: { 10719 uint8_t *nexthdrp; 10720 10721 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, 10722 &hdr_length, &nexthdrp)) { 10723 /* Malformed packet */ 10724 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10725 freemsg(first_mp); 10726 return; 10727 } 10728 nexthdr = *nexthdrp; 10729 break; 10730 } 10731 } 10732 10733 if (cksum_request != -1 && nexthdr != IPPROTO_ICMPV6) { 10734 uint16_t *up; 10735 uint16_t *insp; 10736 10737 /* 10738 * The packet header is processed once for all, even 10739 * in the multirouting case. We disable hardware 10740 * checksum if the packet is multirouted, as it will be 10741 * replicated via several interfaces, and not all of 10742 * them may have this capability. 10743 */ 10744 if (cksum_request == 1 && 10745 !(ire->ire_flags & RTF_MULTIRT)) { 10746 /* Skip the transport checksum */ 10747 goto cksum_done; 10748 } 10749 /* 10750 * Do user-configured raw checksum. 10751 * Compute checksum and insert at offset "cksum_request" 10752 */ 10753 10754 /* check for enough headers for checksum */ 10755 cksum_request += hdr_length; /* offset from rptr */ 10756 if ((mp->b_wptr - mp->b_rptr) < 10757 (cksum_request + sizeof (int16_t))) { 10758 if (!pullupmsg(mp, 10759 cksum_request + sizeof (int16_t))) { 10760 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10761 " failed\n")); 10762 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10763 freemsg(first_mp); 10764 return; 10765 } 10766 ip6h = (ip6_t *)mp->b_rptr; 10767 } 10768 insp = (uint16_t *)((uchar_t *)ip6h + cksum_request); 10769 ASSERT(((uintptr_t)insp & 0x1) == 0); 10770 up = (uint16_t *)&ip6h->ip6_src; 10771 /* 10772 * icmp has placed length and routing 10773 * header adjustment in *insp. 10774 */ 10775 sum = htons(nexthdr) + 10776 up[0] + up[1] + up[2] + up[3] + 10777 up[4] + up[5] + up[6] + up[7] + 10778 up[8] + up[9] + up[10] + up[11] + 10779 up[12] + up[13] + up[14] + up[15]; 10780 sum = (sum & 0xffff) + (sum >> 16); 10781 *insp = IP_CSUM(mp, hdr_length, sum); 10782 } else if (nexthdr == IPPROTO_TCP) { 10783 uint16_t *up; 10784 10785 /* 10786 * Check for full IPv6 header + enough TCP header 10787 * to get at the checksum field. 10788 */ 10789 if ((mp->b_wptr - mp->b_rptr) < 10790 (hdr_length + TCP_CHECKSUM_OFFSET + 10791 TCP_CHECKSUM_SIZE)) { 10792 if (!pullupmsg(mp, hdr_length + 10793 TCP_CHECKSUM_OFFSET + TCP_CHECKSUM_SIZE)) { 10794 ip1dbg(("ip_wput_v6: TCP hdr pullupmsg" 10795 " failed\n")); 10796 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10797 freemsg(first_mp); 10798 return; 10799 } 10800 ip6h = (ip6_t *)mp->b_rptr; 10801 } 10802 10803 up = (uint16_t *)&ip6h->ip6_src; 10804 /* 10805 * Note: The TCP module has stored the length value 10806 * into the tcp checksum field, so we don't 10807 * need to explicitly sum it in here. 10808 */ 10809 sum = up[0] + up[1] + up[2] + up[3] + 10810 up[4] + up[5] + up[6] + up[7] + 10811 up[8] + up[9] + up[10] + up[11] + 10812 up[12] + up[13] + up[14] + up[15]; 10813 10814 /* Fold the initial sum */ 10815 sum = (sum & 0xffff) + (sum >> 16); 10816 10817 up = (uint16_t *)(((uchar_t *)ip6h) + 10818 hdr_length + TCP_CHECKSUM_OFFSET); 10819 10820 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_TCP, 10821 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10822 ire->ire_max_frag, mctl_present, sum); 10823 10824 /* Software checksum? */ 10825 if (DB_CKSUMFLAGS(mp) == 0) { 10826 IP6_STAT(ipst, ip6_out_sw_cksum); 10827 IP6_STAT_UPDATE(ipst, 10828 ip6_tcp_out_sw_cksum_bytes, 10829 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10830 hdr_length); 10831 } 10832 } else if (nexthdr == IPPROTO_UDP) { 10833 uint16_t *up; 10834 10835 /* 10836 * check for full IPv6 header + enough UDP header 10837 * to get at the UDP checksum field 10838 */ 10839 if ((mp->b_wptr - mp->b_rptr) < (hdr_length + 10840 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10841 if (!pullupmsg(mp, hdr_length + 10842 UDP_CHECKSUM_OFFSET + UDP_CHECKSUM_SIZE)) { 10843 ip1dbg(("ip_wput_v6: UDP hdr pullupmsg" 10844 " failed\n")); 10845 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10846 freemsg(first_mp); 10847 return; 10848 } 10849 ip6h = (ip6_t *)mp->b_rptr; 10850 } 10851 up = (uint16_t *)&ip6h->ip6_src; 10852 /* 10853 * Note: The UDP module has stored the length value 10854 * into the udp checksum field, so we don't 10855 * need to explicitly sum it in here. 10856 */ 10857 sum = up[0] + up[1] + up[2] + up[3] + 10858 up[4] + up[5] + up[6] + up[7] + 10859 up[8] + up[9] + up[10] + up[11] + 10860 up[12] + up[13] + up[14] + up[15]; 10861 10862 /* Fold the initial sum */ 10863 sum = (sum & 0xffff) + (sum >> 16); 10864 10865 up = (uint16_t *)(((uchar_t *)ip6h) + 10866 hdr_length + UDP_CHECKSUM_OFFSET); 10867 10868 IP_CKSUM_XMIT(ill, ire, mp, ip6h, up, IPPROTO_UDP, 10869 hdr_length, ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10870 ire->ire_max_frag, mctl_present, sum); 10871 10872 /* Software checksum? */ 10873 if (DB_CKSUMFLAGS(mp) == 0) { 10874 IP6_STAT(ipst, ip6_out_sw_cksum); 10875 IP6_STAT_UPDATE(ipst, 10876 ip6_udp_out_sw_cksum_bytes, 10877 (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN) - 10878 hdr_length); 10879 } 10880 } else if (nexthdr == IPPROTO_ICMPV6) { 10881 uint16_t *up; 10882 icmp6_t *icmp6; 10883 10884 /* check for full IPv6+ICMPv6 header */ 10885 if ((mp->b_wptr - mp->b_rptr) < 10886 (hdr_length + ICMP6_MINLEN)) { 10887 if (!pullupmsg(mp, hdr_length + ICMP6_MINLEN)) { 10888 ip1dbg(("ip_wput_v6: ICMP hdr pullupmsg" 10889 " failed\n")); 10890 BUMP_MIB(mibptr, ipIfStatsOutDiscards); 10891 freemsg(first_mp); 10892 return; 10893 } 10894 ip6h = (ip6_t *)mp->b_rptr; 10895 } 10896 icmp6 = (icmp6_t *)((uchar_t *)ip6h + hdr_length); 10897 up = (uint16_t *)&ip6h->ip6_src; 10898 /* 10899 * icmp has placed length and routing 10900 * header adjustment in icmp6_cksum. 10901 */ 10902 sum = htons(IPPROTO_ICMPV6) + 10903 up[0] + up[1] + up[2] + up[3] + 10904 up[4] + up[5] + up[6] + up[7] + 10905 up[8] + up[9] + up[10] + up[11] + 10906 up[12] + up[13] + up[14] + up[15]; 10907 sum = (sum & 0xffff) + (sum >> 16); 10908 icmp6->icmp6_cksum = IP_CSUM(mp, hdr_length, sum); 10909 10910 /* Update output mib stats */ 10911 icmp_update_out_mib_v6(ill, icmp6); 10912 } else if (nexthdr == IPPROTO_SCTP) { 10913 sctp_hdr_t *sctph; 10914 10915 if (MBLKL(mp) < (hdr_length + sizeof (*sctph))) { 10916 if (!pullupmsg(mp, hdr_length + 10917 sizeof (*sctph))) { 10918 ip1dbg(("ip_wput_v6: SCTP hdr pullupmsg" 10919 " failed\n")); 10920 BUMP_MIB(ill->ill_ip_mib, 10921 ipIfStatsOutDiscards); 10922 freemsg(mp); 10923 return; 10924 } 10925 ip6h = (ip6_t *)mp->b_rptr; 10926 } 10927 sctph = (sctp_hdr_t *)(mp->b_rptr + hdr_length); 10928 sctph->sh_chksum = 0; 10929 sctph->sh_chksum = sctp_cksum(mp, hdr_length); 10930 } 10931 10932 cksum_done: 10933 /* 10934 * We force the insertion of a fragment header using the 10935 * IPH_FRAG_HDR flag in two cases: 10936 * - after reception of an ICMPv6 "packet too big" message 10937 * with a MTU < 1280 (cf. RFC 2460 section 5) 10938 * - for multirouted IPv6 packets, so that the receiver can 10939 * discard duplicates according to their fragment identifier 10940 * 10941 * Two flags modifed from the API can modify this behavior. 10942 * The first is IPV6_USE_MIN_MTU. With this API the user 10943 * can specify how to manage PMTUD for unicast and multicast. 10944 * 10945 * IPV6_DONTFRAG disallows fragmentation. 10946 */ 10947 max_frag = ire->ire_max_frag; 10948 switch (IP6I_USE_MIN_MTU_API(flags)) { 10949 case IPV6_USE_MIN_MTU_DEFAULT: 10950 case IPV6_USE_MIN_MTU_UNICAST: 10951 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 10952 max_frag = IPV6_MIN_MTU; 10953 } 10954 break; 10955 10956 case IPV6_USE_MIN_MTU_NEVER: 10957 max_frag = IPV6_MIN_MTU; 10958 break; 10959 } 10960 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN > max_frag || 10961 (ire->ire_frag_flag & IPH_FRAG_HDR)) { 10962 if (connp != NULL && (flags & IP6I_DONTFRAG)) { 10963 icmp_pkt2big_v6(ire->ire_stq, first_mp, 10964 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 10965 return; 10966 } 10967 10968 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN != 10969 (mp->b_cont ? msgdsize(mp) : 10970 mp->b_wptr - (uchar_t *)ip6h)) { 10971 ip0dbg(("Packet length mismatch: %d, %ld\n", 10972 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN, 10973 msgdsize(mp))); 10974 freemsg(first_mp); 10975 return; 10976 } 10977 /* Do IPSEC processing first */ 10978 if (mctl_present) { 10979 ipsec_out_process(q, first_mp, ire, ill_index); 10980 return; 10981 } 10982 ASSERT(mp->b_prev == NULL); 10983 ip2dbg(("Fragmenting Size = %d, mtu = %d\n", 10984 ntohs(ip6h->ip6_plen) + 10985 IPV6_HDR_LEN, max_frag)); 10986 ASSERT(mp == first_mp); 10987 /* Initiate IPPF processing */ 10988 if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 10989 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 10990 if (mp == NULL) { 10991 return; 10992 } 10993 } 10994 ip_wput_frag_v6(mp, ire, reachable, connp, 10995 caller, max_frag); 10996 return; 10997 } 10998 /* Do IPSEC processing first */ 10999 if (mctl_present) { 11000 int extra_len = ipsec_out_extra_length(first_mp); 11001 11002 if (ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN + extra_len > 11003 max_frag && connp != NULL && 11004 (flags & IP6I_DONTFRAG)) { 11005 /* 11006 * IPsec headers will push the packet over the 11007 * MTU limit. Issue an ICMPv6 Packet Too Big 11008 * message for this packet if the upper-layer 11009 * that issued this packet will be able to 11010 * react to the icmp_pkt2big_v6() that we'll 11011 * generate. 11012 */ 11013 icmp_pkt2big_v6(ire->ire_stq, first_mp, 11014 max_frag, B_FALSE, B_TRUE, zoneid, ipst); 11015 return; 11016 } 11017 ipsec_out_process(q, first_mp, ire, ill_index); 11018 return; 11019 } 11020 /* 11021 * XXX multicast: add ip_mforward_v6() here. 11022 * Check conn_dontroute 11023 */ 11024 #ifdef lint 11025 /* 11026 * XXX The only purpose of this statement is to avoid lint 11027 * errors. See the above "XXX multicast". When that gets 11028 * fixed, remove this whole #ifdef lint section. 11029 */ 11030 ip3dbg(("multicast forward is %s.\n", 11031 (multicast_forward ? "TRUE" : "FALSE"))); 11032 #endif 11033 11034 UPDATE_OB_PKT_COUNT(ire); 11035 ire->ire_last_used_time = lbolt; 11036 ASSERT(mp == first_mp); 11037 ip_xmit_v6(mp, ire, reachable, connp, caller, NULL); 11038 } else { 11039 /* 11040 * DTrace this as ip:::send. A blocked packet will fire the 11041 * send probe, but not the receive probe. 11042 */ 11043 DTRACE_IP7(send, mblk_t *, first_mp, conn_t *, NULL, 11044 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, 11045 NULL, ip6_t *, ip6h, int, 1); 11046 DTRACE_PROBE4(ip6__loopback__out__start, 11047 ill_t *, NULL, ill_t *, ill, 11048 ip6_t *, ip6h, mblk_t *, first_mp); 11049 FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 11050 ipst->ips_ipv6firewall_loopback_out, 11051 NULL, ill, ip6h, first_mp, mp, 0, ipst); 11052 DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, first_mp); 11053 if (first_mp != NULL) { 11054 ip_wput_local_v6(RD(q), ill, ip6h, first_mp, ire, 0, 11055 zoneid); 11056 } 11057 } 11058 } 11059 11060 /* 11061 * Outbound IPv6 fragmentation routine using MDT. 11062 */ 11063 static void 11064 ip_wput_frag_mdt_v6(mblk_t *mp, ire_t *ire, size_t max_chunk, 11065 size_t unfragmentable_len, uint8_t nexthdr, uint_t prev_nexthdr_offset) 11066 { 11067 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11068 uint_t pkts, wroff, hdr_chunk_len, pbuf_idx; 11069 mblk_t *hdr_mp, *md_mp = NULL; 11070 int i1; 11071 multidata_t *mmd; 11072 unsigned char *hdr_ptr, *pld_ptr; 11073 ip_pdescinfo_t pdi; 11074 uint32_t ident; 11075 size_t len; 11076 uint16_t offset; 11077 queue_t *stq = ire->ire_stq; 11078 ill_t *ill = (ill_t *)stq->q_ptr; 11079 ip_stack_t *ipst = ill->ill_ipst; 11080 11081 ASSERT(DB_TYPE(mp) == M_DATA); 11082 ASSERT(MBLKL(mp) > unfragmentable_len); 11083 11084 /* 11085 * Move read ptr past unfragmentable portion, we don't want this part 11086 * of the data in our fragments. 11087 */ 11088 mp->b_rptr += unfragmentable_len; 11089 11090 /* Calculate how many packets we will send out */ 11091 i1 = (mp->b_cont == NULL) ? MBLKL(mp) : msgsize(mp); 11092 pkts = (i1 + max_chunk - 1) / max_chunk; 11093 ASSERT(pkts > 1); 11094 11095 /* Allocate a message block which will hold all the IP Headers. */ 11096 wroff = ipst->ips_ip_wroff_extra; 11097 hdr_chunk_len = wroff + unfragmentable_len + sizeof (ip6_frag_t); 11098 11099 i1 = pkts * hdr_chunk_len; 11100 /* 11101 * Create the header buffer, Multidata and destination address 11102 * and SAP attribute that should be associated with it. 11103 */ 11104 if ((hdr_mp = allocb(i1, BPRI_HI)) == NULL || 11105 ((hdr_mp->b_wptr += i1), 11106 (mmd = mmd_alloc(hdr_mp, &md_mp, KM_NOSLEEP)) == NULL) || 11107 !ip_md_addr_attr(mmd, NULL, ire->ire_nce->nce_res_mp)) { 11108 freemsg(mp); 11109 if (md_mp == NULL) { 11110 freemsg(hdr_mp); 11111 } else { 11112 free_mmd: IP6_STAT(ipst, ip6_frag_mdt_discarded); 11113 freemsg(md_mp); 11114 } 11115 IP6_STAT(ipst, ip6_frag_mdt_allocfail); 11116 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11117 return; 11118 } 11119 IP6_STAT(ipst, ip6_frag_mdt_allocd); 11120 11121 /* 11122 * Add a payload buffer to the Multidata; this operation must not 11123 * fail, or otherwise our logic in this routine is broken. There 11124 * is no memory allocation done by the routine, so any returned 11125 * failure simply tells us that we've done something wrong. 11126 * 11127 * A failure tells us that either we're adding the same payload 11128 * buffer more than once, or we're trying to add more buffers than 11129 * allowed. None of the above cases should happen, and we panic 11130 * because either there's horrible heap corruption, and/or 11131 * programming mistake. 11132 */ 11133 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) { 11134 goto pbuf_panic; 11135 } 11136 11137 hdr_ptr = hdr_mp->b_rptr; 11138 pld_ptr = mp->b_rptr; 11139 11140 pdi.flags = PDESC_HBUF_REF | PDESC_PBUF_REF; 11141 11142 ident = htonl(atomic_add_32_nv(&ire->ire_ident, 1)); 11143 11144 /* 11145 * len is the total length of the fragmentable data in this 11146 * datagram. For each fragment sent, we will decrement len 11147 * by the amount of fragmentable data sent in that fragment 11148 * until len reaches zero. 11149 */ 11150 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11151 11152 offset = 0; 11153 prev_nexthdr_offset += wroff; 11154 11155 while (len != 0) { 11156 size_t mlen; 11157 ip6_t *fip6h; 11158 ip6_frag_t *fraghdr; 11159 int error; 11160 11161 ASSERT((hdr_ptr + hdr_chunk_len) <= hdr_mp->b_wptr); 11162 mlen = MIN(len, max_chunk); 11163 len -= mlen; 11164 11165 fip6h = (ip6_t *)(hdr_ptr + wroff); 11166 ASSERT(OK_32PTR(fip6h)); 11167 bcopy(ip6h, fip6h, unfragmentable_len); 11168 hdr_ptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11169 11170 fip6h->ip6_plen = htons((uint16_t)(mlen + 11171 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11172 11173 fraghdr = (ip6_frag_t *)((unsigned char *)fip6h + 11174 unfragmentable_len); 11175 fraghdr->ip6f_nxt = nexthdr; 11176 fraghdr->ip6f_reserved = 0; 11177 fraghdr->ip6f_offlg = htons(offset) | 11178 ((len != 0) ? IP6F_MORE_FRAG : 0); 11179 fraghdr->ip6f_ident = ident; 11180 11181 /* 11182 * Record offset and size of header and data of the next packet 11183 * in the multidata message. 11184 */ 11185 PDESC_HDR_ADD(&pdi, hdr_ptr, wroff, 11186 unfragmentable_len + sizeof (ip6_frag_t), 0); 11187 PDESC_PLD_INIT(&pdi); 11188 i1 = MIN(mp->b_wptr - pld_ptr, mlen); 11189 ASSERT(i1 > 0); 11190 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, pld_ptr, i1); 11191 if (i1 == mlen) { 11192 pld_ptr += mlen; 11193 } else { 11194 i1 = mlen - i1; 11195 mp = mp->b_cont; 11196 ASSERT(mp != NULL); 11197 ASSERT(MBLKL(mp) >= i1); 11198 /* 11199 * Attach the next payload message block to the 11200 * multidata message. 11201 */ 11202 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11203 goto pbuf_panic; 11204 PDESC_PLD_SPAN_ADD(&pdi, pbuf_idx, mp->b_rptr, i1); 11205 pld_ptr = mp->b_rptr + i1; 11206 } 11207 11208 if ((mmd_addpdesc(mmd, (pdescinfo_t *)&pdi, &error, 11209 KM_NOSLEEP)) == NULL) { 11210 /* 11211 * Any failure other than ENOMEM indicates that we 11212 * have passed in invalid pdesc info or parameters 11213 * to mmd_addpdesc, which must not happen. 11214 * 11215 * EINVAL is a result of failure on boundary checks 11216 * against the pdesc info contents. It should not 11217 * happen, and we panic because either there's 11218 * horrible heap corruption, and/or programming 11219 * mistake. 11220 */ 11221 if (error != ENOMEM) { 11222 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: " 11223 "pdesc logic error detected for " 11224 "mmd %p pinfo %p (%d)\n", 11225 (void *)mmd, (void *)&pdi, error); 11226 /* NOTREACHED */ 11227 } 11228 IP6_STAT(ipst, ip6_frag_mdt_addpdescfail); 11229 /* Free unattached payload message blocks as well */ 11230 md_mp->b_cont = mp->b_cont; 11231 goto free_mmd; 11232 } 11233 11234 /* Advance fragment offset. */ 11235 offset += mlen; 11236 11237 /* Advance to location for next header in the buffer. */ 11238 hdr_ptr += hdr_chunk_len; 11239 11240 /* Did we reach the next payload message block? */ 11241 if (pld_ptr == mp->b_wptr && mp->b_cont != NULL) { 11242 mp = mp->b_cont; 11243 /* 11244 * Attach the next message block with payload 11245 * data to the multidata message. 11246 */ 11247 if ((pbuf_idx = mmd_addpldbuf(mmd, mp)) < 0) 11248 goto pbuf_panic; 11249 pld_ptr = mp->b_rptr; 11250 } 11251 } 11252 11253 ASSERT(hdr_mp->b_wptr == hdr_ptr); 11254 ASSERT(mp->b_wptr == pld_ptr); 11255 11256 /* Update IP statistics */ 11257 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates, pkts); 11258 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11259 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits, pkts); 11260 /* 11261 * The ipv6 header len is accounted for in unfragmentable_len so 11262 * when calculating the fragmentation overhead just add the frag 11263 * header len. 11264 */ 11265 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 11266 (ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN)) + 11267 pkts * (unfragmentable_len + sizeof (ip6_frag_t))); 11268 IP6_STAT_UPDATE(ipst, ip6_frag_mdt_pkt_out, pkts); 11269 11270 ire->ire_ob_pkt_count += pkts; 11271 if (ire->ire_ipif != NULL) 11272 atomic_add_32(&ire->ire_ipif->ipif_ob_pkt_count, pkts); 11273 11274 ire->ire_last_used_time = lbolt; 11275 /* Send it down */ 11276 putnext(stq, md_mp); 11277 return; 11278 11279 pbuf_panic: 11280 cmn_err(CE_PANIC, "ip_wput_frag_mdt_v6: payload buffer logic " 11281 "error for mmd %p pbuf %p (%d)", (void *)mmd, (void *)mp, 11282 pbuf_idx); 11283 /* NOTREACHED */ 11284 } 11285 11286 /* 11287 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 11288 * We have not optimized this in terms of number of mblks 11289 * allocated. For instance, for each fragment sent we always allocate a 11290 * mblk to hold the IPv6 header and fragment header. 11291 * 11292 * Assumes that all the extension headers are contained in the first mblk. 11293 * 11294 * The fragment header is inserted after an hop-by-hop options header 11295 * and after [an optional destinations header followed by] a routing header. 11296 * 11297 * NOTE : This function does not ire_refrele the ire passed in as 11298 * the argument. 11299 */ 11300 void 11301 ip_wput_frag_v6(mblk_t *mp, ire_t *ire, uint_t reachable, conn_t *connp, 11302 int caller, int max_frag) 11303 { 11304 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 11305 ip6_t *fip6h; 11306 mblk_t *hmp; 11307 mblk_t *hmp0; 11308 mblk_t *dmp; 11309 ip6_frag_t *fraghdr; 11310 size_t unfragmentable_len; 11311 size_t len; 11312 size_t mlen; 11313 size_t max_chunk; 11314 uint32_t ident; 11315 uint16_t off_flags; 11316 uint16_t offset = 0; 11317 ill_t *ill; 11318 uint8_t nexthdr; 11319 uint_t prev_nexthdr_offset; 11320 uint8_t *ptr; 11321 ip_stack_t *ipst = ire->ire_ipst; 11322 11323 ASSERT(ire->ire_type == IRE_CACHE); 11324 ill = (ill_t *)ire->ire_stq->q_ptr; 11325 11326 if (max_frag <= 0) { 11327 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11328 freemsg(mp); 11329 return; 11330 } 11331 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 11332 11333 /* 11334 * Determine the length of the unfragmentable portion of this 11335 * datagram. This consists of the IPv6 header, a potential 11336 * hop-by-hop options header, a potential pre-routing-header 11337 * destination options header, and a potential routing header. 11338 */ 11339 nexthdr = ip6h->ip6_nxt; 11340 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 11341 ptr = (uint8_t *)&ip6h[1]; 11342 11343 if (nexthdr == IPPROTO_HOPOPTS) { 11344 ip6_hbh_t *hbh_hdr; 11345 uint_t hdr_len; 11346 11347 hbh_hdr = (ip6_hbh_t *)ptr; 11348 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 11349 nexthdr = hbh_hdr->ip6h_nxt; 11350 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 11351 - (uint8_t *)ip6h; 11352 ptr += hdr_len; 11353 } 11354 if (nexthdr == IPPROTO_DSTOPTS) { 11355 ip6_dest_t *dest_hdr; 11356 uint_t hdr_len; 11357 11358 dest_hdr = (ip6_dest_t *)ptr; 11359 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 11360 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 11361 nexthdr = dest_hdr->ip6d_nxt; 11362 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 11363 - (uint8_t *)ip6h; 11364 ptr += hdr_len; 11365 } 11366 } 11367 if (nexthdr == IPPROTO_ROUTING) { 11368 ip6_rthdr_t *rthdr; 11369 uint_t hdr_len; 11370 11371 rthdr = (ip6_rthdr_t *)ptr; 11372 nexthdr = rthdr->ip6r_nxt; 11373 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 11374 - (uint8_t *)ip6h; 11375 hdr_len = 8 * (rthdr->ip6r_len + 1); 11376 ptr += hdr_len; 11377 } 11378 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 11379 11380 max_chunk = (min(max_frag, ire->ire_max_frag) - unfragmentable_len - 11381 sizeof (ip6_frag_t)) & ~7; 11382 11383 /* Check if we can use MDT to send out the frags. */ 11384 ASSERT(!IRE_IS_LOCAL(ire)); 11385 if (ipst->ips_ip_multidata_outbound && reachable == 0 && 11386 !(ire->ire_flags & RTF_MULTIRT) && ILL_MDT_CAPABLE(ill) && 11387 IP_CAN_FRAG_MDT(mp, unfragmentable_len, max_chunk)) { 11388 ip_wput_frag_mdt_v6(mp, ire, max_chunk, unfragmentable_len, 11389 nexthdr, prev_nexthdr_offset); 11390 return; 11391 } 11392 11393 /* 11394 * Allocate an mblk with enough room for the link-layer 11395 * header, the unfragmentable part of the datagram, and the 11396 * fragment header. This (or a copy) will be used as the 11397 * first mblk for each fragment we send. 11398 */ 11399 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 11400 ipst->ips_ip_wroff_extra, mp); 11401 if (hmp == NULL) { 11402 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11403 freemsg(mp); 11404 return; 11405 } 11406 hmp->b_rptr += ipst->ips_ip_wroff_extra; 11407 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 11408 11409 fip6h = (ip6_t *)hmp->b_rptr; 11410 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 11411 11412 bcopy(ip6h, fip6h, unfragmentable_len); 11413 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 11414 11415 ident = atomic_add_32_nv(&ire->ire_ident, 1); 11416 11417 fraghdr->ip6f_nxt = nexthdr; 11418 fraghdr->ip6f_reserved = 0; 11419 fraghdr->ip6f_offlg = 0; 11420 fraghdr->ip6f_ident = htonl(ident); 11421 11422 /* 11423 * len is the total length of the fragmentable data in this 11424 * datagram. For each fragment sent, we will decrement len 11425 * by the amount of fragmentable data sent in that fragment 11426 * until len reaches zero. 11427 */ 11428 len = ntohs(ip6h->ip6_plen) - (unfragmentable_len - IPV6_HDR_LEN); 11429 11430 /* 11431 * Move read ptr past unfragmentable portion, we don't want this part 11432 * of the data in our fragments. 11433 */ 11434 mp->b_rptr += unfragmentable_len; 11435 11436 while (len != 0) { 11437 mlen = MIN(len, max_chunk); 11438 len -= mlen; 11439 if (len != 0) { 11440 /* Not last */ 11441 hmp0 = copyb(hmp); 11442 if (hmp0 == NULL) { 11443 freeb(hmp); 11444 freemsg(mp); 11445 BUMP_MIB(ill->ill_ip_mib, 11446 ipIfStatsOutFragFails); 11447 ip1dbg(("ip_wput_frag_v6: copyb failed\n")); 11448 return; 11449 } 11450 off_flags = IP6F_MORE_FRAG; 11451 } else { 11452 /* Last fragment */ 11453 hmp0 = hmp; 11454 hmp = NULL; 11455 off_flags = 0; 11456 } 11457 fip6h = (ip6_t *)(hmp0->b_rptr); 11458 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len); 11459 11460 fip6h->ip6_plen = htons((uint16_t)(mlen + 11461 unfragmentable_len - IPV6_HDR_LEN + sizeof (ip6_frag_t))); 11462 /* 11463 * Note: Optimization alert. 11464 * In IPv6 (and IPv4) protocol header, Fragment Offset 11465 * ("offset") is 13 bits wide and in 8-octet units. 11466 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 11467 * it occupies the most significant 13 bits. 11468 * (least significant 13 bits in IPv4). 11469 * We do not do any shifts here. Not shifting is same effect 11470 * as taking offset value in octet units, dividing by 8 and 11471 * then shifting 3 bits left to line it up in place in proper 11472 * place protocol header. 11473 */ 11474 fraghdr->ip6f_offlg = htons(offset) | off_flags; 11475 11476 if (!(dmp = ip_carve_mp(&mp, mlen))) { 11477 /* mp has already been freed by ip_carve_mp() */ 11478 if (hmp != NULL) 11479 freeb(hmp); 11480 freeb(hmp0); 11481 ip1dbg(("ip_carve_mp: failed\n")); 11482 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 11483 return; 11484 } 11485 hmp0->b_cont = dmp; 11486 /* Get the priority marking, if any */ 11487 hmp0->b_band = dmp->b_band; 11488 UPDATE_OB_PKT_COUNT(ire); 11489 ire->ire_last_used_time = lbolt; 11490 ip_xmit_v6(hmp0, ire, reachable | IP6_NO_IPPOLICY, connp, 11491 caller, NULL); 11492 reachable = 0; /* No need to redo state machine in loop */ 11493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 11494 offset += mlen; 11495 } 11496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 11497 } 11498 11499 /* 11500 * Determine if the ill and multicast aspects of that packets 11501 * "matches" the conn. 11502 */ 11503 boolean_t 11504 conn_wantpacket_v6(conn_t *connp, ill_t *ill, ip6_t *ip6h, int fanout_flags, 11505 zoneid_t zoneid) 11506 { 11507 ill_t *bound_ill; 11508 boolean_t wantpacket; 11509 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 11510 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 11511 11512 /* 11513 * conn_incoming_ill is set by IPV6_BOUND_IF which limits 11514 * unicast and multicast reception to conn_incoming_ill. 11515 * conn_wantpacket_v6 is called both for unicast and 11516 * multicast. 11517 */ 11518 bound_ill = connp->conn_incoming_ill; 11519 if (bound_ill != NULL) { 11520 if (IS_IPMP(bound_ill)) { 11521 if (bound_ill->ill_grp != ill->ill_grp) 11522 return (B_FALSE); 11523 } else { 11524 if (bound_ill != ill) 11525 return (B_FALSE); 11526 } 11527 } 11528 11529 if (connp->conn_multi_router) 11530 return (B_TRUE); 11531 11532 if (!IN6_IS_ADDR_MULTICAST(v6dst_ptr) && 11533 !IN6_IS_ADDR_V4MAPPED_CLASSD(v6dst_ptr)) { 11534 /* 11535 * Unicast case: we match the conn only if it's in the specified 11536 * zone. 11537 */ 11538 return (IPCL_ZONE_MATCH(connp, zoneid)); 11539 } 11540 11541 if ((fanout_flags & IP_FF_NO_MCAST_LOOP) && 11542 (connp->conn_zoneid == zoneid || zoneid == ALL_ZONES)) { 11543 /* 11544 * Loopback case: the sending endpoint has IP_MULTICAST_LOOP 11545 * disabled, therefore we don't dispatch the multicast packet to 11546 * the sending zone. 11547 */ 11548 return (B_FALSE); 11549 } 11550 11551 if (IS_LOOPBACK(ill) && connp->conn_zoneid != zoneid && 11552 zoneid != ALL_ZONES) { 11553 /* 11554 * Multicast packet on the loopback interface: we only match 11555 * conns who joined the group in the specified zone. 11556 */ 11557 return (B_FALSE); 11558 } 11559 11560 mutex_enter(&connp->conn_lock); 11561 wantpacket = 11562 ilg_lookup_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, ill) != NULL; 11563 mutex_exit(&connp->conn_lock); 11564 11565 return (wantpacket); 11566 } 11567 11568 11569 /* 11570 * Transmit a packet and update any NUD state based on the flags 11571 * XXX need to "recover" any ip6i_t when doing putq! 11572 * 11573 * NOTE : This function does not ire_refrele the ire passed in as the 11574 * argument. 11575 */ 11576 void 11577 ip_xmit_v6(mblk_t *mp, ire_t *ire, uint_t flags, conn_t *connp, 11578 int caller, ipsec_out_t *io) 11579 { 11580 mblk_t *mp1; 11581 nce_t *nce = ire->ire_nce; 11582 ill_t *ill; 11583 ill_t *out_ill; 11584 uint64_t delta; 11585 ip6_t *ip6h; 11586 queue_t *stq = ire->ire_stq; 11587 ire_t *ire1 = NULL; 11588 ire_t *save_ire = ire; 11589 boolean_t multirt_send = B_FALSE; 11590 mblk_t *next_mp = NULL; 11591 ip_stack_t *ipst = ire->ire_ipst; 11592 boolean_t fp_prepend = B_FALSE; 11593 uint32_t hlen; 11594 11595 ip6h = (ip6_t *)mp->b_rptr; 11596 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ire->ire_addr_v6)); 11597 ASSERT(ire->ire_ipversion == IPV6_VERSION); 11598 ASSERT(nce != NULL); 11599 ASSERT(mp->b_datap->db_type == M_DATA); 11600 ASSERT(stq != NULL); 11601 11602 ill = ire_to_ill(ire); 11603 if (!ill) { 11604 ip0dbg(("ip_xmit_v6: ire_to_ill failed\n")); 11605 freemsg(mp); 11606 return; 11607 } 11608 11609 /* Flow-control check has been done in ip_wput_ire_v6 */ 11610 if (IP_FLOW_CONTROLLED_ULP(ip6h->ip6_nxt) || caller == IP_WPUT || 11611 caller == IP_WSRV || canput(stq->q_next)) { 11612 uint32_t ill_index; 11613 11614 /* 11615 * In most cases, the emission loop below is entered only 11616 * once. Only in the case where the ire holds the 11617 * RTF_MULTIRT flag, do we loop to process all RTF_MULTIRT 11618 * flagged ires in the bucket, and send the packet 11619 * through all crossed RTF_MULTIRT routes. 11620 */ 11621 if (ire->ire_flags & RTF_MULTIRT) { 11622 /* 11623 * Multirouting case. The bucket where ire is stored 11624 * probably holds other RTF_MULTIRT flagged ires 11625 * to the destination. In this call to ip_xmit_v6, 11626 * we attempt to send the packet through all 11627 * those ires. Thus, we first ensure that ire is the 11628 * first RTF_MULTIRT ire in the bucket, 11629 * before walking the ire list. 11630 */ 11631 ire_t *first_ire; 11632 irb_t *irb = ire->ire_bucket; 11633 ASSERT(irb != NULL); 11634 multirt_send = B_TRUE; 11635 11636 /* Make sure we do not omit any multiroute ire. */ 11637 IRB_REFHOLD(irb); 11638 for (first_ire = irb->irb_ire; 11639 first_ire != NULL; 11640 first_ire = first_ire->ire_next) { 11641 if ((first_ire->ire_flags & RTF_MULTIRT) && 11642 (IN6_ARE_ADDR_EQUAL(&first_ire->ire_addr_v6, 11643 &ire->ire_addr_v6)) && 11644 !(first_ire->ire_marks & 11645 (IRE_MARK_CONDEMNED | IRE_MARK_TESTHIDDEN))) 11646 break; 11647 } 11648 11649 if ((first_ire != NULL) && (first_ire != ire)) { 11650 IRE_REFHOLD(first_ire); 11651 /* ire will be released by the caller */ 11652 ire = first_ire; 11653 nce = ire->ire_nce; 11654 stq = ire->ire_stq; 11655 ill = ire_to_ill(ire); 11656 } 11657 IRB_REFRELE(irb); 11658 } else if (connp != NULL && IPCL_IS_TCP(connp) && 11659 connp->conn_mdt_ok && !connp->conn_tcp->tcp_mdt && 11660 ILL_MDT_USABLE(ill)) { 11661 /* 11662 * This tcp connection was marked as MDT-capable, but 11663 * it has been turned off due changes in the interface. 11664 * Now that the interface support is back, turn it on 11665 * by notifying tcp. We don't directly modify tcp_mdt, 11666 * since we leave all the details to the tcp code that 11667 * knows better. 11668 */ 11669 mblk_t *mdimp = ip_mdinfo_alloc(ill->ill_mdt_capab); 11670 11671 if (mdimp == NULL) { 11672 ip0dbg(("ip_xmit_v6: can't re-enable MDT for " 11673 "connp %p (ENOMEM)\n", (void *)connp)); 11674 } else { 11675 CONN_INC_REF(connp); 11676 SQUEUE_ENTER_ONE(connp->conn_sqp, mdimp, 11677 tcp_input, connp, SQ_FILL, 11678 SQTAG_TCP_INPUT_MCTL); 11679 } 11680 } 11681 11682 do { 11683 mblk_t *mp_ip6h; 11684 11685 if (multirt_send) { 11686 irb_t *irb; 11687 /* 11688 * We are in a multiple send case, need to get 11689 * the next ire and make a duplicate of the 11690 * packet. ire1 holds here the next ire to 11691 * process in the bucket. If multirouting is 11692 * expected, any non-RTF_MULTIRT ire that has 11693 * the right destination address is ignored. 11694 */ 11695 irb = ire->ire_bucket; 11696 ASSERT(irb != NULL); 11697 11698 IRB_REFHOLD(irb); 11699 for (ire1 = ire->ire_next; 11700 ire1 != NULL; 11701 ire1 = ire1->ire_next) { 11702 if (!(ire1->ire_flags & RTF_MULTIRT)) 11703 continue; 11704 if (!IN6_ARE_ADDR_EQUAL( 11705 &ire1->ire_addr_v6, 11706 &ire->ire_addr_v6)) 11707 continue; 11708 if (ire1->ire_marks & 11709 IRE_MARK_CONDEMNED) 11710 continue; 11711 11712 /* Got one */ 11713 if (ire1 != save_ire) { 11714 IRE_REFHOLD(ire1); 11715 } 11716 break; 11717 } 11718 IRB_REFRELE(irb); 11719 11720 if (ire1 != NULL) { 11721 next_mp = copyb(mp); 11722 if ((next_mp == NULL) || 11723 ((mp->b_cont != NULL) && 11724 ((next_mp->b_cont = 11725 dupmsg(mp->b_cont)) == NULL))) { 11726 freemsg(next_mp); 11727 next_mp = NULL; 11728 ire_refrele(ire1); 11729 ire1 = NULL; 11730 } 11731 } 11732 11733 /* Last multiroute ire; don't loop anymore. */ 11734 if (ire1 == NULL) { 11735 multirt_send = B_FALSE; 11736 } 11737 } 11738 11739 ill_index = 11740 ((ill_t *)stq->q_ptr)->ill_phyint->phyint_ifindex; 11741 11742 /* Initiate IPPF processing */ 11743 if (IP6_OUT_IPP(flags, ipst)) { 11744 ip_process(IPP_LOCAL_OUT, &mp, ill_index); 11745 if (mp == NULL) { 11746 BUMP_MIB(ill->ill_ip_mib, 11747 ipIfStatsOutDiscards); 11748 if (next_mp != NULL) 11749 freemsg(next_mp); 11750 if (ire != save_ire) { 11751 ire_refrele(ire); 11752 } 11753 return; 11754 } 11755 ip6h = (ip6_t *)mp->b_rptr; 11756 } 11757 mp_ip6h = mp; 11758 11759 /* 11760 * Check for fastpath, we need to hold nce_lock to 11761 * prevent fastpath update from chaining nce_fp_mp. 11762 */ 11763 11764 ASSERT(nce->nce_ipversion != IPV4_VERSION); 11765 mutex_enter(&nce->nce_lock); 11766 if ((mp1 = nce->nce_fp_mp) != NULL) { 11767 uchar_t *rptr; 11768 11769 hlen = MBLKL(mp1); 11770 rptr = mp->b_rptr - hlen; 11771 /* 11772 * make sure there is room for the fastpath 11773 * datalink header 11774 */ 11775 if (rptr < mp->b_datap->db_base) { 11776 mp1 = copyb(mp1); 11777 mutex_exit(&nce->nce_lock); 11778 if (mp1 == NULL) { 11779 BUMP_MIB(ill->ill_ip_mib, 11780 ipIfStatsOutDiscards); 11781 freemsg(mp); 11782 if (next_mp != NULL) 11783 freemsg(next_mp); 11784 if (ire != save_ire) { 11785 ire_refrele(ire); 11786 } 11787 return; 11788 } 11789 mp1->b_cont = mp; 11790 11791 /* Get the priority marking, if any */ 11792 mp1->b_band = mp->b_band; 11793 mp = mp1; 11794 } else { 11795 mp->b_rptr = rptr; 11796 /* 11797 * fastpath - pre-pend datalink 11798 * header 11799 */ 11800 bcopy(mp1->b_rptr, rptr, hlen); 11801 mutex_exit(&nce->nce_lock); 11802 fp_prepend = B_TRUE; 11803 } 11804 } else { 11805 /* 11806 * Get the DL_UNITDATA_REQ. 11807 */ 11808 mp1 = nce->nce_res_mp; 11809 if (mp1 == NULL) { 11810 mutex_exit(&nce->nce_lock); 11811 ip1dbg(("ip_xmit_v6: No resolution " 11812 "block ire = %p\n", (void *)ire)); 11813 freemsg(mp); 11814 if (next_mp != NULL) 11815 freemsg(next_mp); 11816 if (ire != save_ire) { 11817 ire_refrele(ire); 11818 } 11819 return; 11820 } 11821 /* 11822 * Prepend the DL_UNITDATA_REQ. 11823 */ 11824 mp1 = copyb(mp1); 11825 mutex_exit(&nce->nce_lock); 11826 if (mp1 == NULL) { 11827 BUMP_MIB(ill->ill_ip_mib, 11828 ipIfStatsOutDiscards); 11829 freemsg(mp); 11830 if (next_mp != NULL) 11831 freemsg(next_mp); 11832 if (ire != save_ire) { 11833 ire_refrele(ire); 11834 } 11835 return; 11836 } 11837 mp1->b_cont = mp; 11838 11839 /* Get the priority marking, if any */ 11840 mp1->b_band = mp->b_band; 11841 mp = mp1; 11842 } 11843 11844 out_ill = (ill_t *)stq->q_ptr; 11845 11846 DTRACE_PROBE4(ip6__physical__out__start, 11847 ill_t *, NULL, ill_t *, out_ill, 11848 ip6_t *, ip6h, mblk_t *, mp); 11849 11850 FW_HOOKS6(ipst->ips_ip6_physical_out_event, 11851 ipst->ips_ipv6firewall_physical_out, 11852 NULL, out_ill, ip6h, mp, mp_ip6h, 0, ipst); 11853 11854 DTRACE_PROBE1(ip6__physical__out__end, mblk_t *, mp); 11855 11856 if (mp == NULL) { 11857 if (multirt_send) { 11858 ASSERT(ire1 != NULL); 11859 if (ire != save_ire) { 11860 ire_refrele(ire); 11861 } 11862 /* 11863 * Proceed with the next RTF_MULTIRT 11864 * ire, also set up the send-to queue 11865 * accordingly. 11866 */ 11867 ire = ire1; 11868 ire1 = NULL; 11869 stq = ire->ire_stq; 11870 nce = ire->ire_nce; 11871 ill = ire_to_ill(ire); 11872 mp = next_mp; 11873 next_mp = NULL; 11874 continue; 11875 } else { 11876 ASSERT(next_mp == NULL); 11877 ASSERT(ire1 == NULL); 11878 break; 11879 } 11880 } 11881 11882 if (ipst->ips_ip6_observe.he_interested) { 11883 zoneid_t szone; 11884 11885 /* 11886 * Both of these functions expect b_rptr to 11887 * be where the IPv6 header starts, so advance 11888 * past the link layer header. 11889 */ 11890 if (fp_prepend) 11891 mp_ip6h->b_rptr += hlen; 11892 szone = ip_get_zoneid_v6(&ip6h->ip6_src, 11893 mp_ip6h, out_ill, ipst, ALL_ZONES); 11894 ipobs_hook(mp_ip6h, IPOBS_HOOK_OUTBOUND, szone, 11895 ALL_ZONES, out_ill, ipst); 11896 if (fp_prepend) 11897 mp_ip6h->b_rptr -= hlen; 11898 } 11899 11900 /* 11901 * Update ire and MIB counters; for save_ire, this has 11902 * been done by the caller. 11903 */ 11904 if (ire != save_ire) { 11905 UPDATE_OB_PKT_COUNT(ire); 11906 ire->ire_last_used_time = lbolt; 11907 11908 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 11909 BUMP_MIB(ill->ill_ip_mib, 11910 ipIfStatsHCOutMcastPkts); 11911 UPDATE_MIB(ill->ill_ip_mib, 11912 ipIfStatsHCOutMcastOctets, 11913 ntohs(ip6h->ip6_plen) + 11914 IPV6_HDR_LEN); 11915 } 11916 } 11917 11918 /* 11919 * Send it down. XXX Do we want to flow control AH/ESP 11920 * packets that carry TCP payloads? We don't flow 11921 * control TCP packets, but we should also not 11922 * flow-control TCP packets that have been protected. 11923 * We don't have an easy way to find out if an AH/ESP 11924 * packet was originally TCP or not currently. 11925 */ 11926 if (io == NULL) { 11927 BUMP_MIB(ill->ill_ip_mib, 11928 ipIfStatsHCOutTransmits); 11929 UPDATE_MIB(ill->ill_ip_mib, 11930 ipIfStatsHCOutOctets, 11931 ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN); 11932 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 11933 void_ip_t *, ip6h, __dtrace_ipsr_ill_t *, 11934 out_ill, ipha_t *, NULL, ip6_t *, ip6h, 11935 int, 0); 11936 11937 putnext(stq, mp); 11938 } else { 11939 /* 11940 * Safety Pup says: make sure this is 11941 * going to the right interface! 11942 */ 11943 if (io->ipsec_out_capab_ill_index != 11944 ill_index) { 11945 /* IPsec kstats: bump lose counter */ 11946 freemsg(mp1); 11947 } else { 11948 BUMP_MIB(ill->ill_ip_mib, 11949 ipIfStatsHCOutTransmits); 11950 UPDATE_MIB(ill->ill_ip_mib, 11951 ipIfStatsHCOutOctets, 11952 ntohs(ip6h->ip6_plen) + 11953 IPV6_HDR_LEN); 11954 DTRACE_IP7(send, mblk_t *, mp, 11955 conn_t *, NULL, void_ip_t *, ip6h, 11956 __dtrace_ipsr_ill_t *, out_ill, 11957 ipha_t *, NULL, ip6_t *, ip6h, int, 11958 0); 11959 ipsec_hw_putnext(stq, mp); 11960 } 11961 } 11962 11963 if (nce->nce_flags & (NCE_F_NONUD|NCE_F_PERMANENT)) { 11964 if (ire != save_ire) { 11965 ire_refrele(ire); 11966 } 11967 if (multirt_send) { 11968 ASSERT(ire1 != NULL); 11969 /* 11970 * Proceed with the next RTF_MULTIRT 11971 * ire, also set up the send-to queue 11972 * accordingly. 11973 */ 11974 ire = ire1; 11975 ire1 = NULL; 11976 stq = ire->ire_stq; 11977 nce = ire->ire_nce; 11978 ill = ire_to_ill(ire); 11979 mp = next_mp; 11980 next_mp = NULL; 11981 continue; 11982 } 11983 ASSERT(next_mp == NULL); 11984 ASSERT(ire1 == NULL); 11985 return; 11986 } 11987 11988 ASSERT(nce->nce_state != ND_INCOMPLETE); 11989 11990 /* 11991 * Check for upper layer advice 11992 */ 11993 if (flags & IPV6_REACHABILITY_CONFIRMATION) { 11994 /* 11995 * It should be o.k. to check the state without 11996 * a lock here, at most we lose an advice. 11997 */ 11998 nce->nce_last = TICK_TO_MSEC(lbolt64); 11999 if (nce->nce_state != ND_REACHABLE) { 12000 12001 mutex_enter(&nce->nce_lock); 12002 nce->nce_state = ND_REACHABLE; 12003 nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 12004 mutex_exit(&nce->nce_lock); 12005 (void) untimeout(nce->nce_timeout_id); 12006 if (ip_debug > 2) { 12007 /* ip1dbg */ 12008 pr_addr_dbg("ip_xmit_v6: state" 12009 " for %s changed to" 12010 " REACHABLE\n", AF_INET6, 12011 &ire->ire_addr_v6); 12012 } 12013 } 12014 if (ire != save_ire) { 12015 ire_refrele(ire); 12016 } 12017 if (multirt_send) { 12018 ASSERT(ire1 != NULL); 12019 /* 12020 * Proceed with the next RTF_MULTIRT 12021 * ire, also set up the send-to queue 12022 * accordingly. 12023 */ 12024 ire = ire1; 12025 ire1 = NULL; 12026 stq = ire->ire_stq; 12027 nce = ire->ire_nce; 12028 ill = ire_to_ill(ire); 12029 mp = next_mp; 12030 next_mp = NULL; 12031 continue; 12032 } 12033 ASSERT(next_mp == NULL); 12034 ASSERT(ire1 == NULL); 12035 return; 12036 } 12037 12038 delta = TICK_TO_MSEC(lbolt64) - nce->nce_last; 12039 ip1dbg(("ip_xmit_v6: delta = %" PRId64 12040 " ill_reachable_time = %d \n", delta, 12041 ill->ill_reachable_time)); 12042 if (delta > (uint64_t)ill->ill_reachable_time) { 12043 nce = ire->ire_nce; 12044 mutex_enter(&nce->nce_lock); 12045 switch (nce->nce_state) { 12046 case ND_REACHABLE: 12047 case ND_STALE: 12048 /* 12049 * ND_REACHABLE is identical to 12050 * ND_STALE in this specific case. If 12051 * reachable time has expired for this 12052 * neighbor (delta is greater than 12053 * reachable time), conceptually, the 12054 * neighbor cache is no longer in 12055 * REACHABLE state, but already in 12056 * STALE state. So the correct 12057 * transition here is to ND_DELAY. 12058 */ 12059 nce->nce_state = ND_DELAY; 12060 mutex_exit(&nce->nce_lock); 12061 NDP_RESTART_TIMER(nce, 12062 ipst->ips_delay_first_probe_time); 12063 if (ip_debug > 3) { 12064 /* ip2dbg */ 12065 pr_addr_dbg("ip_xmit_v6: state" 12066 " for %s changed to" 12067 " DELAY\n", AF_INET6, 12068 &ire->ire_addr_v6); 12069 } 12070 break; 12071 case ND_DELAY: 12072 case ND_PROBE: 12073 mutex_exit(&nce->nce_lock); 12074 /* Timers have already started */ 12075 break; 12076 case ND_UNREACHABLE: 12077 /* 12078 * ndp timer has detected that this nce 12079 * is unreachable and initiated deleting 12080 * this nce and all its associated IREs. 12081 * This is a race where we found the 12082 * ire before it was deleted and have 12083 * just sent out a packet using this 12084 * unreachable nce. 12085 */ 12086 mutex_exit(&nce->nce_lock); 12087 break; 12088 default: 12089 ASSERT(0); 12090 } 12091 } 12092 12093 if (multirt_send) { 12094 ASSERT(ire1 != NULL); 12095 /* 12096 * Proceed with the next RTF_MULTIRT ire, 12097 * Also set up the send-to queue accordingly. 12098 */ 12099 if (ire != save_ire) { 12100 ire_refrele(ire); 12101 } 12102 ire = ire1; 12103 ire1 = NULL; 12104 stq = ire->ire_stq; 12105 nce = ire->ire_nce; 12106 ill = ire_to_ill(ire); 12107 mp = next_mp; 12108 next_mp = NULL; 12109 } 12110 } while (multirt_send); 12111 /* 12112 * In the multirouting case, release the last ire used for 12113 * emission. save_ire will be released by the caller. 12114 */ 12115 if (ire != save_ire) { 12116 ire_refrele(ire); 12117 } 12118 } else { 12119 /* 12120 * Can't apply backpressure, just discard the packet. 12121 */ 12122 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 12123 freemsg(mp); 12124 return; 12125 } 12126 } 12127 12128 /* 12129 * pr_addr_dbg function provides the needed buffer space to call 12130 * inet_ntop() function's 3rd argument. This function should be 12131 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 12132 * stack buffer space in it's own stack frame. This function uses 12133 * a buffer from it's own stack and prints the information. 12134 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 12135 * 12136 * Note: This function can call inet_ntop() once. 12137 */ 12138 void 12139 pr_addr_dbg(char *fmt1, int af, const void *addr) 12140 { 12141 char buf[INET6_ADDRSTRLEN]; 12142 12143 if (fmt1 == NULL) { 12144 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 12145 return; 12146 } 12147 12148 /* 12149 * This does not compare debug level and just prints 12150 * out. Thus it is the responsibility of the caller 12151 * to check the appropriate debug-level before calling 12152 * this function. 12153 */ 12154 if (ip_debug > 0) { 12155 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 12156 } 12157 12158 12159 } 12160 12161 12162 /* 12163 * Return the length in bytes of the IPv6 headers (base header, ip6i_t 12164 * if needed and extension headers) that will be needed based on the 12165 * ip6_pkt_t structure passed by the caller. 12166 * 12167 * The returned length does not include the length of the upper level 12168 * protocol (ULP) header. 12169 */ 12170 int 12171 ip_total_hdrs_len_v6(ip6_pkt_t *ipp) 12172 { 12173 int len; 12174 12175 len = IPV6_HDR_LEN; 12176 if (ipp->ipp_fields & IPPF_HAS_IP6I) 12177 len += sizeof (ip6i_t); 12178 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12179 ASSERT(ipp->ipp_hopoptslen != 0); 12180 len += ipp->ipp_hopoptslen; 12181 } 12182 if (ipp->ipp_fields & IPPF_RTHDR) { 12183 ASSERT(ipp->ipp_rthdrlen != 0); 12184 len += ipp->ipp_rthdrlen; 12185 } 12186 /* 12187 * En-route destination options 12188 * Only do them if there's a routing header as well 12189 */ 12190 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12191 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12192 ASSERT(ipp->ipp_rtdstoptslen != 0); 12193 len += ipp->ipp_rtdstoptslen; 12194 } 12195 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12196 ASSERT(ipp->ipp_dstoptslen != 0); 12197 len += ipp->ipp_dstoptslen; 12198 } 12199 return (len); 12200 } 12201 12202 /* 12203 * All-purpose routine to build a header chain of an IPv6 header 12204 * followed by any required extension headers and a proto header, 12205 * preceeded (where necessary) by an ip6i_t private header. 12206 * 12207 * The fields of the IPv6 header that are derived from the ip6_pkt_t 12208 * will be filled in appropriately. 12209 * Thus the caller must fill in the rest of the IPv6 header, such as 12210 * traffic class/flowid, source address (if not set here), hoplimit (if not 12211 * set here) and destination address. 12212 * 12213 * The extension headers and ip6i_t header will all be fully filled in. 12214 */ 12215 void 12216 ip_build_hdrs_v6(uchar_t *ext_hdrs, uint_t ext_hdrs_len, 12217 ip6_pkt_t *ipp, uint8_t protocol) 12218 { 12219 uint8_t *nxthdr_ptr; 12220 uint8_t *cp; 12221 ip6i_t *ip6i; 12222 ip6_t *ip6h = (ip6_t *)ext_hdrs; 12223 12224 /* 12225 * If sending private ip6i_t header down (checksum info, nexthop, 12226 * or ifindex), adjust ip header pointer and set ip6i_t header pointer, 12227 * then fill it in. (The checksum info will be filled in by icmp). 12228 */ 12229 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 12230 ip6i = (ip6i_t *)ip6h; 12231 ip6h = (ip6_t *)&ip6i[1]; 12232 12233 ip6i->ip6i_flags = 0; 12234 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12235 if (ipp->ipp_fields & IPPF_IFINDEX || 12236 ipp->ipp_fields & IPPF_SCOPE_ID) { 12237 ASSERT(ipp->ipp_ifindex != 0); 12238 ip6i->ip6i_flags |= IP6I_IFINDEX; 12239 ip6i->ip6i_ifindex = ipp->ipp_ifindex; 12240 } 12241 if (ipp->ipp_fields & IPPF_ADDR) { 12242 /* 12243 * Enable per-packet source address verification if 12244 * IPV6_PKTINFO specified the source address. 12245 * ip6_src is set in the transport's _wput function. 12246 */ 12247 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12248 &ipp->ipp_addr)); 12249 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 12250 } 12251 if (ipp->ipp_fields & IPPF_UNICAST_HOPS) { 12252 ip6h->ip6_hops = ipp->ipp_unicast_hops; 12253 /* 12254 * We need to set this flag so that IP doesn't 12255 * rewrite the IPv6 header's hoplimit with the 12256 * current default value. 12257 */ 12258 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 12259 } 12260 if (ipp->ipp_fields & IPPF_NEXTHOP) { 12261 ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 12262 &ipp->ipp_nexthop)); 12263 ip6i->ip6i_flags |= IP6I_NEXTHOP; 12264 ip6i->ip6i_nexthop = ipp->ipp_nexthop; 12265 } 12266 /* 12267 * tell IP this is an ip6i_t private header 12268 */ 12269 ip6i->ip6i_nxt = IPPROTO_RAW; 12270 } 12271 /* Initialize IPv6 header */ 12272 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 12273 if (ipp->ipp_fields & IPPF_TCLASS) { 12274 ip6h->ip6_vcf = (ip6h->ip6_vcf & ~IPV6_FLOWINFO_TCLASS) | 12275 (ipp->ipp_tclass << 20); 12276 } 12277 if (ipp->ipp_fields & IPPF_ADDR) 12278 ip6h->ip6_src = ipp->ipp_addr; 12279 12280 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 12281 cp = (uint8_t *)&ip6h[1]; 12282 /* 12283 * Here's where we have to start stringing together 12284 * any extension headers in the right order: 12285 * Hop-by-hop, destination, routing, and final destination opts. 12286 */ 12287 if (ipp->ipp_fields & IPPF_HOPOPTS) { 12288 /* Hop-by-hop options */ 12289 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 12290 12291 *nxthdr_ptr = IPPROTO_HOPOPTS; 12292 nxthdr_ptr = &hbh->ip6h_nxt; 12293 12294 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 12295 cp += ipp->ipp_hopoptslen; 12296 } 12297 /* 12298 * En-route destination options 12299 * Only do them if there's a routing header as well 12300 */ 12301 if ((ipp->ipp_fields & (IPPF_RTDSTOPTS|IPPF_RTHDR)) == 12302 (IPPF_RTDSTOPTS|IPPF_RTHDR)) { 12303 ip6_dest_t *dst = (ip6_dest_t *)cp; 12304 12305 *nxthdr_ptr = IPPROTO_DSTOPTS; 12306 nxthdr_ptr = &dst->ip6d_nxt; 12307 12308 bcopy(ipp->ipp_rtdstopts, cp, ipp->ipp_rtdstoptslen); 12309 cp += ipp->ipp_rtdstoptslen; 12310 } 12311 /* 12312 * Routing header next 12313 */ 12314 if (ipp->ipp_fields & IPPF_RTHDR) { 12315 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 12316 12317 *nxthdr_ptr = IPPROTO_ROUTING; 12318 nxthdr_ptr = &rt->ip6r_nxt; 12319 12320 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 12321 cp += ipp->ipp_rthdrlen; 12322 } 12323 /* 12324 * Do ultimate destination options 12325 */ 12326 if (ipp->ipp_fields & IPPF_DSTOPTS) { 12327 ip6_dest_t *dest = (ip6_dest_t *)cp; 12328 12329 *nxthdr_ptr = IPPROTO_DSTOPTS; 12330 nxthdr_ptr = &dest->ip6d_nxt; 12331 12332 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 12333 cp += ipp->ipp_dstoptslen; 12334 } 12335 /* 12336 * Now set the last header pointer to the proto passed in 12337 */ 12338 *nxthdr_ptr = protocol; 12339 ASSERT((int)(cp - ext_hdrs) == ext_hdrs_len); 12340 } 12341 12342 /* 12343 * Return a pointer to the routing header extension header 12344 * in the IPv6 header(s) chain passed in. 12345 * If none found, return NULL 12346 * Assumes that all extension headers are in same mblk as the v6 header 12347 */ 12348 ip6_rthdr_t * 12349 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 12350 { 12351 ip6_dest_t *desthdr; 12352 ip6_frag_t *fraghdr; 12353 uint_t hdrlen; 12354 uint8_t nexthdr; 12355 uint8_t *ptr = (uint8_t *)&ip6h[1]; 12356 12357 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 12358 return ((ip6_rthdr_t *)ptr); 12359 12360 /* 12361 * The routing header will precede all extension headers 12362 * other than the hop-by-hop and destination options 12363 * extension headers, so if we see anything other than those, 12364 * we're done and didn't find it. 12365 * We could see a destination options header alone but no 12366 * routing header, in which case we'll return NULL as soon as 12367 * we see anything after that. 12368 * Hop-by-hop and destination option headers are identical, 12369 * so we can use either one we want as a template. 12370 */ 12371 nexthdr = ip6h->ip6_nxt; 12372 while (ptr < endptr) { 12373 /* Is there enough left for len + nexthdr? */ 12374 if (ptr + MIN_EHDR_LEN > endptr) 12375 return (NULL); 12376 12377 switch (nexthdr) { 12378 case IPPROTO_HOPOPTS: 12379 case IPPROTO_DSTOPTS: 12380 /* Assumes the headers are identical for hbh and dst */ 12381 desthdr = (ip6_dest_t *)ptr; 12382 hdrlen = 8 * (desthdr->ip6d_len + 1); 12383 nexthdr = desthdr->ip6d_nxt; 12384 break; 12385 12386 case IPPROTO_ROUTING: 12387 return ((ip6_rthdr_t *)ptr); 12388 12389 case IPPROTO_FRAGMENT: 12390 fraghdr = (ip6_frag_t *)ptr; 12391 hdrlen = sizeof (ip6_frag_t); 12392 nexthdr = fraghdr->ip6f_nxt; 12393 break; 12394 12395 default: 12396 return (NULL); 12397 } 12398 ptr += hdrlen; 12399 } 12400 return (NULL); 12401 } 12402 12403 /* 12404 * Called for source-routed packets originating on this node. 12405 * Manipulates the original routing header by moving every entry up 12406 * one slot, placing the first entry in the v6 header's v6_dst field, 12407 * and placing the ultimate destination in the routing header's last 12408 * slot. 12409 * 12410 * Returns the checksum diference between the ultimate destination 12411 * (last hop in the routing header when the packet is sent) and 12412 * the first hop (ip6_dst when the packet is sent) 12413 */ 12414 /* ARGSUSED2 */ 12415 uint32_t 12416 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 12417 { 12418 uint_t numaddr; 12419 uint_t i; 12420 in6_addr_t *addrptr; 12421 in6_addr_t tmp; 12422 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 12423 uint32_t cksm; 12424 uint32_t addrsum = 0; 12425 uint16_t *ptr; 12426 12427 /* 12428 * Perform any processing needed for source routing. 12429 * We know that all extension headers will be in the same mblk 12430 * as the IPv6 header. 12431 */ 12432 12433 /* 12434 * If no segments left in header, or the header length field is zero, 12435 * don't move hop addresses around; 12436 * Checksum difference is zero. 12437 */ 12438 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 12439 return (0); 12440 12441 ptr = (uint16_t *)&ip6h->ip6_dst; 12442 cksm = 0; 12443 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12444 cksm += ptr[i]; 12445 } 12446 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12447 12448 /* 12449 * Here's where the fun begins - we have to 12450 * move all addresses up one spot, take the 12451 * first hop and make it our first ip6_dst, 12452 * and place the ultimate destination in the 12453 * newly-opened last slot. 12454 */ 12455 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 12456 numaddr = rthdr->ip6r0_len / 2; 12457 tmp = *addrptr; 12458 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 12459 *addrptr = addrptr[1]; 12460 } 12461 *addrptr = ip6h->ip6_dst; 12462 ip6h->ip6_dst = tmp; 12463 12464 /* 12465 * From the checksummed ultimate destination subtract the checksummed 12466 * current ip6_dst (the first hop address). Return that number. 12467 * (In the v4 case, the second part of this is done in each routine 12468 * that calls ip_massage_options(). We do it all in this one place 12469 * for v6). 12470 */ 12471 ptr = (uint16_t *)&ip6h->ip6_dst; 12472 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 12473 addrsum += ptr[i]; 12474 } 12475 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 12476 if ((int)cksm < 0) 12477 cksm--; 12478 cksm = (cksm & 0xFFFF) + (cksm >> 16); 12479 12480 return (cksm); 12481 } 12482 12483 /* 12484 * Propagate a multicast group membership operation (join/leave) (*fn) on 12485 * all interfaces crossed by the related multirt routes. 12486 * The call is considered successful if the operation succeeds 12487 * on at least one interface. 12488 * The function is called if the destination address in the packet to send 12489 * is multirouted. 12490 */ 12491 int 12492 ip_multirt_apply_membership_v6(int (*fn)(conn_t *, boolean_t, 12493 const in6_addr_t *, int, mcast_record_t, const in6_addr_t *, mblk_t *), 12494 ire_t *ire, conn_t *connp, boolean_t checkonly, const in6_addr_t *v6grp, 12495 mcast_record_t fmode, const in6_addr_t *v6src, mblk_t *first_mp) 12496 { 12497 ire_t *ire_gw; 12498 irb_t *irb; 12499 int index, error = 0; 12500 opt_restart_t *or; 12501 ip_stack_t *ipst = ire->ire_ipst; 12502 12503 irb = ire->ire_bucket; 12504 ASSERT(irb != NULL); 12505 12506 ASSERT(DB_TYPE(first_mp) == M_CTL); 12507 or = (opt_restart_t *)first_mp->b_rptr; 12508 12509 IRB_REFHOLD(irb); 12510 for (; ire != NULL; ire = ire->ire_next) { 12511 if ((ire->ire_flags & RTF_MULTIRT) == 0) 12512 continue; 12513 if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6grp)) 12514 continue; 12515 12516 ire_gw = ire_ftable_lookup_v6(&ire->ire_gateway_addr_v6, 0, 0, 12517 IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, NULL, 12518 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE, ipst); 12519 /* No resolver exists for the gateway; skip this ire. */ 12520 if (ire_gw == NULL) 12521 continue; 12522 index = ire_gw->ire_ipif->ipif_ill->ill_phyint->phyint_ifindex; 12523 /* 12524 * A resolver exists: we can get the interface on which we have 12525 * to apply the operation. 12526 */ 12527 error = fn(connp, checkonly, v6grp, index, fmode, v6src, 12528 first_mp); 12529 if (error == 0) 12530 or->or_private = CGTP_MCAST_SUCCESS; 12531 12532 if (ip_debug > 0) { 12533 ulong_t off; 12534 char *ksym; 12535 12536 ksym = kobj_getsymname((uintptr_t)fn, &off); 12537 ip2dbg(("ip_multirt_apply_membership_v6: " 12538 "called %s, multirt group 0x%08x via itf 0x%08x, " 12539 "error %d [success %u]\n", 12540 ksym ? ksym : "?", 12541 ntohl(V4_PART_OF_V6((*v6grp))), 12542 ntohl(V4_PART_OF_V6(ire_gw->ire_src_addr_v6)), 12543 error, or->or_private)); 12544 } 12545 12546 ire_refrele(ire_gw); 12547 if (error == EINPROGRESS) { 12548 IRB_REFRELE(irb); 12549 return (error); 12550 } 12551 } 12552 IRB_REFRELE(irb); 12553 /* 12554 * Consider the call as successful if we succeeded on at least 12555 * one interface. Otherwise, return the last encountered error. 12556 */ 12557 return (or->or_private == CGTP_MCAST_SUCCESS ? 0 : error); 12558 } 12559 12560 void 12561 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 12562 { 12563 kstat_t *ksp; 12564 12565 ip6_stat_t template = { 12566 { "ip6_udp_fast_path", KSTAT_DATA_UINT64 }, 12567 { "ip6_udp_slow_path", KSTAT_DATA_UINT64 }, 12568 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 12569 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 12570 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 12571 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 12572 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12573 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12574 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12575 { "ip6_tcp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12576 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 12577 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 12578 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 12579 { "ip6_udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 12580 { "ip6_frag_mdt_pkt_out", KSTAT_DATA_UINT64 }, 12581 { "ip6_frag_mdt_discarded", KSTAT_DATA_UINT64 }, 12582 { "ip6_frag_mdt_allocfail", KSTAT_DATA_UINT64 }, 12583 { "ip6_frag_mdt_addpdescfail", KSTAT_DATA_UINT64 }, 12584 { "ip6_frag_mdt_allocd", KSTAT_DATA_UINT64 }, 12585 }; 12586 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 12587 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 12588 KSTAT_FLAG_VIRTUAL, stackid); 12589 12590 if (ksp == NULL) 12591 return (NULL); 12592 12593 bcopy(&template, ip6_statisticsp, sizeof (template)); 12594 ksp->ks_data = (void *)ip6_statisticsp; 12595 ksp->ks_private = (void *)(uintptr_t)stackid; 12596 12597 kstat_install(ksp); 12598 return (ksp); 12599 } 12600 12601 void 12602 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 12603 { 12604 if (ksp != NULL) { 12605 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 12606 kstat_delete_netstack(ksp, stackid); 12607 } 12608 } 12609 12610 /* 12611 * The following two functions set and get the value for the 12612 * IPV6_SRC_PREFERENCES socket option. 12613 */ 12614 int 12615 ip6_set_src_preferences(conn_t *connp, uint32_t prefs) 12616 { 12617 /* 12618 * We only support preferences that are covered by 12619 * IPV6_PREFER_SRC_MASK. 12620 */ 12621 if (prefs & ~IPV6_PREFER_SRC_MASK) 12622 return (EINVAL); 12623 12624 /* 12625 * Look for conflicting preferences or default preferences. If 12626 * both bits of a related pair are clear, the application wants the 12627 * system's default value for that pair. Both bits in a pair can't 12628 * be set. 12629 */ 12630 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 12631 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 12632 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 12633 IPV6_PREFER_SRC_MIPMASK) { 12634 return (EINVAL); 12635 } 12636 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 12637 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 12638 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 12639 IPV6_PREFER_SRC_TMPMASK) { 12640 return (EINVAL); 12641 } 12642 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 12643 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 12644 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 12645 IPV6_PREFER_SRC_CGAMASK) { 12646 return (EINVAL); 12647 } 12648 12649 connp->conn_src_preferences = prefs; 12650 return (0); 12651 } 12652 12653 size_t 12654 ip6_get_src_preferences(conn_t *connp, uint32_t *val) 12655 { 12656 *val = connp->conn_src_preferences; 12657 return (sizeof (connp->conn_src_preferences)); 12658 } 12659 12660 int 12661 ip6_set_pktinfo(cred_t *cr, conn_t *connp, struct in6_pktinfo *pkti) 12662 { 12663 ire_t *ire; 12664 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12665 12666 /* 12667 * Verify the source address and ifindex. Privileged users can use 12668 * any source address. For ancillary data the source address is 12669 * checked in ip_wput_v6. 12670 */ 12671 if (pkti->ipi6_ifindex != 0) { 12672 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 12673 if (!phyint_exists(pkti->ipi6_ifindex, ipst)) { 12674 rw_exit(&ipst->ips_ill_g_lock); 12675 return (ENXIO); 12676 } 12677 rw_exit(&ipst->ips_ill_g_lock); 12678 } 12679 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) && 12680 secpolicy_net_rawaccess(cr) != 0) { 12681 ire = ire_route_lookup_v6(&pkti->ipi6_addr, 0, 0, 12682 (IRE_LOCAL|IRE_LOOPBACK), NULL, NULL, 12683 connp->conn_zoneid, NULL, MATCH_IRE_TYPE, ipst); 12684 if (ire != NULL) 12685 ire_refrele(ire); 12686 else 12687 return (ENXIO); 12688 } 12689 return (0); 12690 } 12691 12692 /* 12693 * Get the size of the IP options (including the IP headers size) 12694 * without including the AH header's size. If till_ah is B_FALSE, 12695 * and if AH header is present, dest options beyond AH header will 12696 * also be included in the returned size. 12697 */ 12698 int 12699 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 12700 { 12701 ip6_t *ip6h; 12702 uint8_t nexthdr; 12703 uint8_t *whereptr; 12704 ip6_hbh_t *hbhhdr; 12705 ip6_dest_t *dsthdr; 12706 ip6_rthdr_t *rthdr; 12707 int ehdrlen; 12708 int size; 12709 ah_t *ah; 12710 12711 ip6h = (ip6_t *)mp->b_rptr; 12712 size = IPV6_HDR_LEN; 12713 nexthdr = ip6h->ip6_nxt; 12714 whereptr = (uint8_t *)&ip6h[1]; 12715 for (;;) { 12716 /* Assume IP has already stripped it */ 12717 ASSERT(nexthdr != IPPROTO_FRAGMENT && nexthdr != IPPROTO_RAW); 12718 switch (nexthdr) { 12719 case IPPROTO_HOPOPTS: 12720 hbhhdr = (ip6_hbh_t *)whereptr; 12721 nexthdr = hbhhdr->ip6h_nxt; 12722 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 12723 break; 12724 case IPPROTO_DSTOPTS: 12725 dsthdr = (ip6_dest_t *)whereptr; 12726 nexthdr = dsthdr->ip6d_nxt; 12727 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12728 break; 12729 case IPPROTO_ROUTING: 12730 rthdr = (ip6_rthdr_t *)whereptr; 12731 nexthdr = rthdr->ip6r_nxt; 12732 ehdrlen = 8 * (rthdr->ip6r_len + 1); 12733 break; 12734 default : 12735 if (till_ah) { 12736 ASSERT(nexthdr == IPPROTO_AH); 12737 return (size); 12738 } 12739 /* 12740 * If we don't have a AH header to traverse, 12741 * return now. This happens normally for 12742 * outbound datagrams where we have not inserted 12743 * the AH header. 12744 */ 12745 if (nexthdr != IPPROTO_AH) { 12746 return (size); 12747 } 12748 12749 /* 12750 * We don't include the AH header's size 12751 * to be symmetrical with other cases where 12752 * we either don't have a AH header (outbound) 12753 * or peek into the AH header yet (inbound and 12754 * not pulled up yet). 12755 */ 12756 ah = (ah_t *)whereptr; 12757 nexthdr = ah->ah_nexthdr; 12758 ehdrlen = (ah->ah_length << 2) + 8; 12759 12760 if (nexthdr == IPPROTO_DSTOPTS) { 12761 if (whereptr + ehdrlen >= mp->b_wptr) { 12762 /* 12763 * The destination options header 12764 * is not part of the first mblk. 12765 */ 12766 whereptr = mp->b_cont->b_rptr; 12767 } else { 12768 whereptr += ehdrlen; 12769 } 12770 12771 dsthdr = (ip6_dest_t *)whereptr; 12772 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 12773 size += ehdrlen; 12774 } 12775 return (size); 12776 } 12777 whereptr += ehdrlen; 12778 size += ehdrlen; 12779 } 12780 } 12781 12782 /* 12783 * Utility routine that checks if `v6srcp' is a valid address on underlying 12784 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 12785 * associated with `v6srcp' on success. NOTE: if this is not called from 12786 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 12787 * group during or after this lookup. 12788 */ 12789 static boolean_t 12790 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 12791 { 12792 ipif_t *ipif; 12793 12794 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 12795 if (ipif != NULL) { 12796 if (ipifp != NULL) 12797 *ipifp = ipif; 12798 else 12799 ipif_refrele(ipif); 12800 return (B_TRUE); 12801 } 12802 12803 if (ip_debug > 2) { 12804 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 12805 "src %s\n", AF_INET6, v6srcp); 12806 } 12807 return (B_FALSE); 12808 } 12809