1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/policy.h> 57 #include <sys/mac.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/iptun/iptun_impl.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/rawip_impl.h> 98 #include <inet/rts_impl.h> 99 #include <sys/squeue_impl.h> 100 #include <sys/squeue.h> 101 102 #include <sys/tsol/label.h> 103 #include <sys/tsol/tnet.h> 104 105 /* Temporary; for CR 6451644 work-around */ 106 #include <sys/ethernet.h> 107 108 /* 109 * Naming conventions: 110 * These rules should be judiciously applied 111 * if there is a need to identify something as IPv6 versus IPv4 112 * IPv6 funcions will end with _v6 in the ip module. 113 * IPv6 funcions will end with _ipv6 in the transport modules. 114 * IPv6 macros: 115 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 116 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 117 * And then there are ..V4_PART_OF_V6. 118 * The intent is that macros in the ip module end with _V6. 119 * IPv6 global variables will start with ipv6_ 120 * IPv6 structures will start with ipv6 121 * IPv6 defined constants should start with IPV6_ 122 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 123 */ 124 125 /* 126 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 127 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 128 * from IANA. This mechanism will remain in effect until an official 129 * number is obtained. 130 */ 131 uchar_t ip6opt_ls; 132 133 const in6_addr_t ipv6_all_ones = 134 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 135 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 136 137 #ifdef _BIG_ENDIAN 138 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 139 #else /* _BIG_ENDIAN */ 140 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 141 #endif /* _BIG_ENDIAN */ 142 143 #ifdef _BIG_ENDIAN 144 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 145 #else /* _BIG_ENDIAN */ 146 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 147 #endif /* _BIG_ENDIAN */ 148 149 #ifdef _BIG_ENDIAN 150 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 151 #else /* _BIG_ENDIAN */ 152 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 153 #endif /* _BIG_ENDIAN */ 154 155 #ifdef _BIG_ENDIAN 156 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 157 #else /* _BIG_ENDIAN */ 158 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 159 #endif /* _BIG_ENDIAN */ 160 161 #ifdef _BIG_ENDIAN 162 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 163 #else /* _BIG_ENDIAN */ 164 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 165 #endif /* _BIG_ENDIAN */ 166 167 #ifdef _BIG_ENDIAN 168 const in6_addr_t ipv6_solicited_node_mcast = 169 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 170 #else /* _BIG_ENDIAN */ 171 const in6_addr_t ipv6_solicited_node_mcast = 172 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 173 #endif /* _BIG_ENDIAN */ 174 175 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *); 176 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *); 177 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *, 178 ip_recv_attr_t *); 179 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *, 180 ip_recv_attr_t *); 181 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *, 182 in6_addr_t *, ip_recv_attr_t *); 183 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *, 184 ip_recv_attr_t *); 185 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 186 187 /* 188 * icmp_inbound_v6 deals with ICMP messages that are handled by IP. 189 * If the ICMP message is consumed by IP, i.e., it should not be delivered 190 * to any IPPROTO_ICMP raw sockets, then it returns NULL. 191 * Likewise, if the ICMP error is misformed (too short, etc), then it 192 * returns NULL. The caller uses this to determine whether or not to send 193 * to raw sockets. 194 * 195 * All error messages are passed to the matching transport stream. 196 * 197 * See comment for icmp_inbound_v4() on how IPsec is handled. 198 */ 199 mblk_t * 200 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira) 201 { 202 icmp6_t *icmp6; 203 ip6_t *ip6h; /* Outer header */ 204 int ip_hdr_length; /* Outer header length */ 205 boolean_t interested; 206 ill_t *ill = ira->ira_ill; 207 ip_stack_t *ipst = ill->ill_ipst; 208 mblk_t *mp_ret = NULL; 209 210 ip6h = (ip6_t *)mp->b_rptr; 211 212 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 213 214 /* Check for Martian packets */ 215 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 216 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 217 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill); 218 freemsg(mp); 219 return (NULL); 220 } 221 222 /* Make sure ira_l2src is set for ndp_input */ 223 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 224 ip_setl2src(mp, ira, ira->ira_rill); 225 226 ip_hdr_length = ira->ira_ip_hdr_length; 227 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 228 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 229 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 230 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 231 freemsg(mp); 232 return (NULL); 233 } 234 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 235 if (ip6h == NULL) { 236 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 237 freemsg(mp); 238 return (NULL); 239 } 240 } 241 242 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 243 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6); 244 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 245 icmp6->icmp6_code)); 246 247 /* 248 * We will set "interested" to "true" if we should pass a copy to 249 * the transport i.e., if it is an error message. 250 */ 251 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 252 253 switch (icmp6->icmp6_type) { 254 case ICMP6_DST_UNREACH: 255 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 256 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 257 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 258 break; 259 260 case ICMP6_TIME_EXCEEDED: 261 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 262 break; 263 264 case ICMP6_PARAM_PROB: 265 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 266 break; 267 268 case ICMP6_PACKET_TOO_BIG: 269 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs); 270 break; 271 272 case ICMP6_ECHO_REQUEST: 273 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 274 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 275 !ipst->ips_ipv6_resp_echo_mcast) 276 break; 277 278 /* 279 * We must have exclusive use of the mblk to convert it to 280 * a response. 281 * If not, we copy it. 282 */ 283 if (mp->b_datap->db_ref > 1) { 284 mblk_t *mp1; 285 286 mp1 = copymsg(mp); 287 if (mp1 == NULL) { 288 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 289 ip_drop_input("ipIfStatsInDiscards - copymsg", 290 mp, ill); 291 freemsg(mp); 292 return (NULL); 293 } 294 freemsg(mp); 295 mp = mp1; 296 ip6h = (ip6_t *)mp->b_rptr; 297 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 298 } 299 300 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 301 icmp_send_reply_v6(mp, ip6h, icmp6, ira); 302 return (NULL); 303 304 case ICMP6_ECHO_REPLY: 305 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 306 break; 307 308 case ND_ROUTER_SOLICIT: 309 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 310 break; 311 312 case ND_ROUTER_ADVERT: 313 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 314 break; 315 316 case ND_NEIGHBOR_SOLICIT: 317 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 318 ndp_input(mp, ira); 319 return (NULL); 320 321 case ND_NEIGHBOR_ADVERT: 322 BUMP_MIB(ill->ill_icmp6_mib, 323 ipv6IfIcmpInNeighborAdvertisements); 324 ndp_input(mp, ira); 325 return (NULL); 326 327 case ND_REDIRECT: 328 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 329 330 if (ipst->ips_ipv6_ignore_redirect) 331 break; 332 333 /* We now allow a RAW socket to receive this. */ 334 interested = B_TRUE; 335 break; 336 337 /* 338 * The next three icmp messages will be handled by MLD. 339 * Pass all valid MLD packets up to any process(es) 340 * listening on a raw ICMP socket. 341 */ 342 case MLD_LISTENER_QUERY: 343 case MLD_LISTENER_REPORT: 344 case MLD_LISTENER_REDUCTION: 345 mp = mld_input(mp, ira); 346 return (mp); 347 default: 348 break; 349 } 350 /* 351 * See if there is an ICMP client to avoid an extra copymsg/freemsg 352 * if there isn't one. 353 */ 354 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) { 355 /* If there is an ICMP client and we want one too, copy it. */ 356 357 if (!interested) { 358 /* Caller will deliver to RAW sockets */ 359 return (mp); 360 } 361 mp_ret = copymsg(mp); 362 if (mp_ret == NULL) { 363 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 364 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 365 } 366 } else if (!interested) { 367 /* Neither we nor raw sockets are interested. Drop packet now */ 368 freemsg(mp); 369 return (NULL); 370 } 371 372 /* 373 * ICMP error or redirect packet. Make sure we have enough of 374 * the header and that db_ref == 1 since we might end up modifying 375 * the packet. 376 */ 377 if (mp->b_cont != NULL) { 378 if (ip_pullup(mp, -1, ira) == NULL) { 379 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 380 ip_drop_input("ipIfStatsInDiscards - ip_pullup", 381 mp, ill); 382 freemsg(mp); 383 return (mp_ret); 384 } 385 } 386 387 if (mp->b_datap->db_ref > 1) { 388 mblk_t *mp1; 389 390 mp1 = copymsg(mp); 391 if (mp1 == NULL) { 392 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 393 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 394 freemsg(mp); 395 return (mp_ret); 396 } 397 freemsg(mp); 398 mp = mp1; 399 } 400 401 /* 402 * In case mp has changed, verify the message before any further 403 * processes. 404 */ 405 ip6h = (ip6_t *)mp->b_rptr; 406 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 407 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 408 freemsg(mp); 409 return (mp_ret); 410 } 411 412 switch (icmp6->icmp6_type) { 413 case ND_REDIRECT: 414 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira); 415 break; 416 case ICMP6_PACKET_TOO_BIG: 417 /* Update DCE and adjust MTU is icmp header if needed */ 418 icmp_inbound_too_big_v6(icmp6, ira); 419 /* FALLTHRU */ 420 default: 421 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 422 break; 423 } 424 425 return (mp_ret); 426 } 427 428 /* 429 * Send an ICMP echo reply. 430 * The caller has already updated the payload part of the packet. 431 * We handle the ICMP checksum, IP source address selection and feed 432 * the packet into ip_output_simple. 433 */ 434 static void 435 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6, 436 ip_recv_attr_t *ira) 437 { 438 uint_t ip_hdr_length = ira->ira_ip_hdr_length; 439 ill_t *ill = ira->ira_ill; 440 ip_stack_t *ipst = ill->ill_ipst; 441 ip_xmit_attr_t ixas; 442 in6_addr_t origsrc; 443 444 /* 445 * Remove any extension headers (do not reverse a source route) 446 * and clear the flow id (keep traffic class for now). 447 */ 448 if (ip_hdr_length != IPV6_HDR_LEN) { 449 int i; 450 451 for (i = 0; i < IPV6_HDR_LEN; i++) { 452 mp->b_rptr[ip_hdr_length - i - 1] = 453 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 454 } 455 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN); 456 ip6h = (ip6_t *)mp->b_rptr; 457 ip6h->ip6_nxt = IPPROTO_ICMPV6; 458 i = ntohs(ip6h->ip6_plen); 459 i -= (ip_hdr_length - IPV6_HDR_LEN); 460 ip6h->ip6_plen = htons(i); 461 ip_hdr_length = IPV6_HDR_LEN; 462 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp)); 463 } 464 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 465 466 /* Reverse the source and destination addresses. */ 467 origsrc = ip6h->ip6_src; 468 ip6h->ip6_src = ip6h->ip6_dst; 469 ip6h->ip6_dst = origsrc; 470 471 /* set the hop limit */ 472 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 473 474 /* 475 * Prepare for checksum by putting icmp length in the icmp 476 * checksum field. The checksum is calculated in ip_output 477 */ 478 icmp6->icmp6_cksum = ip6h->ip6_plen; 479 480 bzero(&ixas, sizeof (ixas)); 481 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 482 ixas.ixa_zoneid = ira->ira_zoneid; 483 ixas.ixa_cred = kcred; 484 ixas.ixa_cpid = NOPID; 485 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 486 ixas.ixa_ifindex = 0; 487 ixas.ixa_ipst = ipst; 488 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 489 490 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 491 /* 492 * This packet should go out the same way as it 493 * came in i.e in clear, independent of the IPsec 494 * policy for transmitting packets. 495 */ 496 ixas.ixa_flags |= IXAF_NO_IPSEC; 497 } else { 498 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 499 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 500 /* Note: mp already consumed and ip_drop_packet done */ 501 return; 502 } 503 } 504 505 /* Was the destination (now source) link-local? Send out same group */ 506 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 507 ixas.ixa_flags |= IXAF_SCOPEID_SET; 508 if (IS_UNDER_IPMP(ill)) 509 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 510 else 511 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 512 } 513 514 if (ira->ira_flags & IRAF_MULTIBROADCAST) { 515 /* 516 * Not one or our addresses (IRE_LOCALs), thus we let 517 * ip_output_simple pick the source. 518 */ 519 ip6h->ip6_src = ipv6_all_zeros; 520 ixas.ixa_flags |= IXAF_SET_SOURCE; 521 } 522 523 /* Should we send using dce_pmtu? */ 524 if (ipst->ips_ipv6_icmp_return_pmtu) 525 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; 526 527 (void) ip_output_simple(mp, &ixas); 528 ixa_cleanup(&ixas); 529 530 } 531 532 /* 533 * Verify the ICMP messages for either for ICMP error or redirect packet. 534 * The caller should have fully pulled up the message. If it's a redirect 535 * packet, only basic checks on IP header will be done; otherwise, verify 536 * the packet by looking at the included ULP header. 537 * 538 * Called before icmp_inbound_error_fanout_v6 is called. 539 */ 540 static boolean_t 541 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 542 { 543 ill_t *ill = ira->ira_ill; 544 uint16_t hdr_length; 545 uint8_t *nexthdrp; 546 uint8_t nexthdr; 547 ip_stack_t *ipst = ill->ill_ipst; 548 conn_t *connp; 549 ip6_t *ip6h; /* Inner header */ 550 551 ip6h = (ip6_t *)&icmp6[1]; 552 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr) 553 goto truncated; 554 555 if (icmp6->icmp6_type == ND_REDIRECT) { 556 hdr_length = sizeof (nd_redirect_t); 557 } else { 558 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION)) 559 goto discard_pkt; 560 hdr_length = IPV6_HDR_LEN; 561 } 562 563 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr) 564 goto truncated; 565 566 /* 567 * Stop here for ICMP_REDIRECT. 568 */ 569 if (icmp6->icmp6_type == ND_REDIRECT) 570 return (B_TRUE); 571 572 /* 573 * ICMP errors only. 574 */ 575 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 576 goto discard_pkt; 577 nexthdr = *nexthdrp; 578 579 /* Try to pass the ICMP message to clients who need it */ 580 switch (nexthdr) { 581 case IPPROTO_UDP: 582 /* 583 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 584 * transport header. 585 */ 586 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 587 mp->b_wptr) 588 goto truncated; 589 break; 590 case IPPROTO_TCP: { 591 tcpha_t *tcpha; 592 593 /* 594 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 595 * transport header. 596 */ 597 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 598 mp->b_wptr) 599 goto truncated; 600 601 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 602 /* 603 * With IPMP we need to match across group, which we do 604 * since we have the upper ill from ira_ill. 605 */ 606 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, 607 ill->ill_phyint->phyint_ifindex, ipst); 608 if (connp == NULL) 609 goto discard_pkt; 610 611 if ((connp->conn_verifyicmp != NULL) && 612 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) { 613 CONN_DEC_REF(connp); 614 goto discard_pkt; 615 } 616 CONN_DEC_REF(connp); 617 break; 618 } 619 case IPPROTO_SCTP: 620 /* 621 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 622 * transport header. 623 */ 624 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 625 mp->b_wptr) 626 goto truncated; 627 break; 628 case IPPROTO_ESP: 629 case IPPROTO_AH: 630 break; 631 case IPPROTO_ENCAP: 632 case IPPROTO_IPV6: { 633 /* Look for self-encapsulated packets that caused an error */ 634 ip6_t *in_ip6h; 635 636 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 637 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ? 638 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr) 639 goto truncated; 640 break; 641 } 642 default: 643 break; 644 } 645 646 return (B_TRUE); 647 648 discard_pkt: 649 /* Bogus ICMP error. */ 650 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 651 return (B_FALSE); 652 653 truncated: 654 /* We pulled up everthing already. Must be truncated */ 655 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 656 return (B_FALSE); 657 } 658 659 /* 660 * Process received IPv6 ICMP Packet too big. 661 * The caller is responsible for validating the packet before passing it in 662 * and also to fanout the ICMP error to any matching transport conns. Assumes 663 * the message has been fully pulled up. 664 * 665 * Before getting here, the caller has called icmp_inbound_verify_v6() 666 * that should have verified with ULP to prevent undoing the changes we're 667 * going to make to DCE. For example, TCP might have verified that the packet 668 * which generated error is in the send window. 669 * 670 * In some cases modified this MTU in the ICMP header packet; the caller 671 * should pass to the matching ULP after this returns. 672 */ 673 static void 674 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira) 675 { 676 uint32_t mtu; 677 dce_t *dce; 678 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */ 679 ip_stack_t *ipst = ill->ill_ipst; 680 int old_max_frag; 681 in6_addr_t final_dst; 682 ip6_t *ip6h; /* Inner IP header */ 683 684 /* Caller has already pulled up everything. */ 685 ip6h = (ip6_t *)&icmp6[1]; 686 final_dst = ip_get_dst_v6(ip6h, NULL, NULL); 687 688 /* 689 * For link local destinations matching simply on address is not 690 * sufficient. Same link local addresses for different ILL's is 691 * possible. 692 */ 693 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) { 694 dce = dce_lookup_and_add_v6(&final_dst, 695 ill->ill_phyint->phyint_ifindex, ipst); 696 } else { 697 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst); 698 } 699 if (dce == NULL) { 700 /* Couldn't add a unique one - ENOMEM */ 701 if (ip_debug > 2) { 702 /* ip1dbg */ 703 pr_addr_dbg("icmp_inbound_too_big_v6:" 704 "no dce for dst %s\n", AF_INET6, 705 &final_dst); 706 } 707 return; 708 } 709 710 mtu = ntohl(icmp6->icmp6_mtu); 711 712 mutex_enter(&dce->dce_lock); 713 if (dce->dce_flags & DCEF_PMTU) 714 old_max_frag = dce->dce_pmtu; 715 else 716 old_max_frag = ill->ill_mtu; 717 718 if (mtu < IPV6_MIN_MTU) { 719 ip1dbg(("Received mtu less than IPv6 " 720 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 721 mtu = IPV6_MIN_MTU; 722 /* 723 * If an mtu less than IPv6 min mtu is received, 724 * we must include a fragment header in 725 * subsequent packets. 726 */ 727 dce->dce_flags |= DCEF_TOO_SMALL_PMTU; 728 } else { 729 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU; 730 } 731 ip1dbg(("Received mtu from router: %d\n", mtu)); 732 dce->dce_pmtu = MIN(old_max_frag, mtu); 733 734 /* Prepare to send the new max frag size for the ULP. */ 735 if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) { 736 /* 737 * If we need a fragment header in every packet 738 * (above case or multirouting), make sure the 739 * ULP takes it into account when computing the 740 * payload size. 741 */ 742 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t)); 743 } else { 744 icmp6->icmp6_mtu = htonl(dce->dce_pmtu); 745 } 746 /* We now have a PMTU for sure */ 747 dce->dce_flags |= DCEF_PMTU; 748 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 749 mutex_exit(&dce->dce_lock); 750 /* 751 * After dropping the lock the new value is visible to everyone. 752 * Then we bump the generation number so any cached values reinspect 753 * the dce_t. 754 */ 755 dce_increment_generation(dce); 756 dce_refrele(dce); 757 } 758 759 /* 760 * Fanout received ICMPv6 error packets to the transports. 761 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 762 * 763 * The caller must have called icmp_inbound_verify_v6. 764 */ 765 void 766 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 767 { 768 uint16_t *up; /* Pointer to ports in ULP header */ 769 uint32_t ports; /* reversed ports for fanout */ 770 ip6_t rip6h; /* With reversed addresses */ 771 ip6_t *ip6h; /* Inner IP header */ 772 uint16_t hdr_length; /* Inner IP header length */ 773 uint8_t *nexthdrp; 774 uint8_t nexthdr; 775 tcpha_t *tcpha; 776 conn_t *connp; 777 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */ 778 ip_stack_t *ipst = ill->ill_ipst; 779 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 780 781 /* Caller has already pulled up everything. */ 782 ip6h = (ip6_t *)&icmp6[1]; 783 ASSERT(mp->b_cont == NULL); 784 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 785 786 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 787 goto drop_pkt; 788 nexthdr = *nexthdrp; 789 ira->ira_protocol = nexthdr; 790 791 /* 792 * We need a separate IP header with the source and destination 793 * addresses reversed to do fanout/classification because the ip6h in 794 * the ICMPv6 error is in the form we sent it out. 795 */ 796 rip6h.ip6_src = ip6h->ip6_dst; 797 rip6h.ip6_dst = ip6h->ip6_src; 798 rip6h.ip6_nxt = nexthdr; 799 800 /* Try to pass the ICMP message to clients who need it */ 801 switch (nexthdr) { 802 case IPPROTO_UDP: { 803 /* Attempt to find a client stream based on port. */ 804 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 805 806 /* Note that we send error to all matches. */ 807 ira->ira_flags |= IRAF_ICMP_ERROR; 808 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira); 809 ira->ira_flags &= ~IRAF_ICMP_ERROR; 810 return; 811 } 812 case IPPROTO_TCP: { 813 /* 814 * Attempt to find a client stream based on port. 815 * Note that we do a reverse lookup since the header is 816 * in the form we sent it out. 817 */ 818 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 819 /* 820 * With IPMP we need to match across group, which we do 821 * since we have the upper ill from ira_ill. 822 */ 823 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 824 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 825 if (connp == NULL) { 826 goto drop_pkt; 827 } 828 829 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 830 (ira->ira_flags & IRAF_IPSEC_SECURE)) { 831 mp = ipsec_check_inbound_policy(mp, connp, 832 NULL, ip6h, ira); 833 if (mp == NULL) { 834 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 835 /* Note that mp is NULL */ 836 ip_drop_input("ipIfStatsInDiscards", mp, ill); 837 CONN_DEC_REF(connp); 838 return; 839 } 840 } 841 842 ira->ira_flags |= IRAF_ICMP_ERROR; 843 if (IPCL_IS_TCP(connp)) { 844 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 845 connp->conn_recvicmp, connp, ira, SQ_FILL, 846 SQTAG_TCP6_INPUT_ICMP_ERR); 847 } else { 848 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 849 ill_t *rill = ira->ira_rill; 850 851 ira->ira_ill = ira->ira_rill = NULL; 852 (connp->conn_recv)(connp, mp, NULL, ira); 853 CONN_DEC_REF(connp); 854 ira->ira_ill = ill; 855 ira->ira_rill = rill; 856 } 857 ira->ira_flags &= ~IRAF_ICMP_ERROR; 858 return; 859 860 } 861 case IPPROTO_SCTP: 862 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 863 /* Find a SCTP client stream for this packet. */ 864 ((uint16_t *)&ports)[0] = up[1]; 865 ((uint16_t *)&ports)[1] = up[0]; 866 867 ira->ira_flags |= IRAF_ICMP_ERROR; 868 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira); 869 ira->ira_flags &= ~IRAF_ICMP_ERROR; 870 return; 871 872 case IPPROTO_ESP: 873 case IPPROTO_AH: 874 if (!ipsec_loaded(ipss)) { 875 ip_proto_not_sup(mp, ira); 876 return; 877 } 878 879 if (nexthdr == IPPROTO_ESP) 880 mp = ipsecesp_icmp_error(mp, ira); 881 else 882 mp = ipsecah_icmp_error(mp, ira); 883 if (mp == NULL) 884 return; 885 886 /* Just in case ipsec didn't preserve the NULL b_cont */ 887 if (mp->b_cont != NULL) { 888 if (!pullupmsg(mp, -1)) 889 goto drop_pkt; 890 } 891 892 /* 893 * If succesful, the mp has been modified to not include 894 * the ESP/AH header so we can fanout to the ULP's icmp 895 * error handler. 896 */ 897 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN) 898 goto drop_pkt; 899 900 ip6h = (ip6_t *)mp->b_rptr; 901 /* Don't call hdr_length_v6() unless you have to. */ 902 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 903 hdr_length = ip_hdr_length_v6(mp, ip6h); 904 else 905 hdr_length = IPV6_HDR_LEN; 906 907 /* Verify the modified message before any further processes. */ 908 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 909 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 910 freemsg(mp); 911 return; 912 } 913 914 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 915 return; 916 917 case IPPROTO_IPV6: { 918 /* Look for self-encapsulated packets that caused an error */ 919 ip6_t *in_ip6h; 920 921 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 922 923 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) && 924 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) { 925 /* 926 * Self-encapsulated case. As in the ipv4 case, 927 * we need to strip the 2nd IP header. Since mp 928 * is already pulled-up, we can simply bcopy 929 * the 3rd header + data over the 2nd header. 930 */ 931 uint16_t unused_len; 932 933 /* 934 * Make sure we don't do recursion more than once. 935 */ 936 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h, 937 &unused_len, &nexthdrp) || 938 *nexthdrp == IPPROTO_IPV6) { 939 goto drop_pkt; 940 } 941 942 /* 943 * Copy the 3rd header + remaining data on top 944 * of the 2nd header. 945 */ 946 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h); 947 948 /* 949 * Subtract length of the 2nd header. 950 */ 951 mp->b_wptr -= hdr_length; 952 953 ip6h = (ip6_t *)mp->b_rptr; 954 /* Don't call hdr_length_v6() unless you have to. */ 955 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 956 hdr_length = ip_hdr_length_v6(mp, ip6h); 957 else 958 hdr_length = IPV6_HDR_LEN; 959 960 /* 961 * Verify the modified message before any further 962 * processes. 963 */ 964 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 965 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 966 freemsg(mp); 967 return; 968 } 969 970 /* 971 * Now recurse, and see what I _really_ should be 972 * doing here. 973 */ 974 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 975 return; 976 } 977 /* FALLTHRU */ 978 } 979 case IPPROTO_ENCAP: 980 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src, 981 &rip6h.ip6_dst, ipst)) != NULL) { 982 ira->ira_flags |= IRAF_ICMP_ERROR; 983 connp->conn_recvicmp(connp, mp, NULL, ira); 984 CONN_DEC_REF(connp); 985 ira->ira_flags &= ~IRAF_ICMP_ERROR; 986 return; 987 } 988 /* 989 * No IP tunnel is interested, fallthrough and see 990 * if a raw socket will want it. 991 */ 992 /* FALLTHRU */ 993 default: 994 ira->ira_flags |= IRAF_ICMP_ERROR; 995 ASSERT(ira->ira_protocol == nexthdr); 996 ip_fanout_proto_v6(mp, &rip6h, ira); 997 ira->ira_flags &= ~IRAF_ICMP_ERROR; 998 return; 999 } 1000 /* NOTREACHED */ 1001 drop_pkt: 1002 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1003 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1004 freemsg(mp); 1005 } 1006 1007 /* 1008 * Process received IPv6 ICMP Redirect messages. 1009 * Assumes the caller has verified that the headers are in the pulled up mblk. 1010 * Consumes mp. 1011 */ 1012 /* ARGSUSED */ 1013 static void 1014 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd, 1015 ip_recv_attr_t *ira) 1016 { 1017 ire_t *ire, *nire; 1018 ire_t *prev_ire = NULL; 1019 ire_t *redir_ire; 1020 in6_addr_t *src, *dst, *gateway; 1021 nd_opt_hdr_t *opt; 1022 nce_t *nce; 1023 int ncec_flags = 0; 1024 int err = 0; 1025 boolean_t redirect_to_router = B_FALSE; 1026 int len; 1027 int optlen; 1028 ill_t *ill = ira->ira_rill; 1029 ill_t *rill = ira->ira_rill; 1030 ip_stack_t *ipst = ill->ill_ipst; 1031 1032 /* 1033 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 1034 * and make it be the IPMP upper so avoid being confused by a packet 1035 * addressed to a unicast address on a different ill. 1036 */ 1037 if (IS_UNDER_IPMP(rill)) { 1038 rill = ipmp_ill_hold_ipmp_ill(rill); 1039 if (rill == NULL) { 1040 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1041 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill", 1042 mp, ill); 1043 freemsg(mp); 1044 return; 1045 } 1046 ASSERT(rill != ira->ira_rill); 1047 } 1048 1049 len = mp->b_wptr - (uchar_t *)rd; 1050 src = &ip6h->ip6_src; 1051 dst = &rd->nd_rd_dst; 1052 gateway = &rd->nd_rd_target; 1053 1054 /* Verify if it is a valid redirect */ 1055 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1056 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1057 (rd->nd_rd_code != 0) || 1058 (len < sizeof (nd_redirect_t)) || 1059 (IN6_IS_ADDR_V4MAPPED(dst)) || 1060 (IN6_IS_ADDR_MULTICAST(dst))) { 1061 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1062 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill); 1063 goto fail_redirect; 1064 } 1065 1066 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1067 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1068 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1069 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway", 1070 mp, ill); 1071 goto fail_redirect; 1072 } 1073 1074 optlen = len - sizeof (nd_redirect_t); 1075 if (optlen != 0) { 1076 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) { 1077 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1078 ip_drop_input("ipv6IfIcmpInBadRedirects - options", 1079 mp, ill); 1080 goto fail_redirect; 1081 } 1082 } 1083 1084 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1085 redirect_to_router = B_TRUE; 1086 ncec_flags |= NCE_F_ISROUTER; 1087 } else { 1088 gateway = dst; /* Add nce for dst */ 1089 } 1090 1091 1092 /* 1093 * Verify that the IP source address of the redirect is 1094 * the same as the current first-hop router for the specified 1095 * ICMP destination address. 1096 * Also, Make sure we had a route for the dest in question and 1097 * that route was pointing to the old gateway (the source of the 1098 * redirect packet.) 1099 * We do longest match and then compare ire_gateway_addr_v6 below. 1100 */ 1101 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill, 1102 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL); 1103 1104 /* 1105 * Check that 1106 * the redirect was not from ourselves 1107 * old gateway is still directly reachable 1108 */ 1109 if (prev_ire == NULL || 1110 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) || 1111 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 1112 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) { 1113 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1114 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill); 1115 goto fail_redirect; 1116 } 1117 1118 ASSERT(prev_ire->ire_ill != NULL); 1119 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD) 1120 ncec_flags |= NCE_F_NONUD; 1121 1122 opt = (nd_opt_hdr_t *)&rd[1]; 1123 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1124 if (opt != NULL) { 1125 err = nce_lookup_then_add_v6(rill, 1126 (uchar_t *)&opt[1], /* Link layer address */ 1127 rill->ill_phys_addr_length, 1128 gateway, ncec_flags, ND_STALE, &nce); 1129 switch (err) { 1130 case 0: 1131 nce_refrele(nce); 1132 break; 1133 case EEXIST: 1134 /* 1135 * Check to see if link layer address has changed and 1136 * process the ncec_state accordingly. 1137 */ 1138 nce_process(nce->nce_common, 1139 (uchar_t *)&opt[1], 0, B_FALSE); 1140 nce_refrele(nce); 1141 break; 1142 default: 1143 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1144 err)); 1145 goto fail_redirect; 1146 } 1147 } 1148 if (redirect_to_router) { 1149 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1150 1151 /* 1152 * Create a Route Association. This will allow us to remember 1153 * a router told us to use the particular gateway. 1154 */ 1155 ire = ire_create_v6( 1156 dst, 1157 &ipv6_all_ones, /* mask */ 1158 gateway, /* gateway addr */ 1159 IRE_HOST, 1160 prev_ire->ire_ill, 1161 ALL_ZONES, 1162 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1163 NULL, 1164 ipst); 1165 } else { 1166 ipif_t *ipif; 1167 in6_addr_t gw; 1168 1169 /* 1170 * Just create an on link entry, i.e. interface route. 1171 * The gateway field is our link-local on the ill. 1172 */ 1173 mutex_enter(&rill->ill_lock); 1174 for (ipif = rill->ill_ipif; ipif != NULL; 1175 ipif = ipif->ipif_next) { 1176 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1177 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr)) 1178 break; 1179 } 1180 if (ipif == NULL) { 1181 /* We have no link-local address! */ 1182 mutex_exit(&rill->ill_lock); 1183 goto fail_redirect; 1184 } 1185 gw = ipif->ipif_v6lcl_addr; 1186 mutex_exit(&rill->ill_lock); 1187 1188 ire = ire_create_v6( 1189 dst, /* gateway == dst */ 1190 &ipv6_all_ones, /* mask */ 1191 &gw, /* gateway addr */ 1192 rill->ill_net_type, /* IF_[NO]RESOLVER */ 1193 prev_ire->ire_ill, 1194 ALL_ZONES, 1195 (RTF_DYNAMIC | RTF_HOST), 1196 NULL, 1197 ipst); 1198 } 1199 1200 if (ire == NULL) 1201 goto fail_redirect; 1202 1203 nire = ire_add(ire); 1204 /* Check if it was a duplicate entry */ 1205 if (nire != NULL && nire != ire) { 1206 ASSERT(nire->ire_identical_ref > 1); 1207 ire_delete(nire); 1208 ire_refrele(nire); 1209 nire = NULL; 1210 } 1211 ire = nire; 1212 if (ire != NULL) { 1213 ire_refrele(ire); /* Held in ire_add */ 1214 1215 /* tell routing sockets that we received a redirect */ 1216 ip_rts_change_v6(RTM_REDIRECT, 1217 &rd->nd_rd_dst, 1218 &rd->nd_rd_target, 1219 &ipv6_all_ones, 0, src, 1220 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1221 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1222 1223 /* 1224 * Delete any existing IRE_HOST type ires for this destination. 1225 * This together with the added IRE has the effect of 1226 * modifying an existing redirect. 1227 */ 1228 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1229 prev_ire->ire_ill, ALL_ZONES, NULL, 1230 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst, 1231 NULL); 1232 1233 if (redir_ire != NULL) { 1234 if (redir_ire->ire_flags & RTF_DYNAMIC) 1235 ire_delete(redir_ire); 1236 ire_refrele(redir_ire); 1237 } 1238 } 1239 1240 ire_refrele(prev_ire); 1241 prev_ire = NULL; 1242 1243 fail_redirect: 1244 if (prev_ire != NULL) 1245 ire_refrele(prev_ire); 1246 freemsg(mp); 1247 if (rill != ira->ira_rill) 1248 ill_refrele(rill); 1249 } 1250 1251 /* 1252 * Build and ship an IPv6 ICMP message using the packet data in mp, 1253 * and the ICMP header pointed to by "stuff". (May be called as 1254 * writer.) 1255 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1256 * verify that an icmp error packet can be sent. 1257 * 1258 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1259 * source address (see above function). 1260 */ 1261 static void 1262 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len, 1263 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira) 1264 { 1265 ip6_t *ip6h; 1266 in6_addr_t v6dst; 1267 size_t len_needed; 1268 size_t msg_len; 1269 mblk_t *mp1; 1270 icmp6_t *icmp6; 1271 in6_addr_t v6src; 1272 ill_t *ill = ira->ira_ill; 1273 ip_stack_t *ipst = ill->ill_ipst; 1274 ip_xmit_attr_t ixas; 1275 1276 ip6h = (ip6_t *)mp->b_rptr; 1277 1278 bzero(&ixas, sizeof (ixas)); 1279 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 1280 ixas.ixa_zoneid = ira->ira_zoneid; 1281 ixas.ixa_ifindex = 0; 1282 ixas.ixa_ipst = ipst; 1283 ixas.ixa_cred = kcred; 1284 ixas.ixa_cpid = NOPID; 1285 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 1286 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1287 1288 /* 1289 * If the source of the original packet was link-local, then 1290 * make sure we send on the same ill (group) as we received it on. 1291 */ 1292 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 1293 ixas.ixa_flags |= IXAF_SCOPEID_SET; 1294 if (IS_UNDER_IPMP(ill)) 1295 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 1296 else 1297 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 1298 } 1299 1300 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1301 /* 1302 * Apply IPsec based on how IPsec was applied to 1303 * the packet that had the error. 1304 * 1305 * If it was an outbound packet that caused the ICMP 1306 * error, then the caller will have setup the IRA 1307 * appropriately. 1308 */ 1309 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 1310 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1311 /* Note: mp already consumed and ip_drop_packet done */ 1312 return; 1313 } 1314 } else { 1315 /* 1316 * This is in clear. The icmp message we are building 1317 * here should go out in clear, independent of our policy. 1318 */ 1319 ixas.ixa_flags |= IXAF_NO_IPSEC; 1320 } 1321 1322 /* 1323 * If the caller specified the source we use that. 1324 * Otherwise, if the packet was for one of our unicast addresses, make 1325 * sure we respond with that as the source. Otherwise 1326 * have ip_output_simple pick the source address. 1327 */ 1328 if (v6src_ptr != NULL) { 1329 v6src = *v6src_ptr; 1330 } else { 1331 ire_t *ire; 1332 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY; 1333 1334 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1335 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) 1336 match_flags |= MATCH_IRE_ILL; 1337 1338 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 1339 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL, 1340 match_flags, 0, ipst, NULL); 1341 if (ire != NULL) { 1342 v6src = ip6h->ip6_dst; 1343 ire_refrele(ire); 1344 } else { 1345 v6src = ipv6_all_zeros; 1346 ixas.ixa_flags |= IXAF_SET_SOURCE; 1347 } 1348 } 1349 v6dst = ip6h->ip6_src; 1350 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1351 msg_len = msgdsize(mp); 1352 if (msg_len > len_needed) { 1353 if (!adjmsg(mp, len_needed - msg_len)) { 1354 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1355 freemsg(mp); 1356 return; 1357 } 1358 msg_len = len_needed; 1359 } 1360 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED); 1361 if (mp1 == NULL) { 1362 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1363 freemsg(mp); 1364 return; 1365 } 1366 mp1->b_cont = mp; 1367 mp = mp1; 1368 1369 /* 1370 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this 1371 * node generates be accepted in peace by all on-host destinations. 1372 * If we do NOT assume that all on-host destinations trust 1373 * self-generated ICMP messages, then rework here, ip6.c, and spd.c. 1374 * (Look for IXAF_TRUSTED_ICMP). 1375 */ 1376 ixas.ixa_flags |= IXAF_TRUSTED_ICMP; 1377 1378 ip6h = (ip6_t *)mp->b_rptr; 1379 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1380 1381 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1382 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1383 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1384 ip6h->ip6_dst = v6dst; 1385 ip6h->ip6_src = v6src; 1386 msg_len += IPV6_HDR_LEN + len; 1387 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1388 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1389 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1390 } 1391 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1392 icmp6 = (icmp6_t *)&ip6h[1]; 1393 bcopy(stuff, (char *)icmp6, len); 1394 /* 1395 * Prepare for checksum by putting icmp length in the icmp 1396 * checksum field. The checksum is calculated in ip_output_wire_v6. 1397 */ 1398 icmp6->icmp6_cksum = ip6h->ip6_plen; 1399 if (icmp6->icmp6_type == ND_REDIRECT) { 1400 ip6h->ip6_hops = IPV6_MAX_HOPS; 1401 } 1402 1403 (void) ip_output_simple(mp, &ixas); 1404 ixa_cleanup(&ixas); 1405 } 1406 1407 /* 1408 * Update the output mib when ICMPv6 packets are sent. 1409 */ 1410 void 1411 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1412 { 1413 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1414 1415 switch (icmp6->icmp6_type) { 1416 case ICMP6_DST_UNREACH: 1417 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1418 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1420 break; 1421 1422 case ICMP6_TIME_EXCEEDED: 1423 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1424 break; 1425 1426 case ICMP6_PARAM_PROB: 1427 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1428 break; 1429 1430 case ICMP6_PACKET_TOO_BIG: 1431 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1432 break; 1433 1434 case ICMP6_ECHO_REQUEST: 1435 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1436 break; 1437 1438 case ICMP6_ECHO_REPLY: 1439 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1440 break; 1441 1442 case ND_ROUTER_SOLICIT: 1443 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1444 break; 1445 1446 case ND_ROUTER_ADVERT: 1447 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1448 break; 1449 1450 case ND_NEIGHBOR_SOLICIT: 1451 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1452 break; 1453 1454 case ND_NEIGHBOR_ADVERT: 1455 BUMP_MIB(ill->ill_icmp6_mib, 1456 ipv6IfIcmpOutNeighborAdvertisements); 1457 break; 1458 1459 case ND_REDIRECT: 1460 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1461 break; 1462 1463 case MLD_LISTENER_QUERY: 1464 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1465 break; 1466 1467 case MLD_LISTENER_REPORT: 1468 case MLD_V2_LISTENER_REPORT: 1469 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1470 break; 1471 1472 case MLD_LISTENER_REDUCTION: 1473 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1474 break; 1475 } 1476 } 1477 1478 /* 1479 * Check if it is ok to send an ICMPv6 error packet in 1480 * response to the IP packet in mp. 1481 * Free the message and return null if no 1482 * ICMP error packet should be sent. 1483 */ 1484 static mblk_t * 1485 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira) 1486 { 1487 ill_t *ill = ira->ira_ill; 1488 ip_stack_t *ipst = ill->ill_ipst; 1489 boolean_t llbcast; 1490 ip6_t *ip6h; 1491 1492 if (!mp) 1493 return (NULL); 1494 1495 /* We view multicast and broadcast as the same.. */ 1496 llbcast = (ira->ira_flags & 1497 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0; 1498 ip6h = (ip6_t *)mp->b_rptr; 1499 1500 /* Check if source address uniquely identifies the host */ 1501 1502 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1503 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1504 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1505 freemsg(mp); 1506 return (NULL); 1507 } 1508 1509 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1510 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1511 icmp6_t *icmp6; 1512 1513 if (mp->b_wptr - mp->b_rptr < len_needed) { 1514 if (!pullupmsg(mp, len_needed)) { 1515 BUMP_MIB(ill->ill_icmp6_mib, 1516 ipv6IfIcmpInErrors); 1517 freemsg(mp); 1518 return (NULL); 1519 } 1520 ip6h = (ip6_t *)mp->b_rptr; 1521 } 1522 icmp6 = (icmp6_t *)&ip6h[1]; 1523 /* Explicitly do not generate errors in response to redirects */ 1524 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1525 icmp6->icmp6_type == ND_REDIRECT) { 1526 freemsg(mp); 1527 return (NULL); 1528 } 1529 } 1530 /* 1531 * Check that the destination is not multicast and that the packet 1532 * was not sent on link layer broadcast or multicast. (Exception 1533 * is Packet too big message as per the draft - when mcast_ok is set.) 1534 */ 1535 if (!mcast_ok && 1536 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1537 freemsg(mp); 1538 return (NULL); 1539 } 1540 /* 1541 * If this is a labeled system, then check to see if we're allowed to 1542 * send a response to this particular sender. If not, then just drop. 1543 */ 1544 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) { 1545 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1546 freemsg(mp); 1547 return (NULL); 1548 } 1549 1550 if (icmp_err_rate_limit(ipst)) { 1551 /* 1552 * Only send ICMP error packets every so often. 1553 * This should be done on a per port/source basis, 1554 * but for now this will suffice. 1555 */ 1556 freemsg(mp); 1557 return (NULL); 1558 } 1559 return (mp); 1560 } 1561 1562 /* 1563 * Called when a packet was sent out the same link that it arrived on. 1564 * Check if it is ok to send a redirect and then send it. 1565 */ 1566 void 1567 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire, 1568 ip_recv_attr_t *ira) 1569 { 1570 ill_t *ill = ira->ira_ill; 1571 ip_stack_t *ipst = ill->ill_ipst; 1572 in6_addr_t *v6targ; 1573 ire_t *src_ire_v6 = NULL; 1574 mblk_t *mp1; 1575 ire_t *nhop_ire = NULL; 1576 1577 /* 1578 * Don't send a redirect when forwarding a source 1579 * routed packet. 1580 */ 1581 if (ip_source_routed_v6(ip6h, mp, ipst)) 1582 return; 1583 1584 if (ire->ire_type & IRE_ONLINK) { 1585 /* Target is directly connected */ 1586 v6targ = &ip6h->ip6_dst; 1587 } else { 1588 /* Determine the most specific IRE used to send the packets */ 1589 nhop_ire = ire_nexthop(ire); 1590 if (nhop_ire == NULL) 1591 return; 1592 1593 /* 1594 * We won't send redirects to a router 1595 * that doesn't have a link local 1596 * address, but will forward. 1597 */ 1598 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) { 1599 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1600 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1601 ire_refrele(nhop_ire); 1602 return; 1603 } 1604 v6targ = &nhop_ire->ire_addr_v6; 1605 } 1606 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 1607 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL, 1608 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL); 1609 1610 if (src_ire_v6 == NULL) { 1611 if (nhop_ire != NULL) 1612 ire_refrele(nhop_ire); 1613 return; 1614 } 1615 1616 /* 1617 * The source is directly connected. 1618 */ 1619 mp1 = copymsg(mp); 1620 if (mp1 != NULL) 1621 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira); 1622 1623 if (nhop_ire != NULL) 1624 ire_refrele(nhop_ire); 1625 ire_refrele(src_ire_v6); 1626 } 1627 1628 /* 1629 * Generate an ICMPv6 redirect message. 1630 * Include target link layer address option if it exits. 1631 * Always include redirect header. 1632 */ 1633 static void 1634 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest, 1635 ip_recv_attr_t *ira) 1636 { 1637 nd_redirect_t *rd; 1638 nd_opt_rd_hdr_t *rdh; 1639 uchar_t *buf; 1640 ncec_t *ncec = NULL; 1641 nd_opt_hdr_t *opt; 1642 int len; 1643 int ll_opt_len = 0; 1644 int max_redir_hdr_data_len; 1645 int pkt_len; 1646 in6_addr_t *srcp; 1647 ill_t *ill; 1648 boolean_t need_refrele; 1649 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1650 1651 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira); 1652 if (mp == NULL) 1653 return; 1654 1655 if (IS_UNDER_IPMP(ira->ira_ill)) { 1656 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill); 1657 if (ill == NULL) { 1658 ill = ira->ira_ill; 1659 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1660 ip_drop_output("no IPMP ill for sending redirect", 1661 mp, ill); 1662 freemsg(mp); 1663 return; 1664 } 1665 need_refrele = B_TRUE; 1666 } else { 1667 ill = ira->ira_ill; 1668 need_refrele = B_FALSE; 1669 } 1670 1671 ncec = ncec_lookup_illgrp_v6(ill, targetp); 1672 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE && 1673 ncec->ncec_lladdr != NULL) { 1674 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1675 ill->ill_phys_addr_length + 7)/8 * 8; 1676 } 1677 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1678 ASSERT(len % 4 == 0); 1679 buf = kmem_alloc(len, KM_NOSLEEP); 1680 if (buf == NULL) { 1681 if (ncec != NULL) 1682 ncec_refrele(ncec); 1683 if (need_refrele) 1684 ill_refrele(ill); 1685 freemsg(mp); 1686 return; 1687 } 1688 1689 rd = (nd_redirect_t *)buf; 1690 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1691 rd->nd_rd_code = 0; 1692 rd->nd_rd_reserved = 0; 1693 rd->nd_rd_target = *targetp; 1694 rd->nd_rd_dst = *dest; 1695 1696 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1697 if (ncec != NULL && ll_opt_len != 0) { 1698 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1699 opt->nd_opt_len = ll_opt_len/8; 1700 bcopy((char *)ncec->ncec_lladdr, &opt[1], 1701 ill->ill_phys_addr_length); 1702 } 1703 if (ncec != NULL) 1704 ncec_refrele(ncec); 1705 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1706 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1707 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1708 max_redir_hdr_data_len = 1709 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1710 pkt_len = msgdsize(mp); 1711 /* Make sure mp is 8 byte aligned */ 1712 if (pkt_len > max_redir_hdr_data_len) { 1713 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1714 sizeof (nd_opt_rd_hdr_t))/8; 1715 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1716 } else { 1717 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1718 (void) adjmsg(mp, -(pkt_len % 8)); 1719 } 1720 rdh->nd_opt_rh_reserved1 = 0; 1721 rdh->nd_opt_rh_reserved2 = 0; 1722 /* ipif_v6lcl_addr contains the link-local source address */ 1723 srcp = &ill->ill_ipif->ipif_v6lcl_addr; 1724 1725 /* Redirects sent by router, and router is global zone */ 1726 ASSERT(ira->ira_zoneid == ALL_ZONES); 1727 ira->ira_zoneid = GLOBAL_ZONEID; 1728 icmp_pkt_v6(mp, buf, len, srcp, ira); 1729 kmem_free(buf, len); 1730 if (need_refrele) 1731 ill_refrele(ill); 1732 } 1733 1734 1735 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1736 void 1737 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1738 ip_recv_attr_t *ira) 1739 { 1740 icmp6_t icmp6; 1741 1742 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1743 if (mp == NULL) 1744 return; 1745 1746 bzero(&icmp6, sizeof (icmp6_t)); 1747 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1748 icmp6.icmp6_code = code; 1749 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1750 } 1751 1752 /* 1753 * Generate an ICMP unreachable message. 1754 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1755 * constructed by the caller. 1756 */ 1757 void 1758 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1759 ip_recv_attr_t *ira) 1760 { 1761 icmp6_t icmp6; 1762 1763 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1764 if (mp == NULL) 1765 return; 1766 1767 bzero(&icmp6, sizeof (icmp6_t)); 1768 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1769 icmp6.icmp6_code = code; 1770 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1771 } 1772 1773 /* 1774 * Generate an ICMP pkt too big message. 1775 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1776 * constructed by the caller. 1777 */ 1778 void 1779 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok, 1780 ip_recv_attr_t *ira) 1781 { 1782 icmp6_t icmp6; 1783 1784 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1785 if (mp == NULL) 1786 return; 1787 1788 bzero(&icmp6, sizeof (icmp6_t)); 1789 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1790 icmp6.icmp6_code = 0; 1791 icmp6.icmp6_mtu = htonl(mtu); 1792 1793 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1794 } 1795 1796 /* 1797 * Generate an ICMP parameter problem message. (May be called as writer.) 1798 * 'offset' is the offset from the beginning of the packet in error. 1799 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1800 * constructed by the caller. 1801 */ 1802 static void 1803 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset, 1804 boolean_t mcast_ok, ip_recv_attr_t *ira) 1805 { 1806 icmp6_t icmp6; 1807 1808 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1809 if (mp == NULL) 1810 return; 1811 1812 bzero((char *)&icmp6, sizeof (icmp6_t)); 1813 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1814 icmp6.icmp6_code = code; 1815 icmp6.icmp6_pptr = htonl(offset); 1816 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1817 } 1818 1819 void 1820 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok, 1821 ip_recv_attr_t *ira) 1822 { 1823 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1824 uint16_t hdr_length; 1825 uint8_t *nexthdrp; 1826 uint32_t offset; 1827 ill_t *ill = ira->ira_ill; 1828 1829 /* Determine the offset of the bad nexthdr value */ 1830 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { 1831 /* Malformed packet */ 1832 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1833 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1834 freemsg(mp); 1835 return; 1836 } 1837 1838 offset = nexthdrp - mp->b_rptr; 1839 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset, 1840 mcast_ok, ira); 1841 } 1842 1843 /* 1844 * Verify whether or not the IP address is a valid local address. 1845 * Could be a unicast, including one for a down interface. 1846 * If allow_mcbc then a multicast or broadcast address is also 1847 * acceptable. 1848 * 1849 * In the case of a multicast address, however, the 1850 * upper protocol is expected to reset the src address 1851 * to zero when we return IPVL_MCAST so that 1852 * no packets are emitted with multicast address as 1853 * source address. 1854 * The addresses valid for bind are: 1855 * (1) - in6addr_any 1856 * (2) - IP address of an UP interface 1857 * (3) - IP address of a DOWN interface 1858 * (4) - a multicast address. In this case 1859 * the conn will only receive packets destined to 1860 * the specified multicast address. Note: the 1861 * application still has to issue an 1862 * IPV6_JOIN_GROUP socket option. 1863 * 1864 * In all the above cases, the bound address must be valid in the current zone. 1865 * When the address is loopback or multicast, there might be many matching IREs 1866 * so bind has to look up based on the zone. 1867 */ 1868 ip_laddr_t 1869 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid, 1870 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid) 1871 { 1872 ire_t *src_ire; 1873 uint_t match_flags; 1874 ill_t *ill = NULL; 1875 1876 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src)); 1877 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src)); 1878 1879 match_flags = MATCH_IRE_ZONEONLY; 1880 if (scopeid != 0) { 1881 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst); 1882 if (ill == NULL) 1883 return (IPVL_BAD); 1884 match_flags |= MATCH_IRE_ILL; 1885 } 1886 1887 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0, 1888 ill, zoneid, NULL, match_flags, 0, ipst, NULL); 1889 if (ill != NULL) 1890 ill_refrele(ill); 1891 1892 /* 1893 * If an address other than in6addr_any is requested, 1894 * we verify that it is a valid address for bind 1895 * Note: Following code is in if-else-if form for 1896 * readability compared to a condition check. 1897 */ 1898 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) { 1899 /* 1900 * (2) Bind to address of local UP interface 1901 */ 1902 ire_refrele(src_ire); 1903 return (IPVL_UNICAST_UP); 1904 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 1905 /* (4) bind to multicast address. */ 1906 if (src_ire != NULL) 1907 ire_refrele(src_ire); 1908 1909 /* 1910 * Note: caller should take IPV6_MULTICAST_IF 1911 * into account when selecting a real source address. 1912 */ 1913 if (allow_mcbc) 1914 return (IPVL_MCAST); 1915 else 1916 return (IPVL_BAD); 1917 } else { 1918 ipif_t *ipif; 1919 1920 /* 1921 * (3) Bind to address of local DOWN interface? 1922 * (ipif_lookup_addr() looks up all interfaces 1923 * but we do not get here for UP interfaces 1924 * - case (2) above) 1925 */ 1926 if (src_ire != NULL) 1927 ire_refrele(src_ire); 1928 1929 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst); 1930 if (ipif == NULL) 1931 return (IPVL_BAD); 1932 1933 /* Not a useful source? */ 1934 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) { 1935 ipif_refrele(ipif); 1936 return (IPVL_BAD); 1937 } 1938 ipif_refrele(ipif); 1939 return (IPVL_UNICAST_DOWN); 1940 } 1941 } 1942 1943 /* 1944 * Verify that both the source and destination addresses are valid. If 1945 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable, 1946 * i.e. have no route to it. Protocols like TCP want to verify destination 1947 * reachability, while tunnels do not. 1948 * 1949 * Determine the route, the interface, and (optionally) the source address 1950 * to use to reach a given destination. 1951 * Note that we allow connect to broadcast and multicast addresses when 1952 * IPDF_ALLOW_MCBC is set. 1953 * first_hop and dst_addr are normally the same, but if source routing 1954 * they will differ; in that case the first_hop is what we'll use for the 1955 * routing lookup but the dce and label checks will be done on dst_addr, 1956 * 1957 * If uinfo is set, then we fill in the best available information 1958 * we have for the destination. This is based on (in priority order) any 1959 * metrics and path MTU stored in a dce_t, route metrics, and finally the 1960 * ill_mtu. 1961 * 1962 * Tsol note: If we have a source route then dst_addr != firsthop. But we 1963 * always do the label check on dst_addr. 1964 * 1965 * Assumes that the caller has set ixa_scopeid for link-local communication. 1966 */ 1967 int 1968 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr, 1969 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo, 1970 uint32_t flags, uint_t mac_mode) 1971 { 1972 ire_t *ire; 1973 int error = 0; 1974 in6_addr_t setsrc; /* RTF_SETSRC */ 1975 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */ 1976 ip_stack_t *ipst = ixa->ixa_ipst; 1977 dce_t *dce; 1978 uint_t pmtu; 1979 uint_t ifindex; 1980 uint_t generation; 1981 nce_t *nce; 1982 ill_t *ill = NULL; 1983 boolean_t multirt = B_FALSE; 1984 1985 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr)); 1986 1987 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1988 1989 /* 1990 * We never send to zero; the ULPs map it to the loopback address. 1991 * We can't allow it since we use zero to mean unitialized in some 1992 * places. 1993 */ 1994 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr)); 1995 1996 if (is_system_labeled()) { 1997 ts_label_t *tsl = NULL; 1998 1999 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION, 2000 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl); 2001 if (error != 0) 2002 return (error); 2003 if (tsl != NULL) { 2004 /* Update the label */ 2005 ip_xmit_attr_replace_tsl(ixa, tsl); 2006 } 2007 } 2008 2009 setsrc = ipv6_all_zeros; 2010 /* 2011 * Select a route; For IPMP interfaces, we would only select 2012 * a "hidden" route (i.e., going through a specific under_ill) 2013 * if ixa_ifindex has been specified. 2014 */ 2015 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation, 2016 &setsrc, &error, &multirt); 2017 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 2018 if (error != 0) 2019 goto bad_addr; 2020 2021 /* 2022 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set. 2023 * If IPDF_VERIFY_DST is set, the destination must be reachable. 2024 * Otherwise the destination needn't be reachable. 2025 * 2026 * If we match on a reject or black hole, then we've got a 2027 * local failure. May as well fail out the connect() attempt, 2028 * since it's never going to succeed. 2029 */ 2030 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2031 /* 2032 * If we're verifying destination reachability, we always want 2033 * to complain here. 2034 * 2035 * If we're not verifying destination reachability but the 2036 * destination has a route, we still want to fail on the 2037 * temporary address and broadcast address tests. 2038 * 2039 * In both cases do we let the code continue so some reasonable 2040 * information is returned to the caller. That enables the 2041 * caller to use (and even cache) the IRE. conn_ip_ouput will 2042 * use the generation mismatch path to check for the unreachable 2043 * case thereby avoiding any specific check in the main path. 2044 */ 2045 ASSERT(generation == IRE_GENERATION_VERIFY); 2046 if (flags & IPDF_VERIFY_DST) { 2047 /* 2048 * Set errno but continue to set up ixa_ire to be 2049 * the RTF_REJECT|RTF_BLACKHOLE IRE. 2050 * That allows callers to use ip_output to get an 2051 * ICMP error back. 2052 */ 2053 if (!(ire->ire_type & IRE_HOST)) 2054 error = ENETUNREACH; 2055 else 2056 error = EHOSTUNREACH; 2057 } 2058 } 2059 2060 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) && 2061 !(flags & IPDF_ALLOW_MCBC)) { 2062 ire_refrele(ire); 2063 ire = ire_reject(ipst, B_FALSE); 2064 generation = IRE_GENERATION_VERIFY; 2065 error = ENETUNREACH; 2066 } 2067 2068 /* Cache things */ 2069 if (ixa->ixa_ire != NULL) 2070 ire_refrele_notr(ixa->ixa_ire); 2071 #ifdef DEBUG 2072 ire_refhold_notr(ire); 2073 ire_refrele(ire); 2074 #endif 2075 ixa->ixa_ire = ire; 2076 ixa->ixa_ire_generation = generation; 2077 2078 /* 2079 * For multicast with multirt we have a flag passed back from 2080 * ire_lookup_multi_ill_v6 since we don't have an IRE for each 2081 * possible multicast address. 2082 * We also need a flag for multicast since we can't check 2083 * whether RTF_MULTIRT is set in ixa_ire for multicast. 2084 */ 2085 if (multirt) { 2086 ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 2087 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 2088 } else { 2089 ixa->ixa_postfragfn = ire->ire_postfragfn; 2090 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 2091 } 2092 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2093 /* Get an nce to cache. */ 2094 nce = ire_to_nce(ire, NULL, firsthop); 2095 if (nce == NULL) { 2096 /* Allocation failure? */ 2097 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2098 } else { 2099 if (ixa->ixa_nce != NULL) 2100 nce_refrele(ixa->ixa_nce); 2101 ixa->ixa_nce = nce; 2102 } 2103 } 2104 2105 /* 2106 * If the source address is a loopback address, the 2107 * destination had best be local or multicast. 2108 * If we are sending to an IRE_LOCAL using a loopback source then 2109 * it had better be the same zoneid. 2110 */ 2111 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) { 2112 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) { 2113 ire = NULL; /* Stored in ixa_ire */ 2114 error = EADDRNOTAVAIL; 2115 goto bad_addr; 2116 } 2117 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) { 2118 ire = NULL; /* Stored in ixa_ire */ 2119 error = EADDRNOTAVAIL; 2120 goto bad_addr; 2121 } 2122 } 2123 2124 /* 2125 * Does the caller want us to pick a source address? 2126 */ 2127 if (flags & IPDF_SELECT_SRC) { 2128 in6_addr_t src_addr; 2129 2130 /* 2131 * We use use ire_nexthop_ill to avoid the under ipmp 2132 * interface for source address selection. Note that for ipmp 2133 * probe packets, ixa_ifindex would have been specified, and 2134 * the ip_select_route() invocation would have picked an ire 2135 * will ire_ill pointing at an under interface. 2136 */ 2137 ill = ire_nexthop_ill(ire); 2138 2139 /* If unreachable we have no ill but need some source */ 2140 if (ill == NULL) { 2141 src_addr = ipv6_loopback; 2142 /* Make sure we look for a better source address */ 2143 generation = SRC_GENERATION_VERIFY; 2144 } else { 2145 error = ip_select_source_v6(ill, &setsrc, dst_addr, 2146 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences, 2147 &src_addr, &generation, NULL); 2148 if (error != 0) { 2149 ire = NULL; /* Stored in ixa_ire */ 2150 goto bad_addr; 2151 } 2152 } 2153 2154 /* 2155 * We allow the source address to to down. 2156 * However, we check that we don't use the loopback address 2157 * as a source when sending out on the wire. 2158 */ 2159 if (IN6_IS_ADDR_LOOPBACK(&src_addr) && 2160 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) && 2161 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2162 ire = NULL; /* Stored in ixa_ire */ 2163 error = EADDRNOTAVAIL; 2164 goto bad_addr; 2165 } 2166 2167 *src_addrp = src_addr; 2168 ixa->ixa_src_generation = generation; 2169 } 2170 2171 /* 2172 * Make sure we don't leave an unreachable ixa_nce in place 2173 * since ip_select_route is used when we unplumb i.e., remove 2174 * references on ixa_ire, ixa_nce, and ixa_dce. 2175 */ 2176 nce = ixa->ixa_nce; 2177 if (nce != NULL && nce->nce_is_condemned) { 2178 nce_refrele(nce); 2179 ixa->ixa_nce = NULL; 2180 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2181 } 2182 2183 2184 ifindex = 0; 2185 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) { 2186 /* If we are creating a DCE we'd better have an ifindex */ 2187 if (ill != NULL) 2188 ifindex = ill->ill_phyint->phyint_ifindex; 2189 else 2190 flags &= ~IPDF_UNIQUE_DCE; 2191 } 2192 2193 if (flags & IPDF_UNIQUE_DCE) { 2194 /* Fallback to the default dce if allocation fails */ 2195 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst); 2196 if (dce != NULL) { 2197 generation = dce->dce_generation; 2198 } else { 2199 dce = dce_lookup_v6(dst_addr, ifindex, ipst, 2200 &generation); 2201 } 2202 } else { 2203 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation); 2204 } 2205 ASSERT(dce != NULL); 2206 if (ixa->ixa_dce != NULL) 2207 dce_refrele_notr(ixa->ixa_dce); 2208 #ifdef DEBUG 2209 dce_refhold_notr(dce); 2210 dce_refrele(dce); 2211 #endif 2212 ixa->ixa_dce = dce; 2213 ixa->ixa_dce_generation = generation; 2214 2215 /* 2216 * Note that IPv6 multicast supports PMTU discovery unlike IPv4 2217 * multicast. But pmtu discovery is only enabled for connected 2218 * sockets in general. 2219 */ 2220 2221 /* 2222 * Set initial value for fragmentation limit. Either conn_ip_output 2223 * or ULP might updates it when there are routing changes. 2224 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT. 2225 */ 2226 pmtu = ip_get_pmtu(ixa); 2227 ixa->ixa_fragsize = pmtu; 2228 /* Make sure ixa_fragsize and ixa_pmtu remain identical */ 2229 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) 2230 ixa->ixa_pmtu = pmtu; 2231 2232 /* 2233 * Extract information useful for some transports. 2234 * First we look for DCE metrics. Then we take what we have in 2235 * the metrics in the route, where the offlink is used if we have 2236 * one. 2237 */ 2238 if (uinfo != NULL) { 2239 bzero(uinfo, sizeof (*uinfo)); 2240 2241 if (dce->dce_flags & DCEF_UINFO) 2242 *uinfo = dce->dce_uinfo; 2243 2244 rts_merge_metrics(uinfo, &ire->ire_metrics); 2245 2246 /* Allow ire_metrics to decrease the path MTU from above */ 2247 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu) 2248 uinfo->iulp_mtu = pmtu; 2249 2250 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0; 2251 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0; 2252 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0; 2253 } 2254 2255 if (ill != NULL) 2256 ill_refrele(ill); 2257 2258 return (error); 2259 2260 bad_addr: 2261 if (ire != NULL) 2262 ire_refrele(ire); 2263 2264 if (ill != NULL) 2265 ill_refrele(ill); 2266 2267 /* 2268 * Make sure we don't leave an unreachable ixa_nce in place 2269 * since ip_select_route is used when we unplumb i.e., remove 2270 * references on ixa_ire, ixa_nce, and ixa_dce. 2271 */ 2272 nce = ixa->ixa_nce; 2273 if (nce != NULL && nce->nce_is_condemned) { 2274 nce_refrele(nce); 2275 ixa->ixa_nce = NULL; 2276 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2277 } 2278 2279 return (error); 2280 } 2281 2282 /* 2283 * Handle protocols with which IP is less intimate. There 2284 * can be more than one stream bound to a particular 2285 * protocol. When this is the case, normally each one gets a copy 2286 * of any incoming packets. 2287 * 2288 * Zones notes: 2289 * Packets will be distributed to conns in all zones. This is really only 2290 * useful for ICMPv6 as only applications in the global zone can create raw 2291 * sockets for other protocols. 2292 */ 2293 void 2294 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 2295 { 2296 mblk_t *mp1; 2297 in6_addr_t laddr = ip6h->ip6_dst; 2298 conn_t *connp, *first_connp, *next_connp; 2299 connf_t *connfp; 2300 ill_t *ill = ira->ira_ill; 2301 ip_stack_t *ipst = ill->ill_ipst; 2302 2303 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol]; 2304 mutex_enter(&connfp->connf_lock); 2305 connp = connfp->connf_head; 2306 for (connp = connfp->connf_head; connp != NULL; 2307 connp = connp->conn_next) { 2308 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2309 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2310 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2311 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2312 break; 2313 } 2314 2315 if (connp == NULL) { 2316 /* 2317 * No one bound to this port. Is 2318 * there a client that wants all 2319 * unclaimed datagrams? 2320 */ 2321 mutex_exit(&connfp->connf_lock); 2322 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB, 2323 ICMP6_PARAMPROB_NEXTHEADER, ira); 2324 return; 2325 } 2326 2327 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 2328 2329 CONN_INC_REF(connp); 2330 first_connp = connp; 2331 2332 /* 2333 * XXX: Fix the multiple protocol listeners case. We should not 2334 * be walking the conn->conn_next list here. 2335 */ 2336 connp = connp->conn_next; 2337 for (;;) { 2338 while (connp != NULL) { 2339 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2340 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2341 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2342 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2343 ira, connp))) 2344 break; 2345 connp = connp->conn_next; 2346 } 2347 2348 if (connp == NULL) { 2349 /* No more interested clients */ 2350 connp = first_connp; 2351 break; 2352 } 2353 if (((mp1 = dupmsg(mp)) == NULL) && 2354 ((mp1 = copymsg(mp)) == NULL)) { 2355 /* Memory allocation failed */ 2356 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2357 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2358 connp = first_connp; 2359 break; 2360 } 2361 2362 CONN_INC_REF(connp); 2363 mutex_exit(&connfp->connf_lock); 2364 2365 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr, 2366 ira); 2367 2368 mutex_enter(&connfp->connf_lock); 2369 /* Follow the next pointer before releasing the conn. */ 2370 next_connp = connp->conn_next; 2371 CONN_DEC_REF(connp); 2372 connp = next_connp; 2373 } 2374 2375 /* Last one. Send it upstream. */ 2376 mutex_exit(&connfp->connf_lock); 2377 2378 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira); 2379 2380 CONN_DEC_REF(connp); 2381 } 2382 2383 /* 2384 * Called when it is conceptually a ULP that would sent the packet 2385 * e.g., port unreachable and nexthdr unknown. Check that the packet 2386 * would have passed the IPsec global policy before sending the error. 2387 * 2388 * Send an ICMP error after patching up the packet appropriately. 2389 * Uses ip_drop_input and bumps the appropriate MIB. 2390 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use. 2391 */ 2392 void 2393 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code, 2394 ip_recv_attr_t *ira) 2395 { 2396 ip6_t *ip6h; 2397 boolean_t secure; 2398 ill_t *ill = ira->ira_ill; 2399 ip_stack_t *ipst = ill->ill_ipst; 2400 netstack_t *ns = ipst->ips_netstack; 2401 ipsec_stack_t *ipss = ns->netstack_ipsec; 2402 2403 secure = ira->ira_flags & IRAF_IPSEC_SECURE; 2404 2405 /* 2406 * We are generating an icmp error for some inbound packet. 2407 * Called from all ip_fanout_(udp, tcp, proto) functions. 2408 * Before we generate an error, check with global policy 2409 * to see whether this is allowed to enter the system. As 2410 * there is no "conn", we are checking with global policy. 2411 */ 2412 ip6h = (ip6_t *)mp->b_rptr; 2413 if (secure || ipss->ipsec_inbound_v6_policy_present) { 2414 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns); 2415 if (mp == NULL) 2416 return; 2417 } 2418 2419 /* We never send errors for protocols that we do implement */ 2420 if (ira->ira_protocol == IPPROTO_ICMPV6) { 2421 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2422 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill); 2423 freemsg(mp); 2424 return; 2425 } 2426 2427 switch (icmp_type) { 2428 case ICMP6_DST_UNREACH: 2429 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT); 2430 2431 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 2432 ip_drop_input("ipIfStatsNoPorts", mp, ill); 2433 2434 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira); 2435 break; 2436 case ICMP6_PARAM_PROB: 2437 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER); 2438 2439 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 2440 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill); 2441 2442 /* Let the system determine the offset for this one */ 2443 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2444 break; 2445 default: 2446 #ifdef DEBUG 2447 panic("ip_fanout_send_icmp_v6: wrong type"); 2448 /*NOTREACHED*/ 2449 #else 2450 freemsg(mp); 2451 break; 2452 #endif 2453 } 2454 } 2455 2456 /* 2457 * Fanout for UDP packets that are multicast or ICMP errors. 2458 * (Unicast fanout is handled in ip_input_v6.) 2459 * 2460 * If SO_REUSEADDR is set all multicast packets 2461 * will be delivered to all conns bound to the same port. 2462 * 2463 * Fanout for UDP packets. 2464 * The caller puts <fport, lport> in the ports parameter. 2465 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 2466 * 2467 * If SO_REUSEADDR is set all multicast and broadcast packets 2468 * will be delivered to all conns bound to the same port. 2469 * 2470 * Zones notes: 2471 * Earlier in ip_input on a system with multiple shared-IP zones we 2472 * duplicate the multicast and broadcast packets and send them up 2473 * with each explicit zoneid that exists on that ill. 2474 * This means that here we can match the zoneid with SO_ALLZONES being special. 2475 */ 2476 void 2477 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport, 2478 ip_recv_attr_t *ira) 2479 { 2480 in6_addr_t laddr; 2481 conn_t *connp; 2482 connf_t *connfp; 2483 in6_addr_t faddr; 2484 ill_t *ill = ira->ira_ill; 2485 ip_stack_t *ipst = ill->ill_ipst; 2486 2487 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR)); 2488 2489 laddr = ip6h->ip6_dst; 2490 faddr = ip6h->ip6_src; 2491 2492 /* Attempt to find a client stream based on destination port. */ 2493 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2494 mutex_enter(&connfp->connf_lock); 2495 connp = connfp->connf_head; 2496 while (connp != NULL) { 2497 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) && 2498 conn_wantpacket_v6(connp, ira, ip6h) && 2499 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2500 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2501 break; 2502 connp = connp->conn_next; 2503 } 2504 2505 if (connp == NULL) 2506 goto notfound; 2507 2508 CONN_INC_REF(connp); 2509 2510 if (connp->conn_reuseaddr) { 2511 conn_t *first_connp = connp; 2512 conn_t *next_connp; 2513 mblk_t *mp1; 2514 2515 connp = connp->conn_next; 2516 for (;;) { 2517 while (connp != NULL) { 2518 if (IPCL_UDP_MATCH_V6(connp, lport, laddr, 2519 fport, faddr) && 2520 conn_wantpacket_v6(connp, ira, ip6h) && 2521 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2522 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2523 ira, connp))) 2524 break; 2525 connp = connp->conn_next; 2526 } 2527 if (connp == NULL) { 2528 /* No more interested clients */ 2529 connp = first_connp; 2530 break; 2531 } 2532 if (((mp1 = dupmsg(mp)) == NULL) && 2533 ((mp1 = copymsg(mp)) == NULL)) { 2534 /* Memory allocation failed */ 2535 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2536 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2537 connp = first_connp; 2538 break; 2539 } 2540 2541 CONN_INC_REF(connp); 2542 mutex_exit(&connfp->connf_lock); 2543 2544 IP6_STAT(ipst, ip6_udp_fanmb); 2545 ip_fanout_udp_conn(connp, mp1, NULL, 2546 (ip6_t *)mp1->b_rptr, ira); 2547 2548 mutex_enter(&connfp->connf_lock); 2549 /* Follow the next pointer before releasing the conn. */ 2550 next_connp = connp->conn_next; 2551 IP6_STAT(ipst, ip6_udp_fanmb); 2552 CONN_DEC_REF(connp); 2553 connp = next_connp; 2554 } 2555 } 2556 2557 /* Last one. Send it upstream. */ 2558 mutex_exit(&connfp->connf_lock); 2559 2560 IP6_STAT(ipst, ip6_udp_fanmb); 2561 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira); 2562 CONN_DEC_REF(connp); 2563 return; 2564 2565 notfound: 2566 mutex_exit(&connfp->connf_lock); 2567 /* 2568 * No one bound to this port. Is 2569 * there a client that wants all 2570 * unclaimed datagrams? 2571 */ 2572 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 2573 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2574 ip_fanout_proto_v6(mp, ip6h, ira); 2575 } else { 2576 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2577 ICMP6_DST_UNREACH_NOPORT, ira); 2578 } 2579 } 2580 2581 /* 2582 * int ip_find_hdr_v6() 2583 * 2584 * This routine is used by the upper layer protocols, iptun, and IPsec: 2585 * - Set extension header pointers to appropriate locations 2586 * - Determine IPv6 header length and return it 2587 * - Return a pointer to the last nexthdr value 2588 * 2589 * The caller must initialize ipp_fields. 2590 * The upper layer protocols normally set label_separate which makes the 2591 * routine put the TX label in ipp_label_v6. If this is not set then 2592 * the hop-by-hop options including the label are placed in ipp_hopopts. 2593 * 2594 * NOTE: If multiple extension headers of the same type are present, 2595 * ip_find_hdr_v6() will set the respective extension header pointers 2596 * to the first one that it encounters in the IPv6 header. It also 2597 * skips fragment headers. This routine deals with malformed packets 2598 * of various sorts in which case the returned length is up to the 2599 * malformed part. 2600 */ 2601 int 2602 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp, 2603 uint8_t *nexthdrp) 2604 { 2605 uint_t length, ehdrlen; 2606 uint8_t nexthdr; 2607 uint8_t *whereptr, *endptr; 2608 ip6_dest_t *tmpdstopts; 2609 ip6_rthdr_t *tmprthdr; 2610 ip6_hbh_t *tmphopopts; 2611 ip6_frag_t *tmpfraghdr; 2612 2613 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR; 2614 ipp->ipp_hoplimit = ip6h->ip6_hops; 2615 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 2616 ipp->ipp_addr = ip6h->ip6_dst; 2617 2618 length = IPV6_HDR_LEN; 2619 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2620 endptr = mp->b_wptr; 2621 2622 nexthdr = ip6h->ip6_nxt; 2623 while (whereptr < endptr) { 2624 /* Is there enough left for len + nexthdr? */ 2625 if (whereptr + MIN_EHDR_LEN > endptr) 2626 goto done; 2627 2628 switch (nexthdr) { 2629 case IPPROTO_HOPOPTS: { 2630 /* We check for any CIPSO */ 2631 uchar_t *secopt; 2632 boolean_t hbh_needed; 2633 uchar_t *after_secopt; 2634 2635 tmphopopts = (ip6_hbh_t *)whereptr; 2636 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 2637 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 2638 goto done; 2639 nexthdr = tmphopopts->ip6h_nxt; 2640 2641 if (!label_separate) { 2642 secopt = NULL; 2643 after_secopt = whereptr; 2644 } else { 2645 /* 2646 * We have dropped packets with bad options in 2647 * ip6_input. No need to check return value 2648 * here. 2649 */ 2650 (void) tsol_find_secopt_v6(whereptr, ehdrlen, 2651 &secopt, &after_secopt, &hbh_needed); 2652 } 2653 if (secopt != NULL && after_secopt - whereptr > 0) { 2654 ipp->ipp_fields |= IPPF_LABEL_V6; 2655 ipp->ipp_label_v6 = secopt; 2656 ipp->ipp_label_len_v6 = after_secopt - whereptr; 2657 } else { 2658 ipp->ipp_label_len_v6 = 0; 2659 after_secopt = whereptr; 2660 hbh_needed = B_TRUE; 2661 } 2662 /* return only 1st hbh */ 2663 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) { 2664 ipp->ipp_fields |= IPPF_HOPOPTS; 2665 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt; 2666 ipp->ipp_hopoptslen = ehdrlen - 2667 ipp->ipp_label_len_v6; 2668 } 2669 break; 2670 } 2671 case IPPROTO_DSTOPTS: 2672 tmpdstopts = (ip6_dest_t *)whereptr; 2673 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 2674 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 2675 goto done; 2676 nexthdr = tmpdstopts->ip6d_nxt; 2677 /* 2678 * ipp_dstopts is set to the destination header after a 2679 * routing header. 2680 * Assume it is a post-rthdr destination header 2681 * and adjust when we find an rthdr. 2682 */ 2683 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2684 ipp->ipp_fields |= IPPF_DSTOPTS; 2685 ipp->ipp_dstopts = tmpdstopts; 2686 ipp->ipp_dstoptslen = ehdrlen; 2687 } 2688 break; 2689 case IPPROTO_ROUTING: 2690 tmprthdr = (ip6_rthdr_t *)whereptr; 2691 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 2692 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 2693 goto done; 2694 nexthdr = tmprthdr->ip6r_nxt; 2695 /* return only 1st rthdr */ 2696 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 2697 ipp->ipp_fields |= IPPF_RTHDR; 2698 ipp->ipp_rthdr = tmprthdr; 2699 ipp->ipp_rthdrlen = ehdrlen; 2700 } 2701 /* 2702 * Make any destination header we've seen be a 2703 * pre-rthdr destination header. 2704 */ 2705 if (ipp->ipp_fields & IPPF_DSTOPTS) { 2706 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2707 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 2708 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts; 2709 ipp->ipp_dstopts = NULL; 2710 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen; 2711 ipp->ipp_dstoptslen = 0; 2712 } 2713 break; 2714 case IPPROTO_FRAGMENT: 2715 tmpfraghdr = (ip6_frag_t *)whereptr; 2716 ehdrlen = sizeof (ip6_frag_t); 2717 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 2718 goto done; 2719 nexthdr = tmpfraghdr->ip6f_nxt; 2720 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 2721 ipp->ipp_fields |= IPPF_FRAGHDR; 2722 ipp->ipp_fraghdr = tmpfraghdr; 2723 ipp->ipp_fraghdrlen = ehdrlen; 2724 } 2725 break; 2726 case IPPROTO_NONE: 2727 default: 2728 goto done; 2729 } 2730 length += ehdrlen; 2731 whereptr += ehdrlen; 2732 } 2733 done: 2734 if (nexthdrp != NULL) 2735 *nexthdrp = nexthdr; 2736 return (length); 2737 } 2738 2739 /* 2740 * Try to determine where and what are the IPv6 header length and 2741 * pointer to nexthdr value for the upper layer protocol (or an 2742 * unknown next hdr). 2743 * 2744 * Parameters returns a pointer to the nexthdr value; 2745 * Must handle malformed packets of various sorts. 2746 * Function returns failure for malformed cases. 2747 */ 2748 boolean_t 2749 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 2750 uint8_t **nexthdrpp) 2751 { 2752 uint16_t length; 2753 uint_t ehdrlen; 2754 uint8_t *nexthdrp; 2755 uint8_t *whereptr; 2756 uint8_t *endptr; 2757 ip6_dest_t *desthdr; 2758 ip6_rthdr_t *rthdr; 2759 ip6_frag_t *fraghdr; 2760 2761 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 2762 length = IPV6_HDR_LEN; 2763 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2764 endptr = mp->b_wptr; 2765 2766 nexthdrp = &ip6h->ip6_nxt; 2767 while (whereptr < endptr) { 2768 /* Is there enough left for len + nexthdr? */ 2769 if (whereptr + MIN_EHDR_LEN > endptr) 2770 break; 2771 2772 switch (*nexthdrp) { 2773 case IPPROTO_HOPOPTS: 2774 case IPPROTO_DSTOPTS: 2775 /* Assumes the headers are identical for hbh and dst */ 2776 desthdr = (ip6_dest_t *)whereptr; 2777 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2778 if ((uchar_t *)desthdr + ehdrlen > endptr) 2779 return (B_FALSE); 2780 nexthdrp = &desthdr->ip6d_nxt; 2781 break; 2782 case IPPROTO_ROUTING: 2783 rthdr = (ip6_rthdr_t *)whereptr; 2784 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2785 if ((uchar_t *)rthdr + ehdrlen > endptr) 2786 return (B_FALSE); 2787 nexthdrp = &rthdr->ip6r_nxt; 2788 break; 2789 case IPPROTO_FRAGMENT: 2790 fraghdr = (ip6_frag_t *)whereptr; 2791 ehdrlen = sizeof (ip6_frag_t); 2792 if ((uchar_t *)&fraghdr[1] > endptr) 2793 return (B_FALSE); 2794 nexthdrp = &fraghdr->ip6f_nxt; 2795 break; 2796 case IPPROTO_NONE: 2797 /* No next header means we're finished */ 2798 default: 2799 *hdr_length_ptr = length; 2800 *nexthdrpp = nexthdrp; 2801 return (B_TRUE); 2802 } 2803 length += ehdrlen; 2804 whereptr += ehdrlen; 2805 *hdr_length_ptr = length; 2806 *nexthdrpp = nexthdrp; 2807 } 2808 switch (*nexthdrp) { 2809 case IPPROTO_HOPOPTS: 2810 case IPPROTO_DSTOPTS: 2811 case IPPROTO_ROUTING: 2812 case IPPROTO_FRAGMENT: 2813 /* 2814 * If any know extension headers are still to be processed, 2815 * the packet's malformed (or at least all the IP header(s) are 2816 * not in the same mblk - and that should never happen. 2817 */ 2818 return (B_FALSE); 2819 2820 default: 2821 /* 2822 * If we get here, we know that all of the IP headers were in 2823 * the same mblk, even if the ULP header is in the next mblk. 2824 */ 2825 *hdr_length_ptr = length; 2826 *nexthdrpp = nexthdrp; 2827 return (B_TRUE); 2828 } 2829 } 2830 2831 /* 2832 * Return the length of the IPv6 related headers (including extension headers) 2833 * Returns a length even if the packet is malformed. 2834 */ 2835 int 2836 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 2837 { 2838 uint16_t hdr_len; 2839 uint8_t *nexthdrp; 2840 2841 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 2842 return (hdr_len); 2843 } 2844 2845 /* 2846 * Parse and process any hop-by-hop or destination options. 2847 * 2848 * Assumes that q is an ill read queue so that ICMP errors for link-local 2849 * destinations are sent out the correct interface. 2850 * 2851 * Returns -1 if there was an error and mp has been consumed. 2852 * Returns 0 if no special action is needed. 2853 * Returns 1 if the packet contained a router alert option for this node 2854 * which is verified to be "interesting/known" for our implementation. 2855 * 2856 * XXX Note: In future as more hbh or dest options are defined, 2857 * it may be better to have different routines for hbh and dest 2858 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 2859 * may have same value in different namespaces. Or is it same namespace ?? 2860 * Current code checks for each opt_type (other than pads) if it is in 2861 * the expected nexthdr (hbh or dest) 2862 */ 2863 int 2864 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, 2865 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) 2866 { 2867 uint8_t opt_type; 2868 uint_t optused; 2869 int ret = 0; 2870 const char *errtype; 2871 ill_t *ill = ira->ira_ill; 2872 ip_stack_t *ipst = ill->ill_ipst; 2873 2874 while (optlen != 0) { 2875 opt_type = *optptr; 2876 if (opt_type == IP6OPT_PAD1) { 2877 optused = 1; 2878 } else { 2879 if (optlen < 2) 2880 goto bad_opt; 2881 errtype = "malformed"; 2882 if (opt_type == ip6opt_ls) { 2883 optused = 2 + optptr[1]; 2884 if (optused > optlen) 2885 goto bad_opt; 2886 } else switch (opt_type) { 2887 case IP6OPT_PADN: 2888 /* 2889 * Note:We don't verify that (N-2) pad octets 2890 * are zero as required by spec. Adhere to 2891 * "be liberal in what you accept..." part of 2892 * implementation philosophy (RFC791,RFC1122) 2893 */ 2894 optused = 2 + optptr[1]; 2895 if (optused > optlen) 2896 goto bad_opt; 2897 break; 2898 2899 case IP6OPT_JUMBO: 2900 if (hdr_type != IPPROTO_HOPOPTS) 2901 goto opt_error; 2902 goto opt_error; /* XXX Not implemented! */ 2903 2904 case IP6OPT_ROUTER_ALERT: { 2905 struct ip6_opt_router *or; 2906 2907 if (hdr_type != IPPROTO_HOPOPTS) 2908 goto opt_error; 2909 optused = 2 + optptr[1]; 2910 if (optused > optlen) 2911 goto bad_opt; 2912 or = (struct ip6_opt_router *)optptr; 2913 /* Check total length and alignment */ 2914 if (optused != sizeof (*or) || 2915 ((uintptr_t)or->ip6or_value & 0x1) != 0) 2916 goto opt_error; 2917 /* Check value */ 2918 switch (*((uint16_t *)or->ip6or_value)) { 2919 case IP6_ALERT_MLD: 2920 case IP6_ALERT_RSVP: 2921 ret = 1; 2922 } 2923 break; 2924 } 2925 case IP6OPT_HOME_ADDRESS: { 2926 /* 2927 * Minimal support for the home address option 2928 * (which is required by all IPv6 nodes). 2929 * Implement by just swapping the home address 2930 * and source address. 2931 * XXX Note: this has IPsec implications since 2932 * AH needs to take this into account. 2933 * Also, when IPsec is used we need to ensure 2934 * that this is only processed once 2935 * in the received packet (to avoid swapping 2936 * back and forth). 2937 * NOTE:This option processing is considered 2938 * to be unsafe and prone to a denial of 2939 * service attack. 2940 * The current processing is not safe even with 2941 * IPsec secured IP packets. Since the home 2942 * address option processing requirement still 2943 * is in the IETF draft and in the process of 2944 * being redefined for its usage, it has been 2945 * decided to turn off the option by default. 2946 * If this section of code needs to be executed, 2947 * ndd variable ip6_ignore_home_address_opt 2948 * should be set to 0 at the user's own risk. 2949 */ 2950 struct ip6_opt_home_address *oh; 2951 in6_addr_t tmp; 2952 2953 if (ipst->ips_ipv6_ignore_home_address_opt) 2954 goto opt_error; 2955 2956 if (hdr_type != IPPROTO_DSTOPTS) 2957 goto opt_error; 2958 optused = 2 + optptr[1]; 2959 if (optused > optlen) 2960 goto bad_opt; 2961 2962 /* 2963 * We did this dest. opt the first time 2964 * around (i.e. before AH processing). 2965 * If we've done AH... stop now. 2966 */ 2967 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && 2968 ira->ira_ipsec_ah_sa != NULL) 2969 break; 2970 2971 oh = (struct ip6_opt_home_address *)optptr; 2972 /* Check total length and alignment */ 2973 if (optused < sizeof (*oh) || 2974 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 2975 goto opt_error; 2976 /* Swap ip6_src and the home address */ 2977 tmp = ip6h->ip6_src; 2978 /* XXX Note: only 8 byte alignment option */ 2979 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 2980 *(in6_addr_t *)oh->ip6oh_addr = tmp; 2981 break; 2982 } 2983 2984 case IP6OPT_TUNNEL_LIMIT: 2985 if (hdr_type != IPPROTO_DSTOPTS) { 2986 goto opt_error; 2987 } 2988 optused = 2 + optptr[1]; 2989 if (optused > optlen) { 2990 goto bad_opt; 2991 } 2992 if (optused != 3) { 2993 goto opt_error; 2994 } 2995 break; 2996 2997 default: 2998 errtype = "unknown"; 2999 /* FALLTHROUGH */ 3000 opt_error: 3001 /* Determine which zone should send error */ 3002 switch (IP6OPT_TYPE(opt_type)) { 3003 case IP6OPT_TYPE_SKIP: 3004 optused = 2 + optptr[1]; 3005 if (optused > optlen) 3006 goto bad_opt; 3007 ip1dbg(("ip_process_options_v6: %s " 3008 "opt 0x%x skipped\n", 3009 errtype, opt_type)); 3010 break; 3011 case IP6OPT_TYPE_DISCARD: 3012 ip1dbg(("ip_process_options_v6: %s " 3013 "opt 0x%x; packet dropped\n", 3014 errtype, opt_type)); 3015 BUMP_MIB(ill->ill_ip_mib, 3016 ipIfStatsInHdrErrors); 3017 ip_drop_input("ipIfStatsInHdrErrors", 3018 mp, ill); 3019 freemsg(mp); 3020 return (-1); 3021 case IP6OPT_TYPE_ICMP: 3022 BUMP_MIB(ill->ill_ip_mib, 3023 ipIfStatsInHdrErrors); 3024 ip_drop_input("ipIfStatsInHdrErrors", 3025 mp, ill); 3026 icmp_param_problem_v6(mp, 3027 ICMP6_PARAMPROB_OPTION, 3028 (uint32_t)(optptr - 3029 (uint8_t *)ip6h), 3030 B_FALSE, ira); 3031 return (-1); 3032 case IP6OPT_TYPE_FORCEICMP: 3033 BUMP_MIB(ill->ill_ip_mib, 3034 ipIfStatsInHdrErrors); 3035 ip_drop_input("ipIfStatsInHdrErrors", 3036 mp, ill); 3037 icmp_param_problem_v6(mp, 3038 ICMP6_PARAMPROB_OPTION, 3039 (uint32_t)(optptr - 3040 (uint8_t *)ip6h), 3041 B_TRUE, ira); 3042 return (-1); 3043 default: 3044 ASSERT(0); 3045 } 3046 } 3047 } 3048 optlen -= optused; 3049 optptr += optused; 3050 } 3051 return (ret); 3052 3053 bad_opt: 3054 /* Determine which zone should send error */ 3055 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3056 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION, 3057 (uint32_t)(optptr - (uint8_t *)ip6h), 3058 B_FALSE, ira); 3059 return (-1); 3060 } 3061 3062 /* 3063 * Process a routing header that is not yet empty. 3064 * Because of RFC 5095, we now reject all route headers. 3065 */ 3066 void 3067 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 3068 ip_recv_attr_t *ira) 3069 { 3070 ill_t *ill = ira->ira_ill; 3071 ip_stack_t *ipst = ill->ill_ipst; 3072 3073 ASSERT(rth->ip6r_segleft != 0); 3074 3075 if (!ipst->ips_ipv6_forward_src_routed) { 3076 /* XXX Check for source routed out same interface? */ 3077 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 3078 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 3079 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 3080 freemsg(mp); 3081 return; 3082 } 3083 3084 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3085 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3086 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 3087 B_FALSE, ira); 3088 } 3089 3090 /* 3091 * Read side put procedure for IPv6 module. 3092 */ 3093 void 3094 ip_rput_v6(queue_t *q, mblk_t *mp) 3095 { 3096 ill_t *ill; 3097 3098 ill = (ill_t *)q->q_ptr; 3099 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { 3100 union DL_primitives *dl; 3101 3102 dl = (union DL_primitives *)mp->b_rptr; 3103 /* 3104 * Things are opening or closing - only accept DLPI 3105 * ack messages. If the stream is closing and ip_wsrv 3106 * has completed, ip_close is out of the qwait, but has 3107 * not yet completed qprocsoff. Don't proceed any further 3108 * because the ill has been cleaned up and things hanging 3109 * off the ill have been freed. 3110 */ 3111 if ((mp->b_datap->db_type != M_PCPROTO) || 3112 (dl->dl_primitive == DL_UNITDATA_IND)) { 3113 inet_freemsg(mp); 3114 return; 3115 } 3116 } 3117 if (DB_TYPE(mp) == M_DATA) { 3118 struct mac_header_info_s mhi; 3119 3120 ip_mdata_to_mhi(ill, mp, &mhi); 3121 ip_input_v6(ill, NULL, mp, &mhi); 3122 } else { 3123 ip_rput_notdata(ill, mp); 3124 } 3125 } 3126 3127 /* 3128 * Walk through the IPv6 packet in mp and see if there's an AH header 3129 * in it. See if the AH header needs to get done before other headers in 3130 * the packet. (Worker function for ipsec_early_ah_v6().) 3131 */ 3132 #define IPSEC_HDR_DONT_PROCESS 0 3133 #define IPSEC_HDR_PROCESS 1 3134 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 3135 static int 3136 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 3137 { 3138 uint_t length; 3139 uint_t ehdrlen; 3140 uint8_t *whereptr; 3141 uint8_t *endptr; 3142 uint8_t *nexthdrp; 3143 ip6_dest_t *desthdr; 3144 ip6_rthdr_t *rthdr; 3145 ip6_t *ip6h; 3146 3147 /* 3148 * For now just pullup everything. In general, the less pullups, 3149 * the better, but there's so much squirrelling through anyway, 3150 * it's just easier this way. 3151 */ 3152 if (!pullupmsg(mp, -1)) { 3153 return (IPSEC_MEMORY_ERROR); 3154 } 3155 3156 ip6h = (ip6_t *)mp->b_rptr; 3157 length = IPV6_HDR_LEN; 3158 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3159 endptr = mp->b_wptr; 3160 3161 /* 3162 * We can't just use the argument nexthdr in the place 3163 * of nexthdrp becaue we don't dereference nexthdrp 3164 * till we confirm whether it is a valid address. 3165 */ 3166 nexthdrp = &ip6h->ip6_nxt; 3167 while (whereptr < endptr) { 3168 /* Is there enough left for len + nexthdr? */ 3169 if (whereptr + MIN_EHDR_LEN > endptr) 3170 return (IPSEC_MEMORY_ERROR); 3171 3172 switch (*nexthdrp) { 3173 case IPPROTO_HOPOPTS: 3174 case IPPROTO_DSTOPTS: 3175 /* Assumes the headers are identical for hbh and dst */ 3176 desthdr = (ip6_dest_t *)whereptr; 3177 ehdrlen = 8 * (desthdr->ip6d_len + 1); 3178 if ((uchar_t *)desthdr + ehdrlen > endptr) 3179 return (IPSEC_MEMORY_ERROR); 3180 /* 3181 * Return DONT_PROCESS because the destination 3182 * options header may be for each hop in a 3183 * routing-header, and we only want AH if we're 3184 * finished with routing headers. 3185 */ 3186 if (*nexthdrp == IPPROTO_DSTOPTS) 3187 return (IPSEC_HDR_DONT_PROCESS); 3188 nexthdrp = &desthdr->ip6d_nxt; 3189 break; 3190 case IPPROTO_ROUTING: 3191 rthdr = (ip6_rthdr_t *)whereptr; 3192 3193 /* 3194 * If there's more hops left on the routing header, 3195 * return now with DON'T PROCESS. 3196 */ 3197 if (rthdr->ip6r_segleft > 0) 3198 return (IPSEC_HDR_DONT_PROCESS); 3199 3200 ehdrlen = 8 * (rthdr->ip6r_len + 1); 3201 if ((uchar_t *)rthdr + ehdrlen > endptr) 3202 return (IPSEC_MEMORY_ERROR); 3203 nexthdrp = &rthdr->ip6r_nxt; 3204 break; 3205 case IPPROTO_FRAGMENT: 3206 /* Wait for reassembly */ 3207 return (IPSEC_HDR_DONT_PROCESS); 3208 case IPPROTO_AH: 3209 *nexthdr = IPPROTO_AH; 3210 return (IPSEC_HDR_PROCESS); 3211 case IPPROTO_NONE: 3212 /* No next header means we're finished */ 3213 default: 3214 return (IPSEC_HDR_DONT_PROCESS); 3215 } 3216 length += ehdrlen; 3217 whereptr += ehdrlen; 3218 } 3219 /* 3220 * Malformed/truncated packet. 3221 */ 3222 return (IPSEC_MEMORY_ERROR); 3223 } 3224 3225 /* 3226 * Path for AH if options are present. 3227 * Returns NULL if the mblk was consumed. 3228 * 3229 * Sometimes AH needs to be done before other IPv6 headers for security 3230 * reasons. This function (and its ipsec_needs_processing_v6() above) 3231 * indicates if that is so, and fans out to the appropriate IPsec protocol 3232 * for the datagram passed in. 3233 */ 3234 mblk_t * 3235 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira) 3236 { 3237 uint8_t nexthdr; 3238 ah_t *ah; 3239 ill_t *ill = ira->ira_ill; 3240 ip_stack_t *ipst = ill->ill_ipst; 3241 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3242 3243 switch (ipsec_needs_processing_v6(mp, &nexthdr)) { 3244 case IPSEC_MEMORY_ERROR: 3245 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3246 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3247 freemsg(mp); 3248 return (NULL); 3249 case IPSEC_HDR_DONT_PROCESS: 3250 return (mp); 3251 } 3252 3253 /* Default means send it to AH! */ 3254 ASSERT(nexthdr == IPPROTO_AH); 3255 3256 if (!ipsec_loaded(ipss)) { 3257 ip_proto_not_sup(mp, ira); 3258 return (NULL); 3259 } 3260 3261 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 3262 if (mp == NULL) 3263 return (NULL); 3264 ASSERT(ah != NULL); 3265 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3266 ASSERT(ira->ira_ipsec_ah_sa != NULL); 3267 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 3268 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira); 3269 3270 if (mp == NULL) { 3271 /* 3272 * Either it failed or is pending. In the former case 3273 * ipIfStatsInDiscards was increased. 3274 */ 3275 return (NULL); 3276 } 3277 3278 /* we're done with IPsec processing, send it up */ 3279 ip_input_post_ipsec(mp, ira); 3280 return (NULL); 3281 } 3282 3283 /* 3284 * Reassemble fragment. 3285 * When it returns a completed message the first mblk will only contain 3286 * the headers prior to the fragment header, with the nexthdr value updated 3287 * to be the header after the fragment header. 3288 */ 3289 mblk_t * 3290 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h, 3291 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira) 3292 { 3293 uint32_t ident = ntohl(fraghdr->ip6f_ident); 3294 uint16_t offset; 3295 boolean_t more_frags; 3296 uint8_t nexthdr = fraghdr->ip6f_nxt; 3297 in6_addr_t *v6dst_ptr; 3298 in6_addr_t *v6src_ptr; 3299 uint_t end; 3300 uint_t hdr_length; 3301 size_t count; 3302 ipf_t *ipf; 3303 ipf_t **ipfp; 3304 ipfb_t *ipfb; 3305 mblk_t *mp1; 3306 uint8_t ecn_info = 0; 3307 size_t msg_len; 3308 mblk_t *tail_mp; 3309 mblk_t *t_mp; 3310 boolean_t pruned = B_FALSE; 3311 uint32_t sum_val; 3312 uint16_t sum_flags; 3313 ill_t *ill = ira->ira_ill; 3314 ip_stack_t *ipst = ill->ill_ipst; 3315 uint_t prev_nexthdr_offset; 3316 uint8_t prev_nexthdr; 3317 uint8_t *ptr; 3318 uint32_t packet_size; 3319 3320 /* 3321 * We utilize hardware computed checksum info only for UDP since 3322 * IP fragmentation is a normal occurence for the protocol. In 3323 * addition, checksum offload support for IP fragments carrying 3324 * UDP payload is commonly implemented across network adapters. 3325 */ 3326 ASSERT(ira->ira_rill != NULL); 3327 if (nexthdr == IPPROTO_UDP && dohwcksum && 3328 ILL_HCKSUM_CAPABLE(ira->ira_rill) && 3329 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 3330 mblk_t *mp1 = mp->b_cont; 3331 int32_t len; 3332 3333 /* Record checksum information from the packet */ 3334 sum_val = (uint32_t)DB_CKSUM16(mp); 3335 sum_flags = DB_CKSUMFLAGS(mp); 3336 3337 /* fragmented payload offset from beginning of mblk */ 3338 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 3339 3340 if ((sum_flags & HCK_PARTIALCKSUM) && 3341 (mp1 == NULL || mp1->b_cont == NULL) && 3342 offset >= DB_CKSUMSTART(mp) && 3343 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) { 3344 uint32_t adj; 3345 /* 3346 * Partial checksum has been calculated by hardware 3347 * and attached to the packet; in addition, any 3348 * prepended extraneous data is even byte aligned. 3349 * If any such data exists, we adjust the checksum; 3350 * this would also handle any postpended data. 3351 */ 3352 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 3353 mp, mp1, len, adj); 3354 3355 /* One's complement subtract extraneous checksum */ 3356 if (adj >= sum_val) 3357 sum_val = ~(adj - sum_val) & 0xFFFF; 3358 else 3359 sum_val -= adj; 3360 } 3361 } else { 3362 sum_val = 0; 3363 sum_flags = 0; 3364 } 3365 3366 /* Clear hardware checksumming flag */ 3367 DB_CKSUMFLAGS(mp) = 0; 3368 3369 /* 3370 * Determine the offset (from the begining of the IP header) 3371 * of the nexthdr value which has IPPROTO_FRAGMENT. We use 3372 * this when removing the fragment header from the packet. 3373 * This packet consists of the IPv6 header, a potential 3374 * hop-by-hop options header, a potential pre-routing-header 3375 * destination options header, and a potential routing header. 3376 */ 3377 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 3378 prev_nexthdr = ip6h->ip6_nxt; 3379 ptr = (uint8_t *)&ip6h[1]; 3380 3381 if (prev_nexthdr == IPPROTO_HOPOPTS) { 3382 ip6_hbh_t *hbh_hdr; 3383 uint_t hdr_len; 3384 3385 hbh_hdr = (ip6_hbh_t *)ptr; 3386 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 3387 prev_nexthdr = hbh_hdr->ip6h_nxt; 3388 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 3389 - (uint8_t *)ip6h; 3390 ptr += hdr_len; 3391 } 3392 if (prev_nexthdr == IPPROTO_DSTOPTS) { 3393 ip6_dest_t *dest_hdr; 3394 uint_t hdr_len; 3395 3396 dest_hdr = (ip6_dest_t *)ptr; 3397 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 3398 prev_nexthdr = dest_hdr->ip6d_nxt; 3399 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 3400 - (uint8_t *)ip6h; 3401 ptr += hdr_len; 3402 } 3403 if (prev_nexthdr == IPPROTO_ROUTING) { 3404 ip6_rthdr_t *rthdr; 3405 uint_t hdr_len; 3406 3407 rthdr = (ip6_rthdr_t *)ptr; 3408 prev_nexthdr = rthdr->ip6r_nxt; 3409 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 3410 - (uint8_t *)ip6h; 3411 hdr_len = 8 * (rthdr->ip6r_len + 1); 3412 ptr += hdr_len; 3413 } 3414 if (prev_nexthdr != IPPROTO_FRAGMENT) { 3415 /* Can't handle other headers before the fragment header */ 3416 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3417 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3418 freemsg(mp); 3419 return (NULL); 3420 } 3421 3422 /* 3423 * Note: Fragment offset in header is in 8-octet units. 3424 * Clearing least significant 3 bits not only extracts 3425 * it but also gets it in units of octets. 3426 */ 3427 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 3428 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 3429 3430 /* 3431 * Is the more frags flag on and the payload length not a multiple 3432 * of eight? 3433 */ 3434 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 3435 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3436 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3437 (uint32_t)((char *)&ip6h->ip6_plen - 3438 (char *)ip6h), B_FALSE, ira); 3439 return (NULL); 3440 } 3441 3442 v6src_ptr = &ip6h->ip6_src; 3443 v6dst_ptr = &ip6h->ip6_dst; 3444 end = remlen; 3445 3446 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 3447 end += offset; 3448 3449 /* 3450 * Would fragment cause reassembled packet to have a payload length 3451 * greater than IP_MAXPACKET - the max payload size? 3452 */ 3453 if (end > IP_MAXPACKET) { 3454 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3455 ip_drop_input("Reassembled packet too large", mp, ill); 3456 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3457 (uint32_t)((char *)&fraghdr->ip6f_offlg - 3458 (char *)ip6h), B_FALSE, ira); 3459 return (NULL); 3460 } 3461 3462 /* 3463 * This packet just has one fragment. Reassembly not 3464 * needed. 3465 */ 3466 if (!more_frags && offset == 0) { 3467 goto reass_done; 3468 } 3469 3470 /* 3471 * Drop the fragmented as early as possible, if 3472 * we don't have resource(s) to re-assemble. 3473 */ 3474 if (ipst->ips_ip_reass_queue_bytes == 0) { 3475 freemsg(mp); 3476 return (NULL); 3477 } 3478 3479 /* Record the ECN field info. */ 3480 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 3481 /* 3482 * If this is not the first fragment, dump the unfragmentable 3483 * portion of the packet. 3484 */ 3485 if (offset) 3486 mp->b_rptr = (uchar_t *)&fraghdr[1]; 3487 3488 /* 3489 * Fragmentation reassembly. Each ILL has a hash table for 3490 * queueing packets undergoing reassembly for all IPIFs 3491 * associated with the ILL. The hash is based on the packet 3492 * IP ident field. The ILL frag hash table was allocated 3493 * as a timer block at the time the ILL was created. Whenever 3494 * there is anything on the reassembly queue, the timer will 3495 * be running. 3496 */ 3497 /* Handle vnic loopback of fragments */ 3498 if (mp->b_datap->db_ref > 2) 3499 msg_len = 0; 3500 else 3501 msg_len = MBLKSIZE(mp); 3502 3503 tail_mp = mp; 3504 while (tail_mp->b_cont != NULL) { 3505 tail_mp = tail_mp->b_cont; 3506 if (tail_mp->b_datap->db_ref <= 2) 3507 msg_len += MBLKSIZE(tail_mp); 3508 } 3509 /* 3510 * If the reassembly list for this ILL will get too big 3511 * prune it. 3512 */ 3513 3514 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 3515 ipst->ips_ip_reass_queue_bytes) { 3516 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len, 3517 uint_t, ill->ill_frag_count, 3518 uint_t, ipst->ips_ip_reass_queue_bytes); 3519 ill_frag_prune(ill, 3520 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 3521 (ipst->ips_ip_reass_queue_bytes - msg_len)); 3522 pruned = B_TRUE; 3523 } 3524 3525 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 3526 mutex_enter(&ipfb->ipfb_lock); 3527 3528 ipfp = &ipfb->ipfb_ipf; 3529 /* Try to find an existing fragment queue for this packet. */ 3530 for (;;) { 3531 ipf = ipfp[0]; 3532 if (ipf) { 3533 /* 3534 * It has to match on ident, source address, and 3535 * dest address. 3536 */ 3537 if (ipf->ipf_ident == ident && 3538 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 3539 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 3540 3541 /* 3542 * If we have received too many 3543 * duplicate fragments for this packet 3544 * free it. 3545 */ 3546 if (ipf->ipf_num_dups > ip_max_frag_dups) { 3547 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3548 freemsg(mp); 3549 mutex_exit(&ipfb->ipfb_lock); 3550 return (NULL); 3551 } 3552 3553 break; 3554 } 3555 ipfp = &ipf->ipf_hash_next; 3556 continue; 3557 } 3558 3559 3560 /* 3561 * If we pruned the list, do we want to store this new 3562 * fragment?. We apply an optimization here based on the 3563 * fact that most fragments will be received in order. 3564 * So if the offset of this incoming fragment is zero, 3565 * it is the first fragment of a new packet. We will 3566 * keep it. Otherwise drop the fragment, as we have 3567 * probably pruned the packet already (since the 3568 * packet cannot be found). 3569 */ 3570 3571 if (pruned && offset != 0) { 3572 mutex_exit(&ipfb->ipfb_lock); 3573 freemsg(mp); 3574 return (NULL); 3575 } 3576 3577 /* New guy. Allocate a frag message. */ 3578 mp1 = allocb(sizeof (*ipf), BPRI_MED); 3579 if (!mp1) { 3580 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3581 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3582 freemsg(mp); 3583 partial_reass_done: 3584 mutex_exit(&ipfb->ipfb_lock); 3585 return (NULL); 3586 } 3587 3588 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 3589 /* 3590 * Too many fragmented packets in this hash bucket. 3591 * Free the oldest. 3592 */ 3593 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 3594 } 3595 3596 mp1->b_cont = mp; 3597 3598 /* Initialize the fragment header. */ 3599 ipf = (ipf_t *)mp1->b_rptr; 3600 ipf->ipf_mp = mp1; 3601 ipf->ipf_ptphn = ipfp; 3602 ipfp[0] = ipf; 3603 ipf->ipf_hash_next = NULL; 3604 ipf->ipf_ident = ident; 3605 ipf->ipf_v6src = *v6src_ptr; 3606 ipf->ipf_v6dst = *v6dst_ptr; 3607 /* Record reassembly start time. */ 3608 ipf->ipf_timestamp = gethrestime_sec(); 3609 /* Record ipf generation and account for frag header */ 3610 ipf->ipf_gen = ill->ill_ipf_gen++; 3611 ipf->ipf_count = MBLKSIZE(mp1); 3612 ipf->ipf_protocol = nexthdr; 3613 ipf->ipf_nf_hdr_len = 0; 3614 ipf->ipf_prev_nexthdr_offset = 0; 3615 ipf->ipf_last_frag_seen = B_FALSE; 3616 ipf->ipf_ecn = ecn_info; 3617 ipf->ipf_num_dups = 0; 3618 ipfb->ipfb_frag_pkts++; 3619 ipf->ipf_checksum = 0; 3620 ipf->ipf_checksum_flags = 0; 3621 3622 /* Store checksum value in fragment header */ 3623 if (sum_flags != 0) { 3624 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3625 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3626 ipf->ipf_checksum = sum_val; 3627 ipf->ipf_checksum_flags = sum_flags; 3628 } 3629 3630 /* 3631 * We handle reassembly two ways. In the easy case, 3632 * where all the fragments show up in order, we do 3633 * minimal bookkeeping, and just clip new pieces on 3634 * the end. If we ever see a hole, then we go off 3635 * to ip_reassemble which has to mark the pieces and 3636 * keep track of the number of holes, etc. Obviously, 3637 * the point of having both mechanisms is so we can 3638 * handle the easy case as efficiently as possible. 3639 */ 3640 if (offset == 0) { 3641 /* Easy case, in-order reassembly so far. */ 3642 /* Update the byte count */ 3643 ipf->ipf_count += msg_len; 3644 ipf->ipf_tail_mp = tail_mp; 3645 /* 3646 * Keep track of next expected offset in 3647 * ipf_end. 3648 */ 3649 ipf->ipf_end = end; 3650 ipf->ipf_nf_hdr_len = hdr_length; 3651 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset; 3652 } else { 3653 /* Hard case, hole at the beginning. */ 3654 ipf->ipf_tail_mp = NULL; 3655 /* 3656 * ipf_end == 0 means that we have given up 3657 * on easy reassembly. 3658 */ 3659 ipf->ipf_end = 0; 3660 3661 /* Forget checksum offload from now on */ 3662 ipf->ipf_checksum_flags = 0; 3663 3664 /* 3665 * ipf_hole_cnt is set by ip_reassemble. 3666 * ipf_count is updated by ip_reassemble. 3667 * No need to check for return value here 3668 * as we don't expect reassembly to complete or 3669 * fail for the first fragment itself. 3670 */ 3671 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 3672 msg_len); 3673 } 3674 /* Update per ipfb and ill byte counts */ 3675 ipfb->ipfb_count += ipf->ipf_count; 3676 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3677 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 3678 /* If the frag timer wasn't already going, start it. */ 3679 mutex_enter(&ill->ill_lock); 3680 ill_frag_timer_start(ill); 3681 mutex_exit(&ill->ill_lock); 3682 goto partial_reass_done; 3683 } 3684 3685 /* 3686 * If the packet's flag has changed (it could be coming up 3687 * from an interface different than the previous, therefore 3688 * possibly different checksum capability), then forget about 3689 * any stored checksum states. Otherwise add the value to 3690 * the existing one stored in the fragment header. 3691 */ 3692 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 3693 sum_val += ipf->ipf_checksum; 3694 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3695 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3696 ipf->ipf_checksum = sum_val; 3697 } else if (ipf->ipf_checksum_flags != 0) { 3698 /* Forget checksum offload from now on */ 3699 ipf->ipf_checksum_flags = 0; 3700 } 3701 3702 /* 3703 * We have a new piece of a datagram which is already being 3704 * reassembled. Update the ECN info if all IP fragments 3705 * are ECN capable. If there is one which is not, clear 3706 * all the info. If there is at least one which has CE 3707 * code point, IP needs to report that up to transport. 3708 */ 3709 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 3710 if (ecn_info == IPH_ECN_CE) 3711 ipf->ipf_ecn = IPH_ECN_CE; 3712 } else { 3713 ipf->ipf_ecn = IPH_ECN_NECT; 3714 } 3715 3716 if (offset && ipf->ipf_end == offset) { 3717 /* The new fragment fits at the end */ 3718 ipf->ipf_tail_mp->b_cont = mp; 3719 /* Update the byte count */ 3720 ipf->ipf_count += msg_len; 3721 /* Update per ipfb and ill byte counts */ 3722 ipfb->ipfb_count += msg_len; 3723 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3724 atomic_add_32(&ill->ill_frag_count, msg_len); 3725 if (more_frags) { 3726 /* More to come. */ 3727 ipf->ipf_end = end; 3728 ipf->ipf_tail_mp = tail_mp; 3729 goto partial_reass_done; 3730 } 3731 } else { 3732 /* 3733 * Go do the hard cases. 3734 * Call ip_reassemble(). 3735 */ 3736 int ret; 3737 3738 if (offset == 0) { 3739 if (ipf->ipf_prev_nexthdr_offset == 0) { 3740 ipf->ipf_nf_hdr_len = hdr_length; 3741 ipf->ipf_prev_nexthdr_offset = 3742 prev_nexthdr_offset; 3743 } 3744 } 3745 /* Save current byte count */ 3746 count = ipf->ipf_count; 3747 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 3748 3749 /* Count of bytes added and subtracted (freeb()ed) */ 3750 count = ipf->ipf_count - count; 3751 if (count) { 3752 /* Update per ipfb and ill byte counts */ 3753 ipfb->ipfb_count += count; 3754 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3755 atomic_add_32(&ill->ill_frag_count, count); 3756 } 3757 if (ret == IP_REASS_PARTIAL) { 3758 goto partial_reass_done; 3759 } else if (ret == IP_REASS_FAILED) { 3760 /* Reassembly failed. Free up all resources */ 3761 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3762 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 3763 IP_REASS_SET_START(t_mp, 0); 3764 IP_REASS_SET_END(t_mp, 0); 3765 } 3766 freemsg(mp); 3767 goto partial_reass_done; 3768 } 3769 3770 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 3771 } 3772 /* 3773 * We have completed reassembly. Unhook the frag header from 3774 * the reassembly list. 3775 * 3776 * Grab the unfragmentable header length next header value out 3777 * of the first fragment 3778 */ 3779 ASSERT(ipf->ipf_nf_hdr_len != 0); 3780 hdr_length = ipf->ipf_nf_hdr_len; 3781 3782 /* 3783 * Before we free the frag header, record the ECN info 3784 * to report back to the transport. 3785 */ 3786 ecn_info = ipf->ipf_ecn; 3787 3788 /* 3789 * Store the nextheader field in the header preceding the fragment 3790 * header 3791 */ 3792 nexthdr = ipf->ipf_protocol; 3793 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 3794 ipfp = ipf->ipf_ptphn; 3795 3796 /* We need to supply these to caller */ 3797 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 3798 sum_val = ipf->ipf_checksum; 3799 else 3800 sum_val = 0; 3801 3802 mp1 = ipf->ipf_mp; 3803 count = ipf->ipf_count; 3804 ipf = ipf->ipf_hash_next; 3805 if (ipf) 3806 ipf->ipf_ptphn = ipfp; 3807 ipfp[0] = ipf; 3808 atomic_add_32(&ill->ill_frag_count, -count); 3809 ASSERT(ipfb->ipfb_count >= count); 3810 ipfb->ipfb_count -= count; 3811 ipfb->ipfb_frag_pkts--; 3812 mutex_exit(&ipfb->ipfb_lock); 3813 /* Ditch the frag header. */ 3814 mp = mp1->b_cont; 3815 freeb(mp1); 3816 3817 /* 3818 * Make sure the packet is good by doing some sanity 3819 * check. If bad we can silentely drop the packet. 3820 */ 3821 reass_done: 3822 if (hdr_length < sizeof (ip6_frag_t)) { 3823 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3824 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3825 ip1dbg(("ip_input_fragment_v6: bad packet\n")); 3826 freemsg(mp); 3827 return (NULL); 3828 } 3829 3830 /* 3831 * Remove the fragment header from the initial header by 3832 * splitting the mblk into the non-fragmentable header and 3833 * everthing after the fragment extension header. This has the 3834 * side effect of putting all the headers that need destination 3835 * processing into the b_cont block-- on return this fact is 3836 * used in order to avoid having to look at the extensions 3837 * already processed. 3838 * 3839 * Note that this code assumes that the unfragmentable portion 3840 * of the header is in the first mblk and increments 3841 * the read pointer past it. If this assumption is broken 3842 * this code fails badly. 3843 */ 3844 if (mp->b_rptr + hdr_length != mp->b_wptr) { 3845 mblk_t *nmp; 3846 3847 if (!(nmp = dupb(mp))) { 3848 ip1dbg(("ip_input_fragment_v6: dupb failed\n")); 3849 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3850 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3851 freemsg(mp); 3852 return (NULL); 3853 } 3854 nmp->b_cont = mp->b_cont; 3855 mp->b_cont = nmp; 3856 nmp->b_rptr += hdr_length; 3857 } 3858 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 3859 3860 ip6h = (ip6_t *)mp->b_rptr; 3861 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr; 3862 3863 /* Restore original IP length in header. */ 3864 packet_size = msgdsize(mp); 3865 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN)); 3866 /* Record the ECN info. */ 3867 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 3868 ip6h->ip6_vcf |= htonl(ecn_info << 20); 3869 3870 /* Update the receive attributes */ 3871 ira->ira_pktlen = packet_size; 3872 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t); 3873 ira->ira_protocol = nexthdr; 3874 3875 /* Reassembly is successful; set checksum information in packet */ 3876 DB_CKSUM16(mp) = (uint16_t)sum_val; 3877 DB_CKSUMFLAGS(mp) = sum_flags; 3878 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length; 3879 3880 return (mp); 3881 } 3882 3883 /* 3884 * Given an mblk and a ptr, find the destination address in an IPv6 routing 3885 * header. 3886 */ 3887 static in6_addr_t 3888 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 3889 { 3890 ip6_rthdr0_t *rt0; 3891 int segleft, numaddr; 3892 in6_addr_t *ap, rv = oldrv; 3893 3894 rt0 = (ip6_rthdr0_t *)whereptr; 3895 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 3896 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 3897 uint8_t *, whereptr); 3898 return (rv); 3899 } 3900 segleft = rt0->ip6r0_segleft; 3901 numaddr = rt0->ip6r0_len / 2; 3902 3903 if ((rt0->ip6r0_len & 0x1) || 3904 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) || 3905 (segleft > rt0->ip6r0_len / 2)) { 3906 /* 3907 * Corrupt packet. Either the routing header length is odd 3908 * (can't happen) or mismatched compared to the packet, or the 3909 * number of addresses is. Return what we can. This will 3910 * only be a problem on forwarded packets that get squeezed 3911 * through an outbound tunnel enforcing IPsec Tunnel Mode. 3912 */ 3913 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 3914 whereptr); 3915 return (rv); 3916 } 3917 3918 if (segleft != 0) { 3919 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 3920 rv = ap[numaddr - 1]; 3921 } 3922 3923 return (rv); 3924 } 3925 3926 /* 3927 * Walk through the options to see if there is a routing header. 3928 * If present get the destination which is the last address of 3929 * the option. 3930 * mp needs to be provided in cases when the extension headers might span 3931 * b_cont; mp is never modified by this function. 3932 */ 3933 in6_addr_t 3934 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment) 3935 { 3936 const mblk_t *current_mp = mp; 3937 uint8_t nexthdr; 3938 uint8_t *whereptr; 3939 int ehdrlen; 3940 in6_addr_t rv; 3941 3942 whereptr = (uint8_t *)ip6h; 3943 ehdrlen = sizeof (ip6_t); 3944 3945 /* We assume at least the IPv6 base header is within one mblk. */ 3946 ASSERT(mp == NULL || 3947 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen)); 3948 3949 rv = ip6h->ip6_dst; 3950 nexthdr = ip6h->ip6_nxt; 3951 if (is_fragment != NULL) 3952 *is_fragment = B_FALSE; 3953 3954 /* 3955 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 3956 * no extension headers will be split across mblks. 3957 */ 3958 3959 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 3960 nexthdr == IPPROTO_ROUTING) { 3961 if (nexthdr == IPPROTO_ROUTING) 3962 rv = pluck_out_dst(current_mp, whereptr, rv); 3963 3964 /* 3965 * All IPv6 extension headers have the next-header in byte 3966 * 0, and the (length - 8) in 8-byte-words. 3967 */ 3968 while (current_mp != NULL && 3969 whereptr + ehdrlen >= current_mp->b_wptr) { 3970 ehdrlen -= (current_mp->b_wptr - whereptr); 3971 current_mp = current_mp->b_cont; 3972 if (current_mp == NULL) { 3973 /* Bad packet. Return what we can. */ 3974 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 3975 mp, mblk_t *, current_mp, ip6_t *, ip6h); 3976 goto done; 3977 } 3978 whereptr = current_mp->b_rptr; 3979 } 3980 whereptr += ehdrlen; 3981 3982 nexthdr = *whereptr; 3983 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr); 3984 ehdrlen = (*(whereptr + 1) + 1) * 8; 3985 } 3986 3987 done: 3988 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 3989 *is_fragment = B_TRUE; 3990 return (rv); 3991 } 3992 3993 /* 3994 * ip_source_routed_v6: 3995 * This function is called by redirect code (called from ip_input_v6) to 3996 * know whether this packet is source routed through this node i.e 3997 * whether this node (router) is part of the journey. This 3998 * function is called under two cases : 3999 * 4000 * case 1 : Routing header was processed by this node and 4001 * ip_process_rthdr replaced ip6_dst with the next hop 4002 * and we are forwarding the packet to the next hop. 4003 * 4004 * case 2 : Routing header was not processed by this node and we 4005 * are just forwarding the packet. 4006 * 4007 * For case (1) we don't want to send redirects. For case(2) we 4008 * want to send redirects. 4009 */ 4010 static boolean_t 4011 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 4012 { 4013 uint8_t nexthdr; 4014 in6_addr_t *addrptr; 4015 ip6_rthdr0_t *rthdr; 4016 uint8_t numaddr; 4017 ip6_hbh_t *hbhhdr; 4018 uint_t ehdrlen; 4019 uint8_t *byteptr; 4020 4021 ip2dbg(("ip_source_routed_v6\n")); 4022 nexthdr = ip6h->ip6_nxt; 4023 ehdrlen = IPV6_HDR_LEN; 4024 4025 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 4026 while (nexthdr == IPPROTO_HOPOPTS || 4027 nexthdr == IPPROTO_DSTOPTS) { 4028 byteptr = (uint8_t *)ip6h + ehdrlen; 4029 /* 4030 * Check if we have already processed 4031 * packets or we are just a forwarding 4032 * router which only pulled up msgs up 4033 * to IPV6HDR and one HBH ext header 4034 */ 4035 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4036 ip2dbg(("ip_source_routed_v6: Extension" 4037 " headers not processed\n")); 4038 return (B_FALSE); 4039 } 4040 hbhhdr = (ip6_hbh_t *)byteptr; 4041 nexthdr = hbhhdr->ip6h_nxt; 4042 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 4043 } 4044 switch (nexthdr) { 4045 case IPPROTO_ROUTING: 4046 byteptr = (uint8_t *)ip6h + ehdrlen; 4047 /* 4048 * If for some reason, we haven't pulled up 4049 * the routing hdr data mblk, then we must 4050 * not have processed it at all. So for sure 4051 * we are not part of the source routed journey. 4052 */ 4053 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4054 ip2dbg(("ip_source_routed_v6: Routing" 4055 " header not processed\n")); 4056 return (B_FALSE); 4057 } 4058 rthdr = (ip6_rthdr0_t *)byteptr; 4059 /* 4060 * Either we are an intermediate router or the 4061 * last hop before destination and we have 4062 * already processed the routing header. 4063 * If segment_left is greater than or equal to zero, 4064 * then we must be the (numaddr - segleft) entry 4065 * of the routing header. Although ip6r0_segleft 4066 * is a unit8_t variable, we still check for zero 4067 * or greater value, if in case the data type 4068 * is changed someday in future. 4069 */ 4070 if (rthdr->ip6r0_segleft > 0 || 4071 rthdr->ip6r0_segleft == 0) { 4072 numaddr = rthdr->ip6r0_len / 2; 4073 addrptr = (in6_addr_t *)((char *)rthdr + 4074 sizeof (*rthdr)); 4075 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 4076 if (addrptr != NULL) { 4077 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL) 4078 return (B_TRUE); 4079 ip1dbg(("ip_source_routed_v6: Not local\n")); 4080 } 4081 } 4082 /* FALLTHRU */ 4083 default: 4084 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 4085 return (B_FALSE); 4086 } 4087 } 4088 4089 /* 4090 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 4091 * We have not optimized this in terms of number of mblks 4092 * allocated. For instance, for each fragment sent we always allocate a 4093 * mblk to hold the IPv6 header and fragment header. 4094 * 4095 * Assumes that all the extension headers are contained in the first mblk 4096 * and that the fragment header has has already been added by calling 4097 * ip_fraghdr_add_v6. 4098 */ 4099 int 4100 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len, 4101 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 4102 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie) 4103 { 4104 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4105 ip6_t *fip6h; 4106 mblk_t *hmp; 4107 mblk_t *hmp0; 4108 mblk_t *dmp; 4109 ip6_frag_t *fraghdr; 4110 size_t unfragmentable_len; 4111 size_t mlen; 4112 size_t max_chunk; 4113 uint16_t off_flags; 4114 uint16_t offset = 0; 4115 ill_t *ill = nce->nce_ill; 4116 uint8_t nexthdr; 4117 uint8_t *ptr; 4118 ip_stack_t *ipst = ill->ill_ipst; 4119 uint_t priority = mp->b_band; 4120 int error = 0; 4121 4122 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 4123 if (max_frag == 0) { 4124 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4125 ip_drop_output("FragFails: zero max_frag", mp, ill); 4126 freemsg(mp); 4127 return (EINVAL); 4128 } 4129 4130 /* 4131 * Caller should have added fraghdr_t to pkt_len, and also 4132 * updated ip6_plen. 4133 */ 4134 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len); 4135 ASSERT(msgdsize(mp) == pkt_len); 4136 4137 /* 4138 * Determine the length of the unfragmentable portion of this 4139 * datagram. This consists of the IPv6 header, a potential 4140 * hop-by-hop options header, a potential pre-routing-header 4141 * destination options header, and a potential routing header. 4142 */ 4143 nexthdr = ip6h->ip6_nxt; 4144 ptr = (uint8_t *)&ip6h[1]; 4145 4146 if (nexthdr == IPPROTO_HOPOPTS) { 4147 ip6_hbh_t *hbh_hdr; 4148 uint_t hdr_len; 4149 4150 hbh_hdr = (ip6_hbh_t *)ptr; 4151 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4152 nexthdr = hbh_hdr->ip6h_nxt; 4153 ptr += hdr_len; 4154 } 4155 if (nexthdr == IPPROTO_DSTOPTS) { 4156 ip6_dest_t *dest_hdr; 4157 uint_t hdr_len; 4158 4159 dest_hdr = (ip6_dest_t *)ptr; 4160 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4161 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4162 nexthdr = dest_hdr->ip6d_nxt; 4163 ptr += hdr_len; 4164 } 4165 } 4166 if (nexthdr == IPPROTO_ROUTING) { 4167 ip6_rthdr_t *rthdr; 4168 uint_t hdr_len; 4169 4170 rthdr = (ip6_rthdr_t *)ptr; 4171 nexthdr = rthdr->ip6r_nxt; 4172 hdr_len = 8 * (rthdr->ip6r_len + 1); 4173 ptr += hdr_len; 4174 } 4175 if (nexthdr != IPPROTO_FRAGMENT) { 4176 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4177 ip_drop_output("FragFails: bad nexthdr", mp, ill); 4178 freemsg(mp); 4179 return (EINVAL); 4180 } 4181 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4182 unfragmentable_len += sizeof (ip6_frag_t); 4183 4184 max_chunk = (max_frag - unfragmentable_len) & ~7; 4185 4186 /* 4187 * Allocate an mblk with enough room for the link-layer 4188 * header and the unfragmentable part of the datagram, which includes 4189 * the fragment header. This (or a copy) will be used as the 4190 * first mblk for each fragment we send. 4191 */ 4192 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp); 4193 if (hmp == NULL) { 4194 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4195 ip_drop_output("FragFails: no hmp", mp, ill); 4196 freemsg(mp); 4197 return (ENOBUFS); 4198 } 4199 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4200 hmp->b_wptr = hmp->b_rptr + unfragmentable_len; 4201 4202 fip6h = (ip6_t *)hmp->b_rptr; 4203 bcopy(ip6h, fip6h, unfragmentable_len); 4204 4205 /* 4206 * pkt_len is set to the total length of the fragmentable data in this 4207 * datagram. For each fragment sent, we will decrement pkt_len 4208 * by the amount of fragmentable data sent in that fragment 4209 * until len reaches zero. 4210 */ 4211 pkt_len -= unfragmentable_len; 4212 4213 /* 4214 * Move read ptr past unfragmentable portion, we don't want this part 4215 * of the data in our fragments. 4216 */ 4217 mp->b_rptr += unfragmentable_len; 4218 if (mp->b_rptr == mp->b_wptr) { 4219 mblk_t *mp1 = mp->b_cont; 4220 freeb(mp); 4221 mp = mp1; 4222 } 4223 4224 while (pkt_len != 0) { 4225 mlen = MIN(pkt_len, max_chunk); 4226 pkt_len -= mlen; 4227 if (pkt_len != 0) { 4228 /* Not last */ 4229 hmp0 = copyb(hmp); 4230 if (hmp0 == NULL) { 4231 BUMP_MIB(ill->ill_ip_mib, 4232 ipIfStatsOutFragFails); 4233 ip_drop_output("FragFails: copyb failed", 4234 mp, ill); 4235 freeb(hmp); 4236 freemsg(mp); 4237 ip1dbg(("ip_fragment_v6: copyb failed\n")); 4238 return (ENOBUFS); 4239 } 4240 off_flags = IP6F_MORE_FRAG; 4241 } else { 4242 /* Last fragment */ 4243 hmp0 = hmp; 4244 hmp = NULL; 4245 off_flags = 0; 4246 } 4247 fip6h = (ip6_t *)(hmp0->b_rptr); 4248 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len - 4249 sizeof (ip6_frag_t)); 4250 4251 fip6h->ip6_plen = htons((uint16_t)(mlen + 4252 unfragmentable_len - IPV6_HDR_LEN)); 4253 /* 4254 * Note: Optimization alert. 4255 * In IPv6 (and IPv4) protocol header, Fragment Offset 4256 * ("offset") is 13 bits wide and in 8-octet units. 4257 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 4258 * it occupies the most significant 13 bits. 4259 * (least significant 13 bits in IPv4). 4260 * We do not do any shifts here. Not shifting is same effect 4261 * as taking offset value in octet units, dividing by 8 and 4262 * then shifting 3 bits left to line it up in place in proper 4263 * place protocol header. 4264 */ 4265 fraghdr->ip6f_offlg = htons(offset) | off_flags; 4266 4267 if (!(dmp = ip_carve_mp(&mp, mlen))) { 4268 /* mp has already been freed by ip_carve_mp() */ 4269 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4270 ip_drop_output("FragFails: could not carve mp", 4271 hmp0, ill); 4272 if (hmp != NULL) 4273 freeb(hmp); 4274 freeb(hmp0); 4275 ip1dbg(("ip_carve_mp: failed\n")); 4276 return (ENOBUFS); 4277 } 4278 hmp0->b_cont = dmp; 4279 /* Get the priority marking, if any */ 4280 hmp0->b_band = priority; 4281 4282 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 4283 4284 error = postfragfn(hmp0, nce, ixaflags, 4285 mlen + unfragmentable_len, xmit_hint, szone, nolzid, 4286 ixa_cookie); 4287 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) { 4288 /* No point in sending the other fragments */ 4289 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4290 ip_drop_output("FragFails: postfragfn failed", 4291 hmp, ill); 4292 freeb(hmp); 4293 freemsg(mp); 4294 return (error); 4295 } 4296 /* No need to redo state machine in loop */ 4297 ixaflags &= ~IXAF_REACH_CONF; 4298 4299 offset += mlen; 4300 } 4301 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 4302 return (error); 4303 } 4304 4305 /* 4306 * Add a fragment header to an IPv6 packet. 4307 * Assumes that all the extension headers are contained in the first mblk. 4308 * 4309 * The fragment header is inserted after an hop-by-hop options header 4310 * and after [an optional destinations header followed by] a routing header. 4311 */ 4312 mblk_t * 4313 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa) 4314 { 4315 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4316 ip6_t *fip6h; 4317 mblk_t *hmp; 4318 ip6_frag_t *fraghdr; 4319 size_t unfragmentable_len; 4320 uint8_t nexthdr; 4321 uint_t prev_nexthdr_offset; 4322 uint8_t *ptr; 4323 uint_t priority = mp->b_band; 4324 ip_stack_t *ipst = ixa->ixa_ipst; 4325 4326 /* 4327 * Determine the length of the unfragmentable portion of this 4328 * datagram. This consists of the IPv6 header, a potential 4329 * hop-by-hop options header, a potential pre-routing-header 4330 * destination options header, and a potential routing header. 4331 */ 4332 nexthdr = ip6h->ip6_nxt; 4333 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 4334 ptr = (uint8_t *)&ip6h[1]; 4335 4336 if (nexthdr == IPPROTO_HOPOPTS) { 4337 ip6_hbh_t *hbh_hdr; 4338 uint_t hdr_len; 4339 4340 hbh_hdr = (ip6_hbh_t *)ptr; 4341 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4342 nexthdr = hbh_hdr->ip6h_nxt; 4343 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 4344 - (uint8_t *)ip6h; 4345 ptr += hdr_len; 4346 } 4347 if (nexthdr == IPPROTO_DSTOPTS) { 4348 ip6_dest_t *dest_hdr; 4349 uint_t hdr_len; 4350 4351 dest_hdr = (ip6_dest_t *)ptr; 4352 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4353 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4354 nexthdr = dest_hdr->ip6d_nxt; 4355 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 4356 - (uint8_t *)ip6h; 4357 ptr += hdr_len; 4358 } 4359 } 4360 if (nexthdr == IPPROTO_ROUTING) { 4361 ip6_rthdr_t *rthdr; 4362 uint_t hdr_len; 4363 4364 rthdr = (ip6_rthdr_t *)ptr; 4365 nexthdr = rthdr->ip6r_nxt; 4366 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 4367 - (uint8_t *)ip6h; 4368 hdr_len = 8 * (rthdr->ip6r_len + 1); 4369 ptr += hdr_len; 4370 } 4371 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4372 4373 /* 4374 * Allocate an mblk with enough room for the link-layer 4375 * header, the unfragmentable part of the datagram, and the 4376 * fragment header. 4377 */ 4378 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 4379 ipst->ips_ip_wroff_extra, mp); 4380 if (hmp == NULL) { 4381 ill_t *ill = ixa->ixa_nce->nce_ill; 4382 4383 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 4384 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill); 4385 freemsg(mp); 4386 return (NULL); 4387 } 4388 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4389 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 4390 4391 fip6h = (ip6_t *)hmp->b_rptr; 4392 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 4393 4394 bcopy(ip6h, fip6h, unfragmentable_len); 4395 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t)); 4396 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 4397 4398 fraghdr->ip6f_nxt = nexthdr; 4399 fraghdr->ip6f_reserved = 0; 4400 fraghdr->ip6f_offlg = 0; 4401 fraghdr->ip6f_ident = htonl(ident); 4402 4403 /* Get the priority marking, if any */ 4404 hmp->b_band = priority; 4405 4406 /* 4407 * Move read ptr past unfragmentable portion, we don't want this part 4408 * of the data in our fragments. 4409 */ 4410 mp->b_rptr += unfragmentable_len; 4411 hmp->b_cont = mp; 4412 return (hmp); 4413 } 4414 4415 /* 4416 * Determine if the ill and multicast aspects of that packets 4417 * "matches" the conn. 4418 */ 4419 boolean_t 4420 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h) 4421 { 4422 ill_t *ill = ira->ira_rill; 4423 zoneid_t zoneid = ira->ira_zoneid; 4424 uint_t in_ifindex; 4425 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 4426 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 4427 4428 /* 4429 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local 4430 * scopeid. This is used to limit 4431 * unicast and multicast reception to conn_incoming_ifindex. 4432 * conn_wantpacket_v6 is called both for unicast and 4433 * multicast packets. 4434 */ 4435 in_ifindex = connp->conn_incoming_ifindex; 4436 4437 /* mpathd can bind to the under IPMP interface, which we allow */ 4438 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) { 4439 if (!IS_UNDER_IPMP(ill)) 4440 return (B_FALSE); 4441 4442 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill)) 4443 return (B_FALSE); 4444 } 4445 4446 if (!IPCL_ZONE_MATCH(connp, zoneid)) 4447 return (B_FALSE); 4448 4449 if (!(ira->ira_flags & IRAF_MULTICAST)) 4450 return (B_TRUE); 4451 4452 if (connp->conn_multi_router) 4453 return (B_TRUE); 4454 4455 if (ira->ira_protocol == IPPROTO_RSVP) 4456 return (B_TRUE); 4457 4458 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, 4459 ira->ira_ill)); 4460 } 4461 4462 /* 4463 * pr_addr_dbg function provides the needed buffer space to call 4464 * inet_ntop() function's 3rd argument. This function should be 4465 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 4466 * stack buffer space in it's own stack frame. This function uses 4467 * a buffer from it's own stack and prints the information. 4468 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 4469 * 4470 * Note: This function can call inet_ntop() once. 4471 */ 4472 void 4473 pr_addr_dbg(char *fmt1, int af, const void *addr) 4474 { 4475 char buf[INET6_ADDRSTRLEN]; 4476 4477 if (fmt1 == NULL) { 4478 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 4479 return; 4480 } 4481 4482 /* 4483 * This does not compare debug level and just prints 4484 * out. Thus it is the responsibility of the caller 4485 * to check the appropriate debug-level before calling 4486 * this function. 4487 */ 4488 if (ip_debug > 0) { 4489 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 4490 } 4491 4492 4493 } 4494 4495 4496 /* 4497 * Return the length in bytes of the IPv6 headers (base header 4498 * extension headers) that will be needed based on the 4499 * ip_pkt_t structure passed by the caller. 4500 * 4501 * The returned length does not include the length of the upper level 4502 * protocol (ULP) header. 4503 */ 4504 int 4505 ip_total_hdrs_len_v6(const ip_pkt_t *ipp) 4506 { 4507 int len; 4508 4509 len = IPV6_HDR_LEN; 4510 4511 /* 4512 * If there's a security label here, then we ignore any hop-by-hop 4513 * options the user may try to set. 4514 */ 4515 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4516 uint_t hopoptslen; 4517 /* 4518 * Note that ipp_label_len_v6 is just the option - not 4519 * the hopopts extension header. It also needs to be padded 4520 * to a multiple of 8 bytes. 4521 */ 4522 ASSERT(ipp->ipp_label_len_v6 != 0); 4523 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4524 hopoptslen = (hopoptslen + 7)/8 * 8; 4525 len += hopoptslen; 4526 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4527 ASSERT(ipp->ipp_hopoptslen != 0); 4528 len += ipp->ipp_hopoptslen; 4529 } 4530 4531 /* 4532 * En-route destination options 4533 * Only do them if there's a routing header as well 4534 */ 4535 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4536 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4537 ASSERT(ipp->ipp_rthdrdstoptslen != 0); 4538 len += ipp->ipp_rthdrdstoptslen; 4539 } 4540 if (ipp->ipp_fields & IPPF_RTHDR) { 4541 ASSERT(ipp->ipp_rthdrlen != 0); 4542 len += ipp->ipp_rthdrlen; 4543 } 4544 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4545 ASSERT(ipp->ipp_dstoptslen != 0); 4546 len += ipp->ipp_dstoptslen; 4547 } 4548 return (len); 4549 } 4550 4551 /* 4552 * All-purpose routine to build a header chain of an IPv6 header 4553 * followed by any required extension headers and a proto header. 4554 * 4555 * The caller has to set the source and destination address as well as 4556 * ip6_plen. The caller has to massage any routing header and compensate 4557 * for the ULP pseudo-header checksum due to the source route. 4558 * 4559 * The extension headers will all be fully filled in. 4560 */ 4561 void 4562 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp, 4563 uint8_t protocol, uint32_t flowinfo) 4564 { 4565 uint8_t *nxthdr_ptr; 4566 uint8_t *cp; 4567 ip6_t *ip6h = (ip6_t *)buf; 4568 4569 /* Initialize IPv6 header */ 4570 ip6h->ip6_vcf = 4571 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4572 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4573 4574 if (ipp->ipp_fields & IPPF_TCLASS) { 4575 /* Overrides the class part of flowinfo */ 4576 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4577 ipp->ipp_tclass); 4578 } 4579 4580 if (ipp->ipp_fields & IPPF_HOPLIMIT) 4581 ip6h->ip6_hops = ipp->ipp_hoplimit; 4582 else 4583 ip6h->ip6_hops = ipp->ipp_unicast_hops; 4584 4585 if ((ipp->ipp_fields & IPPF_ADDR) && 4586 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4587 ip6h->ip6_src = ipp->ipp_addr; 4588 4589 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4590 cp = (uint8_t *)&ip6h[1]; 4591 /* 4592 * Here's where we have to start stringing together 4593 * any extension headers in the right order: 4594 * Hop-by-hop, destination, routing, and final destination opts. 4595 */ 4596 /* 4597 * If there's a security label here, then we ignore any hop-by-hop 4598 * options the user may try to set. 4599 */ 4600 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4601 /* 4602 * Hop-by-hop options with the label. 4603 * Note that ipp_label_v6 is just the option - not 4604 * the hopopts extension header. It also needs to be padded 4605 * to a multiple of 8 bytes. 4606 */ 4607 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4608 uint_t hopoptslen; 4609 uint_t padlen; 4610 4611 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4612 hopoptslen = (padlen + 7)/8 * 8; 4613 padlen = hopoptslen - padlen; 4614 4615 *nxthdr_ptr = IPPROTO_HOPOPTS; 4616 nxthdr_ptr = &hbh->ip6h_nxt; 4617 hbh->ip6h_len = hopoptslen/8 - 1; 4618 cp += sizeof (ip6_hbh_t); 4619 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6); 4620 cp += ipp->ipp_label_len_v6; 4621 4622 ASSERT(padlen <= 7); 4623 switch (padlen) { 4624 case 0: 4625 break; 4626 case 1: 4627 cp[0] = IP6OPT_PAD1; 4628 break; 4629 default: 4630 cp[0] = IP6OPT_PADN; 4631 cp[1] = padlen - 2; 4632 bzero(&cp[2], padlen - 2); 4633 break; 4634 } 4635 cp += padlen; 4636 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4637 /* Hop-by-hop options */ 4638 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4639 4640 *nxthdr_ptr = IPPROTO_HOPOPTS; 4641 nxthdr_ptr = &hbh->ip6h_nxt; 4642 4643 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 4644 cp += ipp->ipp_hopoptslen; 4645 } 4646 /* 4647 * En-route destination options 4648 * Only do them if there's a routing header as well 4649 */ 4650 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4651 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4652 ip6_dest_t *dst = (ip6_dest_t *)cp; 4653 4654 *nxthdr_ptr = IPPROTO_DSTOPTS; 4655 nxthdr_ptr = &dst->ip6d_nxt; 4656 4657 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen); 4658 cp += ipp->ipp_rthdrdstoptslen; 4659 } 4660 /* 4661 * Routing header next 4662 */ 4663 if (ipp->ipp_fields & IPPF_RTHDR) { 4664 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4665 4666 *nxthdr_ptr = IPPROTO_ROUTING; 4667 nxthdr_ptr = &rt->ip6r_nxt; 4668 4669 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 4670 cp += ipp->ipp_rthdrlen; 4671 } 4672 /* 4673 * Do ultimate destination options 4674 */ 4675 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4676 ip6_dest_t *dest = (ip6_dest_t *)cp; 4677 4678 *nxthdr_ptr = IPPROTO_DSTOPTS; 4679 nxthdr_ptr = &dest->ip6d_nxt; 4680 4681 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 4682 cp += ipp->ipp_dstoptslen; 4683 } 4684 /* 4685 * Now set the last header pointer to the proto passed in 4686 */ 4687 *nxthdr_ptr = protocol; 4688 ASSERT((int)(cp - buf) == buf_len); 4689 } 4690 4691 /* 4692 * Return a pointer to the routing header extension header 4693 * in the IPv6 header(s) chain passed in. 4694 * If none found, return NULL 4695 * Assumes that all extension headers are in same mblk as the v6 header 4696 */ 4697 ip6_rthdr_t * 4698 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 4699 { 4700 ip6_dest_t *desthdr; 4701 ip6_frag_t *fraghdr; 4702 uint_t hdrlen; 4703 uint8_t nexthdr; 4704 uint8_t *ptr = (uint8_t *)&ip6h[1]; 4705 4706 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 4707 return ((ip6_rthdr_t *)ptr); 4708 4709 /* 4710 * The routing header will precede all extension headers 4711 * other than the hop-by-hop and destination options 4712 * extension headers, so if we see anything other than those, 4713 * we're done and didn't find it. 4714 * We could see a destination options header alone but no 4715 * routing header, in which case we'll return NULL as soon as 4716 * we see anything after that. 4717 * Hop-by-hop and destination option headers are identical, 4718 * so we can use either one we want as a template. 4719 */ 4720 nexthdr = ip6h->ip6_nxt; 4721 while (ptr < endptr) { 4722 /* Is there enough left for len + nexthdr? */ 4723 if (ptr + MIN_EHDR_LEN > endptr) 4724 return (NULL); 4725 4726 switch (nexthdr) { 4727 case IPPROTO_HOPOPTS: 4728 case IPPROTO_DSTOPTS: 4729 /* Assumes the headers are identical for hbh and dst */ 4730 desthdr = (ip6_dest_t *)ptr; 4731 hdrlen = 8 * (desthdr->ip6d_len + 1); 4732 nexthdr = desthdr->ip6d_nxt; 4733 break; 4734 4735 case IPPROTO_ROUTING: 4736 return ((ip6_rthdr_t *)ptr); 4737 4738 case IPPROTO_FRAGMENT: 4739 fraghdr = (ip6_frag_t *)ptr; 4740 hdrlen = sizeof (ip6_frag_t); 4741 nexthdr = fraghdr->ip6f_nxt; 4742 break; 4743 4744 default: 4745 return (NULL); 4746 } 4747 ptr += hdrlen; 4748 } 4749 return (NULL); 4750 } 4751 4752 /* 4753 * Called for source-routed packets originating on this node. 4754 * Manipulates the original routing header by moving every entry up 4755 * one slot, placing the first entry in the v6 header's v6_dst field, 4756 * and placing the ultimate destination in the routing header's last 4757 * slot. 4758 * 4759 * Returns the checksum diference between the ultimate destination 4760 * (last hop in the routing header when the packet is sent) and 4761 * the first hop (ip6_dst when the packet is sent) 4762 */ 4763 /* ARGSUSED2 */ 4764 uint32_t 4765 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 4766 { 4767 uint_t numaddr; 4768 uint_t i; 4769 in6_addr_t *addrptr; 4770 in6_addr_t tmp; 4771 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 4772 uint32_t cksm; 4773 uint32_t addrsum = 0; 4774 uint16_t *ptr; 4775 4776 /* 4777 * Perform any processing needed for source routing. 4778 * We know that all extension headers will be in the same mblk 4779 * as the IPv6 header. 4780 */ 4781 4782 /* 4783 * If no segments left in header, or the header length field is zero, 4784 * don't move hop addresses around; 4785 * Checksum difference is zero. 4786 */ 4787 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 4788 return (0); 4789 4790 ptr = (uint16_t *)&ip6h->ip6_dst; 4791 cksm = 0; 4792 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4793 cksm += ptr[i]; 4794 } 4795 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4796 4797 /* 4798 * Here's where the fun begins - we have to 4799 * move all addresses up one spot, take the 4800 * first hop and make it our first ip6_dst, 4801 * and place the ultimate destination in the 4802 * newly-opened last slot. 4803 */ 4804 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 4805 numaddr = rthdr->ip6r0_len / 2; 4806 tmp = *addrptr; 4807 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 4808 *addrptr = addrptr[1]; 4809 } 4810 *addrptr = ip6h->ip6_dst; 4811 ip6h->ip6_dst = tmp; 4812 4813 /* 4814 * From the checksummed ultimate destination subtract the checksummed 4815 * current ip6_dst (the first hop address). Return that number. 4816 * (In the v4 case, the second part of this is done in each routine 4817 * that calls ip_massage_options(). We do it all in this one place 4818 * for v6). 4819 */ 4820 ptr = (uint16_t *)&ip6h->ip6_dst; 4821 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4822 addrsum += ptr[i]; 4823 } 4824 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 4825 if ((int)cksm < 0) 4826 cksm--; 4827 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4828 4829 return (cksm); 4830 } 4831 4832 void 4833 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 4834 { 4835 kstat_t *ksp; 4836 4837 ip6_stat_t template = { 4838 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 4839 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 4840 { "ip6_recv_pullup", KSTAT_DATA_UINT64 }, 4841 { "ip6_db_ref", KSTAT_DATA_UINT64 }, 4842 { "ip6_notaligned", KSTAT_DATA_UINT64 }, 4843 { "ip6_multimblk", KSTAT_DATA_UINT64 }, 4844 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, 4845 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 4846 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 4847 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 4848 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4849 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4850 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4851 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4852 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4853 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4854 }; 4855 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 4856 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4857 KSTAT_FLAG_VIRTUAL, stackid); 4858 4859 if (ksp == NULL) 4860 return (NULL); 4861 4862 bcopy(&template, ip6_statisticsp, sizeof (template)); 4863 ksp->ks_data = (void *)ip6_statisticsp; 4864 ksp->ks_private = (void *)(uintptr_t)stackid; 4865 4866 kstat_install(ksp); 4867 return (ksp); 4868 } 4869 4870 void 4871 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4872 { 4873 if (ksp != NULL) { 4874 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4875 kstat_delete_netstack(ksp, stackid); 4876 } 4877 } 4878 4879 /* 4880 * The following two functions set and get the value for the 4881 * IPV6_SRC_PREFERENCES socket option. 4882 */ 4883 int 4884 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs) 4885 { 4886 /* 4887 * We only support preferences that are covered by 4888 * IPV6_PREFER_SRC_MASK. 4889 */ 4890 if (prefs & ~IPV6_PREFER_SRC_MASK) 4891 return (EINVAL); 4892 4893 /* 4894 * Look for conflicting preferences or default preferences. If 4895 * both bits of a related pair are clear, the application wants the 4896 * system's default value for that pair. Both bits in a pair can't 4897 * be set. 4898 */ 4899 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 4900 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 4901 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 4902 IPV6_PREFER_SRC_MIPMASK) { 4903 return (EINVAL); 4904 } 4905 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 4906 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 4907 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 4908 IPV6_PREFER_SRC_TMPMASK) { 4909 return (EINVAL); 4910 } 4911 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 4912 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 4913 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 4914 IPV6_PREFER_SRC_CGAMASK) { 4915 return (EINVAL); 4916 } 4917 4918 ixa->ixa_src_preferences = prefs; 4919 return (0); 4920 } 4921 4922 size_t 4923 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val) 4924 { 4925 *val = ixa->ixa_src_preferences; 4926 return (sizeof (ixa->ixa_src_preferences)); 4927 } 4928 4929 /* 4930 * Get the size of the IP options (including the IP headers size) 4931 * without including the AH header's size. If till_ah is B_FALSE, 4932 * and if AH header is present, dest options beyond AH header will 4933 * also be included in the returned size. 4934 */ 4935 int 4936 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 4937 { 4938 ip6_t *ip6h; 4939 uint8_t nexthdr; 4940 uint8_t *whereptr; 4941 ip6_hbh_t *hbhhdr; 4942 ip6_dest_t *dsthdr; 4943 ip6_rthdr_t *rthdr; 4944 int ehdrlen; 4945 int size; 4946 ah_t *ah; 4947 4948 ip6h = (ip6_t *)mp->b_rptr; 4949 size = IPV6_HDR_LEN; 4950 nexthdr = ip6h->ip6_nxt; 4951 whereptr = (uint8_t *)&ip6h[1]; 4952 for (;;) { 4953 /* Assume IP has already stripped it */ 4954 ASSERT(nexthdr != IPPROTO_FRAGMENT); 4955 switch (nexthdr) { 4956 case IPPROTO_HOPOPTS: 4957 hbhhdr = (ip6_hbh_t *)whereptr; 4958 nexthdr = hbhhdr->ip6h_nxt; 4959 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 4960 break; 4961 case IPPROTO_DSTOPTS: 4962 dsthdr = (ip6_dest_t *)whereptr; 4963 nexthdr = dsthdr->ip6d_nxt; 4964 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4965 break; 4966 case IPPROTO_ROUTING: 4967 rthdr = (ip6_rthdr_t *)whereptr; 4968 nexthdr = rthdr->ip6r_nxt; 4969 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4970 break; 4971 default : 4972 if (till_ah) { 4973 ASSERT(nexthdr == IPPROTO_AH); 4974 return (size); 4975 } 4976 /* 4977 * If we don't have a AH header to traverse, 4978 * return now. This happens normally for 4979 * outbound datagrams where we have not inserted 4980 * the AH header. 4981 */ 4982 if (nexthdr != IPPROTO_AH) { 4983 return (size); 4984 } 4985 4986 /* 4987 * We don't include the AH header's size 4988 * to be symmetrical with other cases where 4989 * we either don't have a AH header (outbound) 4990 * or peek into the AH header yet (inbound and 4991 * not pulled up yet). 4992 */ 4993 ah = (ah_t *)whereptr; 4994 nexthdr = ah->ah_nexthdr; 4995 ehdrlen = (ah->ah_length << 2) + 8; 4996 4997 if (nexthdr == IPPROTO_DSTOPTS) { 4998 if (whereptr + ehdrlen >= mp->b_wptr) { 4999 /* 5000 * The destination options header 5001 * is not part of the first mblk. 5002 */ 5003 whereptr = mp->b_cont->b_rptr; 5004 } else { 5005 whereptr += ehdrlen; 5006 } 5007 5008 dsthdr = (ip6_dest_t *)whereptr; 5009 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 5010 size += ehdrlen; 5011 } 5012 return (size); 5013 } 5014 whereptr += ehdrlen; 5015 size += ehdrlen; 5016 } 5017 } 5018 5019 /* 5020 * Utility routine that checks if `v6srcp' is a valid address on underlying 5021 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 5022 * associated with `v6srcp' on success. NOTE: if this is not called from 5023 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 5024 * group during or after this lookup. 5025 */ 5026 boolean_t 5027 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 5028 { 5029 ipif_t *ipif; 5030 5031 5032 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 5033 if (ipif != NULL) { 5034 if (ipifp != NULL) 5035 *ipifp = ipif; 5036 else 5037 ipif_refrele(ipif); 5038 return (B_TRUE); 5039 } 5040 5041 if (ip_debug > 2) { 5042 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 5043 "src %s\n", AF_INET6, v6srcp); 5044 } 5045 return (B_FALSE); 5046 } 5047