1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/policy.h> 57 #include <sys/mac.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/iptun/iptun_impl.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/rawip_impl.h> 98 #include <inet/rts_impl.h> 99 #include <sys/squeue_impl.h> 100 #include <sys/squeue.h> 101 102 #include <sys/tsol/label.h> 103 #include <sys/tsol/tnet.h> 104 105 /* Temporary; for CR 6451644 work-around */ 106 #include <sys/ethernet.h> 107 108 /* 109 * Naming conventions: 110 * These rules should be judiciously applied 111 * if there is a need to identify something as IPv6 versus IPv4 112 * IPv6 funcions will end with _v6 in the ip module. 113 * IPv6 funcions will end with _ipv6 in the transport modules. 114 * IPv6 macros: 115 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 116 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 117 * And then there are ..V4_PART_OF_V6. 118 * The intent is that macros in the ip module end with _V6. 119 * IPv6 global variables will start with ipv6_ 120 * IPv6 structures will start with ipv6 121 * IPv6 defined constants should start with IPV6_ 122 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 123 */ 124 125 /* 126 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 127 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 128 * from IANA. This mechanism will remain in effect until an official 129 * number is obtained. 130 */ 131 uchar_t ip6opt_ls; 132 133 const in6_addr_t ipv6_all_ones = 134 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 135 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 136 137 #ifdef _BIG_ENDIAN 138 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 139 #else /* _BIG_ENDIAN */ 140 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 141 #endif /* _BIG_ENDIAN */ 142 143 #ifdef _BIG_ENDIAN 144 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 145 #else /* _BIG_ENDIAN */ 146 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 147 #endif /* _BIG_ENDIAN */ 148 149 #ifdef _BIG_ENDIAN 150 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 151 #else /* _BIG_ENDIAN */ 152 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 153 #endif /* _BIG_ENDIAN */ 154 155 #ifdef _BIG_ENDIAN 156 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 157 #else /* _BIG_ENDIAN */ 158 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 159 #endif /* _BIG_ENDIAN */ 160 161 #ifdef _BIG_ENDIAN 162 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 163 #else /* _BIG_ENDIAN */ 164 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 165 #endif /* _BIG_ENDIAN */ 166 167 #ifdef _BIG_ENDIAN 168 const in6_addr_t ipv6_solicited_node_mcast = 169 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 170 #else /* _BIG_ENDIAN */ 171 const in6_addr_t ipv6_solicited_node_mcast = 172 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 173 #endif /* _BIG_ENDIAN */ 174 175 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *); 176 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *); 177 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *, 178 ip_recv_attr_t *); 179 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *, 180 ip_recv_attr_t *); 181 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *, 182 in6_addr_t *, ip_recv_attr_t *); 183 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *, 184 ip_recv_attr_t *); 185 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 186 187 /* 188 * icmp_inbound_v6 deals with ICMP messages that are handled by IP. 189 * If the ICMP message is consumed by IP, i.e., it should not be delivered 190 * to any IPPROTO_ICMP raw sockets, then it returns NULL. 191 * Likewise, if the ICMP error is misformed (too short, etc), then it 192 * returns NULL. The caller uses this to determine whether or not to send 193 * to raw sockets. 194 * 195 * All error messages are passed to the matching transport stream. 196 * 197 * See comment for icmp_inbound_v4() on how IPsec is handled. 198 */ 199 mblk_t * 200 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira) 201 { 202 icmp6_t *icmp6; 203 ip6_t *ip6h; /* Outer header */ 204 int ip_hdr_length; /* Outer header length */ 205 boolean_t interested; 206 ill_t *ill = ira->ira_ill; 207 ip_stack_t *ipst = ill->ill_ipst; 208 mblk_t *mp_ret = NULL; 209 210 ip6h = (ip6_t *)mp->b_rptr; 211 212 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 213 214 /* Make sure ira_l2src is set for ndp_input */ 215 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 216 ip_setl2src(mp, ira, ira->ira_rill); 217 218 ip_hdr_length = ira->ira_ip_hdr_length; 219 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 220 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 221 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 222 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 223 freemsg(mp); 224 return (NULL); 225 } 226 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 227 if (ip6h == NULL) { 228 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 229 freemsg(mp); 230 return (NULL); 231 } 232 } 233 234 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 235 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6); 236 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 237 icmp6->icmp6_code)); 238 239 /* 240 * We will set "interested" to "true" if we should pass a copy to 241 * the transport i.e., if it is an error message. 242 */ 243 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 244 245 switch (icmp6->icmp6_type) { 246 case ICMP6_DST_UNREACH: 247 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 248 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 249 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 250 break; 251 252 case ICMP6_TIME_EXCEEDED: 253 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 254 break; 255 256 case ICMP6_PARAM_PROB: 257 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 258 break; 259 260 case ICMP6_PACKET_TOO_BIG: 261 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs); 262 break; 263 264 case ICMP6_ECHO_REQUEST: 265 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 266 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 267 !ipst->ips_ipv6_resp_echo_mcast) 268 break; 269 270 /* 271 * We must have exclusive use of the mblk to convert it to 272 * a response. 273 * If not, we copy it. 274 */ 275 if (mp->b_datap->db_ref > 1) { 276 mblk_t *mp1; 277 278 mp1 = copymsg(mp); 279 if (mp1 == NULL) { 280 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 281 ip_drop_input("ipIfStatsInDiscards - copymsg", 282 mp, ill); 283 freemsg(mp); 284 return (NULL); 285 } 286 freemsg(mp); 287 mp = mp1; 288 ip6h = (ip6_t *)mp->b_rptr; 289 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 290 } 291 292 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 293 icmp_send_reply_v6(mp, ip6h, icmp6, ira); 294 return (NULL); 295 296 case ICMP6_ECHO_REPLY: 297 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 298 break; 299 300 case ND_ROUTER_SOLICIT: 301 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 302 break; 303 304 case ND_ROUTER_ADVERT: 305 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 306 break; 307 308 case ND_NEIGHBOR_SOLICIT: 309 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 310 ndp_input(mp, ira); 311 return (NULL); 312 313 case ND_NEIGHBOR_ADVERT: 314 BUMP_MIB(ill->ill_icmp6_mib, 315 ipv6IfIcmpInNeighborAdvertisements); 316 ndp_input(mp, ira); 317 return (NULL); 318 319 case ND_REDIRECT: 320 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 321 322 if (ipst->ips_ipv6_ignore_redirect) 323 break; 324 325 /* We now allow a RAW socket to receive this. */ 326 interested = B_TRUE; 327 break; 328 329 /* 330 * The next three icmp messages will be handled by MLD. 331 * Pass all valid MLD packets up to any process(es) 332 * listening on a raw ICMP socket. 333 */ 334 case MLD_LISTENER_QUERY: 335 case MLD_LISTENER_REPORT: 336 case MLD_LISTENER_REDUCTION: 337 mp = mld_input(mp, ira); 338 return (mp); 339 default: 340 break; 341 } 342 /* 343 * See if there is an ICMP client to avoid an extra copymsg/freemsg 344 * if there isn't one. 345 */ 346 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) { 347 /* If there is an ICMP client and we want one too, copy it. */ 348 349 if (!interested) { 350 /* Caller will deliver to RAW sockets */ 351 return (mp); 352 } 353 mp_ret = copymsg(mp); 354 if (mp_ret == NULL) { 355 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 356 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 357 } 358 } else if (!interested) { 359 /* Neither we nor raw sockets are interested. Drop packet now */ 360 freemsg(mp); 361 return (NULL); 362 } 363 364 /* 365 * ICMP error or redirect packet. Make sure we have enough of 366 * the header and that db_ref == 1 since we might end up modifying 367 * the packet. 368 */ 369 if (mp->b_cont != NULL) { 370 if (ip_pullup(mp, -1, ira) == NULL) { 371 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 372 ip_drop_input("ipIfStatsInDiscards - ip_pullup", 373 mp, ill); 374 freemsg(mp); 375 return (mp_ret); 376 } 377 } 378 379 if (mp->b_datap->db_ref > 1) { 380 mblk_t *mp1; 381 382 mp1 = copymsg(mp); 383 if (mp1 == NULL) { 384 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 385 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 386 freemsg(mp); 387 return (mp_ret); 388 } 389 freemsg(mp); 390 mp = mp1; 391 } 392 393 /* 394 * In case mp has changed, verify the message before any further 395 * processes. 396 */ 397 ip6h = (ip6_t *)mp->b_rptr; 398 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 399 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 400 freemsg(mp); 401 return (mp_ret); 402 } 403 404 switch (icmp6->icmp6_type) { 405 case ND_REDIRECT: 406 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira); 407 break; 408 case ICMP6_PACKET_TOO_BIG: 409 /* Update DCE and adjust MTU is icmp header if needed */ 410 icmp_inbound_too_big_v6(icmp6, ira); 411 /* FALLTHRU */ 412 default: 413 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 414 break; 415 } 416 417 return (mp_ret); 418 } 419 420 /* 421 * Send an ICMP echo reply. 422 * The caller has already updated the payload part of the packet. 423 * We handle the ICMP checksum, IP source address selection and feed 424 * the packet into ip_output_simple. 425 */ 426 static void 427 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6, 428 ip_recv_attr_t *ira) 429 { 430 uint_t ip_hdr_length = ira->ira_ip_hdr_length; 431 ill_t *ill = ira->ira_ill; 432 ip_stack_t *ipst = ill->ill_ipst; 433 ip_xmit_attr_t ixas; 434 in6_addr_t origsrc; 435 436 /* 437 * Remove any extension headers (do not reverse a source route) 438 * and clear the flow id (keep traffic class for now). 439 */ 440 if (ip_hdr_length != IPV6_HDR_LEN) { 441 int i; 442 443 for (i = 0; i < IPV6_HDR_LEN; i++) { 444 mp->b_rptr[ip_hdr_length - i - 1] = 445 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 446 } 447 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN); 448 ip6h = (ip6_t *)mp->b_rptr; 449 ip6h->ip6_nxt = IPPROTO_ICMPV6; 450 i = ntohs(ip6h->ip6_plen); 451 i -= (ip_hdr_length - IPV6_HDR_LEN); 452 ip6h->ip6_plen = htons(i); 453 ip_hdr_length = IPV6_HDR_LEN; 454 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp)); 455 } 456 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 457 458 /* Reverse the source and destination addresses. */ 459 origsrc = ip6h->ip6_src; 460 ip6h->ip6_src = ip6h->ip6_dst; 461 ip6h->ip6_dst = origsrc; 462 463 /* set the hop limit */ 464 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 465 466 /* 467 * Prepare for checksum by putting icmp length in the icmp 468 * checksum field. The checksum is calculated in ip_output 469 */ 470 icmp6->icmp6_cksum = ip6h->ip6_plen; 471 472 bzero(&ixas, sizeof (ixas)); 473 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 474 ixas.ixa_zoneid = ira->ira_zoneid; 475 ixas.ixa_cred = kcred; 476 ixas.ixa_cpid = NOPID; 477 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 478 ixas.ixa_ifindex = 0; 479 ixas.ixa_ipst = ipst; 480 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 481 482 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 483 /* 484 * This packet should go out the same way as it 485 * came in i.e in clear, independent of the IPsec 486 * policy for transmitting packets. 487 */ 488 ixas.ixa_flags |= IXAF_NO_IPSEC; 489 } else { 490 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 491 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 492 /* Note: mp already consumed and ip_drop_packet done */ 493 return; 494 } 495 } 496 497 /* Was the destination (now source) link-local? Send out same group */ 498 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 499 ixas.ixa_flags |= IXAF_SCOPEID_SET; 500 if (IS_UNDER_IPMP(ill)) 501 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 502 else 503 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 504 } 505 506 if (ira->ira_flags & IRAF_MULTIBROADCAST) { 507 /* 508 * Not one or our addresses (IRE_LOCALs), thus we let 509 * ip_output_simple pick the source. 510 */ 511 ip6h->ip6_src = ipv6_all_zeros; 512 ixas.ixa_flags |= IXAF_SET_SOURCE; 513 } 514 515 /* Should we send using dce_pmtu? */ 516 if (ipst->ips_ipv6_icmp_return_pmtu) 517 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; 518 519 (void) ip_output_simple(mp, &ixas); 520 ixa_cleanup(&ixas); 521 522 } 523 524 /* 525 * Verify the ICMP messages for either for ICMP error or redirect packet. 526 * The caller should have fully pulled up the message. If it's a redirect 527 * packet, only basic checks on IP header will be done; otherwise, verify 528 * the packet by looking at the included ULP header. 529 * 530 * Called before icmp_inbound_error_fanout_v6 is called. 531 */ 532 static boolean_t 533 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 534 { 535 ill_t *ill = ira->ira_ill; 536 uint16_t hdr_length; 537 uint8_t *nexthdrp; 538 uint8_t nexthdr; 539 ip_stack_t *ipst = ill->ill_ipst; 540 conn_t *connp; 541 ip6_t *ip6h; /* Inner header */ 542 543 ip6h = (ip6_t *)&icmp6[1]; 544 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr) 545 goto truncated; 546 547 if (icmp6->icmp6_type == ND_REDIRECT) { 548 hdr_length = sizeof (nd_redirect_t); 549 } else { 550 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION)) 551 goto discard_pkt; 552 hdr_length = IPV6_HDR_LEN; 553 } 554 555 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr) 556 goto truncated; 557 558 /* 559 * Stop here for ICMP_REDIRECT. 560 */ 561 if (icmp6->icmp6_type == ND_REDIRECT) 562 return (B_TRUE); 563 564 /* 565 * ICMP errors only. 566 */ 567 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 568 goto discard_pkt; 569 nexthdr = *nexthdrp; 570 571 /* Try to pass the ICMP message to clients who need it */ 572 switch (nexthdr) { 573 case IPPROTO_UDP: 574 /* 575 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 576 * transport header. 577 */ 578 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 579 mp->b_wptr) 580 goto truncated; 581 break; 582 case IPPROTO_TCP: { 583 tcpha_t *tcpha; 584 585 /* 586 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 587 * transport header. 588 */ 589 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 590 mp->b_wptr) 591 goto truncated; 592 593 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 594 /* 595 * With IPMP we need to match across group, which we do 596 * since we have the upper ill from ira_ill. 597 */ 598 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, 599 ill->ill_phyint->phyint_ifindex, ipst); 600 if (connp == NULL) 601 goto discard_pkt; 602 603 if ((connp->conn_verifyicmp != NULL) && 604 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) { 605 CONN_DEC_REF(connp); 606 goto discard_pkt; 607 } 608 CONN_DEC_REF(connp); 609 break; 610 } 611 case IPPROTO_SCTP: 612 /* 613 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 614 * transport header. 615 */ 616 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 617 mp->b_wptr) 618 goto truncated; 619 break; 620 case IPPROTO_ESP: 621 case IPPROTO_AH: 622 break; 623 case IPPROTO_ENCAP: 624 case IPPROTO_IPV6: { 625 /* Look for self-encapsulated packets that caused an error */ 626 ip6_t *in_ip6h; 627 628 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 629 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ? 630 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr) 631 goto truncated; 632 break; 633 } 634 default: 635 break; 636 } 637 638 return (B_TRUE); 639 640 discard_pkt: 641 /* Bogus ICMP error. */ 642 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 643 return (B_FALSE); 644 645 truncated: 646 /* We pulled up everthing already. Must be truncated */ 647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 648 return (B_FALSE); 649 } 650 651 /* 652 * Process received IPv6 ICMP Packet too big. 653 * The caller is responsible for validating the packet before passing it in 654 * and also to fanout the ICMP error to any matching transport conns. Assumes 655 * the message has been fully pulled up. 656 * 657 * Before getting here, the caller has called icmp_inbound_verify_v6() 658 * that should have verified with ULP to prevent undoing the changes we're 659 * going to make to DCE. For example, TCP might have verified that the packet 660 * which generated error is in the send window. 661 * 662 * In some cases modified this MTU in the ICMP header packet; the caller 663 * should pass to the matching ULP after this returns. 664 */ 665 static void 666 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira) 667 { 668 uint32_t mtu; 669 dce_t *dce; 670 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */ 671 ip_stack_t *ipst = ill->ill_ipst; 672 int old_max_frag; 673 in6_addr_t final_dst; 674 ip6_t *ip6h; /* Inner IP header */ 675 676 /* Caller has already pulled up everything. */ 677 ip6h = (ip6_t *)&icmp6[1]; 678 final_dst = ip_get_dst_v6(ip6h, NULL, NULL); 679 680 /* 681 * For link local destinations matching simply on address is not 682 * sufficient. Same link local addresses for different ILL's is 683 * possible. 684 */ 685 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) { 686 dce = dce_lookup_and_add_v6(&final_dst, 687 ill->ill_phyint->phyint_ifindex, ipst); 688 } else { 689 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst); 690 } 691 if (dce == NULL) { 692 /* Couldn't add a unique one - ENOMEM */ 693 if (ip_debug > 2) { 694 /* ip1dbg */ 695 pr_addr_dbg("icmp_inbound_too_big_v6:" 696 "no dce for dst %s\n", AF_INET6, 697 &final_dst); 698 } 699 return; 700 } 701 702 mtu = ntohl(icmp6->icmp6_mtu); 703 704 mutex_enter(&dce->dce_lock); 705 if (dce->dce_flags & DCEF_PMTU) 706 old_max_frag = dce->dce_pmtu; 707 else 708 old_max_frag = ill->ill_mtu; 709 710 if (mtu < IPV6_MIN_MTU) { 711 ip1dbg(("Received mtu less than IPv6 " 712 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 713 mtu = IPV6_MIN_MTU; 714 /* 715 * If an mtu less than IPv6 min mtu is received, 716 * we must include a fragment header in 717 * subsequent packets. 718 */ 719 dce->dce_flags |= DCEF_TOO_SMALL_PMTU; 720 } else { 721 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU; 722 } 723 ip1dbg(("Received mtu from router: %d\n", mtu)); 724 dce->dce_pmtu = MIN(old_max_frag, mtu); 725 726 /* Prepare to send the new max frag size for the ULP. */ 727 if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) { 728 /* 729 * If we need a fragment header in every packet 730 * (above case or multirouting), make sure the 731 * ULP takes it into account when computing the 732 * payload size. 733 */ 734 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t)); 735 } else { 736 icmp6->icmp6_mtu = htonl(dce->dce_pmtu); 737 } 738 /* We now have a PMTU for sure */ 739 dce->dce_flags |= DCEF_PMTU; 740 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 741 mutex_exit(&dce->dce_lock); 742 /* 743 * After dropping the lock the new value is visible to everyone. 744 * Then we bump the generation number so any cached values reinspect 745 * the dce_t. 746 */ 747 dce_increment_generation(dce); 748 dce_refrele(dce); 749 } 750 751 /* 752 * Fanout received ICMPv6 error packets to the transports. 753 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 754 * 755 * The caller must have called icmp_inbound_verify_v6. 756 */ 757 void 758 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 759 { 760 uint16_t *up; /* Pointer to ports in ULP header */ 761 uint32_t ports; /* reversed ports for fanout */ 762 ip6_t rip6h; /* With reversed addresses */ 763 ip6_t *ip6h; /* Inner IP header */ 764 uint16_t hdr_length; /* Inner IP header length */ 765 uint8_t *nexthdrp; 766 uint8_t nexthdr; 767 tcpha_t *tcpha; 768 conn_t *connp; 769 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */ 770 ip_stack_t *ipst = ill->ill_ipst; 771 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 772 773 /* Caller has already pulled up everything. */ 774 ip6h = (ip6_t *)&icmp6[1]; 775 ASSERT(mp->b_cont == NULL); 776 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 777 778 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 779 goto drop_pkt; 780 nexthdr = *nexthdrp; 781 ira->ira_protocol = nexthdr; 782 783 /* 784 * We need a separate IP header with the source and destination 785 * addresses reversed to do fanout/classification because the ip6h in 786 * the ICMPv6 error is in the form we sent it out. 787 */ 788 rip6h.ip6_src = ip6h->ip6_dst; 789 rip6h.ip6_dst = ip6h->ip6_src; 790 rip6h.ip6_nxt = nexthdr; 791 792 /* Try to pass the ICMP message to clients who need it */ 793 switch (nexthdr) { 794 case IPPROTO_UDP: { 795 /* Attempt to find a client stream based on port. */ 796 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 797 798 /* Note that we send error to all matches. */ 799 ira->ira_flags |= IRAF_ICMP_ERROR; 800 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira); 801 ira->ira_flags &= ~IRAF_ICMP_ERROR; 802 return; 803 } 804 case IPPROTO_TCP: { 805 /* 806 * Attempt to find a client stream based on port. 807 * Note that we do a reverse lookup since the header is 808 * in the form we sent it out. 809 */ 810 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 811 /* 812 * With IPMP we need to match across group, which we do 813 * since we have the upper ill from ira_ill. 814 */ 815 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 816 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 817 if (connp == NULL) { 818 goto drop_pkt; 819 } 820 821 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 822 (ira->ira_flags & IRAF_IPSEC_SECURE)) { 823 mp = ipsec_check_inbound_policy(mp, connp, 824 NULL, ip6h, ira); 825 if (mp == NULL) { 826 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 827 /* Note that mp is NULL */ 828 ip_drop_input("ipIfStatsInDiscards", mp, ill); 829 CONN_DEC_REF(connp); 830 return; 831 } 832 } 833 834 ira->ira_flags |= IRAF_ICMP_ERROR; 835 if (IPCL_IS_TCP(connp)) { 836 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 837 connp->conn_recvicmp, connp, ira, SQ_FILL, 838 SQTAG_TCP6_INPUT_ICMP_ERR); 839 } else { 840 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 841 ill_t *rill = ira->ira_rill; 842 843 ira->ira_ill = ira->ira_rill = NULL; 844 (connp->conn_recv)(connp, mp, NULL, ira); 845 CONN_DEC_REF(connp); 846 ira->ira_ill = ill; 847 ira->ira_rill = rill; 848 } 849 ira->ira_flags &= ~IRAF_ICMP_ERROR; 850 return; 851 852 } 853 case IPPROTO_SCTP: 854 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 855 /* Find a SCTP client stream for this packet. */ 856 ((uint16_t *)&ports)[0] = up[1]; 857 ((uint16_t *)&ports)[1] = up[0]; 858 859 ira->ira_flags |= IRAF_ICMP_ERROR; 860 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira); 861 ira->ira_flags &= ~IRAF_ICMP_ERROR; 862 return; 863 864 case IPPROTO_ESP: 865 case IPPROTO_AH: 866 if (!ipsec_loaded(ipss)) { 867 ip_proto_not_sup(mp, ira); 868 return; 869 } 870 871 if (nexthdr == IPPROTO_ESP) 872 mp = ipsecesp_icmp_error(mp, ira); 873 else 874 mp = ipsecah_icmp_error(mp, ira); 875 if (mp == NULL) 876 return; 877 878 /* Just in case ipsec didn't preserve the NULL b_cont */ 879 if (mp->b_cont != NULL) { 880 if (!pullupmsg(mp, -1)) 881 goto drop_pkt; 882 } 883 884 /* 885 * If succesful, the mp has been modified to not include 886 * the ESP/AH header so we can fanout to the ULP's icmp 887 * error handler. 888 */ 889 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN) 890 goto drop_pkt; 891 892 ip6h = (ip6_t *)mp->b_rptr; 893 /* Don't call hdr_length_v6() unless you have to. */ 894 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 895 hdr_length = ip_hdr_length_v6(mp, ip6h); 896 else 897 hdr_length = IPV6_HDR_LEN; 898 899 /* Verify the modified message before any further processes. */ 900 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 901 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 902 freemsg(mp); 903 return; 904 } 905 906 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 907 return; 908 909 case IPPROTO_IPV6: { 910 /* Look for self-encapsulated packets that caused an error */ 911 ip6_t *in_ip6h; 912 913 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 914 915 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) && 916 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) { 917 /* 918 * Self-encapsulated case. As in the ipv4 case, 919 * we need to strip the 2nd IP header. Since mp 920 * is already pulled-up, we can simply bcopy 921 * the 3rd header + data over the 2nd header. 922 */ 923 uint16_t unused_len; 924 925 /* 926 * Make sure we don't do recursion more than once. 927 */ 928 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h, 929 &unused_len, &nexthdrp) || 930 *nexthdrp == IPPROTO_IPV6) { 931 goto drop_pkt; 932 } 933 934 /* 935 * Copy the 3rd header + remaining data on top 936 * of the 2nd header. 937 */ 938 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h); 939 940 /* 941 * Subtract length of the 2nd header. 942 */ 943 mp->b_wptr -= hdr_length; 944 945 ip6h = (ip6_t *)mp->b_rptr; 946 /* Don't call hdr_length_v6() unless you have to. */ 947 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 948 hdr_length = ip_hdr_length_v6(mp, ip6h); 949 else 950 hdr_length = IPV6_HDR_LEN; 951 952 /* 953 * Verify the modified message before any further 954 * processes. 955 */ 956 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 957 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 958 freemsg(mp); 959 return; 960 } 961 962 /* 963 * Now recurse, and see what I _really_ should be 964 * doing here. 965 */ 966 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 967 return; 968 } 969 /* FALLTHRU */ 970 } 971 case IPPROTO_ENCAP: 972 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src, 973 &rip6h.ip6_dst, ipst)) != NULL) { 974 ira->ira_flags |= IRAF_ICMP_ERROR; 975 connp->conn_recvicmp(connp, mp, NULL, ira); 976 CONN_DEC_REF(connp); 977 ira->ira_flags &= ~IRAF_ICMP_ERROR; 978 return; 979 } 980 /* 981 * No IP tunnel is interested, fallthrough and see 982 * if a raw socket will want it. 983 */ 984 /* FALLTHRU */ 985 default: 986 ira->ira_flags |= IRAF_ICMP_ERROR; 987 ASSERT(ira->ira_protocol == nexthdr); 988 ip_fanout_proto_v6(mp, &rip6h, ira); 989 ira->ira_flags &= ~IRAF_ICMP_ERROR; 990 return; 991 } 992 /* NOTREACHED */ 993 drop_pkt: 994 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 995 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 996 freemsg(mp); 997 } 998 999 /* 1000 * Process received IPv6 ICMP Redirect messages. 1001 * Assumes the caller has verified that the headers are in the pulled up mblk. 1002 * Consumes mp. 1003 */ 1004 /* ARGSUSED */ 1005 static void 1006 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd, 1007 ip_recv_attr_t *ira) 1008 { 1009 ire_t *ire, *nire; 1010 ire_t *prev_ire = NULL; 1011 ire_t *redir_ire; 1012 in6_addr_t *src, *dst, *gateway; 1013 nd_opt_hdr_t *opt; 1014 nce_t *nce; 1015 int ncec_flags = 0; 1016 int err = 0; 1017 boolean_t redirect_to_router = B_FALSE; 1018 int len; 1019 int optlen; 1020 ill_t *ill = ira->ira_rill; 1021 ill_t *rill = ira->ira_rill; 1022 ip_stack_t *ipst = ill->ill_ipst; 1023 1024 /* 1025 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 1026 * and make it be the IPMP upper so avoid being confused by a packet 1027 * addressed to a unicast address on a different ill. 1028 */ 1029 if (IS_UNDER_IPMP(rill)) { 1030 rill = ipmp_ill_hold_ipmp_ill(rill); 1031 if (rill == NULL) { 1032 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1033 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill", 1034 mp, ill); 1035 freemsg(mp); 1036 return; 1037 } 1038 ASSERT(rill != ira->ira_rill); 1039 } 1040 1041 len = mp->b_wptr - (uchar_t *)rd; 1042 src = &ip6h->ip6_src; 1043 dst = &rd->nd_rd_dst; 1044 gateway = &rd->nd_rd_target; 1045 1046 /* Verify if it is a valid redirect */ 1047 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1048 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1049 (rd->nd_rd_code != 0) || 1050 (len < sizeof (nd_redirect_t)) || 1051 (IN6_IS_ADDR_V4MAPPED(dst)) || 1052 (IN6_IS_ADDR_MULTICAST(dst))) { 1053 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1054 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill); 1055 goto fail_redirect; 1056 } 1057 1058 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1059 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1060 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1061 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway", 1062 mp, ill); 1063 goto fail_redirect; 1064 } 1065 1066 optlen = len - sizeof (nd_redirect_t); 1067 if (optlen != 0) { 1068 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) { 1069 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1070 ip_drop_input("ipv6IfIcmpInBadRedirects - options", 1071 mp, ill); 1072 goto fail_redirect; 1073 } 1074 } 1075 1076 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1077 redirect_to_router = B_TRUE; 1078 ncec_flags |= NCE_F_ISROUTER; 1079 } else { 1080 gateway = dst; /* Add nce for dst */ 1081 } 1082 1083 1084 /* 1085 * Verify that the IP source address of the redirect is 1086 * the same as the current first-hop router for the specified 1087 * ICMP destination address. 1088 * Also, Make sure we had a route for the dest in question and 1089 * that route was pointing to the old gateway (the source of the 1090 * redirect packet.) 1091 * We do longest match and then compare ire_gateway_addr_v6 below. 1092 */ 1093 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill, 1094 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL); 1095 1096 /* 1097 * Check that 1098 * the redirect was not from ourselves 1099 * old gateway is still directly reachable 1100 */ 1101 if (prev_ire == NULL || 1102 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) || 1103 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 1104 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) { 1105 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1106 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill); 1107 goto fail_redirect; 1108 } 1109 1110 ASSERT(prev_ire->ire_ill != NULL); 1111 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD) 1112 ncec_flags |= NCE_F_NONUD; 1113 1114 opt = (nd_opt_hdr_t *)&rd[1]; 1115 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1116 if (opt != NULL) { 1117 err = nce_lookup_then_add_v6(rill, 1118 (uchar_t *)&opt[1], /* Link layer address */ 1119 rill->ill_phys_addr_length, 1120 gateway, ncec_flags, ND_STALE, &nce); 1121 switch (err) { 1122 case 0: 1123 nce_refrele(nce); 1124 break; 1125 case EEXIST: 1126 /* 1127 * Check to see if link layer address has changed and 1128 * process the ncec_state accordingly. 1129 */ 1130 nce_process(nce->nce_common, 1131 (uchar_t *)&opt[1], 0, B_FALSE); 1132 nce_refrele(nce); 1133 break; 1134 default: 1135 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1136 err)); 1137 goto fail_redirect; 1138 } 1139 } 1140 if (redirect_to_router) { 1141 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1142 1143 /* 1144 * Create a Route Association. This will allow us to remember 1145 * a router told us to use the particular gateway. 1146 */ 1147 ire = ire_create_v6( 1148 dst, 1149 &ipv6_all_ones, /* mask */ 1150 gateway, /* gateway addr */ 1151 IRE_HOST, 1152 prev_ire->ire_ill, 1153 ALL_ZONES, 1154 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1155 NULL, 1156 ipst); 1157 } else { 1158 ipif_t *ipif; 1159 in6_addr_t gw; 1160 1161 /* 1162 * Just create an on link entry, i.e. interface route. 1163 * The gateway field is our link-local on the ill. 1164 */ 1165 mutex_enter(&rill->ill_lock); 1166 for (ipif = rill->ill_ipif; ipif != NULL; 1167 ipif = ipif->ipif_next) { 1168 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1169 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr)) 1170 break; 1171 } 1172 if (ipif == NULL) { 1173 /* We have no link-local address! */ 1174 mutex_exit(&rill->ill_lock); 1175 goto fail_redirect; 1176 } 1177 gw = ipif->ipif_v6lcl_addr; 1178 mutex_exit(&rill->ill_lock); 1179 1180 ire = ire_create_v6( 1181 dst, /* gateway == dst */ 1182 &ipv6_all_ones, /* mask */ 1183 &gw, /* gateway addr */ 1184 rill->ill_net_type, /* IF_[NO]RESOLVER */ 1185 prev_ire->ire_ill, 1186 ALL_ZONES, 1187 (RTF_DYNAMIC | RTF_HOST), 1188 NULL, 1189 ipst); 1190 } 1191 1192 if (ire == NULL) 1193 goto fail_redirect; 1194 1195 nire = ire_add(ire); 1196 /* Check if it was a duplicate entry */ 1197 if (nire != NULL && nire != ire) { 1198 ASSERT(nire->ire_identical_ref > 1); 1199 ire_delete(nire); 1200 ire_refrele(nire); 1201 nire = NULL; 1202 } 1203 ire = nire; 1204 if (ire != NULL) { 1205 ire_refrele(ire); /* Held in ire_add */ 1206 1207 /* tell routing sockets that we received a redirect */ 1208 ip_rts_change_v6(RTM_REDIRECT, 1209 &rd->nd_rd_dst, 1210 &rd->nd_rd_target, 1211 &ipv6_all_ones, 0, src, 1212 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1213 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1214 1215 /* 1216 * Delete any existing IRE_HOST type ires for this destination. 1217 * This together with the added IRE has the effect of 1218 * modifying an existing redirect. 1219 */ 1220 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1221 prev_ire->ire_ill, ALL_ZONES, NULL, 1222 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst, 1223 NULL); 1224 1225 if (redir_ire != NULL) { 1226 if (redir_ire->ire_flags & RTF_DYNAMIC) 1227 ire_delete(redir_ire); 1228 ire_refrele(redir_ire); 1229 } 1230 } 1231 1232 ire_refrele(prev_ire); 1233 prev_ire = NULL; 1234 1235 fail_redirect: 1236 if (prev_ire != NULL) 1237 ire_refrele(prev_ire); 1238 freemsg(mp); 1239 if (rill != ira->ira_rill) 1240 ill_refrele(rill); 1241 } 1242 1243 /* 1244 * Build and ship an IPv6 ICMP message using the packet data in mp, 1245 * and the ICMP header pointed to by "stuff". (May be called as 1246 * writer.) 1247 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1248 * verify that an icmp error packet can be sent. 1249 * 1250 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1251 * source address (see above function). 1252 */ 1253 static void 1254 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len, 1255 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira) 1256 { 1257 ip6_t *ip6h; 1258 in6_addr_t v6dst; 1259 size_t len_needed; 1260 size_t msg_len; 1261 mblk_t *mp1; 1262 icmp6_t *icmp6; 1263 in6_addr_t v6src; 1264 ill_t *ill = ira->ira_ill; 1265 ip_stack_t *ipst = ill->ill_ipst; 1266 ip_xmit_attr_t ixas; 1267 1268 ip6h = (ip6_t *)mp->b_rptr; 1269 1270 bzero(&ixas, sizeof (ixas)); 1271 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 1272 ixas.ixa_zoneid = ira->ira_zoneid; 1273 ixas.ixa_ifindex = 0; 1274 ixas.ixa_ipst = ipst; 1275 ixas.ixa_cred = kcred; 1276 ixas.ixa_cpid = NOPID; 1277 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 1278 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1279 1280 /* 1281 * If the source of the original packet was link-local, then 1282 * make sure we send on the same ill (group) as we received it on. 1283 */ 1284 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 1285 ixas.ixa_flags |= IXAF_SCOPEID_SET; 1286 if (IS_UNDER_IPMP(ill)) 1287 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 1288 else 1289 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 1290 } 1291 1292 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1293 /* 1294 * Apply IPsec based on how IPsec was applied to 1295 * the packet that had the error. 1296 * 1297 * If it was an outbound packet that caused the ICMP 1298 * error, then the caller will have setup the IRA 1299 * appropriately. 1300 */ 1301 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 1302 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1303 /* Note: mp already consumed and ip_drop_packet done */ 1304 return; 1305 } 1306 } else { 1307 /* 1308 * This is in clear. The icmp message we are building 1309 * here should go out in clear, independent of our policy. 1310 */ 1311 ixas.ixa_flags |= IXAF_NO_IPSEC; 1312 } 1313 1314 /* 1315 * If the caller specified the source we use that. 1316 * Otherwise, if the packet was for one of our unicast addresses, make 1317 * sure we respond with that as the source. Otherwise 1318 * have ip_output_simple pick the source address. 1319 */ 1320 if (v6src_ptr != NULL) { 1321 v6src = *v6src_ptr; 1322 } else { 1323 ire_t *ire; 1324 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY; 1325 1326 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1327 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) 1328 match_flags |= MATCH_IRE_ILL; 1329 1330 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 1331 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL, 1332 match_flags, 0, ipst, NULL); 1333 if (ire != NULL) { 1334 v6src = ip6h->ip6_dst; 1335 ire_refrele(ire); 1336 } else { 1337 v6src = ipv6_all_zeros; 1338 ixas.ixa_flags |= IXAF_SET_SOURCE; 1339 } 1340 } 1341 v6dst = ip6h->ip6_src; 1342 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1343 msg_len = msgdsize(mp); 1344 if (msg_len > len_needed) { 1345 if (!adjmsg(mp, len_needed - msg_len)) { 1346 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1347 freemsg(mp); 1348 return; 1349 } 1350 msg_len = len_needed; 1351 } 1352 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED); 1353 if (mp1 == NULL) { 1354 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1355 freemsg(mp); 1356 return; 1357 } 1358 mp1->b_cont = mp; 1359 mp = mp1; 1360 1361 /* 1362 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this 1363 * node generates be accepted in peace by all on-host destinations. 1364 * If we do NOT assume that all on-host destinations trust 1365 * self-generated ICMP messages, then rework here, ip6.c, and spd.c. 1366 * (Look for IXAF_TRUSTED_ICMP). 1367 */ 1368 ixas.ixa_flags |= IXAF_TRUSTED_ICMP; 1369 1370 ip6h = (ip6_t *)mp->b_rptr; 1371 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1372 1373 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1374 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1375 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1376 ip6h->ip6_dst = v6dst; 1377 ip6h->ip6_src = v6src; 1378 msg_len += IPV6_HDR_LEN + len; 1379 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1380 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1381 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1382 } 1383 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1384 icmp6 = (icmp6_t *)&ip6h[1]; 1385 bcopy(stuff, (char *)icmp6, len); 1386 /* 1387 * Prepare for checksum by putting icmp length in the icmp 1388 * checksum field. The checksum is calculated in ip_output_wire_v6. 1389 */ 1390 icmp6->icmp6_cksum = ip6h->ip6_plen; 1391 if (icmp6->icmp6_type == ND_REDIRECT) { 1392 ip6h->ip6_hops = IPV6_MAX_HOPS; 1393 } 1394 1395 (void) ip_output_simple(mp, &ixas); 1396 ixa_cleanup(&ixas); 1397 } 1398 1399 /* 1400 * Update the output mib when ICMPv6 packets are sent. 1401 */ 1402 void 1403 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1404 { 1405 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1406 1407 switch (icmp6->icmp6_type) { 1408 case ICMP6_DST_UNREACH: 1409 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1410 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1411 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1412 break; 1413 1414 case ICMP6_TIME_EXCEEDED: 1415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1416 break; 1417 1418 case ICMP6_PARAM_PROB: 1419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1420 break; 1421 1422 case ICMP6_PACKET_TOO_BIG: 1423 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1424 break; 1425 1426 case ICMP6_ECHO_REQUEST: 1427 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1428 break; 1429 1430 case ICMP6_ECHO_REPLY: 1431 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1432 break; 1433 1434 case ND_ROUTER_SOLICIT: 1435 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1436 break; 1437 1438 case ND_ROUTER_ADVERT: 1439 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1440 break; 1441 1442 case ND_NEIGHBOR_SOLICIT: 1443 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1444 break; 1445 1446 case ND_NEIGHBOR_ADVERT: 1447 BUMP_MIB(ill->ill_icmp6_mib, 1448 ipv6IfIcmpOutNeighborAdvertisements); 1449 break; 1450 1451 case ND_REDIRECT: 1452 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1453 break; 1454 1455 case MLD_LISTENER_QUERY: 1456 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1457 break; 1458 1459 case MLD_LISTENER_REPORT: 1460 case MLD_V2_LISTENER_REPORT: 1461 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1462 break; 1463 1464 case MLD_LISTENER_REDUCTION: 1465 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1466 break; 1467 } 1468 } 1469 1470 /* 1471 * Check if it is ok to send an ICMPv6 error packet in 1472 * response to the IP packet in mp. 1473 * Free the message and return null if no 1474 * ICMP error packet should be sent. 1475 */ 1476 static mblk_t * 1477 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira) 1478 { 1479 ill_t *ill = ira->ira_ill; 1480 ip_stack_t *ipst = ill->ill_ipst; 1481 boolean_t llbcast; 1482 ip6_t *ip6h; 1483 1484 if (!mp) 1485 return (NULL); 1486 1487 /* We view multicast and broadcast as the same.. */ 1488 llbcast = (ira->ira_flags & 1489 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0; 1490 ip6h = (ip6_t *)mp->b_rptr; 1491 1492 /* Check if source address uniquely identifies the host */ 1493 1494 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1495 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1496 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1497 freemsg(mp); 1498 return (NULL); 1499 } 1500 1501 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1502 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1503 icmp6_t *icmp6; 1504 1505 if (mp->b_wptr - mp->b_rptr < len_needed) { 1506 if (!pullupmsg(mp, len_needed)) { 1507 BUMP_MIB(ill->ill_icmp6_mib, 1508 ipv6IfIcmpInErrors); 1509 freemsg(mp); 1510 return (NULL); 1511 } 1512 ip6h = (ip6_t *)mp->b_rptr; 1513 } 1514 icmp6 = (icmp6_t *)&ip6h[1]; 1515 /* Explicitly do not generate errors in response to redirects */ 1516 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1517 icmp6->icmp6_type == ND_REDIRECT) { 1518 freemsg(mp); 1519 return (NULL); 1520 } 1521 } 1522 /* 1523 * Check that the destination is not multicast and that the packet 1524 * was not sent on link layer broadcast or multicast. (Exception 1525 * is Packet too big message as per the draft - when mcast_ok is set.) 1526 */ 1527 if (!mcast_ok && 1528 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1529 freemsg(mp); 1530 return (NULL); 1531 } 1532 /* 1533 * If this is a labeled system, then check to see if we're allowed to 1534 * send a response to this particular sender. If not, then just drop. 1535 */ 1536 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) { 1537 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1538 freemsg(mp); 1539 return (NULL); 1540 } 1541 1542 if (icmp_err_rate_limit(ipst)) { 1543 /* 1544 * Only send ICMP error packets every so often. 1545 * This should be done on a per port/source basis, 1546 * but for now this will suffice. 1547 */ 1548 freemsg(mp); 1549 return (NULL); 1550 } 1551 return (mp); 1552 } 1553 1554 /* 1555 * Called when a packet was sent out the same link that it arrived on. 1556 * Check if it is ok to send a redirect and then send it. 1557 */ 1558 void 1559 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire, 1560 ip_recv_attr_t *ira) 1561 { 1562 ill_t *ill = ira->ira_ill; 1563 ip_stack_t *ipst = ill->ill_ipst; 1564 in6_addr_t *v6targ; 1565 ire_t *src_ire_v6 = NULL; 1566 mblk_t *mp1; 1567 ire_t *nhop_ire = NULL; 1568 1569 /* 1570 * Don't send a redirect when forwarding a source 1571 * routed packet. 1572 */ 1573 if (ip_source_routed_v6(ip6h, mp, ipst)) 1574 return; 1575 1576 if (ire->ire_type & IRE_ONLINK) { 1577 /* Target is directly connected */ 1578 v6targ = &ip6h->ip6_dst; 1579 } else { 1580 /* Determine the most specific IRE used to send the packets */ 1581 nhop_ire = ire_nexthop(ire); 1582 if (nhop_ire == NULL) 1583 return; 1584 1585 /* 1586 * We won't send redirects to a router 1587 * that doesn't have a link local 1588 * address, but will forward. 1589 */ 1590 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) { 1591 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1592 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1593 ire_refrele(nhop_ire); 1594 return; 1595 } 1596 v6targ = &nhop_ire->ire_addr_v6; 1597 } 1598 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 1599 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL, 1600 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL); 1601 1602 if (src_ire_v6 == NULL) { 1603 if (nhop_ire != NULL) 1604 ire_refrele(nhop_ire); 1605 return; 1606 } 1607 1608 /* 1609 * The source is directly connected. 1610 */ 1611 mp1 = copymsg(mp); 1612 if (mp1 != NULL) 1613 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira); 1614 1615 if (nhop_ire != NULL) 1616 ire_refrele(nhop_ire); 1617 ire_refrele(src_ire_v6); 1618 } 1619 1620 /* 1621 * Generate an ICMPv6 redirect message. 1622 * Include target link layer address option if it exits. 1623 * Always include redirect header. 1624 */ 1625 static void 1626 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest, 1627 ip_recv_attr_t *ira) 1628 { 1629 nd_redirect_t *rd; 1630 nd_opt_rd_hdr_t *rdh; 1631 uchar_t *buf; 1632 ncec_t *ncec = NULL; 1633 nd_opt_hdr_t *opt; 1634 int len; 1635 int ll_opt_len = 0; 1636 int max_redir_hdr_data_len; 1637 int pkt_len; 1638 in6_addr_t *srcp; 1639 ill_t *ill; 1640 boolean_t need_refrele; 1641 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1642 1643 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira); 1644 if (mp == NULL) 1645 return; 1646 1647 if (IS_UNDER_IPMP(ira->ira_ill)) { 1648 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill); 1649 if (ill == NULL) { 1650 ill = ira->ira_ill; 1651 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1652 ip_drop_output("no IPMP ill for sending redirect", 1653 mp, ill); 1654 freemsg(mp); 1655 return; 1656 } 1657 need_refrele = B_TRUE; 1658 } else { 1659 ill = ira->ira_ill; 1660 need_refrele = B_FALSE; 1661 } 1662 1663 ncec = ncec_lookup_illgrp_v6(ill, targetp); 1664 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE && 1665 ncec->ncec_lladdr != NULL) { 1666 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1667 ill->ill_phys_addr_length + 7)/8 * 8; 1668 } 1669 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1670 ASSERT(len % 4 == 0); 1671 buf = kmem_alloc(len, KM_NOSLEEP); 1672 if (buf == NULL) { 1673 if (ncec != NULL) 1674 ncec_refrele(ncec); 1675 if (need_refrele) 1676 ill_refrele(ill); 1677 freemsg(mp); 1678 return; 1679 } 1680 1681 rd = (nd_redirect_t *)buf; 1682 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1683 rd->nd_rd_code = 0; 1684 rd->nd_rd_reserved = 0; 1685 rd->nd_rd_target = *targetp; 1686 rd->nd_rd_dst = *dest; 1687 1688 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1689 if (ncec != NULL && ll_opt_len != 0) { 1690 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1691 opt->nd_opt_len = ll_opt_len/8; 1692 bcopy((char *)ncec->ncec_lladdr, &opt[1], 1693 ill->ill_phys_addr_length); 1694 } 1695 if (ncec != NULL) 1696 ncec_refrele(ncec); 1697 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1698 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1699 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1700 max_redir_hdr_data_len = 1701 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1702 pkt_len = msgdsize(mp); 1703 /* Make sure mp is 8 byte aligned */ 1704 if (pkt_len > max_redir_hdr_data_len) { 1705 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1706 sizeof (nd_opt_rd_hdr_t))/8; 1707 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1708 } else { 1709 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1710 (void) adjmsg(mp, -(pkt_len % 8)); 1711 } 1712 rdh->nd_opt_rh_reserved1 = 0; 1713 rdh->nd_opt_rh_reserved2 = 0; 1714 /* ipif_v6lcl_addr contains the link-local source address */ 1715 srcp = &ill->ill_ipif->ipif_v6lcl_addr; 1716 1717 /* Redirects sent by router, and router is global zone */ 1718 ASSERT(ira->ira_zoneid == ALL_ZONES); 1719 ira->ira_zoneid = GLOBAL_ZONEID; 1720 icmp_pkt_v6(mp, buf, len, srcp, ira); 1721 kmem_free(buf, len); 1722 if (need_refrele) 1723 ill_refrele(ill); 1724 } 1725 1726 1727 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1728 void 1729 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1730 ip_recv_attr_t *ira) 1731 { 1732 icmp6_t icmp6; 1733 1734 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1735 if (mp == NULL) 1736 return; 1737 1738 bzero(&icmp6, sizeof (icmp6_t)); 1739 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1740 icmp6.icmp6_code = code; 1741 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1742 } 1743 1744 /* 1745 * Generate an ICMP unreachable message. 1746 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1747 * constructed by the caller. 1748 */ 1749 void 1750 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1751 ip_recv_attr_t *ira) 1752 { 1753 icmp6_t icmp6; 1754 1755 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1756 if (mp == NULL) 1757 return; 1758 1759 bzero(&icmp6, sizeof (icmp6_t)); 1760 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1761 icmp6.icmp6_code = code; 1762 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1763 } 1764 1765 /* 1766 * Generate an ICMP pkt too big message. 1767 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1768 * constructed by the caller. 1769 */ 1770 void 1771 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok, 1772 ip_recv_attr_t *ira) 1773 { 1774 icmp6_t icmp6; 1775 1776 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1777 if (mp == NULL) 1778 return; 1779 1780 bzero(&icmp6, sizeof (icmp6_t)); 1781 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1782 icmp6.icmp6_code = 0; 1783 icmp6.icmp6_mtu = htonl(mtu); 1784 1785 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1786 } 1787 1788 /* 1789 * Generate an ICMP parameter problem message. (May be called as writer.) 1790 * 'offset' is the offset from the beginning of the packet in error. 1791 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1792 * constructed by the caller. 1793 */ 1794 static void 1795 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset, 1796 boolean_t mcast_ok, ip_recv_attr_t *ira) 1797 { 1798 icmp6_t icmp6; 1799 1800 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1801 if (mp == NULL) 1802 return; 1803 1804 bzero((char *)&icmp6, sizeof (icmp6_t)); 1805 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1806 icmp6.icmp6_code = code; 1807 icmp6.icmp6_pptr = htonl(offset); 1808 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1809 } 1810 1811 void 1812 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok, 1813 ip_recv_attr_t *ira) 1814 { 1815 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1816 uint16_t hdr_length; 1817 uint8_t *nexthdrp; 1818 uint32_t offset; 1819 ill_t *ill = ira->ira_ill; 1820 1821 /* Determine the offset of the bad nexthdr value */ 1822 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { 1823 /* Malformed packet */ 1824 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1825 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1826 freemsg(mp); 1827 return; 1828 } 1829 1830 offset = nexthdrp - mp->b_rptr; 1831 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset, 1832 mcast_ok, ira); 1833 } 1834 1835 /* 1836 * Verify whether or not the IP address is a valid local address. 1837 * Could be a unicast, including one for a down interface. 1838 * If allow_mcbc then a multicast or broadcast address is also 1839 * acceptable. 1840 * 1841 * In the case of a multicast address, however, the 1842 * upper protocol is expected to reset the src address 1843 * to zero when we return IPVL_MCAST so that 1844 * no packets are emitted with multicast address as 1845 * source address. 1846 * The addresses valid for bind are: 1847 * (1) - in6addr_any 1848 * (2) - IP address of an UP interface 1849 * (3) - IP address of a DOWN interface 1850 * (4) - a multicast address. In this case 1851 * the conn will only receive packets destined to 1852 * the specified multicast address. Note: the 1853 * application still has to issue an 1854 * IPV6_JOIN_GROUP socket option. 1855 * 1856 * In all the above cases, the bound address must be valid in the current zone. 1857 * When the address is loopback or multicast, there might be many matching IREs 1858 * so bind has to look up based on the zone. 1859 */ 1860 ip_laddr_t 1861 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid, 1862 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid) 1863 { 1864 ire_t *src_ire; 1865 uint_t match_flags; 1866 ill_t *ill = NULL; 1867 1868 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src)); 1869 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src)); 1870 1871 match_flags = MATCH_IRE_ZONEONLY; 1872 if (scopeid != 0) { 1873 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst); 1874 if (ill == NULL) 1875 return (IPVL_BAD); 1876 match_flags |= MATCH_IRE_ILL; 1877 } 1878 1879 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0, 1880 ill, zoneid, NULL, match_flags, 0, ipst, NULL); 1881 if (ill != NULL) 1882 ill_refrele(ill); 1883 1884 /* 1885 * If an address other than in6addr_any is requested, 1886 * we verify that it is a valid address for bind 1887 * Note: Following code is in if-else-if form for 1888 * readability compared to a condition check. 1889 */ 1890 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) { 1891 /* 1892 * (2) Bind to address of local UP interface 1893 */ 1894 ire_refrele(src_ire); 1895 return (IPVL_UNICAST_UP); 1896 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 1897 /* (4) bind to multicast address. */ 1898 if (src_ire != NULL) 1899 ire_refrele(src_ire); 1900 1901 /* 1902 * Note: caller should take IPV6_MULTICAST_IF 1903 * into account when selecting a real source address. 1904 */ 1905 if (allow_mcbc) 1906 return (IPVL_MCAST); 1907 else 1908 return (IPVL_BAD); 1909 } else { 1910 ipif_t *ipif; 1911 1912 /* 1913 * (3) Bind to address of local DOWN interface? 1914 * (ipif_lookup_addr() looks up all interfaces 1915 * but we do not get here for UP interfaces 1916 * - case (2) above) 1917 */ 1918 if (src_ire != NULL) 1919 ire_refrele(src_ire); 1920 1921 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst); 1922 if (ipif == NULL) 1923 return (IPVL_BAD); 1924 1925 /* Not a useful source? */ 1926 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) { 1927 ipif_refrele(ipif); 1928 return (IPVL_BAD); 1929 } 1930 ipif_refrele(ipif); 1931 return (IPVL_UNICAST_DOWN); 1932 } 1933 } 1934 1935 /* 1936 * Verify that both the source and destination addresses are valid. If 1937 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable, 1938 * i.e. have no route to it. Protocols like TCP want to verify destination 1939 * reachability, while tunnels do not. 1940 * 1941 * Determine the route, the interface, and (optionally) the source address 1942 * to use to reach a given destination. 1943 * Note that we allow connect to broadcast and multicast addresses when 1944 * IPDF_ALLOW_MCBC is set. 1945 * first_hop and dst_addr are normally the same, but if source routing 1946 * they will differ; in that case the first_hop is what we'll use for the 1947 * routing lookup but the dce and label checks will be done on dst_addr, 1948 * 1949 * If uinfo is set, then we fill in the best available information 1950 * we have for the destination. This is based on (in priority order) any 1951 * metrics and path MTU stored in a dce_t, route metrics, and finally the 1952 * ill_mtu. 1953 * 1954 * Tsol note: If we have a source route then dst_addr != firsthop. But we 1955 * always do the label check on dst_addr. 1956 * 1957 * Assumes that the caller has set ixa_scopeid for link-local communication. 1958 */ 1959 int 1960 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr, 1961 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo, 1962 uint32_t flags, uint_t mac_mode) 1963 { 1964 ire_t *ire; 1965 int error = 0; 1966 in6_addr_t setsrc; /* RTF_SETSRC */ 1967 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */ 1968 ip_stack_t *ipst = ixa->ixa_ipst; 1969 dce_t *dce; 1970 uint_t pmtu; 1971 uint_t ifindex; 1972 uint_t generation; 1973 nce_t *nce; 1974 ill_t *ill = NULL; 1975 boolean_t multirt = B_FALSE; 1976 1977 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr)); 1978 1979 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1980 1981 /* 1982 * We never send to zero; the ULPs map it to the loopback address. 1983 * We can't allow it since we use zero to mean unitialized in some 1984 * places. 1985 */ 1986 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr)); 1987 1988 if (is_system_labeled()) { 1989 ts_label_t *tsl = NULL; 1990 1991 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION, 1992 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl); 1993 if (error != 0) 1994 return (error); 1995 if (tsl != NULL) { 1996 /* Update the label */ 1997 ip_xmit_attr_replace_tsl(ixa, tsl); 1998 } 1999 } 2000 2001 setsrc = ipv6_all_zeros; 2002 /* 2003 * Select a route; For IPMP interfaces, we would only select 2004 * a "hidden" route (i.e., going through a specific under_ill) 2005 * if ixa_ifindex has been specified. 2006 */ 2007 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation, 2008 &setsrc, &error, &multirt); 2009 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 2010 if (error != 0) 2011 goto bad_addr; 2012 2013 /* 2014 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set. 2015 * If IPDF_VERIFY_DST is set, the destination must be reachable. 2016 * Otherwise the destination needn't be reachable. 2017 * 2018 * If we match on a reject or black hole, then we've got a 2019 * local failure. May as well fail out the connect() attempt, 2020 * since it's never going to succeed. 2021 */ 2022 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2023 /* 2024 * If we're verifying destination reachability, we always want 2025 * to complain here. 2026 * 2027 * If we're not verifying destination reachability but the 2028 * destination has a route, we still want to fail on the 2029 * temporary address and broadcast address tests. 2030 * 2031 * In both cases do we let the code continue so some reasonable 2032 * information is returned to the caller. That enables the 2033 * caller to use (and even cache) the IRE. conn_ip_ouput will 2034 * use the generation mismatch path to check for the unreachable 2035 * case thereby avoiding any specific check in the main path. 2036 */ 2037 ASSERT(generation == IRE_GENERATION_VERIFY); 2038 if (flags & IPDF_VERIFY_DST) { 2039 /* 2040 * Set errno but continue to set up ixa_ire to be 2041 * the RTF_REJECT|RTF_BLACKHOLE IRE. 2042 * That allows callers to use ip_output to get an 2043 * ICMP error back. 2044 */ 2045 if (!(ire->ire_type & IRE_HOST)) 2046 error = ENETUNREACH; 2047 else 2048 error = EHOSTUNREACH; 2049 } 2050 } 2051 2052 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) && 2053 !(flags & IPDF_ALLOW_MCBC)) { 2054 ire_refrele(ire); 2055 ire = ire_reject(ipst, B_FALSE); 2056 generation = IRE_GENERATION_VERIFY; 2057 error = ENETUNREACH; 2058 } 2059 2060 /* Cache things */ 2061 if (ixa->ixa_ire != NULL) 2062 ire_refrele_notr(ixa->ixa_ire); 2063 #ifdef DEBUG 2064 ire_refhold_notr(ire); 2065 ire_refrele(ire); 2066 #endif 2067 ixa->ixa_ire = ire; 2068 ixa->ixa_ire_generation = generation; 2069 2070 /* 2071 * For multicast with multirt we have a flag passed back from 2072 * ire_lookup_multi_ill_v6 since we don't have an IRE for each 2073 * possible multicast address. 2074 * We also need a flag for multicast since we can't check 2075 * whether RTF_MULTIRT is set in ixa_ire for multicast. 2076 */ 2077 if (multirt) { 2078 ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 2079 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 2080 } else { 2081 ixa->ixa_postfragfn = ire->ire_postfragfn; 2082 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 2083 } 2084 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2085 /* Get an nce to cache. */ 2086 nce = ire_to_nce(ire, NULL, firsthop); 2087 if (nce == NULL) { 2088 /* Allocation failure? */ 2089 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2090 } else { 2091 if (ixa->ixa_nce != NULL) 2092 nce_refrele(ixa->ixa_nce); 2093 ixa->ixa_nce = nce; 2094 } 2095 } 2096 2097 /* 2098 * If the source address is a loopback address, the 2099 * destination had best be local or multicast. 2100 * If we are sending to an IRE_LOCAL using a loopback source then 2101 * it had better be the same zoneid. 2102 */ 2103 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) { 2104 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) { 2105 ire = NULL; /* Stored in ixa_ire */ 2106 error = EADDRNOTAVAIL; 2107 goto bad_addr; 2108 } 2109 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) { 2110 ire = NULL; /* Stored in ixa_ire */ 2111 error = EADDRNOTAVAIL; 2112 goto bad_addr; 2113 } 2114 } 2115 2116 /* 2117 * Does the caller want us to pick a source address? 2118 */ 2119 if (flags & IPDF_SELECT_SRC) { 2120 in6_addr_t src_addr; 2121 2122 /* 2123 * We use use ire_nexthop_ill to avoid the under ipmp 2124 * interface for source address selection. Note that for ipmp 2125 * probe packets, ixa_ifindex would have been specified, and 2126 * the ip_select_route() invocation would have picked an ire 2127 * will ire_ill pointing at an under interface. 2128 */ 2129 ill = ire_nexthop_ill(ire); 2130 2131 /* If unreachable we have no ill but need some source */ 2132 if (ill == NULL) { 2133 src_addr = ipv6_loopback; 2134 /* Make sure we look for a better source address */ 2135 generation = SRC_GENERATION_VERIFY; 2136 } else { 2137 error = ip_select_source_v6(ill, &setsrc, dst_addr, 2138 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences, 2139 &src_addr, &generation, NULL); 2140 if (error != 0) { 2141 ire = NULL; /* Stored in ixa_ire */ 2142 goto bad_addr; 2143 } 2144 } 2145 2146 /* 2147 * We allow the source address to to down. 2148 * However, we check that we don't use the loopback address 2149 * as a source when sending out on the wire. 2150 */ 2151 if (IN6_IS_ADDR_LOOPBACK(&src_addr) && 2152 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) && 2153 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2154 ire = NULL; /* Stored in ixa_ire */ 2155 error = EADDRNOTAVAIL; 2156 goto bad_addr; 2157 } 2158 2159 *src_addrp = src_addr; 2160 ixa->ixa_src_generation = generation; 2161 } 2162 2163 /* 2164 * Make sure we don't leave an unreachable ixa_nce in place 2165 * since ip_select_route is used when we unplumb i.e., remove 2166 * references on ixa_ire, ixa_nce, and ixa_dce. 2167 */ 2168 nce = ixa->ixa_nce; 2169 if (nce != NULL && nce->nce_is_condemned) { 2170 nce_refrele(nce); 2171 ixa->ixa_nce = NULL; 2172 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2173 } 2174 2175 2176 ifindex = 0; 2177 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) { 2178 /* If we are creating a DCE we'd better have an ifindex */ 2179 if (ill != NULL) 2180 ifindex = ill->ill_phyint->phyint_ifindex; 2181 else 2182 flags &= ~IPDF_UNIQUE_DCE; 2183 } 2184 2185 if (flags & IPDF_UNIQUE_DCE) { 2186 /* Fallback to the default dce if allocation fails */ 2187 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst); 2188 if (dce != NULL) { 2189 generation = dce->dce_generation; 2190 } else { 2191 dce = dce_lookup_v6(dst_addr, ifindex, ipst, 2192 &generation); 2193 } 2194 } else { 2195 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation); 2196 } 2197 ASSERT(dce != NULL); 2198 if (ixa->ixa_dce != NULL) 2199 dce_refrele_notr(ixa->ixa_dce); 2200 #ifdef DEBUG 2201 dce_refhold_notr(dce); 2202 dce_refrele(dce); 2203 #endif 2204 ixa->ixa_dce = dce; 2205 ixa->ixa_dce_generation = generation; 2206 2207 /* 2208 * Note that IPv6 multicast supports PMTU discovery unlike IPv4 2209 * multicast. But pmtu discovery is only enabled for connected 2210 * sockets in general. 2211 */ 2212 2213 /* 2214 * Set initial value for fragmentation limit. Either conn_ip_output 2215 * or ULP might updates it when there are routing changes. 2216 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT. 2217 */ 2218 pmtu = ip_get_pmtu(ixa); 2219 ixa->ixa_fragsize = pmtu; 2220 /* Make sure ixa_fragsize and ixa_pmtu remain identical */ 2221 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) 2222 ixa->ixa_pmtu = pmtu; 2223 2224 /* 2225 * Extract information useful for some transports. 2226 * First we look for DCE metrics. Then we take what we have in 2227 * the metrics in the route, where the offlink is used if we have 2228 * one. 2229 */ 2230 if (uinfo != NULL) { 2231 bzero(uinfo, sizeof (*uinfo)); 2232 2233 if (dce->dce_flags & DCEF_UINFO) 2234 *uinfo = dce->dce_uinfo; 2235 2236 rts_merge_metrics(uinfo, &ire->ire_metrics); 2237 2238 /* Allow ire_metrics to decrease the path MTU from above */ 2239 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu) 2240 uinfo->iulp_mtu = pmtu; 2241 2242 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0; 2243 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0; 2244 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0; 2245 } 2246 2247 if (ill != NULL) 2248 ill_refrele(ill); 2249 2250 return (error); 2251 2252 bad_addr: 2253 if (ire != NULL) 2254 ire_refrele(ire); 2255 2256 if (ill != NULL) 2257 ill_refrele(ill); 2258 2259 /* 2260 * Make sure we don't leave an unreachable ixa_nce in place 2261 * since ip_select_route is used when we unplumb i.e., remove 2262 * references on ixa_ire, ixa_nce, and ixa_dce. 2263 */ 2264 nce = ixa->ixa_nce; 2265 if (nce != NULL && nce->nce_is_condemned) { 2266 nce_refrele(nce); 2267 ixa->ixa_nce = NULL; 2268 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2269 } 2270 2271 return (error); 2272 } 2273 2274 /* 2275 * Handle protocols with which IP is less intimate. There 2276 * can be more than one stream bound to a particular 2277 * protocol. When this is the case, normally each one gets a copy 2278 * of any incoming packets. 2279 * 2280 * Zones notes: 2281 * Packets will be distributed to conns in all zones. This is really only 2282 * useful for ICMPv6 as only applications in the global zone can create raw 2283 * sockets for other protocols. 2284 */ 2285 void 2286 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 2287 { 2288 mblk_t *mp1; 2289 in6_addr_t laddr = ip6h->ip6_dst; 2290 conn_t *connp, *first_connp, *next_connp; 2291 connf_t *connfp; 2292 ill_t *ill = ira->ira_ill; 2293 ip_stack_t *ipst = ill->ill_ipst; 2294 2295 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol]; 2296 mutex_enter(&connfp->connf_lock); 2297 connp = connfp->connf_head; 2298 for (connp = connfp->connf_head; connp != NULL; 2299 connp = connp->conn_next) { 2300 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2301 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2302 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2303 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2304 break; 2305 } 2306 2307 if (connp == NULL) { 2308 /* 2309 * No one bound to this port. Is 2310 * there a client that wants all 2311 * unclaimed datagrams? 2312 */ 2313 mutex_exit(&connfp->connf_lock); 2314 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB, 2315 ICMP6_PARAMPROB_NEXTHEADER, ira); 2316 return; 2317 } 2318 2319 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 2320 2321 CONN_INC_REF(connp); 2322 first_connp = connp; 2323 2324 /* 2325 * XXX: Fix the multiple protocol listeners case. We should not 2326 * be walking the conn->conn_next list here. 2327 */ 2328 connp = connp->conn_next; 2329 for (;;) { 2330 while (connp != NULL) { 2331 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2332 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2333 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2334 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2335 ira, connp))) 2336 break; 2337 connp = connp->conn_next; 2338 } 2339 2340 if (connp == NULL) { 2341 /* No more interested clients */ 2342 connp = first_connp; 2343 break; 2344 } 2345 if (((mp1 = dupmsg(mp)) == NULL) && 2346 ((mp1 = copymsg(mp)) == NULL)) { 2347 /* Memory allocation failed */ 2348 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2349 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2350 connp = first_connp; 2351 break; 2352 } 2353 2354 CONN_INC_REF(connp); 2355 mutex_exit(&connfp->connf_lock); 2356 2357 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr, 2358 ira); 2359 2360 mutex_enter(&connfp->connf_lock); 2361 /* Follow the next pointer before releasing the conn. */ 2362 next_connp = connp->conn_next; 2363 CONN_DEC_REF(connp); 2364 connp = next_connp; 2365 } 2366 2367 /* Last one. Send it upstream. */ 2368 mutex_exit(&connfp->connf_lock); 2369 2370 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira); 2371 2372 CONN_DEC_REF(connp); 2373 } 2374 2375 /* 2376 * Called when it is conceptually a ULP that would sent the packet 2377 * e.g., port unreachable and nexthdr unknown. Check that the packet 2378 * would have passed the IPsec global policy before sending the error. 2379 * 2380 * Send an ICMP error after patching up the packet appropriately. 2381 * Uses ip_drop_input and bumps the appropriate MIB. 2382 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use. 2383 */ 2384 void 2385 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code, 2386 ip_recv_attr_t *ira) 2387 { 2388 ip6_t *ip6h; 2389 boolean_t secure; 2390 ill_t *ill = ira->ira_ill; 2391 ip_stack_t *ipst = ill->ill_ipst; 2392 netstack_t *ns = ipst->ips_netstack; 2393 ipsec_stack_t *ipss = ns->netstack_ipsec; 2394 2395 secure = ira->ira_flags & IRAF_IPSEC_SECURE; 2396 2397 /* 2398 * We are generating an icmp error for some inbound packet. 2399 * Called from all ip_fanout_(udp, tcp, proto) functions. 2400 * Before we generate an error, check with global policy 2401 * to see whether this is allowed to enter the system. As 2402 * there is no "conn", we are checking with global policy. 2403 */ 2404 ip6h = (ip6_t *)mp->b_rptr; 2405 if (secure || ipss->ipsec_inbound_v6_policy_present) { 2406 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns); 2407 if (mp == NULL) 2408 return; 2409 } 2410 2411 /* We never send errors for protocols that we do implement */ 2412 if (ira->ira_protocol == IPPROTO_ICMPV6) { 2413 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2414 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill); 2415 freemsg(mp); 2416 return; 2417 } 2418 2419 switch (icmp_type) { 2420 case ICMP6_DST_UNREACH: 2421 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT); 2422 2423 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 2424 ip_drop_input("ipIfStatsNoPorts", mp, ill); 2425 2426 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira); 2427 break; 2428 case ICMP6_PARAM_PROB: 2429 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER); 2430 2431 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 2432 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill); 2433 2434 /* Let the system determine the offset for this one */ 2435 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2436 break; 2437 default: 2438 #ifdef DEBUG 2439 panic("ip_fanout_send_icmp_v6: wrong type"); 2440 /*NOTREACHED*/ 2441 #else 2442 freemsg(mp); 2443 break; 2444 #endif 2445 } 2446 } 2447 2448 /* 2449 * Fanout for UDP packets that are multicast or ICMP errors. 2450 * (Unicast fanout is handled in ip_input_v6.) 2451 * 2452 * If SO_REUSEADDR is set all multicast packets 2453 * will be delivered to all conns bound to the same port. 2454 * 2455 * Fanout for UDP packets. 2456 * The caller puts <fport, lport> in the ports parameter. 2457 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 2458 * 2459 * If SO_REUSEADDR is set all multicast and broadcast packets 2460 * will be delivered to all conns bound to the same port. 2461 * 2462 * Zones notes: 2463 * Earlier in ip_input on a system with multiple shared-IP zones we 2464 * duplicate the multicast and broadcast packets and send them up 2465 * with each explicit zoneid that exists on that ill. 2466 * This means that here we can match the zoneid with SO_ALLZONES being special. 2467 */ 2468 void 2469 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport, 2470 ip_recv_attr_t *ira) 2471 { 2472 in6_addr_t laddr; 2473 conn_t *connp; 2474 connf_t *connfp; 2475 in6_addr_t faddr; 2476 ill_t *ill = ira->ira_ill; 2477 ip_stack_t *ipst = ill->ill_ipst; 2478 2479 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR)); 2480 2481 laddr = ip6h->ip6_dst; 2482 faddr = ip6h->ip6_src; 2483 2484 /* Attempt to find a client stream based on destination port. */ 2485 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2486 mutex_enter(&connfp->connf_lock); 2487 connp = connfp->connf_head; 2488 while (connp != NULL) { 2489 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) && 2490 conn_wantpacket_v6(connp, ira, ip6h) && 2491 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2492 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2493 break; 2494 connp = connp->conn_next; 2495 } 2496 2497 if (connp == NULL) 2498 goto notfound; 2499 2500 CONN_INC_REF(connp); 2501 2502 if (connp->conn_reuseaddr) { 2503 conn_t *first_connp = connp; 2504 conn_t *next_connp; 2505 mblk_t *mp1; 2506 2507 connp = connp->conn_next; 2508 for (;;) { 2509 while (connp != NULL) { 2510 if (IPCL_UDP_MATCH_V6(connp, lport, laddr, 2511 fport, faddr) && 2512 conn_wantpacket_v6(connp, ira, ip6h) && 2513 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2514 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2515 ira, connp))) 2516 break; 2517 connp = connp->conn_next; 2518 } 2519 if (connp == NULL) { 2520 /* No more interested clients */ 2521 connp = first_connp; 2522 break; 2523 } 2524 if (((mp1 = dupmsg(mp)) == NULL) && 2525 ((mp1 = copymsg(mp)) == NULL)) { 2526 /* Memory allocation failed */ 2527 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2528 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2529 connp = first_connp; 2530 break; 2531 } 2532 2533 CONN_INC_REF(connp); 2534 mutex_exit(&connfp->connf_lock); 2535 2536 IP6_STAT(ipst, ip6_udp_fanmb); 2537 ip_fanout_udp_conn(connp, mp1, NULL, 2538 (ip6_t *)mp1->b_rptr, ira); 2539 2540 mutex_enter(&connfp->connf_lock); 2541 /* Follow the next pointer before releasing the conn. */ 2542 next_connp = connp->conn_next; 2543 IP6_STAT(ipst, ip6_udp_fanmb); 2544 CONN_DEC_REF(connp); 2545 connp = next_connp; 2546 } 2547 } 2548 2549 /* Last one. Send it upstream. */ 2550 mutex_exit(&connfp->connf_lock); 2551 2552 IP6_STAT(ipst, ip6_udp_fanmb); 2553 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira); 2554 CONN_DEC_REF(connp); 2555 return; 2556 2557 notfound: 2558 mutex_exit(&connfp->connf_lock); 2559 /* 2560 * No one bound to this port. Is 2561 * there a client that wants all 2562 * unclaimed datagrams? 2563 */ 2564 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 2565 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2566 ip_fanout_proto_v6(mp, ip6h, ira); 2567 } else { 2568 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2569 ICMP6_DST_UNREACH_NOPORT, ira); 2570 } 2571 } 2572 2573 /* 2574 * int ip_find_hdr_v6() 2575 * 2576 * This routine is used by the upper layer protocols, iptun, and IPsec: 2577 * - Set extension header pointers to appropriate locations 2578 * - Determine IPv6 header length and return it 2579 * - Return a pointer to the last nexthdr value 2580 * 2581 * The caller must initialize ipp_fields. 2582 * The upper layer protocols normally set label_separate which makes the 2583 * routine put the TX label in ipp_label_v6. If this is not set then 2584 * the hop-by-hop options including the label are placed in ipp_hopopts. 2585 * 2586 * NOTE: If multiple extension headers of the same type are present, 2587 * ip_find_hdr_v6() will set the respective extension header pointers 2588 * to the first one that it encounters in the IPv6 header. It also 2589 * skips fragment headers. This routine deals with malformed packets 2590 * of various sorts in which case the returned length is up to the 2591 * malformed part. 2592 */ 2593 int 2594 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp, 2595 uint8_t *nexthdrp) 2596 { 2597 uint_t length, ehdrlen; 2598 uint8_t nexthdr; 2599 uint8_t *whereptr, *endptr; 2600 ip6_dest_t *tmpdstopts; 2601 ip6_rthdr_t *tmprthdr; 2602 ip6_hbh_t *tmphopopts; 2603 ip6_frag_t *tmpfraghdr; 2604 2605 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR; 2606 ipp->ipp_hoplimit = ip6h->ip6_hops; 2607 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 2608 ipp->ipp_addr = ip6h->ip6_dst; 2609 2610 length = IPV6_HDR_LEN; 2611 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2612 endptr = mp->b_wptr; 2613 2614 nexthdr = ip6h->ip6_nxt; 2615 while (whereptr < endptr) { 2616 /* Is there enough left for len + nexthdr? */ 2617 if (whereptr + MIN_EHDR_LEN > endptr) 2618 goto done; 2619 2620 switch (nexthdr) { 2621 case IPPROTO_HOPOPTS: { 2622 /* We check for any CIPSO */ 2623 uchar_t *secopt; 2624 boolean_t hbh_needed; 2625 uchar_t *after_secopt; 2626 2627 tmphopopts = (ip6_hbh_t *)whereptr; 2628 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 2629 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 2630 goto done; 2631 nexthdr = tmphopopts->ip6h_nxt; 2632 2633 if (!label_separate) { 2634 secopt = NULL; 2635 after_secopt = whereptr; 2636 } else { 2637 /* 2638 * We have dropped packets with bad options in 2639 * ip6_input. No need to check return value 2640 * here. 2641 */ 2642 (void) tsol_find_secopt_v6(whereptr, ehdrlen, 2643 &secopt, &after_secopt, &hbh_needed); 2644 } 2645 if (secopt != NULL && after_secopt - whereptr > 0) { 2646 ipp->ipp_fields |= IPPF_LABEL_V6; 2647 ipp->ipp_label_v6 = secopt; 2648 ipp->ipp_label_len_v6 = after_secopt - whereptr; 2649 } else { 2650 ipp->ipp_label_len_v6 = 0; 2651 after_secopt = whereptr; 2652 hbh_needed = B_TRUE; 2653 } 2654 /* return only 1st hbh */ 2655 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) { 2656 ipp->ipp_fields |= IPPF_HOPOPTS; 2657 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt; 2658 ipp->ipp_hopoptslen = ehdrlen - 2659 ipp->ipp_label_len_v6; 2660 } 2661 break; 2662 } 2663 case IPPROTO_DSTOPTS: 2664 tmpdstopts = (ip6_dest_t *)whereptr; 2665 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 2666 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 2667 goto done; 2668 nexthdr = tmpdstopts->ip6d_nxt; 2669 /* 2670 * ipp_dstopts is set to the destination header after a 2671 * routing header. 2672 * Assume it is a post-rthdr destination header 2673 * and adjust when we find an rthdr. 2674 */ 2675 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2676 ipp->ipp_fields |= IPPF_DSTOPTS; 2677 ipp->ipp_dstopts = tmpdstopts; 2678 ipp->ipp_dstoptslen = ehdrlen; 2679 } 2680 break; 2681 case IPPROTO_ROUTING: 2682 tmprthdr = (ip6_rthdr_t *)whereptr; 2683 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 2684 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 2685 goto done; 2686 nexthdr = tmprthdr->ip6r_nxt; 2687 /* return only 1st rthdr */ 2688 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 2689 ipp->ipp_fields |= IPPF_RTHDR; 2690 ipp->ipp_rthdr = tmprthdr; 2691 ipp->ipp_rthdrlen = ehdrlen; 2692 } 2693 /* 2694 * Make any destination header we've seen be a 2695 * pre-rthdr destination header. 2696 */ 2697 if (ipp->ipp_fields & IPPF_DSTOPTS) { 2698 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2699 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 2700 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts; 2701 ipp->ipp_dstopts = NULL; 2702 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen; 2703 ipp->ipp_dstoptslen = 0; 2704 } 2705 break; 2706 case IPPROTO_FRAGMENT: 2707 tmpfraghdr = (ip6_frag_t *)whereptr; 2708 ehdrlen = sizeof (ip6_frag_t); 2709 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 2710 goto done; 2711 nexthdr = tmpfraghdr->ip6f_nxt; 2712 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 2713 ipp->ipp_fields |= IPPF_FRAGHDR; 2714 ipp->ipp_fraghdr = tmpfraghdr; 2715 ipp->ipp_fraghdrlen = ehdrlen; 2716 } 2717 break; 2718 case IPPROTO_NONE: 2719 default: 2720 goto done; 2721 } 2722 length += ehdrlen; 2723 whereptr += ehdrlen; 2724 } 2725 done: 2726 if (nexthdrp != NULL) 2727 *nexthdrp = nexthdr; 2728 return (length); 2729 } 2730 2731 /* 2732 * Try to determine where and what are the IPv6 header length and 2733 * pointer to nexthdr value for the upper layer protocol (or an 2734 * unknown next hdr). 2735 * 2736 * Parameters returns a pointer to the nexthdr value; 2737 * Must handle malformed packets of various sorts. 2738 * Function returns failure for malformed cases. 2739 */ 2740 boolean_t 2741 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 2742 uint8_t **nexthdrpp) 2743 { 2744 uint16_t length; 2745 uint_t ehdrlen; 2746 uint8_t *nexthdrp; 2747 uint8_t *whereptr; 2748 uint8_t *endptr; 2749 ip6_dest_t *desthdr; 2750 ip6_rthdr_t *rthdr; 2751 ip6_frag_t *fraghdr; 2752 2753 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 2754 length = IPV6_HDR_LEN; 2755 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2756 endptr = mp->b_wptr; 2757 2758 nexthdrp = &ip6h->ip6_nxt; 2759 while (whereptr < endptr) { 2760 /* Is there enough left for len + nexthdr? */ 2761 if (whereptr + MIN_EHDR_LEN > endptr) 2762 break; 2763 2764 switch (*nexthdrp) { 2765 case IPPROTO_HOPOPTS: 2766 case IPPROTO_DSTOPTS: 2767 /* Assumes the headers are identical for hbh and dst */ 2768 desthdr = (ip6_dest_t *)whereptr; 2769 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2770 if ((uchar_t *)desthdr + ehdrlen > endptr) 2771 return (B_FALSE); 2772 nexthdrp = &desthdr->ip6d_nxt; 2773 break; 2774 case IPPROTO_ROUTING: 2775 rthdr = (ip6_rthdr_t *)whereptr; 2776 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2777 if ((uchar_t *)rthdr + ehdrlen > endptr) 2778 return (B_FALSE); 2779 nexthdrp = &rthdr->ip6r_nxt; 2780 break; 2781 case IPPROTO_FRAGMENT: 2782 fraghdr = (ip6_frag_t *)whereptr; 2783 ehdrlen = sizeof (ip6_frag_t); 2784 if ((uchar_t *)&fraghdr[1] > endptr) 2785 return (B_FALSE); 2786 nexthdrp = &fraghdr->ip6f_nxt; 2787 break; 2788 case IPPROTO_NONE: 2789 /* No next header means we're finished */ 2790 default: 2791 *hdr_length_ptr = length; 2792 *nexthdrpp = nexthdrp; 2793 return (B_TRUE); 2794 } 2795 length += ehdrlen; 2796 whereptr += ehdrlen; 2797 *hdr_length_ptr = length; 2798 *nexthdrpp = nexthdrp; 2799 } 2800 switch (*nexthdrp) { 2801 case IPPROTO_HOPOPTS: 2802 case IPPROTO_DSTOPTS: 2803 case IPPROTO_ROUTING: 2804 case IPPROTO_FRAGMENT: 2805 /* 2806 * If any know extension headers are still to be processed, 2807 * the packet's malformed (or at least all the IP header(s) are 2808 * not in the same mblk - and that should never happen. 2809 */ 2810 return (B_FALSE); 2811 2812 default: 2813 /* 2814 * If we get here, we know that all of the IP headers were in 2815 * the same mblk, even if the ULP header is in the next mblk. 2816 */ 2817 *hdr_length_ptr = length; 2818 *nexthdrpp = nexthdrp; 2819 return (B_TRUE); 2820 } 2821 } 2822 2823 /* 2824 * Return the length of the IPv6 related headers (including extension headers) 2825 * Returns a length even if the packet is malformed. 2826 */ 2827 int 2828 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 2829 { 2830 uint16_t hdr_len; 2831 uint8_t *nexthdrp; 2832 2833 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 2834 return (hdr_len); 2835 } 2836 2837 /* 2838 * Parse and process any hop-by-hop or destination options. 2839 * 2840 * Assumes that q is an ill read queue so that ICMP errors for link-local 2841 * destinations are sent out the correct interface. 2842 * 2843 * Returns -1 if there was an error and mp has been consumed. 2844 * Returns 0 if no special action is needed. 2845 * Returns 1 if the packet contained a router alert option for this node 2846 * which is verified to be "interesting/known" for our implementation. 2847 * 2848 * XXX Note: In future as more hbh or dest options are defined, 2849 * it may be better to have different routines for hbh and dest 2850 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 2851 * may have same value in different namespaces. Or is it same namespace ?? 2852 * Current code checks for each opt_type (other than pads) if it is in 2853 * the expected nexthdr (hbh or dest) 2854 */ 2855 int 2856 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, 2857 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) 2858 { 2859 uint8_t opt_type; 2860 uint_t optused; 2861 int ret = 0; 2862 const char *errtype; 2863 ill_t *ill = ira->ira_ill; 2864 ip_stack_t *ipst = ill->ill_ipst; 2865 2866 while (optlen != 0) { 2867 opt_type = *optptr; 2868 if (opt_type == IP6OPT_PAD1) { 2869 optused = 1; 2870 } else { 2871 if (optlen < 2) 2872 goto bad_opt; 2873 errtype = "malformed"; 2874 if (opt_type == ip6opt_ls) { 2875 optused = 2 + optptr[1]; 2876 if (optused > optlen) 2877 goto bad_opt; 2878 } else switch (opt_type) { 2879 case IP6OPT_PADN: 2880 /* 2881 * Note:We don't verify that (N-2) pad octets 2882 * are zero as required by spec. Adhere to 2883 * "be liberal in what you accept..." part of 2884 * implementation philosophy (RFC791,RFC1122) 2885 */ 2886 optused = 2 + optptr[1]; 2887 if (optused > optlen) 2888 goto bad_opt; 2889 break; 2890 2891 case IP6OPT_JUMBO: 2892 if (hdr_type != IPPROTO_HOPOPTS) 2893 goto opt_error; 2894 goto opt_error; /* XXX Not implemented! */ 2895 2896 case IP6OPT_ROUTER_ALERT: { 2897 struct ip6_opt_router *or; 2898 2899 if (hdr_type != IPPROTO_HOPOPTS) 2900 goto opt_error; 2901 optused = 2 + optptr[1]; 2902 if (optused > optlen) 2903 goto bad_opt; 2904 or = (struct ip6_opt_router *)optptr; 2905 /* Check total length and alignment */ 2906 if (optused != sizeof (*or) || 2907 ((uintptr_t)or->ip6or_value & 0x1) != 0) 2908 goto opt_error; 2909 /* Check value */ 2910 switch (*((uint16_t *)or->ip6or_value)) { 2911 case IP6_ALERT_MLD: 2912 case IP6_ALERT_RSVP: 2913 ret = 1; 2914 } 2915 break; 2916 } 2917 case IP6OPT_HOME_ADDRESS: { 2918 /* 2919 * Minimal support for the home address option 2920 * (which is required by all IPv6 nodes). 2921 * Implement by just swapping the home address 2922 * and source address. 2923 * XXX Note: this has IPsec implications since 2924 * AH needs to take this into account. 2925 * Also, when IPsec is used we need to ensure 2926 * that this is only processed once 2927 * in the received packet (to avoid swapping 2928 * back and forth). 2929 * NOTE:This option processing is considered 2930 * to be unsafe and prone to a denial of 2931 * service attack. 2932 * The current processing is not safe even with 2933 * IPsec secured IP packets. Since the home 2934 * address option processing requirement still 2935 * is in the IETF draft and in the process of 2936 * being redefined for its usage, it has been 2937 * decided to turn off the option by default. 2938 * If this section of code needs to be executed, 2939 * ndd variable ip6_ignore_home_address_opt 2940 * should be set to 0 at the user's own risk. 2941 */ 2942 struct ip6_opt_home_address *oh; 2943 in6_addr_t tmp; 2944 2945 if (ipst->ips_ipv6_ignore_home_address_opt) 2946 goto opt_error; 2947 2948 if (hdr_type != IPPROTO_DSTOPTS) 2949 goto opt_error; 2950 optused = 2 + optptr[1]; 2951 if (optused > optlen) 2952 goto bad_opt; 2953 2954 /* 2955 * We did this dest. opt the first time 2956 * around (i.e. before AH processing). 2957 * If we've done AH... stop now. 2958 */ 2959 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && 2960 ira->ira_ipsec_ah_sa != NULL) 2961 break; 2962 2963 oh = (struct ip6_opt_home_address *)optptr; 2964 /* Check total length and alignment */ 2965 if (optused < sizeof (*oh) || 2966 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 2967 goto opt_error; 2968 /* Swap ip6_src and the home address */ 2969 tmp = ip6h->ip6_src; 2970 /* XXX Note: only 8 byte alignment option */ 2971 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 2972 *(in6_addr_t *)oh->ip6oh_addr = tmp; 2973 break; 2974 } 2975 2976 case IP6OPT_TUNNEL_LIMIT: 2977 if (hdr_type != IPPROTO_DSTOPTS) { 2978 goto opt_error; 2979 } 2980 optused = 2 + optptr[1]; 2981 if (optused > optlen) { 2982 goto bad_opt; 2983 } 2984 if (optused != 3) { 2985 goto opt_error; 2986 } 2987 break; 2988 2989 default: 2990 errtype = "unknown"; 2991 /* FALLTHROUGH */ 2992 opt_error: 2993 /* Determine which zone should send error */ 2994 switch (IP6OPT_TYPE(opt_type)) { 2995 case IP6OPT_TYPE_SKIP: 2996 optused = 2 + optptr[1]; 2997 if (optused > optlen) 2998 goto bad_opt; 2999 ip1dbg(("ip_process_options_v6: %s " 3000 "opt 0x%x skipped\n", 3001 errtype, opt_type)); 3002 break; 3003 case IP6OPT_TYPE_DISCARD: 3004 ip1dbg(("ip_process_options_v6: %s " 3005 "opt 0x%x; packet dropped\n", 3006 errtype, opt_type)); 3007 BUMP_MIB(ill->ill_ip_mib, 3008 ipIfStatsInHdrErrors); 3009 ip_drop_input("ipIfStatsInHdrErrors", 3010 mp, ill); 3011 freemsg(mp); 3012 return (-1); 3013 case IP6OPT_TYPE_ICMP: 3014 BUMP_MIB(ill->ill_ip_mib, 3015 ipIfStatsInHdrErrors); 3016 ip_drop_input("ipIfStatsInHdrErrors", 3017 mp, ill); 3018 icmp_param_problem_v6(mp, 3019 ICMP6_PARAMPROB_OPTION, 3020 (uint32_t)(optptr - 3021 (uint8_t *)ip6h), 3022 B_FALSE, ira); 3023 return (-1); 3024 case IP6OPT_TYPE_FORCEICMP: 3025 BUMP_MIB(ill->ill_ip_mib, 3026 ipIfStatsInHdrErrors); 3027 ip_drop_input("ipIfStatsInHdrErrors", 3028 mp, ill); 3029 icmp_param_problem_v6(mp, 3030 ICMP6_PARAMPROB_OPTION, 3031 (uint32_t)(optptr - 3032 (uint8_t *)ip6h), 3033 B_TRUE, ira); 3034 return (-1); 3035 default: 3036 ASSERT(0); 3037 } 3038 } 3039 } 3040 optlen -= optused; 3041 optptr += optused; 3042 } 3043 return (ret); 3044 3045 bad_opt: 3046 /* Determine which zone should send error */ 3047 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3048 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION, 3049 (uint32_t)(optptr - (uint8_t *)ip6h), 3050 B_FALSE, ira); 3051 return (-1); 3052 } 3053 3054 /* 3055 * Process a routing header that is not yet empty. 3056 * Because of RFC 5095, we now reject all route headers. 3057 */ 3058 void 3059 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 3060 ip_recv_attr_t *ira) 3061 { 3062 ill_t *ill = ira->ira_ill; 3063 ip_stack_t *ipst = ill->ill_ipst; 3064 3065 ASSERT(rth->ip6r_segleft != 0); 3066 3067 if (!ipst->ips_ipv6_forward_src_routed) { 3068 /* XXX Check for source routed out same interface? */ 3069 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 3070 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 3071 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 3072 freemsg(mp); 3073 return; 3074 } 3075 3076 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3077 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3078 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 3079 B_FALSE, ira); 3080 } 3081 3082 /* 3083 * Read side put procedure for IPv6 module. 3084 */ 3085 void 3086 ip_rput_v6(queue_t *q, mblk_t *mp) 3087 { 3088 ill_t *ill; 3089 3090 ill = (ill_t *)q->q_ptr; 3091 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { 3092 union DL_primitives *dl; 3093 3094 dl = (union DL_primitives *)mp->b_rptr; 3095 /* 3096 * Things are opening or closing - only accept DLPI 3097 * ack messages. If the stream is closing and ip_wsrv 3098 * has completed, ip_close is out of the qwait, but has 3099 * not yet completed qprocsoff. Don't proceed any further 3100 * because the ill has been cleaned up and things hanging 3101 * off the ill have been freed. 3102 */ 3103 if ((mp->b_datap->db_type != M_PCPROTO) || 3104 (dl->dl_primitive == DL_UNITDATA_IND)) { 3105 inet_freemsg(mp); 3106 return; 3107 } 3108 } 3109 if (DB_TYPE(mp) == M_DATA) { 3110 struct mac_header_info_s mhi; 3111 3112 ip_mdata_to_mhi(ill, mp, &mhi); 3113 ip_input_v6(ill, NULL, mp, &mhi); 3114 } else { 3115 ip_rput_notdata(ill, mp); 3116 } 3117 } 3118 3119 /* 3120 * Walk through the IPv6 packet in mp and see if there's an AH header 3121 * in it. See if the AH header needs to get done before other headers in 3122 * the packet. (Worker function for ipsec_early_ah_v6().) 3123 */ 3124 #define IPSEC_HDR_DONT_PROCESS 0 3125 #define IPSEC_HDR_PROCESS 1 3126 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 3127 static int 3128 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 3129 { 3130 uint_t length; 3131 uint_t ehdrlen; 3132 uint8_t *whereptr; 3133 uint8_t *endptr; 3134 uint8_t *nexthdrp; 3135 ip6_dest_t *desthdr; 3136 ip6_rthdr_t *rthdr; 3137 ip6_t *ip6h; 3138 3139 /* 3140 * For now just pullup everything. In general, the less pullups, 3141 * the better, but there's so much squirrelling through anyway, 3142 * it's just easier this way. 3143 */ 3144 if (!pullupmsg(mp, -1)) { 3145 return (IPSEC_MEMORY_ERROR); 3146 } 3147 3148 ip6h = (ip6_t *)mp->b_rptr; 3149 length = IPV6_HDR_LEN; 3150 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3151 endptr = mp->b_wptr; 3152 3153 /* 3154 * We can't just use the argument nexthdr in the place 3155 * of nexthdrp becaue we don't dereference nexthdrp 3156 * till we confirm whether it is a valid address. 3157 */ 3158 nexthdrp = &ip6h->ip6_nxt; 3159 while (whereptr < endptr) { 3160 /* Is there enough left for len + nexthdr? */ 3161 if (whereptr + MIN_EHDR_LEN > endptr) 3162 return (IPSEC_MEMORY_ERROR); 3163 3164 switch (*nexthdrp) { 3165 case IPPROTO_HOPOPTS: 3166 case IPPROTO_DSTOPTS: 3167 /* Assumes the headers are identical for hbh and dst */ 3168 desthdr = (ip6_dest_t *)whereptr; 3169 ehdrlen = 8 * (desthdr->ip6d_len + 1); 3170 if ((uchar_t *)desthdr + ehdrlen > endptr) 3171 return (IPSEC_MEMORY_ERROR); 3172 /* 3173 * Return DONT_PROCESS because the destination 3174 * options header may be for each hop in a 3175 * routing-header, and we only want AH if we're 3176 * finished with routing headers. 3177 */ 3178 if (*nexthdrp == IPPROTO_DSTOPTS) 3179 return (IPSEC_HDR_DONT_PROCESS); 3180 nexthdrp = &desthdr->ip6d_nxt; 3181 break; 3182 case IPPROTO_ROUTING: 3183 rthdr = (ip6_rthdr_t *)whereptr; 3184 3185 /* 3186 * If there's more hops left on the routing header, 3187 * return now with DON'T PROCESS. 3188 */ 3189 if (rthdr->ip6r_segleft > 0) 3190 return (IPSEC_HDR_DONT_PROCESS); 3191 3192 ehdrlen = 8 * (rthdr->ip6r_len + 1); 3193 if ((uchar_t *)rthdr + ehdrlen > endptr) 3194 return (IPSEC_MEMORY_ERROR); 3195 nexthdrp = &rthdr->ip6r_nxt; 3196 break; 3197 case IPPROTO_FRAGMENT: 3198 /* Wait for reassembly */ 3199 return (IPSEC_HDR_DONT_PROCESS); 3200 case IPPROTO_AH: 3201 *nexthdr = IPPROTO_AH; 3202 return (IPSEC_HDR_PROCESS); 3203 case IPPROTO_NONE: 3204 /* No next header means we're finished */ 3205 default: 3206 return (IPSEC_HDR_DONT_PROCESS); 3207 } 3208 length += ehdrlen; 3209 whereptr += ehdrlen; 3210 } 3211 /* 3212 * Malformed/truncated packet. 3213 */ 3214 return (IPSEC_MEMORY_ERROR); 3215 } 3216 3217 /* 3218 * Path for AH if options are present. 3219 * Returns NULL if the mblk was consumed. 3220 * 3221 * Sometimes AH needs to be done before other IPv6 headers for security 3222 * reasons. This function (and its ipsec_needs_processing_v6() above) 3223 * indicates if that is so, and fans out to the appropriate IPsec protocol 3224 * for the datagram passed in. 3225 */ 3226 mblk_t * 3227 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira) 3228 { 3229 uint8_t nexthdr; 3230 ah_t *ah; 3231 ill_t *ill = ira->ira_ill; 3232 ip_stack_t *ipst = ill->ill_ipst; 3233 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3234 3235 switch (ipsec_needs_processing_v6(mp, &nexthdr)) { 3236 case IPSEC_MEMORY_ERROR: 3237 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3238 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3239 freemsg(mp); 3240 return (NULL); 3241 case IPSEC_HDR_DONT_PROCESS: 3242 return (mp); 3243 } 3244 3245 /* Default means send it to AH! */ 3246 ASSERT(nexthdr == IPPROTO_AH); 3247 3248 if (!ipsec_loaded(ipss)) { 3249 ip_proto_not_sup(mp, ira); 3250 return (NULL); 3251 } 3252 3253 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 3254 if (mp == NULL) 3255 return (NULL); 3256 ASSERT(ah != NULL); 3257 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3258 ASSERT(ira->ira_ipsec_ah_sa != NULL); 3259 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 3260 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira); 3261 3262 if (mp == NULL) { 3263 /* 3264 * Either it failed or is pending. In the former case 3265 * ipIfStatsInDiscards was increased. 3266 */ 3267 return (NULL); 3268 } 3269 3270 /* we're done with IPsec processing, send it up */ 3271 ip_input_post_ipsec(mp, ira); 3272 return (NULL); 3273 } 3274 3275 /* 3276 * Reassemble fragment. 3277 * When it returns a completed message the first mblk will only contain 3278 * the headers prior to the fragment header, with the nexthdr value updated 3279 * to be the header after the fragment header. 3280 */ 3281 mblk_t * 3282 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h, 3283 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira) 3284 { 3285 uint32_t ident = ntohl(fraghdr->ip6f_ident); 3286 uint16_t offset; 3287 boolean_t more_frags; 3288 uint8_t nexthdr = fraghdr->ip6f_nxt; 3289 in6_addr_t *v6dst_ptr; 3290 in6_addr_t *v6src_ptr; 3291 uint_t end; 3292 uint_t hdr_length; 3293 size_t count; 3294 ipf_t *ipf; 3295 ipf_t **ipfp; 3296 ipfb_t *ipfb; 3297 mblk_t *mp1; 3298 uint8_t ecn_info = 0; 3299 size_t msg_len; 3300 mblk_t *tail_mp; 3301 mblk_t *t_mp; 3302 boolean_t pruned = B_FALSE; 3303 uint32_t sum_val; 3304 uint16_t sum_flags; 3305 ill_t *ill = ira->ira_ill; 3306 ip_stack_t *ipst = ill->ill_ipst; 3307 uint_t prev_nexthdr_offset; 3308 uint8_t prev_nexthdr; 3309 uint8_t *ptr; 3310 uint32_t packet_size; 3311 3312 /* 3313 * We utilize hardware computed checksum info only for UDP since 3314 * IP fragmentation is a normal occurence for the protocol. In 3315 * addition, checksum offload support for IP fragments carrying 3316 * UDP payload is commonly implemented across network adapters. 3317 */ 3318 ASSERT(ira->ira_rill != NULL); 3319 if (nexthdr == IPPROTO_UDP && dohwcksum && 3320 ILL_HCKSUM_CAPABLE(ira->ira_rill) && 3321 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 3322 mblk_t *mp1 = mp->b_cont; 3323 int32_t len; 3324 3325 /* Record checksum information from the packet */ 3326 sum_val = (uint32_t)DB_CKSUM16(mp); 3327 sum_flags = DB_CKSUMFLAGS(mp); 3328 3329 /* fragmented payload offset from beginning of mblk */ 3330 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 3331 3332 if ((sum_flags & HCK_PARTIALCKSUM) && 3333 (mp1 == NULL || mp1->b_cont == NULL) && 3334 offset >= DB_CKSUMSTART(mp) && 3335 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) { 3336 uint32_t adj; 3337 /* 3338 * Partial checksum has been calculated by hardware 3339 * and attached to the packet; in addition, any 3340 * prepended extraneous data is even byte aligned. 3341 * If any such data exists, we adjust the checksum; 3342 * this would also handle any postpended data. 3343 */ 3344 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 3345 mp, mp1, len, adj); 3346 3347 /* One's complement subtract extraneous checksum */ 3348 if (adj >= sum_val) 3349 sum_val = ~(adj - sum_val) & 0xFFFF; 3350 else 3351 sum_val -= adj; 3352 } 3353 } else { 3354 sum_val = 0; 3355 sum_flags = 0; 3356 } 3357 3358 /* Clear hardware checksumming flag */ 3359 DB_CKSUMFLAGS(mp) = 0; 3360 3361 /* 3362 * Determine the offset (from the begining of the IP header) 3363 * of the nexthdr value which has IPPROTO_FRAGMENT. We use 3364 * this when removing the fragment header from the packet. 3365 * This packet consists of the IPv6 header, a potential 3366 * hop-by-hop options header, a potential pre-routing-header 3367 * destination options header, and a potential routing header. 3368 */ 3369 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 3370 prev_nexthdr = ip6h->ip6_nxt; 3371 ptr = (uint8_t *)&ip6h[1]; 3372 3373 if (prev_nexthdr == IPPROTO_HOPOPTS) { 3374 ip6_hbh_t *hbh_hdr; 3375 uint_t hdr_len; 3376 3377 hbh_hdr = (ip6_hbh_t *)ptr; 3378 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 3379 prev_nexthdr = hbh_hdr->ip6h_nxt; 3380 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 3381 - (uint8_t *)ip6h; 3382 ptr += hdr_len; 3383 } 3384 if (prev_nexthdr == IPPROTO_DSTOPTS) { 3385 ip6_dest_t *dest_hdr; 3386 uint_t hdr_len; 3387 3388 dest_hdr = (ip6_dest_t *)ptr; 3389 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 3390 prev_nexthdr = dest_hdr->ip6d_nxt; 3391 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 3392 - (uint8_t *)ip6h; 3393 ptr += hdr_len; 3394 } 3395 if (prev_nexthdr == IPPROTO_ROUTING) { 3396 ip6_rthdr_t *rthdr; 3397 uint_t hdr_len; 3398 3399 rthdr = (ip6_rthdr_t *)ptr; 3400 prev_nexthdr = rthdr->ip6r_nxt; 3401 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 3402 - (uint8_t *)ip6h; 3403 hdr_len = 8 * (rthdr->ip6r_len + 1); 3404 ptr += hdr_len; 3405 } 3406 if (prev_nexthdr != IPPROTO_FRAGMENT) { 3407 /* Can't handle other headers before the fragment header */ 3408 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3409 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3410 freemsg(mp); 3411 return (NULL); 3412 } 3413 3414 /* 3415 * Note: Fragment offset in header is in 8-octet units. 3416 * Clearing least significant 3 bits not only extracts 3417 * it but also gets it in units of octets. 3418 */ 3419 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 3420 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 3421 3422 /* 3423 * Is the more frags flag on and the payload length not a multiple 3424 * of eight? 3425 */ 3426 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 3427 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3428 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3429 (uint32_t)((char *)&ip6h->ip6_plen - 3430 (char *)ip6h), B_FALSE, ira); 3431 return (NULL); 3432 } 3433 3434 v6src_ptr = &ip6h->ip6_src; 3435 v6dst_ptr = &ip6h->ip6_dst; 3436 end = remlen; 3437 3438 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 3439 end += offset; 3440 3441 /* 3442 * Would fragment cause reassembled packet to have a payload length 3443 * greater than IP_MAXPACKET - the max payload size? 3444 */ 3445 if (end > IP_MAXPACKET) { 3446 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3447 ip_drop_input("Reassembled packet too large", mp, ill); 3448 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3449 (uint32_t)((char *)&fraghdr->ip6f_offlg - 3450 (char *)ip6h), B_FALSE, ira); 3451 return (NULL); 3452 } 3453 3454 /* 3455 * This packet just has one fragment. Reassembly not 3456 * needed. 3457 */ 3458 if (!more_frags && offset == 0) { 3459 goto reass_done; 3460 } 3461 3462 /* 3463 * Drop the fragmented as early as possible, if 3464 * we don't have resource(s) to re-assemble. 3465 */ 3466 if (ipst->ips_ip_reass_queue_bytes == 0) { 3467 freemsg(mp); 3468 return (NULL); 3469 } 3470 3471 /* Record the ECN field info. */ 3472 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 3473 /* 3474 * If this is not the first fragment, dump the unfragmentable 3475 * portion of the packet. 3476 */ 3477 if (offset) 3478 mp->b_rptr = (uchar_t *)&fraghdr[1]; 3479 3480 /* 3481 * Fragmentation reassembly. Each ILL has a hash table for 3482 * queueing packets undergoing reassembly for all IPIFs 3483 * associated with the ILL. The hash is based on the packet 3484 * IP ident field. The ILL frag hash table was allocated 3485 * as a timer block at the time the ILL was created. Whenever 3486 * there is anything on the reassembly queue, the timer will 3487 * be running. 3488 */ 3489 /* Handle vnic loopback of fragments */ 3490 if (mp->b_datap->db_ref > 2) 3491 msg_len = 0; 3492 else 3493 msg_len = MBLKSIZE(mp); 3494 3495 tail_mp = mp; 3496 while (tail_mp->b_cont != NULL) { 3497 tail_mp = tail_mp->b_cont; 3498 if (tail_mp->b_datap->db_ref <= 2) 3499 msg_len += MBLKSIZE(tail_mp); 3500 } 3501 /* 3502 * If the reassembly list for this ILL will get too big 3503 * prune it. 3504 */ 3505 3506 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 3507 ipst->ips_ip_reass_queue_bytes) { 3508 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len, 3509 uint_t, ill->ill_frag_count, 3510 uint_t, ipst->ips_ip_reass_queue_bytes); 3511 ill_frag_prune(ill, 3512 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 3513 (ipst->ips_ip_reass_queue_bytes - msg_len)); 3514 pruned = B_TRUE; 3515 } 3516 3517 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 3518 mutex_enter(&ipfb->ipfb_lock); 3519 3520 ipfp = &ipfb->ipfb_ipf; 3521 /* Try to find an existing fragment queue for this packet. */ 3522 for (;;) { 3523 ipf = ipfp[0]; 3524 if (ipf) { 3525 /* 3526 * It has to match on ident, source address, and 3527 * dest address. 3528 */ 3529 if (ipf->ipf_ident == ident && 3530 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 3531 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 3532 3533 /* 3534 * If we have received too many 3535 * duplicate fragments for this packet 3536 * free it. 3537 */ 3538 if (ipf->ipf_num_dups > ip_max_frag_dups) { 3539 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3540 freemsg(mp); 3541 mutex_exit(&ipfb->ipfb_lock); 3542 return (NULL); 3543 } 3544 3545 break; 3546 } 3547 ipfp = &ipf->ipf_hash_next; 3548 continue; 3549 } 3550 3551 3552 /* 3553 * If we pruned the list, do we want to store this new 3554 * fragment?. We apply an optimization here based on the 3555 * fact that most fragments will be received in order. 3556 * So if the offset of this incoming fragment is zero, 3557 * it is the first fragment of a new packet. We will 3558 * keep it. Otherwise drop the fragment, as we have 3559 * probably pruned the packet already (since the 3560 * packet cannot be found). 3561 */ 3562 3563 if (pruned && offset != 0) { 3564 mutex_exit(&ipfb->ipfb_lock); 3565 freemsg(mp); 3566 return (NULL); 3567 } 3568 3569 /* New guy. Allocate a frag message. */ 3570 mp1 = allocb(sizeof (*ipf), BPRI_MED); 3571 if (!mp1) { 3572 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3573 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3574 freemsg(mp); 3575 partial_reass_done: 3576 mutex_exit(&ipfb->ipfb_lock); 3577 return (NULL); 3578 } 3579 3580 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 3581 /* 3582 * Too many fragmented packets in this hash bucket. 3583 * Free the oldest. 3584 */ 3585 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 3586 } 3587 3588 mp1->b_cont = mp; 3589 3590 /* Initialize the fragment header. */ 3591 ipf = (ipf_t *)mp1->b_rptr; 3592 ipf->ipf_mp = mp1; 3593 ipf->ipf_ptphn = ipfp; 3594 ipfp[0] = ipf; 3595 ipf->ipf_hash_next = NULL; 3596 ipf->ipf_ident = ident; 3597 ipf->ipf_v6src = *v6src_ptr; 3598 ipf->ipf_v6dst = *v6dst_ptr; 3599 /* Record reassembly start time. */ 3600 ipf->ipf_timestamp = gethrestime_sec(); 3601 /* Record ipf generation and account for frag header */ 3602 ipf->ipf_gen = ill->ill_ipf_gen++; 3603 ipf->ipf_count = MBLKSIZE(mp1); 3604 ipf->ipf_protocol = nexthdr; 3605 ipf->ipf_nf_hdr_len = 0; 3606 ipf->ipf_prev_nexthdr_offset = 0; 3607 ipf->ipf_last_frag_seen = B_FALSE; 3608 ipf->ipf_ecn = ecn_info; 3609 ipf->ipf_num_dups = 0; 3610 ipfb->ipfb_frag_pkts++; 3611 ipf->ipf_checksum = 0; 3612 ipf->ipf_checksum_flags = 0; 3613 3614 /* Store checksum value in fragment header */ 3615 if (sum_flags != 0) { 3616 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3617 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3618 ipf->ipf_checksum = sum_val; 3619 ipf->ipf_checksum_flags = sum_flags; 3620 } 3621 3622 /* 3623 * We handle reassembly two ways. In the easy case, 3624 * where all the fragments show up in order, we do 3625 * minimal bookkeeping, and just clip new pieces on 3626 * the end. If we ever see a hole, then we go off 3627 * to ip_reassemble which has to mark the pieces and 3628 * keep track of the number of holes, etc. Obviously, 3629 * the point of having both mechanisms is so we can 3630 * handle the easy case as efficiently as possible. 3631 */ 3632 if (offset == 0) { 3633 /* Easy case, in-order reassembly so far. */ 3634 /* Update the byte count */ 3635 ipf->ipf_count += msg_len; 3636 ipf->ipf_tail_mp = tail_mp; 3637 /* 3638 * Keep track of next expected offset in 3639 * ipf_end. 3640 */ 3641 ipf->ipf_end = end; 3642 ipf->ipf_nf_hdr_len = hdr_length; 3643 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset; 3644 } else { 3645 /* Hard case, hole at the beginning. */ 3646 ipf->ipf_tail_mp = NULL; 3647 /* 3648 * ipf_end == 0 means that we have given up 3649 * on easy reassembly. 3650 */ 3651 ipf->ipf_end = 0; 3652 3653 /* Forget checksum offload from now on */ 3654 ipf->ipf_checksum_flags = 0; 3655 3656 /* 3657 * ipf_hole_cnt is set by ip_reassemble. 3658 * ipf_count is updated by ip_reassemble. 3659 * No need to check for return value here 3660 * as we don't expect reassembly to complete or 3661 * fail for the first fragment itself. 3662 */ 3663 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 3664 msg_len); 3665 } 3666 /* Update per ipfb and ill byte counts */ 3667 ipfb->ipfb_count += ipf->ipf_count; 3668 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3669 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 3670 /* If the frag timer wasn't already going, start it. */ 3671 mutex_enter(&ill->ill_lock); 3672 ill_frag_timer_start(ill); 3673 mutex_exit(&ill->ill_lock); 3674 goto partial_reass_done; 3675 } 3676 3677 /* 3678 * If the packet's flag has changed (it could be coming up 3679 * from an interface different than the previous, therefore 3680 * possibly different checksum capability), then forget about 3681 * any stored checksum states. Otherwise add the value to 3682 * the existing one stored in the fragment header. 3683 */ 3684 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 3685 sum_val += ipf->ipf_checksum; 3686 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3687 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3688 ipf->ipf_checksum = sum_val; 3689 } else if (ipf->ipf_checksum_flags != 0) { 3690 /* Forget checksum offload from now on */ 3691 ipf->ipf_checksum_flags = 0; 3692 } 3693 3694 /* 3695 * We have a new piece of a datagram which is already being 3696 * reassembled. Update the ECN info if all IP fragments 3697 * are ECN capable. If there is one which is not, clear 3698 * all the info. If there is at least one which has CE 3699 * code point, IP needs to report that up to transport. 3700 */ 3701 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 3702 if (ecn_info == IPH_ECN_CE) 3703 ipf->ipf_ecn = IPH_ECN_CE; 3704 } else { 3705 ipf->ipf_ecn = IPH_ECN_NECT; 3706 } 3707 3708 if (offset && ipf->ipf_end == offset) { 3709 /* The new fragment fits at the end */ 3710 ipf->ipf_tail_mp->b_cont = mp; 3711 /* Update the byte count */ 3712 ipf->ipf_count += msg_len; 3713 /* Update per ipfb and ill byte counts */ 3714 ipfb->ipfb_count += msg_len; 3715 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3716 atomic_add_32(&ill->ill_frag_count, msg_len); 3717 if (more_frags) { 3718 /* More to come. */ 3719 ipf->ipf_end = end; 3720 ipf->ipf_tail_mp = tail_mp; 3721 goto partial_reass_done; 3722 } 3723 } else { 3724 /* 3725 * Go do the hard cases. 3726 * Call ip_reassemble(). 3727 */ 3728 int ret; 3729 3730 if (offset == 0) { 3731 if (ipf->ipf_prev_nexthdr_offset == 0) { 3732 ipf->ipf_nf_hdr_len = hdr_length; 3733 ipf->ipf_prev_nexthdr_offset = 3734 prev_nexthdr_offset; 3735 } 3736 } 3737 /* Save current byte count */ 3738 count = ipf->ipf_count; 3739 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 3740 3741 /* Count of bytes added and subtracted (freeb()ed) */ 3742 count = ipf->ipf_count - count; 3743 if (count) { 3744 /* Update per ipfb and ill byte counts */ 3745 ipfb->ipfb_count += count; 3746 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3747 atomic_add_32(&ill->ill_frag_count, count); 3748 } 3749 if (ret == IP_REASS_PARTIAL) { 3750 goto partial_reass_done; 3751 } else if (ret == IP_REASS_FAILED) { 3752 /* Reassembly failed. Free up all resources */ 3753 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3754 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 3755 IP_REASS_SET_START(t_mp, 0); 3756 IP_REASS_SET_END(t_mp, 0); 3757 } 3758 freemsg(mp); 3759 goto partial_reass_done; 3760 } 3761 3762 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 3763 } 3764 /* 3765 * We have completed reassembly. Unhook the frag header from 3766 * the reassembly list. 3767 * 3768 * Grab the unfragmentable header length next header value out 3769 * of the first fragment 3770 */ 3771 ASSERT(ipf->ipf_nf_hdr_len != 0); 3772 hdr_length = ipf->ipf_nf_hdr_len; 3773 3774 /* 3775 * Before we free the frag header, record the ECN info 3776 * to report back to the transport. 3777 */ 3778 ecn_info = ipf->ipf_ecn; 3779 3780 /* 3781 * Store the nextheader field in the header preceding the fragment 3782 * header 3783 */ 3784 nexthdr = ipf->ipf_protocol; 3785 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 3786 ipfp = ipf->ipf_ptphn; 3787 3788 /* We need to supply these to caller */ 3789 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 3790 sum_val = ipf->ipf_checksum; 3791 else 3792 sum_val = 0; 3793 3794 mp1 = ipf->ipf_mp; 3795 count = ipf->ipf_count; 3796 ipf = ipf->ipf_hash_next; 3797 if (ipf) 3798 ipf->ipf_ptphn = ipfp; 3799 ipfp[0] = ipf; 3800 atomic_add_32(&ill->ill_frag_count, -count); 3801 ASSERT(ipfb->ipfb_count >= count); 3802 ipfb->ipfb_count -= count; 3803 ipfb->ipfb_frag_pkts--; 3804 mutex_exit(&ipfb->ipfb_lock); 3805 /* Ditch the frag header. */ 3806 mp = mp1->b_cont; 3807 freeb(mp1); 3808 3809 /* 3810 * Make sure the packet is good by doing some sanity 3811 * check. If bad we can silentely drop the packet. 3812 */ 3813 reass_done: 3814 if (hdr_length < sizeof (ip6_frag_t)) { 3815 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3816 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3817 ip1dbg(("ip_input_fragment_v6: bad packet\n")); 3818 freemsg(mp); 3819 return (NULL); 3820 } 3821 3822 /* 3823 * Remove the fragment header from the initial header by 3824 * splitting the mblk into the non-fragmentable header and 3825 * everthing after the fragment extension header. This has the 3826 * side effect of putting all the headers that need destination 3827 * processing into the b_cont block-- on return this fact is 3828 * used in order to avoid having to look at the extensions 3829 * already processed. 3830 * 3831 * Note that this code assumes that the unfragmentable portion 3832 * of the header is in the first mblk and increments 3833 * the read pointer past it. If this assumption is broken 3834 * this code fails badly. 3835 */ 3836 if (mp->b_rptr + hdr_length != mp->b_wptr) { 3837 mblk_t *nmp; 3838 3839 if (!(nmp = dupb(mp))) { 3840 ip1dbg(("ip_input_fragment_v6: dupb failed\n")); 3841 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3842 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3843 freemsg(mp); 3844 return (NULL); 3845 } 3846 nmp->b_cont = mp->b_cont; 3847 mp->b_cont = nmp; 3848 nmp->b_rptr += hdr_length; 3849 } 3850 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 3851 3852 ip6h = (ip6_t *)mp->b_rptr; 3853 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr; 3854 3855 /* Restore original IP length in header. */ 3856 packet_size = msgdsize(mp); 3857 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN)); 3858 /* Record the ECN info. */ 3859 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 3860 ip6h->ip6_vcf |= htonl(ecn_info << 20); 3861 3862 /* Update the receive attributes */ 3863 ira->ira_pktlen = packet_size; 3864 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t); 3865 ira->ira_protocol = nexthdr; 3866 3867 /* Reassembly is successful; set checksum information in packet */ 3868 DB_CKSUM16(mp) = (uint16_t)sum_val; 3869 DB_CKSUMFLAGS(mp) = sum_flags; 3870 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length; 3871 3872 return (mp); 3873 } 3874 3875 /* 3876 * Given an mblk and a ptr, find the destination address in an IPv6 routing 3877 * header. 3878 */ 3879 static in6_addr_t 3880 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 3881 { 3882 ip6_rthdr0_t *rt0; 3883 int segleft, numaddr; 3884 in6_addr_t *ap, rv = oldrv; 3885 3886 rt0 = (ip6_rthdr0_t *)whereptr; 3887 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 3888 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 3889 uint8_t *, whereptr); 3890 return (rv); 3891 } 3892 segleft = rt0->ip6r0_segleft; 3893 numaddr = rt0->ip6r0_len / 2; 3894 3895 if ((rt0->ip6r0_len & 0x1) || 3896 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) || 3897 (segleft > rt0->ip6r0_len / 2)) { 3898 /* 3899 * Corrupt packet. Either the routing header length is odd 3900 * (can't happen) or mismatched compared to the packet, or the 3901 * number of addresses is. Return what we can. This will 3902 * only be a problem on forwarded packets that get squeezed 3903 * through an outbound tunnel enforcing IPsec Tunnel Mode. 3904 */ 3905 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 3906 whereptr); 3907 return (rv); 3908 } 3909 3910 if (segleft != 0) { 3911 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 3912 rv = ap[numaddr - 1]; 3913 } 3914 3915 return (rv); 3916 } 3917 3918 /* 3919 * Walk through the options to see if there is a routing header. 3920 * If present get the destination which is the last address of 3921 * the option. 3922 * mp needs to be provided in cases when the extension headers might span 3923 * b_cont; mp is never modified by this function. 3924 */ 3925 in6_addr_t 3926 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment) 3927 { 3928 const mblk_t *current_mp = mp; 3929 uint8_t nexthdr; 3930 uint8_t *whereptr; 3931 int ehdrlen; 3932 in6_addr_t rv; 3933 3934 whereptr = (uint8_t *)ip6h; 3935 ehdrlen = sizeof (ip6_t); 3936 3937 /* We assume at least the IPv6 base header is within one mblk. */ 3938 ASSERT(mp == NULL || 3939 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen)); 3940 3941 rv = ip6h->ip6_dst; 3942 nexthdr = ip6h->ip6_nxt; 3943 if (is_fragment != NULL) 3944 *is_fragment = B_FALSE; 3945 3946 /* 3947 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 3948 * no extension headers will be split across mblks. 3949 */ 3950 3951 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 3952 nexthdr == IPPROTO_ROUTING) { 3953 if (nexthdr == IPPROTO_ROUTING) 3954 rv = pluck_out_dst(current_mp, whereptr, rv); 3955 3956 /* 3957 * All IPv6 extension headers have the next-header in byte 3958 * 0, and the (length - 8) in 8-byte-words. 3959 */ 3960 while (current_mp != NULL && 3961 whereptr + ehdrlen >= current_mp->b_wptr) { 3962 ehdrlen -= (current_mp->b_wptr - whereptr); 3963 current_mp = current_mp->b_cont; 3964 if (current_mp == NULL) { 3965 /* Bad packet. Return what we can. */ 3966 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 3967 mp, mblk_t *, current_mp, ip6_t *, ip6h); 3968 goto done; 3969 } 3970 whereptr = current_mp->b_rptr; 3971 } 3972 whereptr += ehdrlen; 3973 3974 nexthdr = *whereptr; 3975 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr); 3976 ehdrlen = (*(whereptr + 1) + 1) * 8; 3977 } 3978 3979 done: 3980 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 3981 *is_fragment = B_TRUE; 3982 return (rv); 3983 } 3984 3985 /* 3986 * ip_source_routed_v6: 3987 * This function is called by redirect code (called from ip_input_v6) to 3988 * know whether this packet is source routed through this node i.e 3989 * whether this node (router) is part of the journey. This 3990 * function is called under two cases : 3991 * 3992 * case 1 : Routing header was processed by this node and 3993 * ip_process_rthdr replaced ip6_dst with the next hop 3994 * and we are forwarding the packet to the next hop. 3995 * 3996 * case 2 : Routing header was not processed by this node and we 3997 * are just forwarding the packet. 3998 * 3999 * For case (1) we don't want to send redirects. For case(2) we 4000 * want to send redirects. 4001 */ 4002 static boolean_t 4003 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 4004 { 4005 uint8_t nexthdr; 4006 in6_addr_t *addrptr; 4007 ip6_rthdr0_t *rthdr; 4008 uint8_t numaddr; 4009 ip6_hbh_t *hbhhdr; 4010 uint_t ehdrlen; 4011 uint8_t *byteptr; 4012 4013 ip2dbg(("ip_source_routed_v6\n")); 4014 nexthdr = ip6h->ip6_nxt; 4015 ehdrlen = IPV6_HDR_LEN; 4016 4017 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 4018 while (nexthdr == IPPROTO_HOPOPTS || 4019 nexthdr == IPPROTO_DSTOPTS) { 4020 byteptr = (uint8_t *)ip6h + ehdrlen; 4021 /* 4022 * Check if we have already processed 4023 * packets or we are just a forwarding 4024 * router which only pulled up msgs up 4025 * to IPV6HDR and one HBH ext header 4026 */ 4027 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4028 ip2dbg(("ip_source_routed_v6: Extension" 4029 " headers not processed\n")); 4030 return (B_FALSE); 4031 } 4032 hbhhdr = (ip6_hbh_t *)byteptr; 4033 nexthdr = hbhhdr->ip6h_nxt; 4034 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 4035 } 4036 switch (nexthdr) { 4037 case IPPROTO_ROUTING: 4038 byteptr = (uint8_t *)ip6h + ehdrlen; 4039 /* 4040 * If for some reason, we haven't pulled up 4041 * the routing hdr data mblk, then we must 4042 * not have processed it at all. So for sure 4043 * we are not part of the source routed journey. 4044 */ 4045 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4046 ip2dbg(("ip_source_routed_v6: Routing" 4047 " header not processed\n")); 4048 return (B_FALSE); 4049 } 4050 rthdr = (ip6_rthdr0_t *)byteptr; 4051 /* 4052 * Either we are an intermediate router or the 4053 * last hop before destination and we have 4054 * already processed the routing header. 4055 * If segment_left is greater than or equal to zero, 4056 * then we must be the (numaddr - segleft) entry 4057 * of the routing header. Although ip6r0_segleft 4058 * is a unit8_t variable, we still check for zero 4059 * or greater value, if in case the data type 4060 * is changed someday in future. 4061 */ 4062 if (rthdr->ip6r0_segleft > 0 || 4063 rthdr->ip6r0_segleft == 0) { 4064 numaddr = rthdr->ip6r0_len / 2; 4065 addrptr = (in6_addr_t *)((char *)rthdr + 4066 sizeof (*rthdr)); 4067 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 4068 if (addrptr != NULL) { 4069 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL) 4070 return (B_TRUE); 4071 ip1dbg(("ip_source_routed_v6: Not local\n")); 4072 } 4073 } 4074 /* FALLTHRU */ 4075 default: 4076 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 4077 return (B_FALSE); 4078 } 4079 } 4080 4081 /* 4082 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 4083 * We have not optimized this in terms of number of mblks 4084 * allocated. For instance, for each fragment sent we always allocate a 4085 * mblk to hold the IPv6 header and fragment header. 4086 * 4087 * Assumes that all the extension headers are contained in the first mblk 4088 * and that the fragment header has has already been added by calling 4089 * ip_fraghdr_add_v6. 4090 */ 4091 int 4092 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len, 4093 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 4094 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie) 4095 { 4096 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4097 ip6_t *fip6h; 4098 mblk_t *hmp; 4099 mblk_t *hmp0; 4100 mblk_t *dmp; 4101 ip6_frag_t *fraghdr; 4102 size_t unfragmentable_len; 4103 size_t mlen; 4104 size_t max_chunk; 4105 uint16_t off_flags; 4106 uint16_t offset = 0; 4107 ill_t *ill = nce->nce_ill; 4108 uint8_t nexthdr; 4109 uint8_t *ptr; 4110 ip_stack_t *ipst = ill->ill_ipst; 4111 uint_t priority = mp->b_band; 4112 int error = 0; 4113 4114 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 4115 if (max_frag == 0) { 4116 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4117 ip_drop_output("FragFails: zero max_frag", mp, ill); 4118 freemsg(mp); 4119 return (EINVAL); 4120 } 4121 4122 /* 4123 * Caller should have added fraghdr_t to pkt_len, and also 4124 * updated ip6_plen. 4125 */ 4126 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len); 4127 ASSERT(msgdsize(mp) == pkt_len); 4128 4129 /* 4130 * Determine the length of the unfragmentable portion of this 4131 * datagram. This consists of the IPv6 header, a potential 4132 * hop-by-hop options header, a potential pre-routing-header 4133 * destination options header, and a potential routing header. 4134 */ 4135 nexthdr = ip6h->ip6_nxt; 4136 ptr = (uint8_t *)&ip6h[1]; 4137 4138 if (nexthdr == IPPROTO_HOPOPTS) { 4139 ip6_hbh_t *hbh_hdr; 4140 uint_t hdr_len; 4141 4142 hbh_hdr = (ip6_hbh_t *)ptr; 4143 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4144 nexthdr = hbh_hdr->ip6h_nxt; 4145 ptr += hdr_len; 4146 } 4147 if (nexthdr == IPPROTO_DSTOPTS) { 4148 ip6_dest_t *dest_hdr; 4149 uint_t hdr_len; 4150 4151 dest_hdr = (ip6_dest_t *)ptr; 4152 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4153 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4154 nexthdr = dest_hdr->ip6d_nxt; 4155 ptr += hdr_len; 4156 } 4157 } 4158 if (nexthdr == IPPROTO_ROUTING) { 4159 ip6_rthdr_t *rthdr; 4160 uint_t hdr_len; 4161 4162 rthdr = (ip6_rthdr_t *)ptr; 4163 nexthdr = rthdr->ip6r_nxt; 4164 hdr_len = 8 * (rthdr->ip6r_len + 1); 4165 ptr += hdr_len; 4166 } 4167 if (nexthdr != IPPROTO_FRAGMENT) { 4168 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4169 ip_drop_output("FragFails: bad nexthdr", mp, ill); 4170 freemsg(mp); 4171 return (EINVAL); 4172 } 4173 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4174 unfragmentable_len += sizeof (ip6_frag_t); 4175 4176 max_chunk = (max_frag - unfragmentable_len) & ~7; 4177 4178 /* 4179 * Allocate an mblk with enough room for the link-layer 4180 * header and the unfragmentable part of the datagram, which includes 4181 * the fragment header. This (or a copy) will be used as the 4182 * first mblk for each fragment we send. 4183 */ 4184 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp); 4185 if (hmp == NULL) { 4186 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4187 ip_drop_output("FragFails: no hmp", mp, ill); 4188 freemsg(mp); 4189 return (ENOBUFS); 4190 } 4191 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4192 hmp->b_wptr = hmp->b_rptr + unfragmentable_len; 4193 4194 fip6h = (ip6_t *)hmp->b_rptr; 4195 bcopy(ip6h, fip6h, unfragmentable_len); 4196 4197 /* 4198 * pkt_len is set to the total length of the fragmentable data in this 4199 * datagram. For each fragment sent, we will decrement pkt_len 4200 * by the amount of fragmentable data sent in that fragment 4201 * until len reaches zero. 4202 */ 4203 pkt_len -= unfragmentable_len; 4204 4205 /* 4206 * Move read ptr past unfragmentable portion, we don't want this part 4207 * of the data in our fragments. 4208 */ 4209 mp->b_rptr += unfragmentable_len; 4210 if (mp->b_rptr == mp->b_wptr) { 4211 mblk_t *mp1 = mp->b_cont; 4212 freeb(mp); 4213 mp = mp1; 4214 } 4215 4216 while (pkt_len != 0) { 4217 mlen = MIN(pkt_len, max_chunk); 4218 pkt_len -= mlen; 4219 if (pkt_len != 0) { 4220 /* Not last */ 4221 hmp0 = copyb(hmp); 4222 if (hmp0 == NULL) { 4223 BUMP_MIB(ill->ill_ip_mib, 4224 ipIfStatsOutFragFails); 4225 ip_drop_output("FragFails: copyb failed", 4226 mp, ill); 4227 freeb(hmp); 4228 freemsg(mp); 4229 ip1dbg(("ip_fragment_v6: copyb failed\n")); 4230 return (ENOBUFS); 4231 } 4232 off_flags = IP6F_MORE_FRAG; 4233 } else { 4234 /* Last fragment */ 4235 hmp0 = hmp; 4236 hmp = NULL; 4237 off_flags = 0; 4238 } 4239 fip6h = (ip6_t *)(hmp0->b_rptr); 4240 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len - 4241 sizeof (ip6_frag_t)); 4242 4243 fip6h->ip6_plen = htons((uint16_t)(mlen + 4244 unfragmentable_len - IPV6_HDR_LEN)); 4245 /* 4246 * Note: Optimization alert. 4247 * In IPv6 (and IPv4) protocol header, Fragment Offset 4248 * ("offset") is 13 bits wide and in 8-octet units. 4249 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 4250 * it occupies the most significant 13 bits. 4251 * (least significant 13 bits in IPv4). 4252 * We do not do any shifts here. Not shifting is same effect 4253 * as taking offset value in octet units, dividing by 8 and 4254 * then shifting 3 bits left to line it up in place in proper 4255 * place protocol header. 4256 */ 4257 fraghdr->ip6f_offlg = htons(offset) | off_flags; 4258 4259 if (!(dmp = ip_carve_mp(&mp, mlen))) { 4260 /* mp has already been freed by ip_carve_mp() */ 4261 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4262 ip_drop_output("FragFails: could not carve mp", 4263 hmp0, ill); 4264 if (hmp != NULL) 4265 freeb(hmp); 4266 freeb(hmp0); 4267 ip1dbg(("ip_carve_mp: failed\n")); 4268 return (ENOBUFS); 4269 } 4270 hmp0->b_cont = dmp; 4271 /* Get the priority marking, if any */ 4272 hmp0->b_band = priority; 4273 4274 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 4275 4276 error = postfragfn(hmp0, nce, ixaflags, 4277 mlen + unfragmentable_len, xmit_hint, szone, nolzid, 4278 ixa_cookie); 4279 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) { 4280 /* No point in sending the other fragments */ 4281 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4282 ip_drop_output("FragFails: postfragfn failed", 4283 hmp, ill); 4284 freeb(hmp); 4285 freemsg(mp); 4286 return (error); 4287 } 4288 /* No need to redo state machine in loop */ 4289 ixaflags &= ~IXAF_REACH_CONF; 4290 4291 offset += mlen; 4292 } 4293 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 4294 return (error); 4295 } 4296 4297 /* 4298 * Add a fragment header to an IPv6 packet. 4299 * Assumes that all the extension headers are contained in the first mblk. 4300 * 4301 * The fragment header is inserted after an hop-by-hop options header 4302 * and after [an optional destinations header followed by] a routing header. 4303 */ 4304 mblk_t * 4305 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa) 4306 { 4307 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4308 ip6_t *fip6h; 4309 mblk_t *hmp; 4310 ip6_frag_t *fraghdr; 4311 size_t unfragmentable_len; 4312 uint8_t nexthdr; 4313 uint_t prev_nexthdr_offset; 4314 uint8_t *ptr; 4315 uint_t priority = mp->b_band; 4316 ip_stack_t *ipst = ixa->ixa_ipst; 4317 4318 /* 4319 * Determine the length of the unfragmentable portion of this 4320 * datagram. This consists of the IPv6 header, a potential 4321 * hop-by-hop options header, a potential pre-routing-header 4322 * destination options header, and a potential routing header. 4323 */ 4324 nexthdr = ip6h->ip6_nxt; 4325 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 4326 ptr = (uint8_t *)&ip6h[1]; 4327 4328 if (nexthdr == IPPROTO_HOPOPTS) { 4329 ip6_hbh_t *hbh_hdr; 4330 uint_t hdr_len; 4331 4332 hbh_hdr = (ip6_hbh_t *)ptr; 4333 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4334 nexthdr = hbh_hdr->ip6h_nxt; 4335 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 4336 - (uint8_t *)ip6h; 4337 ptr += hdr_len; 4338 } 4339 if (nexthdr == IPPROTO_DSTOPTS) { 4340 ip6_dest_t *dest_hdr; 4341 uint_t hdr_len; 4342 4343 dest_hdr = (ip6_dest_t *)ptr; 4344 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4345 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4346 nexthdr = dest_hdr->ip6d_nxt; 4347 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 4348 - (uint8_t *)ip6h; 4349 ptr += hdr_len; 4350 } 4351 } 4352 if (nexthdr == IPPROTO_ROUTING) { 4353 ip6_rthdr_t *rthdr; 4354 uint_t hdr_len; 4355 4356 rthdr = (ip6_rthdr_t *)ptr; 4357 nexthdr = rthdr->ip6r_nxt; 4358 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 4359 - (uint8_t *)ip6h; 4360 hdr_len = 8 * (rthdr->ip6r_len + 1); 4361 ptr += hdr_len; 4362 } 4363 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4364 4365 /* 4366 * Allocate an mblk with enough room for the link-layer 4367 * header, the unfragmentable part of the datagram, and the 4368 * fragment header. 4369 */ 4370 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 4371 ipst->ips_ip_wroff_extra, mp); 4372 if (hmp == NULL) { 4373 ill_t *ill = ixa->ixa_nce->nce_ill; 4374 4375 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 4376 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill); 4377 freemsg(mp); 4378 return (NULL); 4379 } 4380 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4381 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 4382 4383 fip6h = (ip6_t *)hmp->b_rptr; 4384 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 4385 4386 bcopy(ip6h, fip6h, unfragmentable_len); 4387 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t)); 4388 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 4389 4390 fraghdr->ip6f_nxt = nexthdr; 4391 fraghdr->ip6f_reserved = 0; 4392 fraghdr->ip6f_offlg = 0; 4393 fraghdr->ip6f_ident = htonl(ident); 4394 4395 /* Get the priority marking, if any */ 4396 hmp->b_band = priority; 4397 4398 /* 4399 * Move read ptr past unfragmentable portion, we don't want this part 4400 * of the data in our fragments. 4401 */ 4402 mp->b_rptr += unfragmentable_len; 4403 hmp->b_cont = mp; 4404 return (hmp); 4405 } 4406 4407 /* 4408 * Determine if the ill and multicast aspects of that packets 4409 * "matches" the conn. 4410 */ 4411 boolean_t 4412 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h) 4413 { 4414 ill_t *ill = ira->ira_rill; 4415 zoneid_t zoneid = ira->ira_zoneid; 4416 uint_t in_ifindex; 4417 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 4418 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 4419 4420 /* 4421 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local 4422 * scopeid. This is used to limit 4423 * unicast and multicast reception to conn_incoming_ifindex. 4424 * conn_wantpacket_v6 is called both for unicast and 4425 * multicast packets. 4426 */ 4427 in_ifindex = connp->conn_incoming_ifindex; 4428 4429 /* mpathd can bind to the under IPMP interface, which we allow */ 4430 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) { 4431 if (!IS_UNDER_IPMP(ill)) 4432 return (B_FALSE); 4433 4434 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill)) 4435 return (B_FALSE); 4436 } 4437 4438 if (!IPCL_ZONE_MATCH(connp, zoneid)) 4439 return (B_FALSE); 4440 4441 if (!(ira->ira_flags & IRAF_MULTICAST)) 4442 return (B_TRUE); 4443 4444 if (connp->conn_multi_router) 4445 return (B_TRUE); 4446 4447 if (ira->ira_protocol == IPPROTO_RSVP) 4448 return (B_TRUE); 4449 4450 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, 4451 ira->ira_ill)); 4452 } 4453 4454 /* 4455 * pr_addr_dbg function provides the needed buffer space to call 4456 * inet_ntop() function's 3rd argument. This function should be 4457 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 4458 * stack buffer space in it's own stack frame. This function uses 4459 * a buffer from it's own stack and prints the information. 4460 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 4461 * 4462 * Note: This function can call inet_ntop() once. 4463 */ 4464 void 4465 pr_addr_dbg(char *fmt1, int af, const void *addr) 4466 { 4467 char buf[INET6_ADDRSTRLEN]; 4468 4469 if (fmt1 == NULL) { 4470 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 4471 return; 4472 } 4473 4474 /* 4475 * This does not compare debug level and just prints 4476 * out. Thus it is the responsibility of the caller 4477 * to check the appropriate debug-level before calling 4478 * this function. 4479 */ 4480 if (ip_debug > 0) { 4481 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 4482 } 4483 4484 4485 } 4486 4487 4488 /* 4489 * Return the length in bytes of the IPv6 headers (base header 4490 * extension headers) that will be needed based on the 4491 * ip_pkt_t structure passed by the caller. 4492 * 4493 * The returned length does not include the length of the upper level 4494 * protocol (ULP) header. 4495 */ 4496 int 4497 ip_total_hdrs_len_v6(const ip_pkt_t *ipp) 4498 { 4499 int len; 4500 4501 len = IPV6_HDR_LEN; 4502 4503 /* 4504 * If there's a security label here, then we ignore any hop-by-hop 4505 * options the user may try to set. 4506 */ 4507 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4508 uint_t hopoptslen; 4509 /* 4510 * Note that ipp_label_len_v6 is just the option - not 4511 * the hopopts extension header. It also needs to be padded 4512 * to a multiple of 8 bytes. 4513 */ 4514 ASSERT(ipp->ipp_label_len_v6 != 0); 4515 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4516 hopoptslen = (hopoptslen + 7)/8 * 8; 4517 len += hopoptslen; 4518 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4519 ASSERT(ipp->ipp_hopoptslen != 0); 4520 len += ipp->ipp_hopoptslen; 4521 } 4522 4523 /* 4524 * En-route destination options 4525 * Only do them if there's a routing header as well 4526 */ 4527 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4528 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4529 ASSERT(ipp->ipp_rthdrdstoptslen != 0); 4530 len += ipp->ipp_rthdrdstoptslen; 4531 } 4532 if (ipp->ipp_fields & IPPF_RTHDR) { 4533 ASSERT(ipp->ipp_rthdrlen != 0); 4534 len += ipp->ipp_rthdrlen; 4535 } 4536 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4537 ASSERT(ipp->ipp_dstoptslen != 0); 4538 len += ipp->ipp_dstoptslen; 4539 } 4540 return (len); 4541 } 4542 4543 /* 4544 * All-purpose routine to build a header chain of an IPv6 header 4545 * followed by any required extension headers and a proto header. 4546 * 4547 * The caller has to set the source and destination address as well as 4548 * ip6_plen. The caller has to massage any routing header and compensate 4549 * for the ULP pseudo-header checksum due to the source route. 4550 * 4551 * The extension headers will all be fully filled in. 4552 */ 4553 void 4554 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp, 4555 uint8_t protocol, uint32_t flowinfo) 4556 { 4557 uint8_t *nxthdr_ptr; 4558 uint8_t *cp; 4559 ip6_t *ip6h = (ip6_t *)buf; 4560 4561 /* Initialize IPv6 header */ 4562 ip6h->ip6_vcf = 4563 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4564 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4565 4566 if (ipp->ipp_fields & IPPF_TCLASS) { 4567 /* Overrides the class part of flowinfo */ 4568 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4569 ipp->ipp_tclass); 4570 } 4571 4572 if (ipp->ipp_fields & IPPF_HOPLIMIT) 4573 ip6h->ip6_hops = ipp->ipp_hoplimit; 4574 else 4575 ip6h->ip6_hops = ipp->ipp_unicast_hops; 4576 4577 if ((ipp->ipp_fields & IPPF_ADDR) && 4578 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4579 ip6h->ip6_src = ipp->ipp_addr; 4580 4581 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4582 cp = (uint8_t *)&ip6h[1]; 4583 /* 4584 * Here's where we have to start stringing together 4585 * any extension headers in the right order: 4586 * Hop-by-hop, destination, routing, and final destination opts. 4587 */ 4588 /* 4589 * If there's a security label here, then we ignore any hop-by-hop 4590 * options the user may try to set. 4591 */ 4592 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4593 /* 4594 * Hop-by-hop options with the label. 4595 * Note that ipp_label_v6 is just the option - not 4596 * the hopopts extension header. It also needs to be padded 4597 * to a multiple of 8 bytes. 4598 */ 4599 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4600 uint_t hopoptslen; 4601 uint_t padlen; 4602 4603 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4604 hopoptslen = (padlen + 7)/8 * 8; 4605 padlen = hopoptslen - padlen; 4606 4607 *nxthdr_ptr = IPPROTO_HOPOPTS; 4608 nxthdr_ptr = &hbh->ip6h_nxt; 4609 hbh->ip6h_len = hopoptslen/8 - 1; 4610 cp += sizeof (ip6_hbh_t); 4611 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6); 4612 cp += ipp->ipp_label_len_v6; 4613 4614 ASSERT(padlen <= 7); 4615 switch (padlen) { 4616 case 0: 4617 break; 4618 case 1: 4619 cp[0] = IP6OPT_PAD1; 4620 break; 4621 default: 4622 cp[0] = IP6OPT_PADN; 4623 cp[1] = padlen - 2; 4624 bzero(&cp[2], padlen - 2); 4625 break; 4626 } 4627 cp += padlen; 4628 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4629 /* Hop-by-hop options */ 4630 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4631 4632 *nxthdr_ptr = IPPROTO_HOPOPTS; 4633 nxthdr_ptr = &hbh->ip6h_nxt; 4634 4635 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 4636 cp += ipp->ipp_hopoptslen; 4637 } 4638 /* 4639 * En-route destination options 4640 * Only do them if there's a routing header as well 4641 */ 4642 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4643 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4644 ip6_dest_t *dst = (ip6_dest_t *)cp; 4645 4646 *nxthdr_ptr = IPPROTO_DSTOPTS; 4647 nxthdr_ptr = &dst->ip6d_nxt; 4648 4649 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen); 4650 cp += ipp->ipp_rthdrdstoptslen; 4651 } 4652 /* 4653 * Routing header next 4654 */ 4655 if (ipp->ipp_fields & IPPF_RTHDR) { 4656 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4657 4658 *nxthdr_ptr = IPPROTO_ROUTING; 4659 nxthdr_ptr = &rt->ip6r_nxt; 4660 4661 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 4662 cp += ipp->ipp_rthdrlen; 4663 } 4664 /* 4665 * Do ultimate destination options 4666 */ 4667 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4668 ip6_dest_t *dest = (ip6_dest_t *)cp; 4669 4670 *nxthdr_ptr = IPPROTO_DSTOPTS; 4671 nxthdr_ptr = &dest->ip6d_nxt; 4672 4673 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 4674 cp += ipp->ipp_dstoptslen; 4675 } 4676 /* 4677 * Now set the last header pointer to the proto passed in 4678 */ 4679 *nxthdr_ptr = protocol; 4680 ASSERT((int)(cp - buf) == buf_len); 4681 } 4682 4683 /* 4684 * Return a pointer to the routing header extension header 4685 * in the IPv6 header(s) chain passed in. 4686 * If none found, return NULL 4687 * Assumes that all extension headers are in same mblk as the v6 header 4688 */ 4689 ip6_rthdr_t * 4690 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 4691 { 4692 ip6_dest_t *desthdr; 4693 ip6_frag_t *fraghdr; 4694 uint_t hdrlen; 4695 uint8_t nexthdr; 4696 uint8_t *ptr = (uint8_t *)&ip6h[1]; 4697 4698 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 4699 return ((ip6_rthdr_t *)ptr); 4700 4701 /* 4702 * The routing header will precede all extension headers 4703 * other than the hop-by-hop and destination options 4704 * extension headers, so if we see anything other than those, 4705 * we're done and didn't find it. 4706 * We could see a destination options header alone but no 4707 * routing header, in which case we'll return NULL as soon as 4708 * we see anything after that. 4709 * Hop-by-hop and destination option headers are identical, 4710 * so we can use either one we want as a template. 4711 */ 4712 nexthdr = ip6h->ip6_nxt; 4713 while (ptr < endptr) { 4714 /* Is there enough left for len + nexthdr? */ 4715 if (ptr + MIN_EHDR_LEN > endptr) 4716 return (NULL); 4717 4718 switch (nexthdr) { 4719 case IPPROTO_HOPOPTS: 4720 case IPPROTO_DSTOPTS: 4721 /* Assumes the headers are identical for hbh and dst */ 4722 desthdr = (ip6_dest_t *)ptr; 4723 hdrlen = 8 * (desthdr->ip6d_len + 1); 4724 nexthdr = desthdr->ip6d_nxt; 4725 break; 4726 4727 case IPPROTO_ROUTING: 4728 return ((ip6_rthdr_t *)ptr); 4729 4730 case IPPROTO_FRAGMENT: 4731 fraghdr = (ip6_frag_t *)ptr; 4732 hdrlen = sizeof (ip6_frag_t); 4733 nexthdr = fraghdr->ip6f_nxt; 4734 break; 4735 4736 default: 4737 return (NULL); 4738 } 4739 ptr += hdrlen; 4740 } 4741 return (NULL); 4742 } 4743 4744 /* 4745 * Called for source-routed packets originating on this node. 4746 * Manipulates the original routing header by moving every entry up 4747 * one slot, placing the first entry in the v6 header's v6_dst field, 4748 * and placing the ultimate destination in the routing header's last 4749 * slot. 4750 * 4751 * Returns the checksum diference between the ultimate destination 4752 * (last hop in the routing header when the packet is sent) and 4753 * the first hop (ip6_dst when the packet is sent) 4754 */ 4755 /* ARGSUSED2 */ 4756 uint32_t 4757 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 4758 { 4759 uint_t numaddr; 4760 uint_t i; 4761 in6_addr_t *addrptr; 4762 in6_addr_t tmp; 4763 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 4764 uint32_t cksm; 4765 uint32_t addrsum = 0; 4766 uint16_t *ptr; 4767 4768 /* 4769 * Perform any processing needed for source routing. 4770 * We know that all extension headers will be in the same mblk 4771 * as the IPv6 header. 4772 */ 4773 4774 /* 4775 * If no segments left in header, or the header length field is zero, 4776 * don't move hop addresses around; 4777 * Checksum difference is zero. 4778 */ 4779 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 4780 return (0); 4781 4782 ptr = (uint16_t *)&ip6h->ip6_dst; 4783 cksm = 0; 4784 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4785 cksm += ptr[i]; 4786 } 4787 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4788 4789 /* 4790 * Here's where the fun begins - we have to 4791 * move all addresses up one spot, take the 4792 * first hop and make it our first ip6_dst, 4793 * and place the ultimate destination in the 4794 * newly-opened last slot. 4795 */ 4796 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 4797 numaddr = rthdr->ip6r0_len / 2; 4798 tmp = *addrptr; 4799 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 4800 *addrptr = addrptr[1]; 4801 } 4802 *addrptr = ip6h->ip6_dst; 4803 ip6h->ip6_dst = tmp; 4804 4805 /* 4806 * From the checksummed ultimate destination subtract the checksummed 4807 * current ip6_dst (the first hop address). Return that number. 4808 * (In the v4 case, the second part of this is done in each routine 4809 * that calls ip_massage_options(). We do it all in this one place 4810 * for v6). 4811 */ 4812 ptr = (uint16_t *)&ip6h->ip6_dst; 4813 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4814 addrsum += ptr[i]; 4815 } 4816 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 4817 if ((int)cksm < 0) 4818 cksm--; 4819 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4820 4821 return (cksm); 4822 } 4823 4824 void 4825 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 4826 { 4827 kstat_t *ksp; 4828 4829 ip6_stat_t template = { 4830 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 4831 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 4832 { "ip6_recv_pullup", KSTAT_DATA_UINT64 }, 4833 { "ip6_db_ref", KSTAT_DATA_UINT64 }, 4834 { "ip6_notaligned", KSTAT_DATA_UINT64 }, 4835 { "ip6_multimblk", KSTAT_DATA_UINT64 }, 4836 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, 4837 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 4838 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 4839 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 4840 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4841 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4842 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4843 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4844 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4845 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4846 }; 4847 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 4848 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4849 KSTAT_FLAG_VIRTUAL, stackid); 4850 4851 if (ksp == NULL) 4852 return (NULL); 4853 4854 bcopy(&template, ip6_statisticsp, sizeof (template)); 4855 ksp->ks_data = (void *)ip6_statisticsp; 4856 ksp->ks_private = (void *)(uintptr_t)stackid; 4857 4858 kstat_install(ksp); 4859 return (ksp); 4860 } 4861 4862 void 4863 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4864 { 4865 if (ksp != NULL) { 4866 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4867 kstat_delete_netstack(ksp, stackid); 4868 } 4869 } 4870 4871 /* 4872 * The following two functions set and get the value for the 4873 * IPV6_SRC_PREFERENCES socket option. 4874 */ 4875 int 4876 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs) 4877 { 4878 /* 4879 * We only support preferences that are covered by 4880 * IPV6_PREFER_SRC_MASK. 4881 */ 4882 if (prefs & ~IPV6_PREFER_SRC_MASK) 4883 return (EINVAL); 4884 4885 /* 4886 * Look for conflicting preferences or default preferences. If 4887 * both bits of a related pair are clear, the application wants the 4888 * system's default value for that pair. Both bits in a pair can't 4889 * be set. 4890 */ 4891 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 4892 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 4893 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 4894 IPV6_PREFER_SRC_MIPMASK) { 4895 return (EINVAL); 4896 } 4897 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 4898 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 4899 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 4900 IPV6_PREFER_SRC_TMPMASK) { 4901 return (EINVAL); 4902 } 4903 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 4904 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 4905 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 4906 IPV6_PREFER_SRC_CGAMASK) { 4907 return (EINVAL); 4908 } 4909 4910 ixa->ixa_src_preferences = prefs; 4911 return (0); 4912 } 4913 4914 size_t 4915 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val) 4916 { 4917 *val = ixa->ixa_src_preferences; 4918 return (sizeof (ixa->ixa_src_preferences)); 4919 } 4920 4921 /* 4922 * Get the size of the IP options (including the IP headers size) 4923 * without including the AH header's size. If till_ah is B_FALSE, 4924 * and if AH header is present, dest options beyond AH header will 4925 * also be included in the returned size. 4926 */ 4927 int 4928 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 4929 { 4930 ip6_t *ip6h; 4931 uint8_t nexthdr; 4932 uint8_t *whereptr; 4933 ip6_hbh_t *hbhhdr; 4934 ip6_dest_t *dsthdr; 4935 ip6_rthdr_t *rthdr; 4936 int ehdrlen; 4937 int size; 4938 ah_t *ah; 4939 4940 ip6h = (ip6_t *)mp->b_rptr; 4941 size = IPV6_HDR_LEN; 4942 nexthdr = ip6h->ip6_nxt; 4943 whereptr = (uint8_t *)&ip6h[1]; 4944 for (;;) { 4945 /* Assume IP has already stripped it */ 4946 ASSERT(nexthdr != IPPROTO_FRAGMENT); 4947 switch (nexthdr) { 4948 case IPPROTO_HOPOPTS: 4949 hbhhdr = (ip6_hbh_t *)whereptr; 4950 nexthdr = hbhhdr->ip6h_nxt; 4951 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 4952 break; 4953 case IPPROTO_DSTOPTS: 4954 dsthdr = (ip6_dest_t *)whereptr; 4955 nexthdr = dsthdr->ip6d_nxt; 4956 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4957 break; 4958 case IPPROTO_ROUTING: 4959 rthdr = (ip6_rthdr_t *)whereptr; 4960 nexthdr = rthdr->ip6r_nxt; 4961 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4962 break; 4963 default : 4964 if (till_ah) { 4965 ASSERT(nexthdr == IPPROTO_AH); 4966 return (size); 4967 } 4968 /* 4969 * If we don't have a AH header to traverse, 4970 * return now. This happens normally for 4971 * outbound datagrams where we have not inserted 4972 * the AH header. 4973 */ 4974 if (nexthdr != IPPROTO_AH) { 4975 return (size); 4976 } 4977 4978 /* 4979 * We don't include the AH header's size 4980 * to be symmetrical with other cases where 4981 * we either don't have a AH header (outbound) 4982 * or peek into the AH header yet (inbound and 4983 * not pulled up yet). 4984 */ 4985 ah = (ah_t *)whereptr; 4986 nexthdr = ah->ah_nexthdr; 4987 ehdrlen = (ah->ah_length << 2) + 8; 4988 4989 if (nexthdr == IPPROTO_DSTOPTS) { 4990 if (whereptr + ehdrlen >= mp->b_wptr) { 4991 /* 4992 * The destination options header 4993 * is not part of the first mblk. 4994 */ 4995 whereptr = mp->b_cont->b_rptr; 4996 } else { 4997 whereptr += ehdrlen; 4998 } 4999 5000 dsthdr = (ip6_dest_t *)whereptr; 5001 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 5002 size += ehdrlen; 5003 } 5004 return (size); 5005 } 5006 whereptr += ehdrlen; 5007 size += ehdrlen; 5008 } 5009 } 5010 5011 /* 5012 * Utility routine that checks if `v6srcp' is a valid address on underlying 5013 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 5014 * associated with `v6srcp' on success. NOTE: if this is not called from 5015 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 5016 * group during or after this lookup. 5017 */ 5018 boolean_t 5019 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 5020 { 5021 ipif_t *ipif; 5022 5023 5024 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 5025 if (ipif != NULL) { 5026 if (ipifp != NULL) 5027 *ipifp = ipif; 5028 else 5029 ipif_refrele(ipif); 5030 return (B_TRUE); 5031 } 5032 5033 if (ip_debug > 2) { 5034 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 5035 "src %s\n", AF_INET6, v6srcp); 5036 } 5037 return (B_FALSE); 5038 } 5039