1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 1990 Mentat Inc. 24 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/stropts.h> 31 #include <sys/sysmacros.h> 32 #include <sys/strsun.h> 33 #include <sys/strlog.h> 34 #include <sys/strsubr.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/debug.h> 41 #include <sys/sdt.h> 42 #include <sys/kobj.h> 43 #include <sys/zone.h> 44 #include <sys/neti.h> 45 #include <sys/hook.h> 46 47 #include <sys/kmem.h> 48 #include <sys/systm.h> 49 #include <sys/param.h> 50 #include <sys/socket.h> 51 #include <sys/vtrace.h> 52 #include <sys/isa_defs.h> 53 #include <sys/atomic.h> 54 #include <sys/policy.h> 55 #include <sys/mac.h> 56 #include <net/if.h> 57 #include <net/if_types.h> 58 #include <net/route.h> 59 #include <net/if_dl.h> 60 #include <sys/sockio.h> 61 #include <netinet/in.h> 62 #include <netinet/ip6.h> 63 #include <netinet/icmp6.h> 64 #include <netinet/sctp.h> 65 66 #include <inet/common.h> 67 #include <inet/mi.h> 68 #include <inet/optcom.h> 69 #include <inet/mib2.h> 70 #include <inet/nd.h> 71 #include <inet/arp.h> 72 73 #include <inet/ip.h> 74 #include <inet/ip_impl.h> 75 #include <inet/ip6.h> 76 #include <inet/ip6_asp.h> 77 #include <inet/tcp.h> 78 #include <inet/tcp_impl.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipp_common.h> 81 82 #include <inet/ip_multi.h> 83 #include <inet/ip_if.h> 84 #include <inet/ip_ire.h> 85 #include <inet/ip_rts.h> 86 #include <inet/ip_ndp.h> 87 #include <net/pfkeyv2.h> 88 #include <inet/sadb.h> 89 #include <inet/ipsec_impl.h> 90 #include <inet/iptun/iptun_impl.h> 91 #include <inet/sctp_ip.h> 92 #include <sys/pattr.h> 93 #include <inet/ipclassifier.h> 94 #include <inet/ipsecah.h> 95 #include <inet/rawip_impl.h> 96 #include <inet/rts_impl.h> 97 #include <sys/squeue_impl.h> 98 #include <sys/squeue.h> 99 100 #include <sys/tsol/label.h> 101 #include <sys/tsol/tnet.h> 102 103 /* Temporary; for CR 6451644 work-around */ 104 #include <sys/ethernet.h> 105 106 /* 107 * Naming conventions: 108 * These rules should be judiciously applied 109 * if there is a need to identify something as IPv6 versus IPv4 110 * IPv6 funcions will end with _v6 in the ip module. 111 * IPv6 funcions will end with _ipv6 in the transport modules. 112 * IPv6 macros: 113 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 114 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 115 * And then there are ..V4_PART_OF_V6. 116 * The intent is that macros in the ip module end with _V6. 117 * IPv6 global variables will start with ipv6_ 118 * IPv6 structures will start with ipv6 119 * IPv6 defined constants should start with IPV6_ 120 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 121 */ 122 123 /* 124 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 125 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 126 * from IANA. This mechanism will remain in effect until an official 127 * number is obtained. 128 */ 129 uchar_t ip6opt_ls; 130 131 const in6_addr_t ipv6_all_ones = 132 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 133 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 134 135 #ifdef _BIG_ENDIAN 136 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 137 #else /* _BIG_ENDIAN */ 138 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 139 #endif /* _BIG_ENDIAN */ 140 141 #ifdef _BIG_ENDIAN 142 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 143 #else /* _BIG_ENDIAN */ 144 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 145 #endif /* _BIG_ENDIAN */ 146 147 #ifdef _BIG_ENDIAN 148 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 149 #else /* _BIG_ENDIAN */ 150 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 151 #endif /* _BIG_ENDIAN */ 152 153 #ifdef _BIG_ENDIAN 154 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 155 #else /* _BIG_ENDIAN */ 156 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 157 #endif /* _BIG_ENDIAN */ 158 159 #ifdef _BIG_ENDIAN 160 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 161 #else /* _BIG_ENDIAN */ 162 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 163 #endif /* _BIG_ENDIAN */ 164 165 #ifdef _BIG_ENDIAN 166 const in6_addr_t ipv6_solicited_node_mcast = 167 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 168 #else /* _BIG_ENDIAN */ 169 const in6_addr_t ipv6_solicited_node_mcast = 170 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 171 #endif /* _BIG_ENDIAN */ 172 173 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *); 174 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *); 175 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *, 176 ip_recv_attr_t *); 177 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *, 178 ip_recv_attr_t *); 179 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *, 180 in6_addr_t *, ip_recv_attr_t *); 181 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *, 182 ip_recv_attr_t *); 183 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 184 185 /* 186 * icmp_inbound_v6 deals with ICMP messages that are handled by IP. 187 * If the ICMP message is consumed by IP, i.e., it should not be delivered 188 * to any IPPROTO_ICMP raw sockets, then it returns NULL. 189 * Likewise, if the ICMP error is misformed (too short, etc), then it 190 * returns NULL. The caller uses this to determine whether or not to send 191 * to raw sockets. 192 * 193 * All error messages are passed to the matching transport stream. 194 * 195 * See comment for icmp_inbound_v4() on how IPsec is handled. 196 */ 197 mblk_t * 198 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira) 199 { 200 icmp6_t *icmp6; 201 ip6_t *ip6h; /* Outer header */ 202 int ip_hdr_length; /* Outer header length */ 203 boolean_t interested; 204 ill_t *ill = ira->ira_ill; 205 ip_stack_t *ipst = ill->ill_ipst; 206 mblk_t *mp_ret = NULL; 207 208 ip6h = (ip6_t *)mp->b_rptr; 209 210 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 211 212 /* Check for Martian packets */ 213 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 214 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 215 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill); 216 freemsg(mp); 217 return (NULL); 218 } 219 220 /* Make sure ira_l2src is set for ndp_input */ 221 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 222 ip_setl2src(mp, ira, ira->ira_rill); 223 224 ip_hdr_length = ira->ira_ip_hdr_length; 225 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 226 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 227 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 228 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 229 freemsg(mp); 230 return (NULL); 231 } 232 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 233 if (ip6h == NULL) { 234 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 235 freemsg(mp); 236 return (NULL); 237 } 238 } 239 240 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 241 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6); 242 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 243 icmp6->icmp6_code)); 244 245 /* 246 * We will set "interested" to "true" if we should pass a copy to 247 * the transport i.e., if it is an error message. 248 */ 249 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 250 251 switch (icmp6->icmp6_type) { 252 case ICMP6_DST_UNREACH: 253 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 254 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 255 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 256 break; 257 258 case ICMP6_TIME_EXCEEDED: 259 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 260 break; 261 262 case ICMP6_PARAM_PROB: 263 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 264 break; 265 266 case ICMP6_PACKET_TOO_BIG: 267 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs); 268 break; 269 270 case ICMP6_ECHO_REQUEST: 271 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 272 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 273 !ipst->ips_ipv6_resp_echo_mcast) 274 break; 275 276 /* 277 * We must have exclusive use of the mblk to convert it to 278 * a response. 279 * If not, we copy it. 280 */ 281 if (mp->b_datap->db_ref > 1) { 282 mblk_t *mp1; 283 284 mp1 = copymsg(mp); 285 if (mp1 == NULL) { 286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 287 ip_drop_input("ipIfStatsInDiscards - copymsg", 288 mp, ill); 289 freemsg(mp); 290 return (NULL); 291 } 292 freemsg(mp); 293 mp = mp1; 294 ip6h = (ip6_t *)mp->b_rptr; 295 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 296 } 297 298 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 299 icmp_send_reply_v6(mp, ip6h, icmp6, ira); 300 return (NULL); 301 302 case ICMP6_ECHO_REPLY: 303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 304 break; 305 306 case ND_ROUTER_SOLICIT: 307 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 308 break; 309 310 case ND_ROUTER_ADVERT: 311 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 312 break; 313 314 case ND_NEIGHBOR_SOLICIT: 315 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 316 ndp_input(mp, ira); 317 return (NULL); 318 319 case ND_NEIGHBOR_ADVERT: 320 BUMP_MIB(ill->ill_icmp6_mib, 321 ipv6IfIcmpInNeighborAdvertisements); 322 ndp_input(mp, ira); 323 return (NULL); 324 325 case ND_REDIRECT: 326 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 327 328 if (ipst->ips_ipv6_ignore_redirect) 329 break; 330 331 /* We now allow a RAW socket to receive this. */ 332 interested = B_TRUE; 333 break; 334 335 /* 336 * The next three icmp messages will be handled by MLD. 337 * Pass all valid MLD packets up to any process(es) 338 * listening on a raw ICMP socket. 339 */ 340 case MLD_LISTENER_QUERY: 341 case MLD_LISTENER_REPORT: 342 case MLD_LISTENER_REDUCTION: 343 mp = mld_input(mp, ira); 344 return (mp); 345 default: 346 break; 347 } 348 /* 349 * See if there is an ICMP client to avoid an extra copymsg/freemsg 350 * if there isn't one. 351 */ 352 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) { 353 /* If there is an ICMP client and we want one too, copy it. */ 354 355 if (!interested) { 356 /* Caller will deliver to RAW sockets */ 357 return (mp); 358 } 359 mp_ret = copymsg(mp); 360 if (mp_ret == NULL) { 361 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 362 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 363 } 364 } else if (!interested) { 365 /* Neither we nor raw sockets are interested. Drop packet now */ 366 freemsg(mp); 367 return (NULL); 368 } 369 370 /* 371 * ICMP error or redirect packet. Make sure we have enough of 372 * the header and that db_ref == 1 since we might end up modifying 373 * the packet. 374 */ 375 if (mp->b_cont != NULL) { 376 if (ip_pullup(mp, -1, ira) == NULL) { 377 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 378 ip_drop_input("ipIfStatsInDiscards - ip_pullup", 379 mp, ill); 380 freemsg(mp); 381 return (mp_ret); 382 } 383 } 384 385 if (mp->b_datap->db_ref > 1) { 386 mblk_t *mp1; 387 388 mp1 = copymsg(mp); 389 if (mp1 == NULL) { 390 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 391 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 392 freemsg(mp); 393 return (mp_ret); 394 } 395 freemsg(mp); 396 mp = mp1; 397 } 398 399 /* 400 * In case mp has changed, verify the message before any further 401 * processes. 402 */ 403 ip6h = (ip6_t *)mp->b_rptr; 404 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 405 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 406 freemsg(mp); 407 return (mp_ret); 408 } 409 410 switch (icmp6->icmp6_type) { 411 case ND_REDIRECT: 412 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira); 413 break; 414 case ICMP6_PACKET_TOO_BIG: 415 /* Update DCE and adjust MTU is icmp header if needed */ 416 icmp_inbound_too_big_v6(icmp6, ira); 417 /* FALLTHRU */ 418 default: 419 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 420 break; 421 } 422 423 return (mp_ret); 424 } 425 426 /* 427 * Send an ICMP echo reply. 428 * The caller has already updated the payload part of the packet. 429 * We handle the ICMP checksum, IP source address selection and feed 430 * the packet into ip_output_simple. 431 */ 432 static void 433 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6, 434 ip_recv_attr_t *ira) 435 { 436 uint_t ip_hdr_length = ira->ira_ip_hdr_length; 437 ill_t *ill = ira->ira_ill; 438 ip_stack_t *ipst = ill->ill_ipst; 439 ip_xmit_attr_t ixas; 440 in6_addr_t origsrc; 441 442 /* 443 * Remove any extension headers (do not reverse a source route) 444 * and clear the flow id (keep traffic class for now). 445 */ 446 if (ip_hdr_length != IPV6_HDR_LEN) { 447 int i; 448 449 for (i = 0; i < IPV6_HDR_LEN; i++) { 450 mp->b_rptr[ip_hdr_length - i - 1] = 451 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 452 } 453 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN); 454 ip6h = (ip6_t *)mp->b_rptr; 455 ip6h->ip6_nxt = IPPROTO_ICMPV6; 456 i = ntohs(ip6h->ip6_plen); 457 i -= (ip_hdr_length - IPV6_HDR_LEN); 458 ip6h->ip6_plen = htons(i); 459 ip_hdr_length = IPV6_HDR_LEN; 460 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp)); 461 } 462 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 463 464 /* Reverse the source and destination addresses. */ 465 origsrc = ip6h->ip6_src; 466 ip6h->ip6_src = ip6h->ip6_dst; 467 ip6h->ip6_dst = origsrc; 468 469 /* set the hop limit */ 470 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 471 472 /* 473 * Prepare for checksum by putting icmp length in the icmp 474 * checksum field. The checksum is calculated in ip_output 475 */ 476 icmp6->icmp6_cksum = ip6h->ip6_plen; 477 478 bzero(&ixas, sizeof (ixas)); 479 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 480 ixas.ixa_zoneid = ira->ira_zoneid; 481 ixas.ixa_cred = kcred; 482 ixas.ixa_cpid = NOPID; 483 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 484 ixas.ixa_ifindex = 0; 485 ixas.ixa_ipst = ipst; 486 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 487 488 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 489 /* 490 * This packet should go out the same way as it 491 * came in i.e in clear, independent of the IPsec 492 * policy for transmitting packets. 493 */ 494 ixas.ixa_flags |= IXAF_NO_IPSEC; 495 } else { 496 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 497 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 498 /* Note: mp already consumed and ip_drop_packet done */ 499 return; 500 } 501 } 502 503 /* Was the destination (now source) link-local? Send out same group */ 504 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 505 ixas.ixa_flags |= IXAF_SCOPEID_SET; 506 if (IS_UNDER_IPMP(ill)) 507 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 508 else 509 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 510 } 511 512 if (ira->ira_flags & IRAF_MULTIBROADCAST) { 513 /* 514 * Not one or our addresses (IRE_LOCALs), thus we let 515 * ip_output_simple pick the source. 516 */ 517 ip6h->ip6_src = ipv6_all_zeros; 518 ixas.ixa_flags |= IXAF_SET_SOURCE; 519 } 520 521 /* Should we send using dce_pmtu? */ 522 if (ipst->ips_ipv6_icmp_return_pmtu) 523 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; 524 525 (void) ip_output_simple(mp, &ixas); 526 ixa_cleanup(&ixas); 527 528 } 529 530 /* 531 * Verify the ICMP messages for either for ICMP error or redirect packet. 532 * The caller should have fully pulled up the message. If it's a redirect 533 * packet, only basic checks on IP header will be done; otherwise, verify 534 * the packet by looking at the included ULP header. 535 * 536 * Called before icmp_inbound_error_fanout_v6 is called. 537 */ 538 static boolean_t 539 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 540 { 541 ill_t *ill = ira->ira_ill; 542 uint16_t hdr_length; 543 uint8_t *nexthdrp; 544 uint8_t nexthdr; 545 ip_stack_t *ipst = ill->ill_ipst; 546 conn_t *connp; 547 ip6_t *ip6h; /* Inner header */ 548 549 ip6h = (ip6_t *)&icmp6[1]; 550 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr) 551 goto truncated; 552 553 if (icmp6->icmp6_type == ND_REDIRECT) { 554 hdr_length = sizeof (nd_redirect_t); 555 } else { 556 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION)) 557 goto discard_pkt; 558 hdr_length = IPV6_HDR_LEN; 559 } 560 561 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr) 562 goto truncated; 563 564 /* 565 * Stop here for ICMP_REDIRECT. 566 */ 567 if (icmp6->icmp6_type == ND_REDIRECT) 568 return (B_TRUE); 569 570 /* 571 * ICMP errors only. 572 */ 573 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 574 goto discard_pkt; 575 nexthdr = *nexthdrp; 576 577 /* Try to pass the ICMP message to clients who need it */ 578 switch (nexthdr) { 579 case IPPROTO_UDP: 580 /* 581 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 582 * transport header. 583 */ 584 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 585 mp->b_wptr) 586 goto truncated; 587 break; 588 case IPPROTO_TCP: { 589 tcpha_t *tcpha; 590 591 /* 592 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 593 * transport header. 594 */ 595 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 596 mp->b_wptr) 597 goto truncated; 598 599 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 600 /* 601 * With IPMP we need to match across group, which we do 602 * since we have the upper ill from ira_ill. 603 */ 604 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, 605 ill->ill_phyint->phyint_ifindex, ipst); 606 if (connp == NULL) 607 goto discard_pkt; 608 609 if ((connp->conn_verifyicmp != NULL) && 610 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) { 611 CONN_DEC_REF(connp); 612 goto discard_pkt; 613 } 614 CONN_DEC_REF(connp); 615 break; 616 } 617 case IPPROTO_SCTP: 618 /* 619 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 620 * transport header. 621 */ 622 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 623 mp->b_wptr) 624 goto truncated; 625 break; 626 case IPPROTO_ESP: 627 case IPPROTO_AH: 628 break; 629 case IPPROTO_ENCAP: 630 case IPPROTO_IPV6: { 631 /* Look for self-encapsulated packets that caused an error */ 632 ip6_t *in_ip6h; 633 634 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 635 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ? 636 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr) 637 goto truncated; 638 break; 639 } 640 default: 641 break; 642 } 643 644 return (B_TRUE); 645 646 discard_pkt: 647 /* Bogus ICMP error. */ 648 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 649 return (B_FALSE); 650 651 truncated: 652 /* We pulled up everthing already. Must be truncated */ 653 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 654 return (B_FALSE); 655 } 656 657 /* 658 * Process received IPv6 ICMP Packet too big. 659 * The caller is responsible for validating the packet before passing it in 660 * and also to fanout the ICMP error to any matching transport conns. Assumes 661 * the message has been fully pulled up. 662 * 663 * Before getting here, the caller has called icmp_inbound_verify_v6() 664 * that should have verified with ULP to prevent undoing the changes we're 665 * going to make to DCE. For example, TCP might have verified that the packet 666 * which generated error is in the send window. 667 * 668 * In some cases modified this MTU in the ICMP header packet; the caller 669 * should pass to the matching ULP after this returns. 670 */ 671 static void 672 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira) 673 { 674 uint32_t mtu; 675 dce_t *dce; 676 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */ 677 ip_stack_t *ipst = ill->ill_ipst; 678 int old_max_frag; 679 in6_addr_t final_dst; 680 ip6_t *ip6h; /* Inner IP header */ 681 682 /* Caller has already pulled up everything. */ 683 ip6h = (ip6_t *)&icmp6[1]; 684 final_dst = ip_get_dst_v6(ip6h, NULL, NULL); 685 686 mtu = ntohl(icmp6->icmp6_mtu); 687 if (mtu < IPV6_MIN_MTU) { 688 /* 689 * RFC 8021 suggests to ignore messages where mtu is 690 * less than the IPv6 minimum. 691 */ 692 ip1dbg(("Received mtu less than IPv6 " 693 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 694 DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu); 695 return; 696 } 697 698 /* 699 * For link local destinations matching simply on address is not 700 * sufficient. Same link local addresses for different ILL's is 701 * possible. 702 */ 703 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) { 704 dce = dce_lookup_and_add_v6(&final_dst, 705 ill->ill_phyint->phyint_ifindex, ipst); 706 } else { 707 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst); 708 } 709 if (dce == NULL) { 710 /* Couldn't add a unique one - ENOMEM */ 711 if (ip_debug > 2) { 712 /* ip1dbg */ 713 pr_addr_dbg("icmp_inbound_too_big_v6:" 714 "no dce for dst %s\n", AF_INET6, 715 &final_dst); 716 } 717 return; 718 } 719 720 mutex_enter(&dce->dce_lock); 721 if (dce->dce_flags & DCEF_PMTU) 722 old_max_frag = dce->dce_pmtu; 723 else if (IN6_IS_ADDR_MULTICAST(&final_dst)) 724 old_max_frag = ill->ill_mc_mtu; 725 else 726 old_max_frag = ill->ill_mtu; 727 728 ip1dbg(("Received mtu from router: %d\n", mtu)); 729 DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu); 730 dce->dce_pmtu = MIN(old_max_frag, mtu); 731 icmp6->icmp6_mtu = htonl(dce->dce_pmtu); 732 733 /* We now have a PMTU for sure */ 734 dce->dce_flags |= DCEF_PMTU; 735 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 736 737 mutex_exit(&dce->dce_lock); 738 /* 739 * After dropping the lock the new value is visible to everyone. 740 * Then we bump the generation number so any cached values reinspect 741 * the dce_t. 742 */ 743 dce_increment_generation(dce); 744 dce_refrele(dce); 745 } 746 747 /* 748 * Fanout received ICMPv6 error packets to the transports. 749 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 750 * 751 * The caller must have called icmp_inbound_verify_v6. 752 */ 753 void 754 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 755 { 756 uint16_t *up; /* Pointer to ports in ULP header */ 757 uint32_t ports; /* reversed ports for fanout */ 758 ip6_t rip6h; /* With reversed addresses */ 759 ip6_t *ip6h; /* Inner IP header */ 760 uint16_t hdr_length; /* Inner IP header length */ 761 uint8_t *nexthdrp; 762 uint8_t nexthdr; 763 tcpha_t *tcpha; 764 conn_t *connp; 765 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */ 766 ip_stack_t *ipst = ill->ill_ipst; 767 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 768 769 /* Caller has already pulled up everything. */ 770 ip6h = (ip6_t *)&icmp6[1]; 771 ASSERT(mp->b_cont == NULL); 772 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 773 774 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 775 goto drop_pkt; 776 nexthdr = *nexthdrp; 777 ira->ira_protocol = nexthdr; 778 779 /* 780 * We need a separate IP header with the source and destination 781 * addresses reversed to do fanout/classification because the ip6h in 782 * the ICMPv6 error is in the form we sent it out. 783 */ 784 rip6h.ip6_src = ip6h->ip6_dst; 785 rip6h.ip6_dst = ip6h->ip6_src; 786 rip6h.ip6_nxt = nexthdr; 787 788 /* Try to pass the ICMP message to clients who need it */ 789 switch (nexthdr) { 790 case IPPROTO_UDP: { 791 /* Attempt to find a client stream based on port. */ 792 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 793 794 /* Note that we send error to all matches. */ 795 ira->ira_flags |= IRAF_ICMP_ERROR; 796 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira); 797 ira->ira_flags &= ~IRAF_ICMP_ERROR; 798 return; 799 } 800 case IPPROTO_TCP: { 801 /* 802 * Attempt to find a client stream based on port. 803 * Note that we do a reverse lookup since the header is 804 * in the form we sent it out. 805 */ 806 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 807 /* 808 * With IPMP we need to match across group, which we do 809 * since we have the upper ill from ira_ill. 810 */ 811 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 812 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 813 if (connp == NULL) { 814 goto drop_pkt; 815 } 816 817 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 818 (ira->ira_flags & IRAF_IPSEC_SECURE)) { 819 mp = ipsec_check_inbound_policy(mp, connp, 820 NULL, ip6h, ira); 821 if (mp == NULL) { 822 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 823 /* Note that mp is NULL */ 824 ip_drop_input("ipIfStatsInDiscards", mp, ill); 825 CONN_DEC_REF(connp); 826 return; 827 } 828 } 829 830 ira->ira_flags |= IRAF_ICMP_ERROR; 831 if (IPCL_IS_TCP(connp)) { 832 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 833 connp->conn_recvicmp, connp, ira, SQ_FILL, 834 SQTAG_TCP6_INPUT_ICMP_ERR); 835 } else { 836 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 837 ill_t *rill = ira->ira_rill; 838 839 ira->ira_ill = ira->ira_rill = NULL; 840 (connp->conn_recv)(connp, mp, NULL, ira); 841 CONN_DEC_REF(connp); 842 ira->ira_ill = ill; 843 ira->ira_rill = rill; 844 } 845 ira->ira_flags &= ~IRAF_ICMP_ERROR; 846 return; 847 848 } 849 case IPPROTO_SCTP: 850 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 851 /* Find a SCTP client stream for this packet. */ 852 ((uint16_t *)&ports)[0] = up[1]; 853 ((uint16_t *)&ports)[1] = up[0]; 854 855 ira->ira_flags |= IRAF_ICMP_ERROR; 856 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira); 857 ira->ira_flags &= ~IRAF_ICMP_ERROR; 858 return; 859 860 case IPPROTO_ESP: 861 case IPPROTO_AH: 862 if (!ipsec_loaded(ipss)) { 863 ip_proto_not_sup(mp, ira); 864 return; 865 } 866 867 if (nexthdr == IPPROTO_ESP) 868 mp = ipsecesp_icmp_error(mp, ira); 869 else 870 mp = ipsecah_icmp_error(mp, ira); 871 if (mp == NULL) 872 return; 873 874 /* Just in case ipsec didn't preserve the NULL b_cont */ 875 if (mp->b_cont != NULL) { 876 if (!pullupmsg(mp, -1)) 877 goto drop_pkt; 878 } 879 880 /* 881 * If succesful, the mp has been modified to not include 882 * the ESP/AH header so we can fanout to the ULP's icmp 883 * error handler. 884 */ 885 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN) 886 goto drop_pkt; 887 888 ip6h = (ip6_t *)mp->b_rptr; 889 /* Don't call hdr_length_v6() unless you have to. */ 890 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 891 hdr_length = ip_hdr_length_v6(mp, ip6h); 892 else 893 hdr_length = IPV6_HDR_LEN; 894 895 /* Verify the modified message before any further processes. */ 896 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 897 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 898 freemsg(mp); 899 return; 900 } 901 902 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 903 return; 904 905 case IPPROTO_IPV6: { 906 /* Look for self-encapsulated packets that caused an error */ 907 ip6_t *in_ip6h; 908 909 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 910 911 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) && 912 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) { 913 /* 914 * Self-encapsulated case. As in the ipv4 case, 915 * we need to strip the 2nd IP header. Since mp 916 * is already pulled-up, we can simply bcopy 917 * the 3rd header + data over the 2nd header. 918 */ 919 uint16_t unused_len; 920 921 /* 922 * Make sure we don't do recursion more than once. 923 */ 924 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h, 925 &unused_len, &nexthdrp) || 926 *nexthdrp == IPPROTO_IPV6) { 927 goto drop_pkt; 928 } 929 930 /* 931 * Copy the 3rd header + remaining data on top 932 * of the 2nd header. 933 */ 934 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h); 935 936 /* 937 * Subtract length of the 2nd header. 938 */ 939 mp->b_wptr -= hdr_length; 940 941 ip6h = (ip6_t *)mp->b_rptr; 942 /* Don't call hdr_length_v6() unless you have to. */ 943 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 944 hdr_length = ip_hdr_length_v6(mp, ip6h); 945 else 946 hdr_length = IPV6_HDR_LEN; 947 948 /* 949 * Verify the modified message before any further 950 * processes. 951 */ 952 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 953 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 954 freemsg(mp); 955 return; 956 } 957 958 /* 959 * Now recurse, and see what I _really_ should be 960 * doing here. 961 */ 962 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 963 return; 964 } 965 /* FALLTHRU */ 966 } 967 case IPPROTO_ENCAP: 968 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src, 969 &rip6h.ip6_dst, ipst)) != NULL) { 970 ira->ira_flags |= IRAF_ICMP_ERROR; 971 connp->conn_recvicmp(connp, mp, NULL, ira); 972 CONN_DEC_REF(connp); 973 ira->ira_flags &= ~IRAF_ICMP_ERROR; 974 return; 975 } 976 /* 977 * No IP tunnel is interested, fallthrough and see 978 * if a raw socket will want it. 979 */ 980 /* FALLTHRU */ 981 default: 982 ira->ira_flags |= IRAF_ICMP_ERROR; 983 ASSERT(ira->ira_protocol == nexthdr); 984 ip_fanout_proto_v6(mp, &rip6h, ira); 985 ira->ira_flags &= ~IRAF_ICMP_ERROR; 986 return; 987 } 988 /* NOTREACHED */ 989 drop_pkt: 990 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 991 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 992 freemsg(mp); 993 } 994 995 /* 996 * Process received IPv6 ICMP Redirect messages. 997 * Assumes the caller has verified that the headers are in the pulled up mblk. 998 * Consumes mp. 999 */ 1000 /* ARGSUSED */ 1001 static void 1002 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd, 1003 ip_recv_attr_t *ira) 1004 { 1005 ire_t *ire, *nire; 1006 ire_t *prev_ire = NULL; 1007 ire_t *redir_ire; 1008 in6_addr_t *src, *dst, *gateway; 1009 nd_opt_hdr_t *opt; 1010 nce_t *nce; 1011 int ncec_flags = 0; 1012 int err = 0; 1013 boolean_t redirect_to_router = B_FALSE; 1014 int len; 1015 int optlen; 1016 ill_t *ill = ira->ira_rill; 1017 ill_t *rill = ira->ira_rill; 1018 ip_stack_t *ipst = ill->ill_ipst; 1019 1020 /* 1021 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 1022 * and make it be the IPMP upper so avoid being confused by a packet 1023 * addressed to a unicast address on a different ill. 1024 */ 1025 if (IS_UNDER_IPMP(rill)) { 1026 rill = ipmp_ill_hold_ipmp_ill(rill); 1027 if (rill == NULL) { 1028 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1029 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill", 1030 mp, ill); 1031 freemsg(mp); 1032 return; 1033 } 1034 ASSERT(rill != ira->ira_rill); 1035 } 1036 1037 len = mp->b_wptr - (uchar_t *)rd; 1038 src = &ip6h->ip6_src; 1039 dst = &rd->nd_rd_dst; 1040 gateway = &rd->nd_rd_target; 1041 1042 /* Verify if it is a valid redirect */ 1043 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1044 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1045 (rd->nd_rd_code != 0) || 1046 (len < sizeof (nd_redirect_t)) || 1047 (IN6_IS_ADDR_V4MAPPED(dst)) || 1048 (IN6_IS_ADDR_MULTICAST(dst))) { 1049 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1050 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill); 1051 goto fail_redirect; 1052 } 1053 1054 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1055 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1056 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1057 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway", 1058 mp, ill); 1059 goto fail_redirect; 1060 } 1061 1062 optlen = len - sizeof (nd_redirect_t); 1063 if (optlen != 0) { 1064 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) { 1065 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1066 ip_drop_input("ipv6IfIcmpInBadRedirects - options", 1067 mp, ill); 1068 goto fail_redirect; 1069 } 1070 } 1071 1072 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1073 redirect_to_router = B_TRUE; 1074 ncec_flags |= NCE_F_ISROUTER; 1075 } else { 1076 gateway = dst; /* Add nce for dst */ 1077 } 1078 1079 1080 /* 1081 * Verify that the IP source address of the redirect is 1082 * the same as the current first-hop router for the specified 1083 * ICMP destination address. 1084 * Also, Make sure we had a route for the dest in question and 1085 * that route was pointing to the old gateway (the source of the 1086 * redirect packet.) 1087 * We do longest match and then compare ire_gateway_addr_v6 below. 1088 */ 1089 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill, 1090 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL); 1091 1092 /* 1093 * Check that 1094 * the redirect was not from ourselves 1095 * old gateway is still directly reachable 1096 */ 1097 if (prev_ire == NULL || 1098 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) || 1099 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 1100 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) { 1101 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1102 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill); 1103 goto fail_redirect; 1104 } 1105 1106 ASSERT(prev_ire->ire_ill != NULL); 1107 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD) 1108 ncec_flags |= NCE_F_NONUD; 1109 1110 opt = (nd_opt_hdr_t *)&rd[1]; 1111 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1112 if (opt != NULL) { 1113 err = nce_lookup_then_add_v6(rill, 1114 (uchar_t *)&opt[1], /* Link layer address */ 1115 rill->ill_phys_addr_length, 1116 gateway, ncec_flags, ND_STALE, &nce); 1117 switch (err) { 1118 case 0: 1119 nce_refrele(nce); 1120 break; 1121 case EEXIST: 1122 /* 1123 * Check to see if link layer address has changed and 1124 * process the ncec_state accordingly. 1125 */ 1126 nce_process(nce->nce_common, 1127 (uchar_t *)&opt[1], 0, B_FALSE); 1128 nce_refrele(nce); 1129 break; 1130 default: 1131 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1132 err)); 1133 goto fail_redirect; 1134 } 1135 } 1136 if (redirect_to_router) { 1137 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1138 1139 /* 1140 * Create a Route Association. This will allow us to remember 1141 * a router told us to use the particular gateway. 1142 */ 1143 ire = ire_create_v6( 1144 dst, 1145 &ipv6_all_ones, /* mask */ 1146 gateway, /* gateway addr */ 1147 IRE_HOST, 1148 prev_ire->ire_ill, 1149 ALL_ZONES, 1150 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1151 NULL, 1152 ipst); 1153 } else { 1154 ipif_t *ipif; 1155 in6_addr_t gw; 1156 1157 /* 1158 * Just create an on link entry, i.e. interface route. 1159 * The gateway field is our link-local on the ill. 1160 */ 1161 mutex_enter(&rill->ill_lock); 1162 for (ipif = rill->ill_ipif; ipif != NULL; 1163 ipif = ipif->ipif_next) { 1164 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1165 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr)) 1166 break; 1167 } 1168 if (ipif == NULL) { 1169 /* We have no link-local address! */ 1170 mutex_exit(&rill->ill_lock); 1171 goto fail_redirect; 1172 } 1173 gw = ipif->ipif_v6lcl_addr; 1174 mutex_exit(&rill->ill_lock); 1175 1176 ire = ire_create_v6( 1177 dst, /* gateway == dst */ 1178 &ipv6_all_ones, /* mask */ 1179 &gw, /* gateway addr */ 1180 rill->ill_net_type, /* IF_[NO]RESOLVER */ 1181 prev_ire->ire_ill, 1182 ALL_ZONES, 1183 (RTF_DYNAMIC | RTF_HOST), 1184 NULL, 1185 ipst); 1186 } 1187 1188 if (ire == NULL) 1189 goto fail_redirect; 1190 1191 nire = ire_add(ire); 1192 /* Check if it was a duplicate entry */ 1193 if (nire != NULL && nire != ire) { 1194 ASSERT(nire->ire_identical_ref > 1); 1195 ire_delete(nire); 1196 ire_refrele(nire); 1197 nire = NULL; 1198 } 1199 ire = nire; 1200 if (ire != NULL) { 1201 ire_refrele(ire); /* Held in ire_add */ 1202 1203 /* tell routing sockets that we received a redirect */ 1204 ip_rts_change_v6(RTM_REDIRECT, 1205 &rd->nd_rd_dst, 1206 &rd->nd_rd_target, 1207 &ipv6_all_ones, 0, src, 1208 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1209 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1210 1211 /* 1212 * Delete any existing IRE_HOST type ires for this destination. 1213 * This together with the added IRE has the effect of 1214 * modifying an existing redirect. 1215 */ 1216 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1217 prev_ire->ire_ill, ALL_ZONES, NULL, 1218 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst, 1219 NULL); 1220 1221 if (redir_ire != NULL) { 1222 if (redir_ire->ire_flags & RTF_DYNAMIC) 1223 ire_delete(redir_ire); 1224 ire_refrele(redir_ire); 1225 } 1226 } 1227 1228 ire_refrele(prev_ire); 1229 prev_ire = NULL; 1230 1231 fail_redirect: 1232 if (prev_ire != NULL) 1233 ire_refrele(prev_ire); 1234 freemsg(mp); 1235 if (rill != ira->ira_rill) 1236 ill_refrele(rill); 1237 } 1238 1239 /* 1240 * Build and ship an IPv6 ICMP message using the packet data in mp, 1241 * and the ICMP header pointed to by "stuff". (May be called as 1242 * writer.) 1243 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1244 * verify that an icmp error packet can be sent. 1245 * 1246 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1247 * source address (see above function). 1248 */ 1249 static void 1250 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len, 1251 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira) 1252 { 1253 ip6_t *ip6h; 1254 in6_addr_t v6dst; 1255 size_t len_needed; 1256 size_t msg_len; 1257 mblk_t *mp1; 1258 icmp6_t *icmp6; 1259 in6_addr_t v6src; 1260 ill_t *ill = ira->ira_ill; 1261 ip_stack_t *ipst = ill->ill_ipst; 1262 ip_xmit_attr_t ixas; 1263 1264 ip6h = (ip6_t *)mp->b_rptr; 1265 1266 bzero(&ixas, sizeof (ixas)); 1267 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 1268 ixas.ixa_zoneid = ira->ira_zoneid; 1269 ixas.ixa_ifindex = 0; 1270 ixas.ixa_ipst = ipst; 1271 ixas.ixa_cred = kcred; 1272 ixas.ixa_cpid = NOPID; 1273 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 1274 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1275 1276 /* 1277 * If the source of the original packet was link-local, then 1278 * make sure we send on the same ill (group) as we received it on. 1279 */ 1280 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 1281 ixas.ixa_flags |= IXAF_SCOPEID_SET; 1282 if (IS_UNDER_IPMP(ill)) 1283 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 1284 else 1285 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 1286 } 1287 1288 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1289 /* 1290 * Apply IPsec based on how IPsec was applied to 1291 * the packet that had the error. 1292 * 1293 * If it was an outbound packet that caused the ICMP 1294 * error, then the caller will have setup the IRA 1295 * appropriately. 1296 */ 1297 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 1298 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1299 /* Note: mp already consumed and ip_drop_packet done */ 1300 return; 1301 } 1302 } else { 1303 /* 1304 * This is in clear. The icmp message we are building 1305 * here should go out in clear, independent of our policy. 1306 */ 1307 ixas.ixa_flags |= IXAF_NO_IPSEC; 1308 } 1309 1310 /* 1311 * If the caller specified the source we use that. 1312 * Otherwise, if the packet was for one of our unicast addresses, make 1313 * sure we respond with that as the source. Otherwise 1314 * have ip_output_simple pick the source address. 1315 */ 1316 if (v6src_ptr != NULL) { 1317 v6src = *v6src_ptr; 1318 } else { 1319 ire_t *ire; 1320 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY; 1321 1322 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1323 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) 1324 match_flags |= MATCH_IRE_ILL; 1325 1326 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 1327 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL, 1328 match_flags, 0, ipst, NULL); 1329 if (ire != NULL) { 1330 v6src = ip6h->ip6_dst; 1331 ire_refrele(ire); 1332 } else { 1333 v6src = ipv6_all_zeros; 1334 ixas.ixa_flags |= IXAF_SET_SOURCE; 1335 } 1336 } 1337 v6dst = ip6h->ip6_src; 1338 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1339 msg_len = msgdsize(mp); 1340 if (msg_len > len_needed) { 1341 if (!adjmsg(mp, len_needed - msg_len)) { 1342 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1343 freemsg(mp); 1344 return; 1345 } 1346 msg_len = len_needed; 1347 } 1348 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED); 1349 if (mp1 == NULL) { 1350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1351 freemsg(mp); 1352 return; 1353 } 1354 mp1->b_cont = mp; 1355 mp = mp1; 1356 1357 /* 1358 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this 1359 * node generates be accepted in peace by all on-host destinations. 1360 * If we do NOT assume that all on-host destinations trust 1361 * self-generated ICMP messages, then rework here, ip6.c, and spd.c. 1362 * (Look for IXAF_TRUSTED_ICMP). 1363 */ 1364 ixas.ixa_flags |= IXAF_TRUSTED_ICMP; 1365 1366 ip6h = (ip6_t *)mp->b_rptr; 1367 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1368 1369 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1370 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1371 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1372 ip6h->ip6_dst = v6dst; 1373 ip6h->ip6_src = v6src; 1374 msg_len += IPV6_HDR_LEN + len; 1375 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1376 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1377 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1378 } 1379 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1380 icmp6 = (icmp6_t *)&ip6h[1]; 1381 bcopy(stuff, (char *)icmp6, len); 1382 /* 1383 * Prepare for checksum by putting icmp length in the icmp 1384 * checksum field. The checksum is calculated in ip_output_wire_v6. 1385 */ 1386 icmp6->icmp6_cksum = ip6h->ip6_plen; 1387 if (icmp6->icmp6_type == ND_REDIRECT) { 1388 ip6h->ip6_hops = IPV6_MAX_HOPS; 1389 } 1390 1391 (void) ip_output_simple(mp, &ixas); 1392 ixa_cleanup(&ixas); 1393 } 1394 1395 /* 1396 * Update the output mib when ICMPv6 packets are sent. 1397 */ 1398 void 1399 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1400 { 1401 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1402 1403 switch (icmp6->icmp6_type) { 1404 case ICMP6_DST_UNREACH: 1405 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1406 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1407 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1408 break; 1409 1410 case ICMP6_TIME_EXCEEDED: 1411 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1412 break; 1413 1414 case ICMP6_PARAM_PROB: 1415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1416 break; 1417 1418 case ICMP6_PACKET_TOO_BIG: 1419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1420 break; 1421 1422 case ICMP6_ECHO_REQUEST: 1423 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1424 break; 1425 1426 case ICMP6_ECHO_REPLY: 1427 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1428 break; 1429 1430 case ND_ROUTER_SOLICIT: 1431 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1432 break; 1433 1434 case ND_ROUTER_ADVERT: 1435 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1436 break; 1437 1438 case ND_NEIGHBOR_SOLICIT: 1439 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1440 break; 1441 1442 case ND_NEIGHBOR_ADVERT: 1443 BUMP_MIB(ill->ill_icmp6_mib, 1444 ipv6IfIcmpOutNeighborAdvertisements); 1445 break; 1446 1447 case ND_REDIRECT: 1448 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1449 break; 1450 1451 case MLD_LISTENER_QUERY: 1452 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1453 break; 1454 1455 case MLD_LISTENER_REPORT: 1456 case MLD_V2_LISTENER_REPORT: 1457 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1458 break; 1459 1460 case MLD_LISTENER_REDUCTION: 1461 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1462 break; 1463 } 1464 } 1465 1466 /* 1467 * Check if it is ok to send an ICMPv6 error packet in 1468 * response to the IP packet in mp. 1469 * Free the message and return null if no 1470 * ICMP error packet should be sent. 1471 */ 1472 static mblk_t * 1473 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira) 1474 { 1475 ill_t *ill = ira->ira_ill; 1476 ip_stack_t *ipst = ill->ill_ipst; 1477 boolean_t llbcast; 1478 ip6_t *ip6h; 1479 1480 if (!mp) 1481 return (NULL); 1482 1483 /* We view multicast and broadcast as the same.. */ 1484 llbcast = (ira->ira_flags & 1485 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0; 1486 ip6h = (ip6_t *)mp->b_rptr; 1487 1488 /* Check if source address uniquely identifies the host */ 1489 1490 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1491 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1492 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1493 freemsg(mp); 1494 return (NULL); 1495 } 1496 1497 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1498 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1499 icmp6_t *icmp6; 1500 1501 if (mp->b_wptr - mp->b_rptr < len_needed) { 1502 if (!pullupmsg(mp, len_needed)) { 1503 BUMP_MIB(ill->ill_icmp6_mib, 1504 ipv6IfIcmpInErrors); 1505 freemsg(mp); 1506 return (NULL); 1507 } 1508 ip6h = (ip6_t *)mp->b_rptr; 1509 } 1510 icmp6 = (icmp6_t *)&ip6h[1]; 1511 /* Explicitly do not generate errors in response to redirects */ 1512 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1513 icmp6->icmp6_type == ND_REDIRECT) { 1514 freemsg(mp); 1515 return (NULL); 1516 } 1517 } 1518 /* 1519 * Check that the destination is not multicast and that the packet 1520 * was not sent on link layer broadcast or multicast. (Exception 1521 * is Packet too big message as per the draft - when mcast_ok is set.) 1522 */ 1523 if (!mcast_ok && 1524 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1525 freemsg(mp); 1526 return (NULL); 1527 } 1528 /* 1529 * If this is a labeled system, then check to see if we're allowed to 1530 * send a response to this particular sender. If not, then just drop. 1531 */ 1532 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) { 1533 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1534 freemsg(mp); 1535 return (NULL); 1536 } 1537 1538 if (icmp_err_rate_limit(ipst)) { 1539 /* 1540 * Only send ICMP error packets every so often. 1541 * This should be done on a per port/source basis, 1542 * but for now this will suffice. 1543 */ 1544 freemsg(mp); 1545 return (NULL); 1546 } 1547 return (mp); 1548 } 1549 1550 /* 1551 * Called when a packet was sent out the same link that it arrived on. 1552 * Check if it is ok to send a redirect and then send it. 1553 */ 1554 void 1555 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire, 1556 ip_recv_attr_t *ira) 1557 { 1558 ill_t *ill = ira->ira_ill; 1559 ip_stack_t *ipst = ill->ill_ipst; 1560 in6_addr_t *v6targ; 1561 ire_t *src_ire_v6 = NULL; 1562 mblk_t *mp1; 1563 ire_t *nhop_ire = NULL; 1564 1565 /* 1566 * Don't send a redirect when forwarding a source 1567 * routed packet. 1568 */ 1569 if (ip_source_routed_v6(ip6h, mp, ipst)) 1570 return; 1571 1572 if (ire->ire_type & IRE_ONLINK) { 1573 /* Target is directly connected */ 1574 v6targ = &ip6h->ip6_dst; 1575 } else { 1576 /* Determine the most specific IRE used to send the packets */ 1577 nhop_ire = ire_nexthop(ire); 1578 if (nhop_ire == NULL) 1579 return; 1580 1581 /* 1582 * We won't send redirects to a router 1583 * that doesn't have a link local 1584 * address, but will forward. 1585 */ 1586 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) { 1587 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1588 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1589 ire_refrele(nhop_ire); 1590 return; 1591 } 1592 v6targ = &nhop_ire->ire_addr_v6; 1593 } 1594 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 1595 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL, 1596 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL); 1597 1598 if (src_ire_v6 == NULL) { 1599 if (nhop_ire != NULL) 1600 ire_refrele(nhop_ire); 1601 return; 1602 } 1603 1604 /* 1605 * The source is directly connected. 1606 */ 1607 mp1 = copymsg(mp); 1608 if (mp1 != NULL) 1609 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira); 1610 1611 if (nhop_ire != NULL) 1612 ire_refrele(nhop_ire); 1613 ire_refrele(src_ire_v6); 1614 } 1615 1616 /* 1617 * Generate an ICMPv6 redirect message. 1618 * Include target link layer address option if it exits. 1619 * Always include redirect header. 1620 */ 1621 static void 1622 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest, 1623 ip_recv_attr_t *ira) 1624 { 1625 nd_redirect_t *rd; 1626 nd_opt_rd_hdr_t *rdh; 1627 uchar_t *buf; 1628 ncec_t *ncec = NULL; 1629 nd_opt_hdr_t *opt; 1630 int len; 1631 int ll_opt_len = 0; 1632 int max_redir_hdr_data_len; 1633 int pkt_len; 1634 in6_addr_t *srcp; 1635 ill_t *ill; 1636 boolean_t need_refrele; 1637 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1638 1639 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira); 1640 if (mp == NULL) 1641 return; 1642 1643 if (IS_UNDER_IPMP(ira->ira_ill)) { 1644 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill); 1645 if (ill == NULL) { 1646 ill = ira->ira_ill; 1647 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1648 ip_drop_output("no IPMP ill for sending redirect", 1649 mp, ill); 1650 freemsg(mp); 1651 return; 1652 } 1653 need_refrele = B_TRUE; 1654 } else { 1655 ill = ira->ira_ill; 1656 need_refrele = B_FALSE; 1657 } 1658 1659 ncec = ncec_lookup_illgrp_v6(ill, targetp); 1660 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE && 1661 ncec->ncec_lladdr != NULL) { 1662 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1663 ill->ill_phys_addr_length + 7)/8 * 8; 1664 } 1665 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1666 ASSERT(len % 4 == 0); 1667 buf = kmem_alloc(len, KM_NOSLEEP); 1668 if (buf == NULL) { 1669 if (ncec != NULL) 1670 ncec_refrele(ncec); 1671 if (need_refrele) 1672 ill_refrele(ill); 1673 freemsg(mp); 1674 return; 1675 } 1676 1677 rd = (nd_redirect_t *)buf; 1678 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1679 rd->nd_rd_code = 0; 1680 rd->nd_rd_reserved = 0; 1681 rd->nd_rd_target = *targetp; 1682 rd->nd_rd_dst = *dest; 1683 1684 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1685 if (ncec != NULL && ll_opt_len != 0) { 1686 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1687 opt->nd_opt_len = ll_opt_len/8; 1688 bcopy((char *)ncec->ncec_lladdr, &opt[1], 1689 ill->ill_phys_addr_length); 1690 } 1691 if (ncec != NULL) 1692 ncec_refrele(ncec); 1693 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1694 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1695 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1696 max_redir_hdr_data_len = 1697 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1698 pkt_len = msgdsize(mp); 1699 /* Make sure mp is 8 byte aligned */ 1700 if (pkt_len > max_redir_hdr_data_len) { 1701 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1702 sizeof (nd_opt_rd_hdr_t))/8; 1703 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1704 } else { 1705 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1706 (void) adjmsg(mp, -(pkt_len % 8)); 1707 } 1708 rdh->nd_opt_rh_reserved1 = 0; 1709 rdh->nd_opt_rh_reserved2 = 0; 1710 /* ipif_v6lcl_addr contains the link-local source address */ 1711 srcp = &ill->ill_ipif->ipif_v6lcl_addr; 1712 1713 /* Redirects sent by router, and router is global zone */ 1714 ASSERT(ira->ira_zoneid == ALL_ZONES); 1715 ira->ira_zoneid = GLOBAL_ZONEID; 1716 icmp_pkt_v6(mp, buf, len, srcp, ira); 1717 kmem_free(buf, len); 1718 if (need_refrele) 1719 ill_refrele(ill); 1720 } 1721 1722 1723 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1724 void 1725 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1726 ip_recv_attr_t *ira) 1727 { 1728 icmp6_t icmp6; 1729 1730 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1731 if (mp == NULL) 1732 return; 1733 1734 bzero(&icmp6, sizeof (icmp6_t)); 1735 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1736 icmp6.icmp6_code = code; 1737 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1738 } 1739 1740 /* 1741 * Generate an ICMP unreachable message. 1742 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1743 * constructed by the caller. 1744 */ 1745 void 1746 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1747 ip_recv_attr_t *ira) 1748 { 1749 icmp6_t icmp6; 1750 1751 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1752 if (mp == NULL) 1753 return; 1754 1755 bzero(&icmp6, sizeof (icmp6_t)); 1756 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1757 icmp6.icmp6_code = code; 1758 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1759 } 1760 1761 /* 1762 * Generate an ICMP pkt too big message. 1763 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1764 * constructed by the caller. 1765 */ 1766 void 1767 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok, 1768 ip_recv_attr_t *ira) 1769 { 1770 icmp6_t icmp6; 1771 1772 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1773 if (mp == NULL) 1774 return; 1775 1776 bzero(&icmp6, sizeof (icmp6_t)); 1777 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1778 icmp6.icmp6_code = 0; 1779 icmp6.icmp6_mtu = htonl(mtu); 1780 1781 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1782 } 1783 1784 /* 1785 * Generate an ICMP parameter problem message. (May be called as writer.) 1786 * 'offset' is the offset from the beginning of the packet in error. 1787 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1788 * constructed by the caller. 1789 */ 1790 static void 1791 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset, 1792 boolean_t mcast_ok, ip_recv_attr_t *ira) 1793 { 1794 icmp6_t icmp6; 1795 1796 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1797 if (mp == NULL) 1798 return; 1799 1800 bzero((char *)&icmp6, sizeof (icmp6_t)); 1801 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1802 icmp6.icmp6_code = code; 1803 icmp6.icmp6_pptr = htonl(offset); 1804 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1805 } 1806 1807 void 1808 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok, 1809 ip_recv_attr_t *ira) 1810 { 1811 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1812 uint16_t hdr_length; 1813 uint8_t *nexthdrp; 1814 uint32_t offset; 1815 ill_t *ill = ira->ira_ill; 1816 1817 /* Determine the offset of the bad nexthdr value */ 1818 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { 1819 /* Malformed packet */ 1820 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1821 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1822 freemsg(mp); 1823 return; 1824 } 1825 1826 offset = nexthdrp - mp->b_rptr; 1827 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset, 1828 mcast_ok, ira); 1829 } 1830 1831 /* 1832 * Verify whether or not the IP address is a valid local address. 1833 * Could be a unicast, including one for a down interface. 1834 * If allow_mcbc then a multicast or broadcast address is also 1835 * acceptable. 1836 * 1837 * In the case of a multicast address, however, the 1838 * upper protocol is expected to reset the src address 1839 * to zero when we return IPVL_MCAST so that 1840 * no packets are emitted with multicast address as 1841 * source address. 1842 * The addresses valid for bind are: 1843 * (1) - in6addr_any 1844 * (2) - IP address of an UP interface 1845 * (3) - IP address of a DOWN interface 1846 * (4) - a multicast address. In this case 1847 * the conn will only receive packets destined to 1848 * the specified multicast address. Note: the 1849 * application still has to issue an 1850 * IPV6_JOIN_GROUP socket option. 1851 * 1852 * In all the above cases, the bound address must be valid in the current zone. 1853 * When the address is loopback or multicast, there might be many matching IREs 1854 * so bind has to look up based on the zone. 1855 */ 1856 ip_laddr_t 1857 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid, 1858 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid) 1859 { 1860 ire_t *src_ire; 1861 uint_t match_flags; 1862 ill_t *ill = NULL; 1863 1864 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src)); 1865 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src)); 1866 1867 match_flags = MATCH_IRE_ZONEONLY; 1868 if (scopeid != 0) { 1869 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst); 1870 if (ill == NULL) 1871 return (IPVL_BAD); 1872 match_flags |= MATCH_IRE_ILL; 1873 } 1874 1875 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0, 1876 ill, zoneid, NULL, match_flags, 0, ipst, NULL); 1877 if (ill != NULL) 1878 ill_refrele(ill); 1879 1880 /* 1881 * If an address other than in6addr_any is requested, 1882 * we verify that it is a valid address for bind 1883 * Note: Following code is in if-else-if form for 1884 * readability compared to a condition check. 1885 */ 1886 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) { 1887 /* 1888 * (2) Bind to address of local UP interface 1889 */ 1890 ire_refrele(src_ire); 1891 return (IPVL_UNICAST_UP); 1892 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 1893 /* (4) bind to multicast address. */ 1894 if (src_ire != NULL) 1895 ire_refrele(src_ire); 1896 1897 /* 1898 * Note: caller should take IPV6_MULTICAST_IF 1899 * into account when selecting a real source address. 1900 */ 1901 if (allow_mcbc) 1902 return (IPVL_MCAST); 1903 else 1904 return (IPVL_BAD); 1905 } else { 1906 ipif_t *ipif; 1907 1908 /* 1909 * (3) Bind to address of local DOWN interface? 1910 * (ipif_lookup_addr() looks up all interfaces 1911 * but we do not get here for UP interfaces 1912 * - case (2) above) 1913 */ 1914 if (src_ire != NULL) 1915 ire_refrele(src_ire); 1916 1917 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst); 1918 if (ipif == NULL) 1919 return (IPVL_BAD); 1920 1921 /* Not a useful source? */ 1922 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) { 1923 ipif_refrele(ipif); 1924 return (IPVL_BAD); 1925 } 1926 ipif_refrele(ipif); 1927 return (IPVL_UNICAST_DOWN); 1928 } 1929 } 1930 1931 /* 1932 * Verify that both the source and destination addresses are valid. If 1933 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable, 1934 * i.e. have no route to it. Protocols like TCP want to verify destination 1935 * reachability, while tunnels do not. 1936 * 1937 * Determine the route, the interface, and (optionally) the source address 1938 * to use to reach a given destination. 1939 * Note that we allow connect to broadcast and multicast addresses when 1940 * IPDF_ALLOW_MCBC is set. 1941 * first_hop and dst_addr are normally the same, but if source routing 1942 * they will differ; in that case the first_hop is what we'll use for the 1943 * routing lookup but the dce and label checks will be done on dst_addr, 1944 * 1945 * If uinfo is set, then we fill in the best available information 1946 * we have for the destination. This is based on (in priority order) any 1947 * metrics and path MTU stored in a dce_t, route metrics, and finally the 1948 * ill_mtu/ill_mc_mtu. 1949 * 1950 * Tsol note: If we have a source route then dst_addr != firsthop. But we 1951 * always do the label check on dst_addr. 1952 * 1953 * Assumes that the caller has set ixa_scopeid for link-local communication. 1954 */ 1955 int 1956 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr, 1957 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo, 1958 uint32_t flags, uint_t mac_mode) 1959 { 1960 ire_t *ire; 1961 int error = 0; 1962 in6_addr_t setsrc; /* RTF_SETSRC */ 1963 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */ 1964 ip_stack_t *ipst = ixa->ixa_ipst; 1965 dce_t *dce; 1966 uint_t pmtu; 1967 uint_t ifindex; 1968 uint_t generation; 1969 nce_t *nce; 1970 ill_t *ill = NULL; 1971 boolean_t multirt = B_FALSE; 1972 1973 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr)); 1974 1975 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1976 1977 /* 1978 * We never send to zero; the ULPs map it to the loopback address. 1979 * We can't allow it since we use zero to mean unitialized in some 1980 * places. 1981 */ 1982 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr)); 1983 1984 if (is_system_labeled()) { 1985 ts_label_t *tsl = NULL; 1986 1987 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION, 1988 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl); 1989 if (error != 0) 1990 return (error); 1991 if (tsl != NULL) { 1992 /* Update the label */ 1993 ip_xmit_attr_replace_tsl(ixa, tsl); 1994 } 1995 } 1996 1997 setsrc = ipv6_all_zeros; 1998 /* 1999 * Select a route; For IPMP interfaces, we would only select 2000 * a "hidden" route (i.e., going through a specific under_ill) 2001 * if ixa_ifindex has been specified. 2002 */ 2003 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation, 2004 &setsrc, &error, &multirt); 2005 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 2006 if (error != 0) 2007 goto bad_addr; 2008 2009 /* 2010 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set. 2011 * If IPDF_VERIFY_DST is set, the destination must be reachable. 2012 * Otherwise the destination needn't be reachable. 2013 * 2014 * If we match on a reject or black hole, then we've got a 2015 * local failure. May as well fail out the connect() attempt, 2016 * since it's never going to succeed. 2017 */ 2018 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2019 /* 2020 * If we're verifying destination reachability, we always want 2021 * to complain here. 2022 * 2023 * If we're not verifying destination reachability but the 2024 * destination has a route, we still want to fail on the 2025 * temporary address and broadcast address tests. 2026 * 2027 * In both cases do we let the code continue so some reasonable 2028 * information is returned to the caller. That enables the 2029 * caller to use (and even cache) the IRE. conn_ip_ouput will 2030 * use the generation mismatch path to check for the unreachable 2031 * case thereby avoiding any specific check in the main path. 2032 */ 2033 ASSERT(generation == IRE_GENERATION_VERIFY); 2034 if (flags & IPDF_VERIFY_DST) { 2035 /* 2036 * Set errno but continue to set up ixa_ire to be 2037 * the RTF_REJECT|RTF_BLACKHOLE IRE. 2038 * That allows callers to use ip_output to get an 2039 * ICMP error back. 2040 */ 2041 if (!(ire->ire_type & IRE_HOST)) 2042 error = ENETUNREACH; 2043 else 2044 error = EHOSTUNREACH; 2045 } 2046 } 2047 2048 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) && 2049 !(flags & IPDF_ALLOW_MCBC)) { 2050 ire_refrele(ire); 2051 ire = ire_reject(ipst, B_FALSE); 2052 generation = IRE_GENERATION_VERIFY; 2053 error = ENETUNREACH; 2054 } 2055 2056 /* Cache things */ 2057 if (ixa->ixa_ire != NULL) 2058 ire_refrele_notr(ixa->ixa_ire); 2059 #ifdef DEBUG 2060 ire_refhold_notr(ire); 2061 ire_refrele(ire); 2062 #endif 2063 ixa->ixa_ire = ire; 2064 ixa->ixa_ire_generation = generation; 2065 2066 /* 2067 * Ensure that ixa_dce is always set any time that ixa_ire is set, 2068 * since some callers will send a packet to conn_ip_output() even if 2069 * there's an error. 2070 */ 2071 ifindex = 0; 2072 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) { 2073 /* If we are creating a DCE we'd better have an ifindex */ 2074 if (ill != NULL) 2075 ifindex = ill->ill_phyint->phyint_ifindex; 2076 else 2077 flags &= ~IPDF_UNIQUE_DCE; 2078 } 2079 2080 if (flags & IPDF_UNIQUE_DCE) { 2081 /* Fallback to the default dce if allocation fails */ 2082 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst); 2083 if (dce != NULL) { 2084 generation = dce->dce_generation; 2085 } else { 2086 dce = dce_lookup_v6(dst_addr, ifindex, ipst, 2087 &generation); 2088 } 2089 } else { 2090 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation); 2091 } 2092 ASSERT(dce != NULL); 2093 if (ixa->ixa_dce != NULL) 2094 dce_refrele_notr(ixa->ixa_dce); 2095 #ifdef DEBUG 2096 dce_refhold_notr(dce); 2097 dce_refrele(dce); 2098 #endif 2099 ixa->ixa_dce = dce; 2100 ixa->ixa_dce_generation = generation; 2101 2102 2103 /* 2104 * For multicast with multirt we have a flag passed back from 2105 * ire_lookup_multi_ill_v6 since we don't have an IRE for each 2106 * possible multicast address. 2107 * We also need a flag for multicast since we can't check 2108 * whether RTF_MULTIRT is set in ixa_ire for multicast. 2109 */ 2110 if (multirt) { 2111 ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 2112 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 2113 } else { 2114 ixa->ixa_postfragfn = ire->ire_postfragfn; 2115 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 2116 } 2117 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2118 /* Get an nce to cache. */ 2119 nce = ire_to_nce(ire, NULL, firsthop); 2120 if (nce == NULL) { 2121 /* Allocation failure? */ 2122 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2123 } else { 2124 if (ixa->ixa_nce != NULL) 2125 nce_refrele(ixa->ixa_nce); 2126 ixa->ixa_nce = nce; 2127 } 2128 } 2129 2130 /* 2131 * If the source address is a loopback address, the 2132 * destination had best be local or multicast. 2133 * If we are sending to an IRE_LOCAL using a loopback source then 2134 * it had better be the same zoneid. 2135 */ 2136 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) { 2137 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) { 2138 ire = NULL; /* Stored in ixa_ire */ 2139 error = EADDRNOTAVAIL; 2140 goto bad_addr; 2141 } 2142 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) { 2143 ire = NULL; /* Stored in ixa_ire */ 2144 error = EADDRNOTAVAIL; 2145 goto bad_addr; 2146 } 2147 } 2148 2149 /* 2150 * Does the caller want us to pick a source address? 2151 */ 2152 if (flags & IPDF_SELECT_SRC) { 2153 in6_addr_t src_addr; 2154 2155 /* 2156 * We use use ire_nexthop_ill to avoid the under ipmp 2157 * interface for source address selection. Note that for ipmp 2158 * probe packets, ixa_ifindex would have been specified, and 2159 * the ip_select_route() invocation would have picked an ire 2160 * will ire_ill pointing at an under interface. 2161 */ 2162 ill = ire_nexthop_ill(ire); 2163 2164 /* If unreachable we have no ill but need some source */ 2165 if (ill == NULL) { 2166 src_addr = ipv6_loopback; 2167 /* Make sure we look for a better source address */ 2168 generation = SRC_GENERATION_VERIFY; 2169 } else { 2170 error = ip_select_source_v6(ill, &setsrc, dst_addr, 2171 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences, 2172 &src_addr, &generation, NULL); 2173 if (error != 0) { 2174 ire = NULL; /* Stored in ixa_ire */ 2175 goto bad_addr; 2176 } 2177 } 2178 2179 /* 2180 * We allow the source address to to down. 2181 * However, we check that we don't use the loopback address 2182 * as a source when sending out on the wire. 2183 */ 2184 if (IN6_IS_ADDR_LOOPBACK(&src_addr) && 2185 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) && 2186 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2187 ire = NULL; /* Stored in ixa_ire */ 2188 error = EADDRNOTAVAIL; 2189 goto bad_addr; 2190 } 2191 2192 *src_addrp = src_addr; 2193 ixa->ixa_src_generation = generation; 2194 } 2195 2196 /* 2197 * Make sure we don't leave an unreachable ixa_nce in place 2198 * since ip_select_route is used when we unplumb i.e., remove 2199 * references on ixa_ire, ixa_nce, and ixa_dce. 2200 */ 2201 nce = ixa->ixa_nce; 2202 if (nce != NULL && nce->nce_is_condemned) { 2203 nce_refrele(nce); 2204 ixa->ixa_nce = NULL; 2205 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2206 } 2207 2208 /* 2209 * Note that IPv6 multicast supports PMTU discovery unlike IPv4 2210 * multicast. But pmtu discovery is only enabled for connected 2211 * sockets in general. 2212 */ 2213 2214 /* 2215 * Set initial value for fragmentation limit. Either conn_ip_output 2216 * or ULP might updates it when there are routing changes. 2217 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT. 2218 */ 2219 pmtu = ip_get_pmtu(ixa); 2220 ixa->ixa_fragsize = pmtu; 2221 /* Make sure ixa_fragsize and ixa_pmtu remain identical */ 2222 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) 2223 ixa->ixa_pmtu = pmtu; 2224 2225 /* 2226 * Extract information useful for some transports. 2227 * First we look for DCE metrics. Then we take what we have in 2228 * the metrics in the route, where the offlink is used if we have 2229 * one. 2230 */ 2231 if (uinfo != NULL) { 2232 bzero(uinfo, sizeof (*uinfo)); 2233 2234 if (dce->dce_flags & DCEF_UINFO) 2235 *uinfo = dce->dce_uinfo; 2236 2237 rts_merge_metrics(uinfo, &ire->ire_metrics); 2238 2239 /* Allow ire_metrics to decrease the path MTU from above */ 2240 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu) 2241 uinfo->iulp_mtu = pmtu; 2242 2243 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0; 2244 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0; 2245 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0; 2246 } 2247 2248 if (ill != NULL) 2249 ill_refrele(ill); 2250 2251 return (error); 2252 2253 bad_addr: 2254 if (ire != NULL) 2255 ire_refrele(ire); 2256 2257 if (ill != NULL) 2258 ill_refrele(ill); 2259 2260 /* 2261 * Make sure we don't leave an unreachable ixa_nce in place 2262 * since ip_select_route is used when we unplumb i.e., remove 2263 * references on ixa_ire, ixa_nce, and ixa_dce. 2264 */ 2265 nce = ixa->ixa_nce; 2266 if (nce != NULL && nce->nce_is_condemned) { 2267 nce_refrele(nce); 2268 ixa->ixa_nce = NULL; 2269 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2270 } 2271 2272 return (error); 2273 } 2274 2275 /* 2276 * Handle protocols with which IP is less intimate. There 2277 * can be more than one stream bound to a particular 2278 * protocol. When this is the case, normally each one gets a copy 2279 * of any incoming packets. 2280 * 2281 * Zones notes: 2282 * Packets will be distributed to conns in all zones. This is really only 2283 * useful for ICMPv6 as only applications in the global zone can create raw 2284 * sockets for other protocols. 2285 */ 2286 void 2287 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 2288 { 2289 mblk_t *mp1; 2290 in6_addr_t laddr = ip6h->ip6_dst; 2291 conn_t *connp, *first_connp, *next_connp; 2292 connf_t *connfp; 2293 ill_t *ill = ira->ira_ill; 2294 ip_stack_t *ipst = ill->ill_ipst; 2295 2296 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol]; 2297 mutex_enter(&connfp->connf_lock); 2298 connp = connfp->connf_head; 2299 for (connp = connfp->connf_head; connp != NULL; 2300 connp = connp->conn_next) { 2301 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2302 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2303 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2304 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2305 break; 2306 } 2307 2308 if (connp == NULL) { 2309 /* 2310 * No one bound to this port. Is 2311 * there a client that wants all 2312 * unclaimed datagrams? 2313 */ 2314 mutex_exit(&connfp->connf_lock); 2315 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB, 2316 ICMP6_PARAMPROB_NEXTHEADER, ira); 2317 return; 2318 } 2319 2320 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 2321 2322 CONN_INC_REF(connp); 2323 first_connp = connp; 2324 2325 /* 2326 * XXX: Fix the multiple protocol listeners case. We should not 2327 * be walking the conn->conn_next list here. 2328 */ 2329 connp = connp->conn_next; 2330 for (;;) { 2331 while (connp != NULL) { 2332 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2333 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2334 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2335 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2336 ira, connp))) 2337 break; 2338 connp = connp->conn_next; 2339 } 2340 2341 if (connp == NULL) { 2342 /* No more interested clients */ 2343 connp = first_connp; 2344 break; 2345 } 2346 if (((mp1 = dupmsg(mp)) == NULL) && 2347 ((mp1 = copymsg(mp)) == NULL)) { 2348 /* Memory allocation failed */ 2349 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2350 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2351 connp = first_connp; 2352 break; 2353 } 2354 2355 CONN_INC_REF(connp); 2356 mutex_exit(&connfp->connf_lock); 2357 2358 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr, 2359 ira); 2360 2361 mutex_enter(&connfp->connf_lock); 2362 /* Follow the next pointer before releasing the conn. */ 2363 next_connp = connp->conn_next; 2364 CONN_DEC_REF(connp); 2365 connp = next_connp; 2366 } 2367 2368 /* Last one. Send it upstream. */ 2369 mutex_exit(&connfp->connf_lock); 2370 2371 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira); 2372 2373 CONN_DEC_REF(connp); 2374 } 2375 2376 /* 2377 * Called when it is conceptually a ULP that would sent the packet 2378 * e.g., port unreachable and nexthdr unknown. Check that the packet 2379 * would have passed the IPsec global policy before sending the error. 2380 * 2381 * Send an ICMP error after patching up the packet appropriately. 2382 * Uses ip_drop_input and bumps the appropriate MIB. 2383 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use. 2384 */ 2385 void 2386 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code, 2387 ip_recv_attr_t *ira) 2388 { 2389 ip6_t *ip6h; 2390 boolean_t secure; 2391 ill_t *ill = ira->ira_ill; 2392 ip_stack_t *ipst = ill->ill_ipst; 2393 netstack_t *ns = ipst->ips_netstack; 2394 ipsec_stack_t *ipss = ns->netstack_ipsec; 2395 2396 secure = ira->ira_flags & IRAF_IPSEC_SECURE; 2397 2398 /* 2399 * We are generating an icmp error for some inbound packet. 2400 * Called from all ip_fanout_(udp, tcp, proto) functions. 2401 * Before we generate an error, check with global policy 2402 * to see whether this is allowed to enter the system. As 2403 * there is no "conn", we are checking with global policy. 2404 */ 2405 ip6h = (ip6_t *)mp->b_rptr; 2406 if (secure || ipss->ipsec_inbound_v6_policy_present) { 2407 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns); 2408 if (mp == NULL) 2409 return; 2410 } 2411 2412 /* We never send errors for protocols that we do implement */ 2413 if (ira->ira_protocol == IPPROTO_ICMPV6) { 2414 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2415 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill); 2416 freemsg(mp); 2417 return; 2418 } 2419 2420 switch (icmp_type) { 2421 case ICMP6_DST_UNREACH: 2422 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT); 2423 2424 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 2425 ip_drop_input("ipIfStatsNoPorts", mp, ill); 2426 2427 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira); 2428 break; 2429 case ICMP6_PARAM_PROB: 2430 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER); 2431 2432 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 2433 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill); 2434 2435 /* Let the system determine the offset for this one */ 2436 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2437 break; 2438 default: 2439 #ifdef DEBUG 2440 panic("ip_fanout_send_icmp_v6: wrong type"); 2441 /*NOTREACHED*/ 2442 #else 2443 freemsg(mp); 2444 break; 2445 #endif 2446 } 2447 } 2448 2449 /* 2450 * Fanout for UDP packets that are multicast or ICMP errors. 2451 * (Unicast fanout is handled in ip_input_v6.) 2452 * 2453 * If SO_REUSEADDR is set all multicast packets 2454 * will be delivered to all conns bound to the same port. 2455 * 2456 * Fanout for UDP packets. 2457 * The caller puts <fport, lport> in the ports parameter. 2458 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 2459 * 2460 * If SO_REUSEADDR is set all multicast and broadcast packets 2461 * will be delivered to all conns bound to the same port. 2462 * 2463 * Zones notes: 2464 * Earlier in ip_input on a system with multiple shared-IP zones we 2465 * duplicate the multicast and broadcast packets and send them up 2466 * with each explicit zoneid that exists on that ill. 2467 * This means that here we can match the zoneid with SO_ALLZONES being special. 2468 */ 2469 void 2470 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport, 2471 ip_recv_attr_t *ira) 2472 { 2473 in6_addr_t laddr; 2474 conn_t *connp; 2475 connf_t *connfp; 2476 in6_addr_t faddr; 2477 ill_t *ill = ira->ira_ill; 2478 ip_stack_t *ipst = ill->ill_ipst; 2479 2480 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR)); 2481 2482 laddr = ip6h->ip6_dst; 2483 faddr = ip6h->ip6_src; 2484 2485 /* Attempt to find a client stream based on destination port. */ 2486 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2487 mutex_enter(&connfp->connf_lock); 2488 connp = connfp->connf_head; 2489 while (connp != NULL) { 2490 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) && 2491 conn_wantpacket_v6(connp, ira, ip6h) && 2492 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2493 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2494 break; 2495 connp = connp->conn_next; 2496 } 2497 2498 if (connp == NULL) 2499 goto notfound; 2500 2501 CONN_INC_REF(connp); 2502 2503 if (connp->conn_reuseaddr) { 2504 conn_t *first_connp = connp; 2505 conn_t *next_connp; 2506 mblk_t *mp1; 2507 2508 connp = connp->conn_next; 2509 for (;;) { 2510 while (connp != NULL) { 2511 if (IPCL_UDP_MATCH_V6(connp, lport, laddr, 2512 fport, faddr) && 2513 conn_wantpacket_v6(connp, ira, ip6h) && 2514 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2515 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2516 ira, connp))) 2517 break; 2518 connp = connp->conn_next; 2519 } 2520 if (connp == NULL) { 2521 /* No more interested clients */ 2522 connp = first_connp; 2523 break; 2524 } 2525 if (((mp1 = dupmsg(mp)) == NULL) && 2526 ((mp1 = copymsg(mp)) == NULL)) { 2527 /* Memory allocation failed */ 2528 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2529 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2530 connp = first_connp; 2531 break; 2532 } 2533 2534 CONN_INC_REF(connp); 2535 mutex_exit(&connfp->connf_lock); 2536 2537 IP6_STAT(ipst, ip6_udp_fanmb); 2538 ip_fanout_udp_conn(connp, mp1, NULL, 2539 (ip6_t *)mp1->b_rptr, ira); 2540 2541 mutex_enter(&connfp->connf_lock); 2542 /* Follow the next pointer before releasing the conn. */ 2543 next_connp = connp->conn_next; 2544 IP6_STAT(ipst, ip6_udp_fanmb); 2545 CONN_DEC_REF(connp); 2546 connp = next_connp; 2547 } 2548 } 2549 2550 /* Last one. Send it upstream. */ 2551 mutex_exit(&connfp->connf_lock); 2552 2553 IP6_STAT(ipst, ip6_udp_fanmb); 2554 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira); 2555 CONN_DEC_REF(connp); 2556 return; 2557 2558 notfound: 2559 mutex_exit(&connfp->connf_lock); 2560 /* 2561 * No one bound to this port. Is 2562 * there a client that wants all 2563 * unclaimed datagrams? 2564 */ 2565 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 2566 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2567 ip_fanout_proto_v6(mp, ip6h, ira); 2568 } else { 2569 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2570 ICMP6_DST_UNREACH_NOPORT, ira); 2571 } 2572 } 2573 2574 /* 2575 * int ip_find_hdr_v6() 2576 * 2577 * This routine is used by the upper layer protocols, iptun, and IPsec: 2578 * - Set extension header pointers to appropriate locations 2579 * - Determine IPv6 header length and return it 2580 * - Return a pointer to the last nexthdr value 2581 * 2582 * The caller must initialize ipp_fields. 2583 * The upper layer protocols normally set label_separate which makes the 2584 * routine put the TX label in ipp_label_v6. If this is not set then 2585 * the hop-by-hop options including the label are placed in ipp_hopopts. 2586 * 2587 * NOTE: If multiple extension headers of the same type are present, 2588 * ip_find_hdr_v6() will set the respective extension header pointers 2589 * to the first one that it encounters in the IPv6 header. It also 2590 * skips fragment headers. This routine deals with malformed packets 2591 * of various sorts in which case the returned length is up to the 2592 * malformed part. 2593 */ 2594 int 2595 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp, 2596 uint8_t *nexthdrp) 2597 { 2598 uint_t length, ehdrlen; 2599 uint8_t nexthdr; 2600 uint8_t *whereptr, *endptr; 2601 ip6_dest_t *tmpdstopts; 2602 ip6_rthdr_t *tmprthdr; 2603 ip6_hbh_t *tmphopopts; 2604 ip6_frag_t *tmpfraghdr; 2605 2606 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR; 2607 ipp->ipp_hoplimit = ip6h->ip6_hops; 2608 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 2609 ipp->ipp_addr = ip6h->ip6_dst; 2610 2611 length = IPV6_HDR_LEN; 2612 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2613 endptr = mp->b_wptr; 2614 2615 nexthdr = ip6h->ip6_nxt; 2616 while (whereptr < endptr) { 2617 /* Is there enough left for len + nexthdr? */ 2618 if (whereptr + MIN_EHDR_LEN > endptr) 2619 goto done; 2620 2621 switch (nexthdr) { 2622 case IPPROTO_HOPOPTS: { 2623 /* We check for any CIPSO */ 2624 uchar_t *secopt; 2625 boolean_t hbh_needed; 2626 uchar_t *after_secopt; 2627 2628 tmphopopts = (ip6_hbh_t *)whereptr; 2629 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 2630 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 2631 goto done; 2632 nexthdr = tmphopopts->ip6h_nxt; 2633 2634 if (!label_separate) { 2635 secopt = NULL; 2636 after_secopt = whereptr; 2637 } else { 2638 /* 2639 * We have dropped packets with bad options in 2640 * ip6_input. No need to check return value 2641 * here. 2642 */ 2643 (void) tsol_find_secopt_v6(whereptr, ehdrlen, 2644 &secopt, &after_secopt, &hbh_needed); 2645 } 2646 if (secopt != NULL && after_secopt - whereptr > 0) { 2647 ipp->ipp_fields |= IPPF_LABEL_V6; 2648 ipp->ipp_label_v6 = secopt; 2649 ipp->ipp_label_len_v6 = after_secopt - whereptr; 2650 } else { 2651 ipp->ipp_label_len_v6 = 0; 2652 after_secopt = whereptr; 2653 hbh_needed = B_TRUE; 2654 } 2655 /* return only 1st hbh */ 2656 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) { 2657 ipp->ipp_fields |= IPPF_HOPOPTS; 2658 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt; 2659 ipp->ipp_hopoptslen = ehdrlen - 2660 ipp->ipp_label_len_v6; 2661 } 2662 break; 2663 } 2664 case IPPROTO_DSTOPTS: 2665 tmpdstopts = (ip6_dest_t *)whereptr; 2666 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 2667 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 2668 goto done; 2669 nexthdr = tmpdstopts->ip6d_nxt; 2670 /* 2671 * ipp_dstopts is set to the destination header after a 2672 * routing header. 2673 * Assume it is a post-rthdr destination header 2674 * and adjust when we find an rthdr. 2675 */ 2676 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2677 ipp->ipp_fields |= IPPF_DSTOPTS; 2678 ipp->ipp_dstopts = tmpdstopts; 2679 ipp->ipp_dstoptslen = ehdrlen; 2680 } 2681 break; 2682 case IPPROTO_ROUTING: 2683 tmprthdr = (ip6_rthdr_t *)whereptr; 2684 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 2685 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 2686 goto done; 2687 nexthdr = tmprthdr->ip6r_nxt; 2688 /* return only 1st rthdr */ 2689 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 2690 ipp->ipp_fields |= IPPF_RTHDR; 2691 ipp->ipp_rthdr = tmprthdr; 2692 ipp->ipp_rthdrlen = ehdrlen; 2693 } 2694 /* 2695 * Make any destination header we've seen be a 2696 * pre-rthdr destination header. 2697 */ 2698 if (ipp->ipp_fields & IPPF_DSTOPTS) { 2699 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2700 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 2701 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts; 2702 ipp->ipp_dstopts = NULL; 2703 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen; 2704 ipp->ipp_dstoptslen = 0; 2705 } 2706 break; 2707 case IPPROTO_FRAGMENT: 2708 tmpfraghdr = (ip6_frag_t *)whereptr; 2709 ehdrlen = sizeof (ip6_frag_t); 2710 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 2711 goto done; 2712 nexthdr = tmpfraghdr->ip6f_nxt; 2713 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 2714 ipp->ipp_fields |= IPPF_FRAGHDR; 2715 ipp->ipp_fraghdr = tmpfraghdr; 2716 ipp->ipp_fraghdrlen = ehdrlen; 2717 } 2718 break; 2719 case IPPROTO_NONE: 2720 default: 2721 goto done; 2722 } 2723 length += ehdrlen; 2724 whereptr += ehdrlen; 2725 } 2726 done: 2727 if (nexthdrp != NULL) 2728 *nexthdrp = nexthdr; 2729 return (length); 2730 } 2731 2732 /* 2733 * Try to determine where and what are the IPv6 header length and 2734 * pointer to nexthdr value for the upper layer protocol (or an 2735 * unknown next hdr). 2736 * 2737 * Parameters returns a pointer to the nexthdr value; 2738 * Must handle malformed packets of various sorts. 2739 * Function returns failure for malformed cases. 2740 */ 2741 boolean_t 2742 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 2743 uint8_t **nexthdrpp) 2744 { 2745 uint16_t length; 2746 uint_t ehdrlen; 2747 uint8_t *nexthdrp; 2748 uint8_t *whereptr; 2749 uint8_t *endptr; 2750 ip6_dest_t *desthdr; 2751 ip6_rthdr_t *rthdr; 2752 ip6_frag_t *fraghdr; 2753 2754 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 2755 length = IPV6_HDR_LEN; 2756 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2757 endptr = mp->b_wptr; 2758 2759 nexthdrp = &ip6h->ip6_nxt; 2760 while (whereptr < endptr) { 2761 /* Is there enough left for len + nexthdr? */ 2762 if (whereptr + MIN_EHDR_LEN > endptr) 2763 break; 2764 2765 switch (*nexthdrp) { 2766 case IPPROTO_HOPOPTS: 2767 case IPPROTO_DSTOPTS: 2768 /* Assumes the headers are identical for hbh and dst */ 2769 desthdr = (ip6_dest_t *)whereptr; 2770 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2771 if ((uchar_t *)desthdr + ehdrlen > endptr) 2772 return (B_FALSE); 2773 nexthdrp = &desthdr->ip6d_nxt; 2774 break; 2775 case IPPROTO_ROUTING: 2776 rthdr = (ip6_rthdr_t *)whereptr; 2777 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2778 if ((uchar_t *)rthdr + ehdrlen > endptr) 2779 return (B_FALSE); 2780 nexthdrp = &rthdr->ip6r_nxt; 2781 break; 2782 case IPPROTO_FRAGMENT: 2783 fraghdr = (ip6_frag_t *)whereptr; 2784 ehdrlen = sizeof (ip6_frag_t); 2785 if ((uchar_t *)&fraghdr[1] > endptr) 2786 return (B_FALSE); 2787 nexthdrp = &fraghdr->ip6f_nxt; 2788 break; 2789 case IPPROTO_NONE: 2790 /* No next header means we're finished */ 2791 default: 2792 *hdr_length_ptr = length; 2793 *nexthdrpp = nexthdrp; 2794 return (B_TRUE); 2795 } 2796 length += ehdrlen; 2797 whereptr += ehdrlen; 2798 *hdr_length_ptr = length; 2799 *nexthdrpp = nexthdrp; 2800 } 2801 switch (*nexthdrp) { 2802 case IPPROTO_HOPOPTS: 2803 case IPPROTO_DSTOPTS: 2804 case IPPROTO_ROUTING: 2805 case IPPROTO_FRAGMENT: 2806 /* 2807 * If any know extension headers are still to be processed, 2808 * the packet's malformed (or at least all the IP header(s) are 2809 * not in the same mblk - and that should never happen. 2810 */ 2811 return (B_FALSE); 2812 2813 default: 2814 /* 2815 * If we get here, we know that all of the IP headers were in 2816 * the same mblk, even if the ULP header is in the next mblk. 2817 */ 2818 *hdr_length_ptr = length; 2819 *nexthdrpp = nexthdrp; 2820 return (B_TRUE); 2821 } 2822 } 2823 2824 /* 2825 * Return the length of the IPv6 related headers (including extension headers) 2826 * Returns a length even if the packet is malformed. 2827 */ 2828 int 2829 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 2830 { 2831 uint16_t hdr_len; 2832 uint8_t *nexthdrp; 2833 2834 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 2835 return (hdr_len); 2836 } 2837 2838 /* 2839 * Parse and process any hop-by-hop or destination options. 2840 * 2841 * Assumes that q is an ill read queue so that ICMP errors for link-local 2842 * destinations are sent out the correct interface. 2843 * 2844 * Returns -1 if there was an error and mp has been consumed. 2845 * Returns 0 if no special action is needed. 2846 * Returns 1 if the packet contained a router alert option for this node 2847 * which is verified to be "interesting/known" for our implementation. 2848 * 2849 * XXX Note: In future as more hbh or dest options are defined, 2850 * it may be better to have different routines for hbh and dest 2851 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 2852 * may have same value in different namespaces. Or is it same namespace ?? 2853 * Current code checks for each opt_type (other than pads) if it is in 2854 * the expected nexthdr (hbh or dest) 2855 */ 2856 int 2857 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, 2858 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) 2859 { 2860 uint8_t opt_type; 2861 uint_t optused; 2862 int ret = 0; 2863 const char *errtype; 2864 ill_t *ill = ira->ira_ill; 2865 ip_stack_t *ipst = ill->ill_ipst; 2866 2867 while (optlen != 0) { 2868 opt_type = *optptr; 2869 if (opt_type == IP6OPT_PAD1) { 2870 optused = 1; 2871 } else { 2872 if (optlen < 2) 2873 goto bad_opt; 2874 errtype = "malformed"; 2875 if (opt_type == ip6opt_ls) { 2876 optused = 2 + optptr[1]; 2877 if (optused > optlen) 2878 goto bad_opt; 2879 } else switch (opt_type) { 2880 case IP6OPT_PADN: 2881 /* 2882 * Note:We don't verify that (N-2) pad octets 2883 * are zero as required by spec. Adhere to 2884 * "be liberal in what you accept..." part of 2885 * implementation philosophy (RFC791,RFC1122) 2886 */ 2887 optused = 2 + optptr[1]; 2888 if (optused > optlen) 2889 goto bad_opt; 2890 break; 2891 2892 case IP6OPT_JUMBO: 2893 if (hdr_type != IPPROTO_HOPOPTS) 2894 goto opt_error; 2895 goto opt_error; /* XXX Not implemented! */ 2896 2897 case IP6OPT_ROUTER_ALERT: { 2898 struct ip6_opt_router *or; 2899 2900 if (hdr_type != IPPROTO_HOPOPTS) 2901 goto opt_error; 2902 optused = 2 + optptr[1]; 2903 if (optused > optlen) 2904 goto bad_opt; 2905 or = (struct ip6_opt_router *)optptr; 2906 /* Check total length and alignment */ 2907 if (optused != sizeof (*or) || 2908 ((uintptr_t)or->ip6or_value & 0x1) != 0) 2909 goto opt_error; 2910 /* Check value */ 2911 switch (*((uint16_t *)or->ip6or_value)) { 2912 case IP6_ALERT_MLD: 2913 case IP6_ALERT_RSVP: 2914 ret = 1; 2915 } 2916 break; 2917 } 2918 case IP6OPT_HOME_ADDRESS: { 2919 /* 2920 * Minimal support for the home address option 2921 * (which is required by all IPv6 nodes). 2922 * Implement by just swapping the home address 2923 * and source address. 2924 * XXX Note: this has IPsec implications since 2925 * AH needs to take this into account. 2926 * Also, when IPsec is used we need to ensure 2927 * that this is only processed once 2928 * in the received packet (to avoid swapping 2929 * back and forth). 2930 * NOTE:This option processing is considered 2931 * to be unsafe and prone to a denial of 2932 * service attack. 2933 * The current processing is not safe even with 2934 * IPsec secured IP packets. Since the home 2935 * address option processing requirement still 2936 * is in the IETF draft and in the process of 2937 * being redefined for its usage, it has been 2938 * decided to turn off the option by default. 2939 * If this section of code needs to be executed, 2940 * ndd variable ip6_ignore_home_address_opt 2941 * should be set to 0 at the user's own risk. 2942 */ 2943 struct ip6_opt_home_address *oh; 2944 in6_addr_t tmp; 2945 2946 if (ipst->ips_ipv6_ignore_home_address_opt) 2947 goto opt_error; 2948 2949 if (hdr_type != IPPROTO_DSTOPTS) 2950 goto opt_error; 2951 optused = 2 + optptr[1]; 2952 if (optused > optlen) 2953 goto bad_opt; 2954 2955 /* 2956 * We did this dest. opt the first time 2957 * around (i.e. before AH processing). 2958 * If we've done AH... stop now. 2959 */ 2960 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && 2961 ira->ira_ipsec_ah_sa != NULL) 2962 break; 2963 2964 oh = (struct ip6_opt_home_address *)optptr; 2965 /* Check total length and alignment */ 2966 if (optused < sizeof (*oh) || 2967 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 2968 goto opt_error; 2969 /* Swap ip6_src and the home address */ 2970 tmp = ip6h->ip6_src; 2971 /* XXX Note: only 8 byte alignment option */ 2972 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 2973 *(in6_addr_t *)oh->ip6oh_addr = tmp; 2974 break; 2975 } 2976 2977 case IP6OPT_TUNNEL_LIMIT: 2978 if (hdr_type != IPPROTO_DSTOPTS) { 2979 goto opt_error; 2980 } 2981 optused = 2 + optptr[1]; 2982 if (optused > optlen) { 2983 goto bad_opt; 2984 } 2985 if (optused != 3) { 2986 goto opt_error; 2987 } 2988 break; 2989 2990 default: 2991 errtype = "unknown"; 2992 /* FALLTHROUGH */ 2993 opt_error: 2994 /* Determine which zone should send error */ 2995 switch (IP6OPT_TYPE(opt_type)) { 2996 case IP6OPT_TYPE_SKIP: 2997 optused = 2 + optptr[1]; 2998 if (optused > optlen) 2999 goto bad_opt; 3000 ip1dbg(("ip_process_options_v6: %s " 3001 "opt 0x%x skipped\n", 3002 errtype, opt_type)); 3003 break; 3004 case IP6OPT_TYPE_DISCARD: 3005 ip1dbg(("ip_process_options_v6: %s " 3006 "opt 0x%x; packet dropped\n", 3007 errtype, opt_type)); 3008 BUMP_MIB(ill->ill_ip_mib, 3009 ipIfStatsInHdrErrors); 3010 ip_drop_input("ipIfStatsInHdrErrors", 3011 mp, ill); 3012 freemsg(mp); 3013 return (-1); 3014 case IP6OPT_TYPE_ICMP: 3015 BUMP_MIB(ill->ill_ip_mib, 3016 ipIfStatsInHdrErrors); 3017 ip_drop_input("ipIfStatsInHdrErrors", 3018 mp, ill); 3019 icmp_param_problem_v6(mp, 3020 ICMP6_PARAMPROB_OPTION, 3021 (uint32_t)(optptr - 3022 (uint8_t *)ip6h), 3023 B_FALSE, ira); 3024 return (-1); 3025 case IP6OPT_TYPE_FORCEICMP: 3026 BUMP_MIB(ill->ill_ip_mib, 3027 ipIfStatsInHdrErrors); 3028 ip_drop_input("ipIfStatsInHdrErrors", 3029 mp, ill); 3030 icmp_param_problem_v6(mp, 3031 ICMP6_PARAMPROB_OPTION, 3032 (uint32_t)(optptr - 3033 (uint8_t *)ip6h), 3034 B_TRUE, ira); 3035 return (-1); 3036 default: 3037 ASSERT(0); 3038 } 3039 } 3040 } 3041 optlen -= optused; 3042 optptr += optused; 3043 } 3044 return (ret); 3045 3046 bad_opt: 3047 /* Determine which zone should send error */ 3048 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3049 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION, 3050 (uint32_t)(optptr - (uint8_t *)ip6h), 3051 B_FALSE, ira); 3052 return (-1); 3053 } 3054 3055 /* 3056 * Process a routing header that is not yet empty. 3057 * Because of RFC 5095, we now reject all route headers. 3058 */ 3059 void 3060 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 3061 ip_recv_attr_t *ira) 3062 { 3063 ill_t *ill = ira->ira_ill; 3064 ip_stack_t *ipst = ill->ill_ipst; 3065 3066 ASSERT(rth->ip6r_segleft != 0); 3067 3068 if (!ipst->ips_ipv6_forward_src_routed) { 3069 /* XXX Check for source routed out same interface? */ 3070 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 3071 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 3072 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 3073 freemsg(mp); 3074 return; 3075 } 3076 3077 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3078 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3079 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 3080 B_FALSE, ira); 3081 } 3082 3083 /* 3084 * Read side put procedure for IPv6 module. 3085 */ 3086 void 3087 ip_rput_v6(queue_t *q, mblk_t *mp) 3088 { 3089 ill_t *ill; 3090 3091 ill = (ill_t *)q->q_ptr; 3092 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { 3093 union DL_primitives *dl; 3094 3095 dl = (union DL_primitives *)mp->b_rptr; 3096 /* 3097 * Things are opening or closing - only accept DLPI 3098 * ack messages. If the stream is closing and ip_wsrv 3099 * has completed, ip_close is out of the qwait, but has 3100 * not yet completed qprocsoff. Don't proceed any further 3101 * because the ill has been cleaned up and things hanging 3102 * off the ill have been freed. 3103 */ 3104 if ((mp->b_datap->db_type != M_PCPROTO) || 3105 (dl->dl_primitive == DL_UNITDATA_IND)) { 3106 inet_freemsg(mp); 3107 return; 3108 } 3109 } 3110 if (DB_TYPE(mp) == M_DATA) { 3111 struct mac_header_info_s mhi; 3112 3113 ip_mdata_to_mhi(ill, mp, &mhi); 3114 ip_input_v6(ill, NULL, mp, &mhi); 3115 } else { 3116 ip_rput_notdata(ill, mp); 3117 } 3118 } 3119 3120 /* 3121 * Walk through the IPv6 packet in mp and see if there's an AH header 3122 * in it. See if the AH header needs to get done before other headers in 3123 * the packet. (Worker function for ipsec_early_ah_v6().) 3124 */ 3125 #define IPSEC_HDR_DONT_PROCESS 0 3126 #define IPSEC_HDR_PROCESS 1 3127 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 3128 static int 3129 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 3130 { 3131 uint_t length; 3132 uint_t ehdrlen; 3133 uint8_t *whereptr; 3134 uint8_t *endptr; 3135 uint8_t *nexthdrp; 3136 ip6_dest_t *desthdr; 3137 ip6_rthdr_t *rthdr; 3138 ip6_t *ip6h; 3139 3140 /* 3141 * For now just pullup everything. In general, the less pullups, 3142 * the better, but there's so much squirrelling through anyway, 3143 * it's just easier this way. 3144 */ 3145 if (!pullupmsg(mp, -1)) { 3146 return (IPSEC_MEMORY_ERROR); 3147 } 3148 3149 ip6h = (ip6_t *)mp->b_rptr; 3150 length = IPV6_HDR_LEN; 3151 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3152 endptr = mp->b_wptr; 3153 3154 /* 3155 * We can't just use the argument nexthdr in the place 3156 * of nexthdrp becaue we don't dereference nexthdrp 3157 * till we confirm whether it is a valid address. 3158 */ 3159 nexthdrp = &ip6h->ip6_nxt; 3160 while (whereptr < endptr) { 3161 /* Is there enough left for len + nexthdr? */ 3162 if (whereptr + MIN_EHDR_LEN > endptr) 3163 return (IPSEC_MEMORY_ERROR); 3164 3165 switch (*nexthdrp) { 3166 case IPPROTO_HOPOPTS: 3167 case IPPROTO_DSTOPTS: 3168 /* Assumes the headers are identical for hbh and dst */ 3169 desthdr = (ip6_dest_t *)whereptr; 3170 ehdrlen = 8 * (desthdr->ip6d_len + 1); 3171 if ((uchar_t *)desthdr + ehdrlen > endptr) 3172 return (IPSEC_MEMORY_ERROR); 3173 /* 3174 * Return DONT_PROCESS because the destination 3175 * options header may be for each hop in a 3176 * routing-header, and we only want AH if we're 3177 * finished with routing headers. 3178 */ 3179 if (*nexthdrp == IPPROTO_DSTOPTS) 3180 return (IPSEC_HDR_DONT_PROCESS); 3181 nexthdrp = &desthdr->ip6d_nxt; 3182 break; 3183 case IPPROTO_ROUTING: 3184 rthdr = (ip6_rthdr_t *)whereptr; 3185 3186 /* 3187 * If there's more hops left on the routing header, 3188 * return now with DON'T PROCESS. 3189 */ 3190 if (rthdr->ip6r_segleft > 0) 3191 return (IPSEC_HDR_DONT_PROCESS); 3192 3193 ehdrlen = 8 * (rthdr->ip6r_len + 1); 3194 if ((uchar_t *)rthdr + ehdrlen > endptr) 3195 return (IPSEC_MEMORY_ERROR); 3196 nexthdrp = &rthdr->ip6r_nxt; 3197 break; 3198 case IPPROTO_FRAGMENT: 3199 /* Wait for reassembly */ 3200 return (IPSEC_HDR_DONT_PROCESS); 3201 case IPPROTO_AH: 3202 *nexthdr = IPPROTO_AH; 3203 return (IPSEC_HDR_PROCESS); 3204 case IPPROTO_NONE: 3205 /* No next header means we're finished */ 3206 default: 3207 return (IPSEC_HDR_DONT_PROCESS); 3208 } 3209 length += ehdrlen; 3210 whereptr += ehdrlen; 3211 } 3212 /* 3213 * Malformed/truncated packet. 3214 */ 3215 return (IPSEC_MEMORY_ERROR); 3216 } 3217 3218 /* 3219 * Path for AH if options are present. 3220 * Returns NULL if the mblk was consumed. 3221 * 3222 * Sometimes AH needs to be done before other IPv6 headers for security 3223 * reasons. This function (and its ipsec_needs_processing_v6() above) 3224 * indicates if that is so, and fans out to the appropriate IPsec protocol 3225 * for the datagram passed in. 3226 */ 3227 mblk_t * 3228 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira) 3229 { 3230 uint8_t nexthdr; 3231 ah_t *ah; 3232 ill_t *ill = ira->ira_ill; 3233 ip_stack_t *ipst = ill->ill_ipst; 3234 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3235 3236 switch (ipsec_needs_processing_v6(mp, &nexthdr)) { 3237 case IPSEC_MEMORY_ERROR: 3238 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3239 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3240 freemsg(mp); 3241 return (NULL); 3242 case IPSEC_HDR_DONT_PROCESS: 3243 return (mp); 3244 } 3245 3246 /* Default means send it to AH! */ 3247 ASSERT(nexthdr == IPPROTO_AH); 3248 3249 if (!ipsec_loaded(ipss)) { 3250 ip_proto_not_sup(mp, ira); 3251 return (NULL); 3252 } 3253 3254 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 3255 if (mp == NULL) 3256 return (NULL); 3257 ASSERT(ah != NULL); 3258 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3259 ASSERT(ira->ira_ipsec_ah_sa != NULL); 3260 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 3261 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira); 3262 3263 if (mp == NULL) { 3264 /* 3265 * Either it failed or is pending. In the former case 3266 * ipIfStatsInDiscards was increased. 3267 */ 3268 return (NULL); 3269 } 3270 3271 /* we're done with IPsec processing, send it up */ 3272 ip_input_post_ipsec(mp, ira); 3273 return (NULL); 3274 } 3275 3276 /* 3277 * Reassemble fragment. 3278 * When it returns a completed message the first mblk will only contain 3279 * the headers prior to the fragment header, with the nexthdr value updated 3280 * to be the header after the fragment header. 3281 */ 3282 mblk_t * 3283 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h, 3284 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira) 3285 { 3286 uint32_t ident = ntohl(fraghdr->ip6f_ident); 3287 uint16_t offset; 3288 boolean_t more_frags; 3289 uint8_t nexthdr = fraghdr->ip6f_nxt; 3290 in6_addr_t *v6dst_ptr; 3291 in6_addr_t *v6src_ptr; 3292 uint_t end; 3293 uint_t hdr_length; 3294 size_t count; 3295 ipf_t *ipf; 3296 ipf_t **ipfp; 3297 ipfb_t *ipfb; 3298 mblk_t *mp1; 3299 uint8_t ecn_info = 0; 3300 size_t msg_len; 3301 mblk_t *tail_mp; 3302 mblk_t *t_mp; 3303 boolean_t pruned = B_FALSE; 3304 uint32_t sum_val; 3305 uint16_t sum_flags; 3306 ill_t *ill = ira->ira_ill; 3307 ip_stack_t *ipst = ill->ill_ipst; 3308 uint_t prev_nexthdr_offset; 3309 uint8_t prev_nexthdr; 3310 uint8_t *ptr; 3311 uint32_t packet_size; 3312 3313 /* 3314 * We utilize hardware computed checksum info only for UDP since 3315 * IP fragmentation is a normal occurence for the protocol. In 3316 * addition, checksum offload support for IP fragments carrying 3317 * UDP payload is commonly implemented across network adapters. 3318 */ 3319 ASSERT(ira->ira_rill != NULL); 3320 if (nexthdr == IPPROTO_UDP && dohwcksum && 3321 ILL_HCKSUM_CAPABLE(ira->ira_rill) && 3322 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 3323 mblk_t *mp1 = mp->b_cont; 3324 int32_t len; 3325 3326 /* Record checksum information from the packet */ 3327 sum_val = (uint32_t)DB_CKSUM16(mp); 3328 sum_flags = DB_CKSUMFLAGS(mp); 3329 3330 /* fragmented payload offset from beginning of mblk */ 3331 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 3332 3333 if ((sum_flags & HCK_PARTIALCKSUM) && 3334 (mp1 == NULL || mp1->b_cont == NULL) && 3335 offset >= DB_CKSUMSTART(mp) && 3336 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) { 3337 uint32_t adj; 3338 /* 3339 * Partial checksum has been calculated by hardware 3340 * and attached to the packet; in addition, any 3341 * prepended extraneous data is even byte aligned. 3342 * If any such data exists, we adjust the checksum; 3343 * this would also handle any postpended data. 3344 */ 3345 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 3346 mp, mp1, len, adj); 3347 3348 /* One's complement subtract extraneous checksum */ 3349 if (adj >= sum_val) 3350 sum_val = ~(adj - sum_val) & 0xFFFF; 3351 else 3352 sum_val -= adj; 3353 } 3354 } else { 3355 sum_val = 0; 3356 sum_flags = 0; 3357 } 3358 3359 /* Clear hardware checksumming flag */ 3360 DB_CKSUMFLAGS(mp) = 0; 3361 3362 /* 3363 * Determine the offset (from the begining of the IP header) 3364 * of the nexthdr value which has IPPROTO_FRAGMENT. We use 3365 * this when removing the fragment header from the packet. 3366 * This packet consists of the IPv6 header, a potential 3367 * hop-by-hop options header, a potential pre-routing-header 3368 * destination options header, and a potential routing header. 3369 */ 3370 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 3371 prev_nexthdr = ip6h->ip6_nxt; 3372 ptr = (uint8_t *)&ip6h[1]; 3373 3374 if (prev_nexthdr == IPPROTO_HOPOPTS) { 3375 ip6_hbh_t *hbh_hdr; 3376 uint_t hdr_len; 3377 3378 hbh_hdr = (ip6_hbh_t *)ptr; 3379 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 3380 prev_nexthdr = hbh_hdr->ip6h_nxt; 3381 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 3382 - (uint8_t *)ip6h; 3383 ptr += hdr_len; 3384 } 3385 if (prev_nexthdr == IPPROTO_DSTOPTS) { 3386 ip6_dest_t *dest_hdr; 3387 uint_t hdr_len; 3388 3389 dest_hdr = (ip6_dest_t *)ptr; 3390 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 3391 prev_nexthdr = dest_hdr->ip6d_nxt; 3392 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 3393 - (uint8_t *)ip6h; 3394 ptr += hdr_len; 3395 } 3396 if (prev_nexthdr == IPPROTO_ROUTING) { 3397 ip6_rthdr_t *rthdr; 3398 uint_t hdr_len; 3399 3400 rthdr = (ip6_rthdr_t *)ptr; 3401 prev_nexthdr = rthdr->ip6r_nxt; 3402 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 3403 - (uint8_t *)ip6h; 3404 hdr_len = 8 * (rthdr->ip6r_len + 1); 3405 ptr += hdr_len; 3406 } 3407 if (prev_nexthdr != IPPROTO_FRAGMENT) { 3408 /* Can't handle other headers before the fragment header */ 3409 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3410 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3411 freemsg(mp); 3412 return (NULL); 3413 } 3414 3415 /* 3416 * Note: Fragment offset in header is in 8-octet units. 3417 * Clearing least significant 3 bits not only extracts 3418 * it but also gets it in units of octets. 3419 */ 3420 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 3421 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 3422 3423 /* 3424 * Is the more frags flag on and the payload length not a multiple 3425 * of eight? 3426 */ 3427 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 3428 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3429 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3430 (uint32_t)((char *)&ip6h->ip6_plen - 3431 (char *)ip6h), B_FALSE, ira); 3432 return (NULL); 3433 } 3434 3435 v6src_ptr = &ip6h->ip6_src; 3436 v6dst_ptr = &ip6h->ip6_dst; 3437 end = remlen; 3438 3439 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 3440 end += offset; 3441 3442 /* 3443 * Would fragment cause reassembled packet to have a payload length 3444 * greater than IP_MAXPACKET - the max payload size? 3445 */ 3446 if (end > IP_MAXPACKET) { 3447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3448 ip_drop_input("Reassembled packet too large", mp, ill); 3449 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3450 (uint32_t)((char *)&fraghdr->ip6f_offlg - 3451 (char *)ip6h), B_FALSE, ira); 3452 return (NULL); 3453 } 3454 3455 /* 3456 * This packet just has one fragment. Reassembly not 3457 * needed. 3458 */ 3459 if (!more_frags && offset == 0) { 3460 goto reass_done; 3461 } 3462 3463 /* 3464 * Drop the fragmented as early as possible, if 3465 * we don't have resource(s) to re-assemble. 3466 */ 3467 if (ipst->ips_ip_reass_queue_bytes == 0) { 3468 freemsg(mp); 3469 return (NULL); 3470 } 3471 3472 /* Record the ECN field info. */ 3473 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 3474 /* 3475 * If this is not the first fragment, dump the unfragmentable 3476 * portion of the packet. 3477 */ 3478 if (offset) 3479 mp->b_rptr = (uchar_t *)&fraghdr[1]; 3480 3481 /* 3482 * Fragmentation reassembly. Each ILL has a hash table for 3483 * queueing packets undergoing reassembly for all IPIFs 3484 * associated with the ILL. The hash is based on the packet 3485 * IP ident field. The ILL frag hash table was allocated 3486 * as a timer block at the time the ILL was created. Whenever 3487 * there is anything on the reassembly queue, the timer will 3488 * be running. 3489 */ 3490 /* Handle vnic loopback of fragments */ 3491 if (mp->b_datap->db_ref > 2) 3492 msg_len = 0; 3493 else 3494 msg_len = MBLKSIZE(mp); 3495 3496 tail_mp = mp; 3497 while (tail_mp->b_cont != NULL) { 3498 tail_mp = tail_mp->b_cont; 3499 if (tail_mp->b_datap->db_ref <= 2) 3500 msg_len += MBLKSIZE(tail_mp); 3501 } 3502 /* 3503 * If the reassembly list for this ILL will get too big 3504 * prune it. 3505 */ 3506 3507 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 3508 ipst->ips_ip_reass_queue_bytes) { 3509 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len, 3510 uint_t, ill->ill_frag_count, 3511 uint_t, ipst->ips_ip_reass_queue_bytes); 3512 ill_frag_prune(ill, 3513 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 3514 (ipst->ips_ip_reass_queue_bytes - msg_len)); 3515 pruned = B_TRUE; 3516 } 3517 3518 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 3519 mutex_enter(&ipfb->ipfb_lock); 3520 3521 ipfp = &ipfb->ipfb_ipf; 3522 /* Try to find an existing fragment queue for this packet. */ 3523 for (;;) { 3524 ipf = ipfp[0]; 3525 if (ipf) { 3526 /* 3527 * It has to match on ident, source address, and 3528 * dest address. 3529 */ 3530 if (ipf->ipf_ident == ident && 3531 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 3532 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 3533 3534 /* 3535 * If we have received too many 3536 * duplicate fragments for this packet 3537 * free it. 3538 */ 3539 if (ipf->ipf_num_dups > ip_max_frag_dups) { 3540 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3541 freemsg(mp); 3542 mutex_exit(&ipfb->ipfb_lock); 3543 return (NULL); 3544 } 3545 3546 break; 3547 } 3548 ipfp = &ipf->ipf_hash_next; 3549 continue; 3550 } 3551 3552 3553 /* 3554 * If we pruned the list, do we want to store this new 3555 * fragment?. We apply an optimization here based on the 3556 * fact that most fragments will be received in order. 3557 * So if the offset of this incoming fragment is zero, 3558 * it is the first fragment of a new packet. We will 3559 * keep it. Otherwise drop the fragment, as we have 3560 * probably pruned the packet already (since the 3561 * packet cannot be found). 3562 */ 3563 3564 if (pruned && offset != 0) { 3565 mutex_exit(&ipfb->ipfb_lock); 3566 freemsg(mp); 3567 return (NULL); 3568 } 3569 3570 /* New guy. Allocate a frag message. */ 3571 mp1 = allocb(sizeof (*ipf), BPRI_MED); 3572 if (!mp1) { 3573 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3574 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3575 freemsg(mp); 3576 partial_reass_done: 3577 mutex_exit(&ipfb->ipfb_lock); 3578 return (NULL); 3579 } 3580 3581 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 3582 /* 3583 * Too many fragmented packets in this hash bucket. 3584 * Free the oldest. 3585 */ 3586 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 3587 } 3588 3589 mp1->b_cont = mp; 3590 3591 /* Initialize the fragment header. */ 3592 ipf = (ipf_t *)mp1->b_rptr; 3593 ipf->ipf_mp = mp1; 3594 ipf->ipf_ptphn = ipfp; 3595 ipfp[0] = ipf; 3596 ipf->ipf_hash_next = NULL; 3597 ipf->ipf_ident = ident; 3598 ipf->ipf_v6src = *v6src_ptr; 3599 ipf->ipf_v6dst = *v6dst_ptr; 3600 /* Record reassembly start time. */ 3601 ipf->ipf_timestamp = gethrestime_sec(); 3602 /* Record ipf generation and account for frag header */ 3603 ipf->ipf_gen = ill->ill_ipf_gen++; 3604 ipf->ipf_count = MBLKSIZE(mp1); 3605 ipf->ipf_protocol = nexthdr; 3606 ipf->ipf_nf_hdr_len = 0; 3607 ipf->ipf_prev_nexthdr_offset = 0; 3608 ipf->ipf_last_frag_seen = B_FALSE; 3609 ipf->ipf_ecn = ecn_info; 3610 ipf->ipf_num_dups = 0; 3611 ipfb->ipfb_frag_pkts++; 3612 ipf->ipf_checksum = 0; 3613 ipf->ipf_checksum_flags = 0; 3614 3615 /* Store checksum value in fragment header */ 3616 if (sum_flags != 0) { 3617 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3618 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3619 ipf->ipf_checksum = sum_val; 3620 ipf->ipf_checksum_flags = sum_flags; 3621 } 3622 3623 /* 3624 * We handle reassembly two ways. In the easy case, 3625 * where all the fragments show up in order, we do 3626 * minimal bookkeeping, and just clip new pieces on 3627 * the end. If we ever see a hole, then we go off 3628 * to ip_reassemble which has to mark the pieces and 3629 * keep track of the number of holes, etc. Obviously, 3630 * the point of having both mechanisms is so we can 3631 * handle the easy case as efficiently as possible. 3632 */ 3633 if (offset == 0) { 3634 /* Easy case, in-order reassembly so far. */ 3635 /* Update the byte count */ 3636 ipf->ipf_count += msg_len; 3637 ipf->ipf_tail_mp = tail_mp; 3638 /* 3639 * Keep track of next expected offset in 3640 * ipf_end. 3641 */ 3642 ipf->ipf_end = end; 3643 ipf->ipf_nf_hdr_len = hdr_length; 3644 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset; 3645 } else { 3646 /* Hard case, hole at the beginning. */ 3647 ipf->ipf_tail_mp = NULL; 3648 /* 3649 * ipf_end == 0 means that we have given up 3650 * on easy reassembly. 3651 */ 3652 ipf->ipf_end = 0; 3653 3654 /* Forget checksum offload from now on */ 3655 ipf->ipf_checksum_flags = 0; 3656 3657 /* 3658 * ipf_hole_cnt is set by ip_reassemble. 3659 * ipf_count is updated by ip_reassemble. 3660 * No need to check for return value here 3661 * as we don't expect reassembly to complete or 3662 * fail for the first fragment itself. 3663 */ 3664 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 3665 msg_len); 3666 } 3667 /* Update per ipfb and ill byte counts */ 3668 ipfb->ipfb_count += ipf->ipf_count; 3669 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3670 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 3671 /* If the frag timer wasn't already going, start it. */ 3672 mutex_enter(&ill->ill_lock); 3673 ill_frag_timer_start(ill); 3674 mutex_exit(&ill->ill_lock); 3675 goto partial_reass_done; 3676 } 3677 3678 /* 3679 * If the packet's flag has changed (it could be coming up 3680 * from an interface different than the previous, therefore 3681 * possibly different checksum capability), then forget about 3682 * any stored checksum states. Otherwise add the value to 3683 * the existing one stored in the fragment header. 3684 */ 3685 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 3686 sum_val += ipf->ipf_checksum; 3687 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3688 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3689 ipf->ipf_checksum = sum_val; 3690 } else if (ipf->ipf_checksum_flags != 0) { 3691 /* Forget checksum offload from now on */ 3692 ipf->ipf_checksum_flags = 0; 3693 } 3694 3695 /* 3696 * We have a new piece of a datagram which is already being 3697 * reassembled. Update the ECN info if all IP fragments 3698 * are ECN capable. If there is one which is not, clear 3699 * all the info. If there is at least one which has CE 3700 * code point, IP needs to report that up to transport. 3701 */ 3702 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 3703 if (ecn_info == IPH_ECN_CE) 3704 ipf->ipf_ecn = IPH_ECN_CE; 3705 } else { 3706 ipf->ipf_ecn = IPH_ECN_NECT; 3707 } 3708 3709 if (offset && ipf->ipf_end == offset) { 3710 /* The new fragment fits at the end */ 3711 ipf->ipf_tail_mp->b_cont = mp; 3712 /* Update the byte count */ 3713 ipf->ipf_count += msg_len; 3714 /* Update per ipfb and ill byte counts */ 3715 ipfb->ipfb_count += msg_len; 3716 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3717 atomic_add_32(&ill->ill_frag_count, msg_len); 3718 if (more_frags) { 3719 /* More to come. */ 3720 ipf->ipf_end = end; 3721 ipf->ipf_tail_mp = tail_mp; 3722 goto partial_reass_done; 3723 } 3724 } else { 3725 /* 3726 * Go do the hard cases. 3727 * Call ip_reassemble(). 3728 */ 3729 int ret; 3730 3731 if (offset == 0) { 3732 if (ipf->ipf_prev_nexthdr_offset == 0) { 3733 ipf->ipf_nf_hdr_len = hdr_length; 3734 ipf->ipf_prev_nexthdr_offset = 3735 prev_nexthdr_offset; 3736 } 3737 } 3738 /* Save current byte count */ 3739 count = ipf->ipf_count; 3740 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 3741 3742 /* Count of bytes added and subtracted (freeb()ed) */ 3743 count = ipf->ipf_count - count; 3744 if (count) { 3745 /* Update per ipfb and ill byte counts */ 3746 ipfb->ipfb_count += count; 3747 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3748 atomic_add_32(&ill->ill_frag_count, count); 3749 } 3750 if (ret == IP_REASS_PARTIAL) { 3751 goto partial_reass_done; 3752 } else if (ret == IP_REASS_FAILED) { 3753 /* Reassembly failed. Free up all resources */ 3754 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3755 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 3756 IP_REASS_SET_START(t_mp, 0); 3757 IP_REASS_SET_END(t_mp, 0); 3758 } 3759 freemsg(mp); 3760 goto partial_reass_done; 3761 } 3762 3763 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 3764 } 3765 /* 3766 * We have completed reassembly. Unhook the frag header from 3767 * the reassembly list. 3768 * 3769 * Grab the unfragmentable header length next header value out 3770 * of the first fragment 3771 */ 3772 ASSERT(ipf->ipf_nf_hdr_len != 0); 3773 hdr_length = ipf->ipf_nf_hdr_len; 3774 3775 /* 3776 * Before we free the frag header, record the ECN info 3777 * to report back to the transport. 3778 */ 3779 ecn_info = ipf->ipf_ecn; 3780 3781 /* 3782 * Store the nextheader field in the header preceding the fragment 3783 * header 3784 */ 3785 nexthdr = ipf->ipf_protocol; 3786 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 3787 ipfp = ipf->ipf_ptphn; 3788 3789 /* We need to supply these to caller */ 3790 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 3791 sum_val = ipf->ipf_checksum; 3792 else 3793 sum_val = 0; 3794 3795 mp1 = ipf->ipf_mp; 3796 count = ipf->ipf_count; 3797 ipf = ipf->ipf_hash_next; 3798 if (ipf) 3799 ipf->ipf_ptphn = ipfp; 3800 ipfp[0] = ipf; 3801 atomic_add_32(&ill->ill_frag_count, -count); 3802 ASSERT(ipfb->ipfb_count >= count); 3803 ipfb->ipfb_count -= count; 3804 ipfb->ipfb_frag_pkts--; 3805 mutex_exit(&ipfb->ipfb_lock); 3806 /* Ditch the frag header. */ 3807 mp = mp1->b_cont; 3808 freeb(mp1); 3809 3810 /* 3811 * Make sure the packet is good by doing some sanity 3812 * check. If bad we can silentely drop the packet. 3813 */ 3814 reass_done: 3815 if (hdr_length < sizeof (ip6_frag_t)) { 3816 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3817 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3818 ip1dbg(("ip_input_fragment_v6: bad packet\n")); 3819 freemsg(mp); 3820 return (NULL); 3821 } 3822 3823 /* 3824 * Remove the fragment header from the initial header by 3825 * splitting the mblk into the non-fragmentable header and 3826 * everthing after the fragment extension header. This has the 3827 * side effect of putting all the headers that need destination 3828 * processing into the b_cont block-- on return this fact is 3829 * used in order to avoid having to look at the extensions 3830 * already processed. 3831 * 3832 * Note that this code assumes that the unfragmentable portion 3833 * of the header is in the first mblk and increments 3834 * the read pointer past it. If this assumption is broken 3835 * this code fails badly. 3836 */ 3837 if (mp->b_rptr + hdr_length != mp->b_wptr) { 3838 mblk_t *nmp; 3839 3840 if (!(nmp = dupb(mp))) { 3841 ip1dbg(("ip_input_fragment_v6: dupb failed\n")); 3842 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3843 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3844 freemsg(mp); 3845 return (NULL); 3846 } 3847 nmp->b_cont = mp->b_cont; 3848 mp->b_cont = nmp; 3849 nmp->b_rptr += hdr_length; 3850 } 3851 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 3852 3853 ip6h = (ip6_t *)mp->b_rptr; 3854 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr; 3855 3856 /* Restore original IP length in header. */ 3857 packet_size = msgdsize(mp); 3858 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN)); 3859 /* Record the ECN info. */ 3860 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 3861 ip6h->ip6_vcf |= htonl(ecn_info << 20); 3862 3863 /* Update the receive attributes */ 3864 ira->ira_pktlen = packet_size; 3865 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t); 3866 ira->ira_protocol = nexthdr; 3867 3868 /* Reassembly is successful; set checksum information in packet */ 3869 DB_CKSUM16(mp) = (uint16_t)sum_val; 3870 DB_CKSUMFLAGS(mp) = sum_flags; 3871 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length; 3872 3873 return (mp); 3874 } 3875 3876 /* 3877 * Given an mblk and a ptr, find the destination address in an IPv6 routing 3878 * header. 3879 */ 3880 static in6_addr_t 3881 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 3882 { 3883 ip6_rthdr0_t *rt0; 3884 int segleft, numaddr; 3885 in6_addr_t *ap, rv = oldrv; 3886 3887 rt0 = (ip6_rthdr0_t *)whereptr; 3888 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 3889 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 3890 uint8_t *, whereptr); 3891 return (rv); 3892 } 3893 segleft = rt0->ip6r0_segleft; 3894 numaddr = rt0->ip6r0_len / 2; 3895 3896 if ((rt0->ip6r0_len & 0x1) || 3897 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) || 3898 (segleft > rt0->ip6r0_len / 2)) { 3899 /* 3900 * Corrupt packet. Either the routing header length is odd 3901 * (can't happen) or mismatched compared to the packet, or the 3902 * number of addresses is. Return what we can. This will 3903 * only be a problem on forwarded packets that get squeezed 3904 * through an outbound tunnel enforcing IPsec Tunnel Mode. 3905 */ 3906 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 3907 whereptr); 3908 return (rv); 3909 } 3910 3911 if (segleft != 0) { 3912 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 3913 rv = ap[numaddr - 1]; 3914 } 3915 3916 return (rv); 3917 } 3918 3919 /* 3920 * Walk through the options to see if there is a routing header. 3921 * If present get the destination which is the last address of 3922 * the option. 3923 * mp needs to be provided in cases when the extension headers might span 3924 * b_cont; mp is never modified by this function. 3925 */ 3926 in6_addr_t 3927 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment) 3928 { 3929 const mblk_t *current_mp = mp; 3930 uint8_t nexthdr; 3931 uint8_t *whereptr; 3932 int ehdrlen; 3933 in6_addr_t rv; 3934 3935 whereptr = (uint8_t *)ip6h; 3936 ehdrlen = sizeof (ip6_t); 3937 3938 /* We assume at least the IPv6 base header is within one mblk. */ 3939 ASSERT(mp == NULL || 3940 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen)); 3941 3942 rv = ip6h->ip6_dst; 3943 nexthdr = ip6h->ip6_nxt; 3944 if (is_fragment != NULL) 3945 *is_fragment = B_FALSE; 3946 3947 /* 3948 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 3949 * no extension headers will be split across mblks. 3950 */ 3951 3952 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 3953 nexthdr == IPPROTO_ROUTING) { 3954 if (nexthdr == IPPROTO_ROUTING) 3955 rv = pluck_out_dst(current_mp, whereptr, rv); 3956 3957 /* 3958 * All IPv6 extension headers have the next-header in byte 3959 * 0, and the (length - 8) in 8-byte-words. 3960 */ 3961 while (current_mp != NULL && 3962 whereptr + ehdrlen >= current_mp->b_wptr) { 3963 ehdrlen -= (current_mp->b_wptr - whereptr); 3964 current_mp = current_mp->b_cont; 3965 if (current_mp == NULL) { 3966 /* Bad packet. Return what we can. */ 3967 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 3968 mp, mblk_t *, current_mp, ip6_t *, ip6h); 3969 goto done; 3970 } 3971 whereptr = current_mp->b_rptr; 3972 } 3973 whereptr += ehdrlen; 3974 3975 nexthdr = *whereptr; 3976 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr); 3977 ehdrlen = (*(whereptr + 1) + 1) * 8; 3978 } 3979 3980 done: 3981 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 3982 *is_fragment = B_TRUE; 3983 return (rv); 3984 } 3985 3986 /* 3987 * ip_source_routed_v6: 3988 * This function is called by redirect code (called from ip_input_v6) to 3989 * know whether this packet is source routed through this node i.e 3990 * whether this node (router) is part of the journey. This 3991 * function is called under two cases : 3992 * 3993 * case 1 : Routing header was processed by this node and 3994 * ip_process_rthdr replaced ip6_dst with the next hop 3995 * and we are forwarding the packet to the next hop. 3996 * 3997 * case 2 : Routing header was not processed by this node and we 3998 * are just forwarding the packet. 3999 * 4000 * For case (1) we don't want to send redirects. For case(2) we 4001 * want to send redirects. 4002 */ 4003 static boolean_t 4004 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 4005 { 4006 uint8_t nexthdr; 4007 in6_addr_t *addrptr; 4008 ip6_rthdr0_t *rthdr; 4009 uint8_t numaddr; 4010 ip6_hbh_t *hbhhdr; 4011 uint_t ehdrlen; 4012 uint8_t *byteptr; 4013 4014 ip2dbg(("ip_source_routed_v6\n")); 4015 nexthdr = ip6h->ip6_nxt; 4016 ehdrlen = IPV6_HDR_LEN; 4017 4018 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 4019 while (nexthdr == IPPROTO_HOPOPTS || 4020 nexthdr == IPPROTO_DSTOPTS) { 4021 byteptr = (uint8_t *)ip6h + ehdrlen; 4022 /* 4023 * Check if we have already processed 4024 * packets or we are just a forwarding 4025 * router which only pulled up msgs up 4026 * to IPV6HDR and one HBH ext header 4027 */ 4028 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4029 ip2dbg(("ip_source_routed_v6: Extension" 4030 " headers not processed\n")); 4031 return (B_FALSE); 4032 } 4033 hbhhdr = (ip6_hbh_t *)byteptr; 4034 nexthdr = hbhhdr->ip6h_nxt; 4035 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 4036 } 4037 switch (nexthdr) { 4038 case IPPROTO_ROUTING: 4039 byteptr = (uint8_t *)ip6h + ehdrlen; 4040 /* 4041 * If for some reason, we haven't pulled up 4042 * the routing hdr data mblk, then we must 4043 * not have processed it at all. So for sure 4044 * we are not part of the source routed journey. 4045 */ 4046 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4047 ip2dbg(("ip_source_routed_v6: Routing" 4048 " header not processed\n")); 4049 return (B_FALSE); 4050 } 4051 rthdr = (ip6_rthdr0_t *)byteptr; 4052 /* 4053 * Either we are an intermediate router or the 4054 * last hop before destination and we have 4055 * already processed the routing header. 4056 * If segment_left is greater than or equal to zero, 4057 * then we must be the (numaddr - segleft) entry 4058 * of the routing header. Although ip6r0_segleft 4059 * is a unit8_t variable, we still check for zero 4060 * or greater value, if in case the data type 4061 * is changed someday in future. 4062 */ 4063 if (rthdr->ip6r0_segleft > 0 || 4064 rthdr->ip6r0_segleft == 0) { 4065 numaddr = rthdr->ip6r0_len / 2; 4066 addrptr = (in6_addr_t *)((char *)rthdr + 4067 sizeof (*rthdr)); 4068 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 4069 if (addrptr != NULL) { 4070 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL) 4071 return (B_TRUE); 4072 ip1dbg(("ip_source_routed_v6: Not local\n")); 4073 } 4074 } 4075 /* FALLTHRU */ 4076 default: 4077 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 4078 return (B_FALSE); 4079 } 4080 } 4081 4082 /* 4083 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 4084 * We have not optimized this in terms of number of mblks 4085 * allocated. For instance, for each fragment sent we always allocate a 4086 * mblk to hold the IPv6 header and fragment header. 4087 * 4088 * Assumes that all the extension headers are contained in the first mblk 4089 * and that the fragment header has has already been added by calling 4090 * ip_fraghdr_add_v6. 4091 */ 4092 int 4093 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len, 4094 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 4095 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie) 4096 { 4097 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4098 ip6_t *fip6h; 4099 mblk_t *hmp; 4100 mblk_t *hmp0; 4101 mblk_t *dmp; 4102 ip6_frag_t *fraghdr; 4103 size_t unfragmentable_len; 4104 size_t mlen; 4105 size_t max_chunk; 4106 uint16_t off_flags; 4107 uint16_t offset = 0; 4108 ill_t *ill = nce->nce_ill; 4109 uint8_t nexthdr; 4110 uint8_t *ptr; 4111 ip_stack_t *ipst = ill->ill_ipst; 4112 uint_t priority = mp->b_band; 4113 int error = 0; 4114 4115 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 4116 if (max_frag == 0) { 4117 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4118 ip_drop_output("FragFails: zero max_frag", mp, ill); 4119 freemsg(mp); 4120 return (EINVAL); 4121 } 4122 4123 /* 4124 * Caller should have added fraghdr_t to pkt_len, and also 4125 * updated ip6_plen. 4126 */ 4127 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len); 4128 ASSERT(msgdsize(mp) == pkt_len); 4129 4130 /* 4131 * Determine the length of the unfragmentable portion of this 4132 * datagram. This consists of the IPv6 header, a potential 4133 * hop-by-hop options header, a potential pre-routing-header 4134 * destination options header, and a potential routing header. 4135 */ 4136 nexthdr = ip6h->ip6_nxt; 4137 ptr = (uint8_t *)&ip6h[1]; 4138 4139 if (nexthdr == IPPROTO_HOPOPTS) { 4140 ip6_hbh_t *hbh_hdr; 4141 uint_t hdr_len; 4142 4143 hbh_hdr = (ip6_hbh_t *)ptr; 4144 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4145 nexthdr = hbh_hdr->ip6h_nxt; 4146 ptr += hdr_len; 4147 } 4148 if (nexthdr == IPPROTO_DSTOPTS) { 4149 ip6_dest_t *dest_hdr; 4150 uint_t hdr_len; 4151 4152 dest_hdr = (ip6_dest_t *)ptr; 4153 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4154 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4155 nexthdr = dest_hdr->ip6d_nxt; 4156 ptr += hdr_len; 4157 } 4158 } 4159 if (nexthdr == IPPROTO_ROUTING) { 4160 ip6_rthdr_t *rthdr; 4161 uint_t hdr_len; 4162 4163 rthdr = (ip6_rthdr_t *)ptr; 4164 nexthdr = rthdr->ip6r_nxt; 4165 hdr_len = 8 * (rthdr->ip6r_len + 1); 4166 ptr += hdr_len; 4167 } 4168 if (nexthdr != IPPROTO_FRAGMENT) { 4169 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4170 ip_drop_output("FragFails: bad nexthdr", mp, ill); 4171 freemsg(mp); 4172 return (EINVAL); 4173 } 4174 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4175 unfragmentable_len += sizeof (ip6_frag_t); 4176 4177 max_chunk = (max_frag - unfragmentable_len) & ~7; 4178 4179 /* 4180 * Allocate an mblk with enough room for the link-layer 4181 * header and the unfragmentable part of the datagram, which includes 4182 * the fragment header. This (or a copy) will be used as the 4183 * first mblk for each fragment we send. 4184 */ 4185 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp); 4186 if (hmp == NULL) { 4187 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4188 ip_drop_output("FragFails: no hmp", mp, ill); 4189 freemsg(mp); 4190 return (ENOBUFS); 4191 } 4192 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4193 hmp->b_wptr = hmp->b_rptr + unfragmentable_len; 4194 4195 fip6h = (ip6_t *)hmp->b_rptr; 4196 bcopy(ip6h, fip6h, unfragmentable_len); 4197 4198 /* 4199 * pkt_len is set to the total length of the fragmentable data in this 4200 * datagram. For each fragment sent, we will decrement pkt_len 4201 * by the amount of fragmentable data sent in that fragment 4202 * until len reaches zero. 4203 */ 4204 pkt_len -= unfragmentable_len; 4205 4206 /* 4207 * Move read ptr past unfragmentable portion, we don't want this part 4208 * of the data in our fragments. 4209 */ 4210 mp->b_rptr += unfragmentable_len; 4211 if (mp->b_rptr == mp->b_wptr) { 4212 mblk_t *mp1 = mp->b_cont; 4213 freeb(mp); 4214 mp = mp1; 4215 } 4216 4217 while (pkt_len != 0) { 4218 mlen = MIN(pkt_len, max_chunk); 4219 pkt_len -= mlen; 4220 if (pkt_len != 0) { 4221 /* Not last */ 4222 hmp0 = copyb(hmp); 4223 if (hmp0 == NULL) { 4224 BUMP_MIB(ill->ill_ip_mib, 4225 ipIfStatsOutFragFails); 4226 ip_drop_output("FragFails: copyb failed", 4227 mp, ill); 4228 freeb(hmp); 4229 freemsg(mp); 4230 ip1dbg(("ip_fragment_v6: copyb failed\n")); 4231 return (ENOBUFS); 4232 } 4233 off_flags = IP6F_MORE_FRAG; 4234 } else { 4235 /* Last fragment */ 4236 hmp0 = hmp; 4237 hmp = NULL; 4238 off_flags = 0; 4239 } 4240 fip6h = (ip6_t *)(hmp0->b_rptr); 4241 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len - 4242 sizeof (ip6_frag_t)); 4243 4244 fip6h->ip6_plen = htons((uint16_t)(mlen + 4245 unfragmentable_len - IPV6_HDR_LEN)); 4246 /* 4247 * Note: Optimization alert. 4248 * In IPv6 (and IPv4) protocol header, Fragment Offset 4249 * ("offset") is 13 bits wide and in 8-octet units. 4250 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 4251 * it occupies the most significant 13 bits. 4252 * (least significant 13 bits in IPv4). 4253 * We do not do any shifts here. Not shifting is same effect 4254 * as taking offset value in octet units, dividing by 8 and 4255 * then shifting 3 bits left to line it up in place in proper 4256 * place protocol header. 4257 */ 4258 fraghdr->ip6f_offlg = htons(offset) | off_flags; 4259 4260 if (!(dmp = ip_carve_mp(&mp, mlen))) { 4261 /* mp has already been freed by ip_carve_mp() */ 4262 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4263 ip_drop_output("FragFails: could not carve mp", 4264 hmp0, ill); 4265 if (hmp != NULL) 4266 freeb(hmp); 4267 freeb(hmp0); 4268 ip1dbg(("ip_carve_mp: failed\n")); 4269 return (ENOBUFS); 4270 } 4271 hmp0->b_cont = dmp; 4272 /* Get the priority marking, if any */ 4273 hmp0->b_band = priority; 4274 4275 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 4276 4277 error = postfragfn(hmp0, nce, ixaflags, 4278 mlen + unfragmentable_len, xmit_hint, szone, nolzid, 4279 ixa_cookie); 4280 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) { 4281 /* No point in sending the other fragments */ 4282 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4283 ip_drop_output("FragFails: postfragfn failed", 4284 hmp, ill); 4285 freeb(hmp); 4286 freemsg(mp); 4287 return (error); 4288 } 4289 /* No need to redo state machine in loop */ 4290 ixaflags &= ~IXAF_REACH_CONF; 4291 4292 offset += mlen; 4293 } 4294 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 4295 return (error); 4296 } 4297 4298 /* 4299 * Add a fragment header to an IPv6 packet. 4300 * Assumes that all the extension headers are contained in the first mblk. 4301 * 4302 * The fragment header is inserted after an hop-by-hop options header 4303 * and after [an optional destinations header followed by] a routing header. 4304 */ 4305 mblk_t * 4306 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa) 4307 { 4308 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4309 ip6_t *fip6h; 4310 mblk_t *hmp; 4311 ip6_frag_t *fraghdr; 4312 size_t unfragmentable_len; 4313 uint8_t nexthdr; 4314 uint_t prev_nexthdr_offset; 4315 uint8_t *ptr; 4316 uint_t priority = mp->b_band; 4317 ip_stack_t *ipst = ixa->ixa_ipst; 4318 4319 /* 4320 * Determine the length of the unfragmentable portion of this 4321 * datagram. This consists of the IPv6 header, a potential 4322 * hop-by-hop options header, a potential pre-routing-header 4323 * destination options header, and a potential routing header. 4324 */ 4325 nexthdr = ip6h->ip6_nxt; 4326 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 4327 ptr = (uint8_t *)&ip6h[1]; 4328 4329 if (nexthdr == IPPROTO_HOPOPTS) { 4330 ip6_hbh_t *hbh_hdr; 4331 uint_t hdr_len; 4332 4333 hbh_hdr = (ip6_hbh_t *)ptr; 4334 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4335 nexthdr = hbh_hdr->ip6h_nxt; 4336 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 4337 - (uint8_t *)ip6h; 4338 ptr += hdr_len; 4339 } 4340 if (nexthdr == IPPROTO_DSTOPTS) { 4341 ip6_dest_t *dest_hdr; 4342 uint_t hdr_len; 4343 4344 dest_hdr = (ip6_dest_t *)ptr; 4345 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4346 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4347 nexthdr = dest_hdr->ip6d_nxt; 4348 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 4349 - (uint8_t *)ip6h; 4350 ptr += hdr_len; 4351 } 4352 } 4353 if (nexthdr == IPPROTO_ROUTING) { 4354 ip6_rthdr_t *rthdr; 4355 uint_t hdr_len; 4356 4357 rthdr = (ip6_rthdr_t *)ptr; 4358 nexthdr = rthdr->ip6r_nxt; 4359 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 4360 - (uint8_t *)ip6h; 4361 hdr_len = 8 * (rthdr->ip6r_len + 1); 4362 ptr += hdr_len; 4363 } 4364 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4365 4366 /* 4367 * Allocate an mblk with enough room for the link-layer 4368 * header, the unfragmentable part of the datagram, and the 4369 * fragment header. 4370 */ 4371 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 4372 ipst->ips_ip_wroff_extra, mp); 4373 if (hmp == NULL) { 4374 ill_t *ill = ixa->ixa_nce->nce_ill; 4375 4376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 4377 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill); 4378 freemsg(mp); 4379 return (NULL); 4380 } 4381 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4382 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 4383 4384 fip6h = (ip6_t *)hmp->b_rptr; 4385 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 4386 4387 bcopy(ip6h, fip6h, unfragmentable_len); 4388 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t)); 4389 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 4390 4391 fraghdr->ip6f_nxt = nexthdr; 4392 fraghdr->ip6f_reserved = 0; 4393 fraghdr->ip6f_offlg = 0; 4394 fraghdr->ip6f_ident = htonl(ident); 4395 4396 /* Get the priority marking, if any */ 4397 hmp->b_band = priority; 4398 4399 /* 4400 * Move read ptr past unfragmentable portion, we don't want this part 4401 * of the data in our fragments. 4402 */ 4403 mp->b_rptr += unfragmentable_len; 4404 hmp->b_cont = mp; 4405 return (hmp); 4406 } 4407 4408 /* 4409 * Determine if the ill and multicast aspects of that packets 4410 * "matches" the conn. 4411 */ 4412 boolean_t 4413 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h) 4414 { 4415 ill_t *ill = ira->ira_rill; 4416 zoneid_t zoneid = ira->ira_zoneid; 4417 uint_t in_ifindex; 4418 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 4419 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 4420 4421 /* 4422 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local 4423 * scopeid. This is used to limit 4424 * unicast and multicast reception to conn_incoming_ifindex. 4425 * conn_wantpacket_v6 is called both for unicast and 4426 * multicast packets. 4427 */ 4428 in_ifindex = connp->conn_incoming_ifindex; 4429 4430 /* mpathd can bind to the under IPMP interface, which we allow */ 4431 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) { 4432 if (!IS_UNDER_IPMP(ill)) 4433 return (B_FALSE); 4434 4435 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill)) 4436 return (B_FALSE); 4437 } 4438 4439 if (!IPCL_ZONE_MATCH(connp, zoneid)) 4440 return (B_FALSE); 4441 4442 if (!(ira->ira_flags & IRAF_MULTICAST)) 4443 return (B_TRUE); 4444 4445 if (connp->conn_multi_router) 4446 return (B_TRUE); 4447 4448 if (ira->ira_protocol == IPPROTO_RSVP) 4449 return (B_TRUE); 4450 4451 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, 4452 ira->ira_ill)); 4453 } 4454 4455 /* 4456 * pr_addr_dbg function provides the needed buffer space to call 4457 * inet_ntop() function's 3rd argument. This function should be 4458 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 4459 * stack buffer space in it's own stack frame. This function uses 4460 * a buffer from it's own stack and prints the information. 4461 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 4462 * 4463 * Note: This function can call inet_ntop() once. 4464 */ 4465 void 4466 pr_addr_dbg(char *fmt1, int af, const void *addr) 4467 { 4468 char buf[INET6_ADDRSTRLEN]; 4469 4470 if (fmt1 == NULL) { 4471 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 4472 return; 4473 } 4474 4475 /* 4476 * This does not compare debug level and just prints 4477 * out. Thus it is the responsibility of the caller 4478 * to check the appropriate debug-level before calling 4479 * this function. 4480 */ 4481 if (ip_debug > 0) { 4482 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 4483 } 4484 4485 4486 } 4487 4488 4489 /* 4490 * Return the length in bytes of the IPv6 headers (base header 4491 * extension headers) that will be needed based on the 4492 * ip_pkt_t structure passed by the caller. 4493 * 4494 * The returned length does not include the length of the upper level 4495 * protocol (ULP) header. 4496 */ 4497 int 4498 ip_total_hdrs_len_v6(const ip_pkt_t *ipp) 4499 { 4500 int len; 4501 4502 len = IPV6_HDR_LEN; 4503 4504 /* 4505 * If there's a security label here, then we ignore any hop-by-hop 4506 * options the user may try to set. 4507 */ 4508 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4509 uint_t hopoptslen; 4510 /* 4511 * Note that ipp_label_len_v6 is just the option - not 4512 * the hopopts extension header. It also needs to be padded 4513 * to a multiple of 8 bytes. 4514 */ 4515 ASSERT(ipp->ipp_label_len_v6 != 0); 4516 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4517 hopoptslen = (hopoptslen + 7)/8 * 8; 4518 len += hopoptslen; 4519 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4520 ASSERT(ipp->ipp_hopoptslen != 0); 4521 len += ipp->ipp_hopoptslen; 4522 } 4523 4524 /* 4525 * En-route destination options 4526 * Only do them if there's a routing header as well 4527 */ 4528 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4529 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4530 ASSERT(ipp->ipp_rthdrdstoptslen != 0); 4531 len += ipp->ipp_rthdrdstoptslen; 4532 } 4533 if (ipp->ipp_fields & IPPF_RTHDR) { 4534 ASSERT(ipp->ipp_rthdrlen != 0); 4535 len += ipp->ipp_rthdrlen; 4536 } 4537 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4538 ASSERT(ipp->ipp_dstoptslen != 0); 4539 len += ipp->ipp_dstoptslen; 4540 } 4541 return (len); 4542 } 4543 4544 /* 4545 * All-purpose routine to build a header chain of an IPv6 header 4546 * followed by any required extension headers and a proto header. 4547 * 4548 * The caller has to set the source and destination address as well as 4549 * ip6_plen. The caller has to massage any routing header and compensate 4550 * for the ULP pseudo-header checksum due to the source route. 4551 * 4552 * The extension headers will all be fully filled in. 4553 */ 4554 void 4555 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp, 4556 uint8_t protocol, uint32_t flowinfo) 4557 { 4558 uint8_t *nxthdr_ptr; 4559 uint8_t *cp; 4560 ip6_t *ip6h = (ip6_t *)buf; 4561 4562 /* Initialize IPv6 header */ 4563 ip6h->ip6_vcf = 4564 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4565 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4566 4567 if (ipp->ipp_fields & IPPF_TCLASS) { 4568 /* Overrides the class part of flowinfo */ 4569 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4570 ipp->ipp_tclass); 4571 } 4572 4573 if (ipp->ipp_fields & IPPF_HOPLIMIT) 4574 ip6h->ip6_hops = ipp->ipp_hoplimit; 4575 else 4576 ip6h->ip6_hops = ipp->ipp_unicast_hops; 4577 4578 if ((ipp->ipp_fields & IPPF_ADDR) && 4579 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4580 ip6h->ip6_src = ipp->ipp_addr; 4581 4582 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4583 cp = (uint8_t *)&ip6h[1]; 4584 /* 4585 * Here's where we have to start stringing together 4586 * any extension headers in the right order: 4587 * Hop-by-hop, destination, routing, and final destination opts. 4588 */ 4589 /* 4590 * If there's a security label here, then we ignore any hop-by-hop 4591 * options the user may try to set. 4592 */ 4593 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4594 /* 4595 * Hop-by-hop options with the label. 4596 * Note that ipp_label_v6 is just the option - not 4597 * the hopopts extension header. It also needs to be padded 4598 * to a multiple of 8 bytes. 4599 */ 4600 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4601 uint_t hopoptslen; 4602 uint_t padlen; 4603 4604 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4605 hopoptslen = (padlen + 7)/8 * 8; 4606 padlen = hopoptslen - padlen; 4607 4608 *nxthdr_ptr = IPPROTO_HOPOPTS; 4609 nxthdr_ptr = &hbh->ip6h_nxt; 4610 hbh->ip6h_len = hopoptslen/8 - 1; 4611 cp += sizeof (ip6_hbh_t); 4612 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6); 4613 cp += ipp->ipp_label_len_v6; 4614 4615 ASSERT(padlen <= 7); 4616 switch (padlen) { 4617 case 0: 4618 break; 4619 case 1: 4620 cp[0] = IP6OPT_PAD1; 4621 break; 4622 default: 4623 cp[0] = IP6OPT_PADN; 4624 cp[1] = padlen - 2; 4625 bzero(&cp[2], padlen - 2); 4626 break; 4627 } 4628 cp += padlen; 4629 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4630 /* Hop-by-hop options */ 4631 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4632 4633 *nxthdr_ptr = IPPROTO_HOPOPTS; 4634 nxthdr_ptr = &hbh->ip6h_nxt; 4635 4636 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 4637 cp += ipp->ipp_hopoptslen; 4638 } 4639 /* 4640 * En-route destination options 4641 * Only do them if there's a routing header as well 4642 */ 4643 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4644 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4645 ip6_dest_t *dst = (ip6_dest_t *)cp; 4646 4647 *nxthdr_ptr = IPPROTO_DSTOPTS; 4648 nxthdr_ptr = &dst->ip6d_nxt; 4649 4650 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen); 4651 cp += ipp->ipp_rthdrdstoptslen; 4652 } 4653 /* 4654 * Routing header next 4655 */ 4656 if (ipp->ipp_fields & IPPF_RTHDR) { 4657 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4658 4659 *nxthdr_ptr = IPPROTO_ROUTING; 4660 nxthdr_ptr = &rt->ip6r_nxt; 4661 4662 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 4663 cp += ipp->ipp_rthdrlen; 4664 } 4665 /* 4666 * Do ultimate destination options 4667 */ 4668 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4669 ip6_dest_t *dest = (ip6_dest_t *)cp; 4670 4671 *nxthdr_ptr = IPPROTO_DSTOPTS; 4672 nxthdr_ptr = &dest->ip6d_nxt; 4673 4674 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 4675 cp += ipp->ipp_dstoptslen; 4676 } 4677 /* 4678 * Now set the last header pointer to the proto passed in 4679 */ 4680 *nxthdr_ptr = protocol; 4681 ASSERT((int)(cp - buf) == buf_len); 4682 } 4683 4684 /* 4685 * Return a pointer to the routing header extension header 4686 * in the IPv6 header(s) chain passed in. 4687 * If none found, return NULL 4688 * Assumes that all extension headers are in same mblk as the v6 header 4689 */ 4690 ip6_rthdr_t * 4691 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 4692 { 4693 ip6_dest_t *desthdr; 4694 ip6_frag_t *fraghdr; 4695 uint_t hdrlen; 4696 uint8_t nexthdr; 4697 uint8_t *ptr = (uint8_t *)&ip6h[1]; 4698 4699 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 4700 return ((ip6_rthdr_t *)ptr); 4701 4702 /* 4703 * The routing header will precede all extension headers 4704 * other than the hop-by-hop and destination options 4705 * extension headers, so if we see anything other than those, 4706 * we're done and didn't find it. 4707 * We could see a destination options header alone but no 4708 * routing header, in which case we'll return NULL as soon as 4709 * we see anything after that. 4710 * Hop-by-hop and destination option headers are identical, 4711 * so we can use either one we want as a template. 4712 */ 4713 nexthdr = ip6h->ip6_nxt; 4714 while (ptr < endptr) { 4715 /* Is there enough left for len + nexthdr? */ 4716 if (ptr + MIN_EHDR_LEN > endptr) 4717 return (NULL); 4718 4719 switch (nexthdr) { 4720 case IPPROTO_HOPOPTS: 4721 case IPPROTO_DSTOPTS: 4722 /* Assumes the headers are identical for hbh and dst */ 4723 desthdr = (ip6_dest_t *)ptr; 4724 hdrlen = 8 * (desthdr->ip6d_len + 1); 4725 nexthdr = desthdr->ip6d_nxt; 4726 break; 4727 4728 case IPPROTO_ROUTING: 4729 return ((ip6_rthdr_t *)ptr); 4730 4731 case IPPROTO_FRAGMENT: 4732 fraghdr = (ip6_frag_t *)ptr; 4733 hdrlen = sizeof (ip6_frag_t); 4734 nexthdr = fraghdr->ip6f_nxt; 4735 break; 4736 4737 default: 4738 return (NULL); 4739 } 4740 ptr += hdrlen; 4741 } 4742 return (NULL); 4743 } 4744 4745 /* 4746 * Called for source-routed packets originating on this node. 4747 * Manipulates the original routing header by moving every entry up 4748 * one slot, placing the first entry in the v6 header's v6_dst field, 4749 * and placing the ultimate destination in the routing header's last 4750 * slot. 4751 * 4752 * Returns the checksum diference between the ultimate destination 4753 * (last hop in the routing header when the packet is sent) and 4754 * the first hop (ip6_dst when the packet is sent) 4755 */ 4756 /* ARGSUSED2 */ 4757 uint32_t 4758 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 4759 { 4760 uint_t numaddr; 4761 uint_t i; 4762 in6_addr_t *addrptr; 4763 in6_addr_t tmp; 4764 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 4765 uint32_t cksm; 4766 uint32_t addrsum = 0; 4767 uint16_t *ptr; 4768 4769 /* 4770 * Perform any processing needed for source routing. 4771 * We know that all extension headers will be in the same mblk 4772 * as the IPv6 header. 4773 */ 4774 4775 /* 4776 * If no segments left in header, or the header length field is zero, 4777 * don't move hop addresses around; 4778 * Checksum difference is zero. 4779 */ 4780 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 4781 return (0); 4782 4783 ptr = (uint16_t *)&ip6h->ip6_dst; 4784 cksm = 0; 4785 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4786 cksm += ptr[i]; 4787 } 4788 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4789 4790 /* 4791 * Here's where the fun begins - we have to 4792 * move all addresses up one spot, take the 4793 * first hop and make it our first ip6_dst, 4794 * and place the ultimate destination in the 4795 * newly-opened last slot. 4796 */ 4797 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 4798 numaddr = rthdr->ip6r0_len / 2; 4799 tmp = *addrptr; 4800 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 4801 *addrptr = addrptr[1]; 4802 } 4803 *addrptr = ip6h->ip6_dst; 4804 ip6h->ip6_dst = tmp; 4805 4806 /* 4807 * From the checksummed ultimate destination subtract the checksummed 4808 * current ip6_dst (the first hop address). Return that number. 4809 * (In the v4 case, the second part of this is done in each routine 4810 * that calls ip_massage_options(). We do it all in this one place 4811 * for v6). 4812 */ 4813 ptr = (uint16_t *)&ip6h->ip6_dst; 4814 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4815 addrsum += ptr[i]; 4816 } 4817 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 4818 if ((int)cksm < 0) 4819 cksm--; 4820 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4821 4822 return (cksm); 4823 } 4824 4825 void 4826 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 4827 { 4828 kstat_t *ksp; 4829 4830 ip6_stat_t template = { 4831 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 4832 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 4833 { "ip6_recv_pullup", KSTAT_DATA_UINT64 }, 4834 { "ip6_db_ref", KSTAT_DATA_UINT64 }, 4835 { "ip6_notaligned", KSTAT_DATA_UINT64 }, 4836 { "ip6_multimblk", KSTAT_DATA_UINT64 }, 4837 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, 4838 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 4839 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 4840 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 4841 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4842 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4843 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4844 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4845 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4846 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4847 }; 4848 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 4849 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4850 KSTAT_FLAG_VIRTUAL, stackid); 4851 4852 if (ksp == NULL) 4853 return (NULL); 4854 4855 bcopy(&template, ip6_statisticsp, sizeof (template)); 4856 ksp->ks_data = (void *)ip6_statisticsp; 4857 ksp->ks_private = (void *)(uintptr_t)stackid; 4858 4859 kstat_install(ksp); 4860 return (ksp); 4861 } 4862 4863 void 4864 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4865 { 4866 if (ksp != NULL) { 4867 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4868 kstat_delete_netstack(ksp, stackid); 4869 } 4870 } 4871 4872 /* 4873 * The following two functions set and get the value for the 4874 * IPV6_SRC_PREFERENCES socket option. 4875 */ 4876 int 4877 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs) 4878 { 4879 /* 4880 * We only support preferences that are covered by 4881 * IPV6_PREFER_SRC_MASK. 4882 */ 4883 if (prefs & ~IPV6_PREFER_SRC_MASK) 4884 return (EINVAL); 4885 4886 /* 4887 * Look for conflicting preferences or default preferences. If 4888 * both bits of a related pair are clear, the application wants the 4889 * system's default value for that pair. Both bits in a pair can't 4890 * be set. 4891 */ 4892 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 4893 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 4894 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 4895 IPV6_PREFER_SRC_MIPMASK) { 4896 return (EINVAL); 4897 } 4898 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 4899 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 4900 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 4901 IPV6_PREFER_SRC_TMPMASK) { 4902 return (EINVAL); 4903 } 4904 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 4905 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 4906 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 4907 IPV6_PREFER_SRC_CGAMASK) { 4908 return (EINVAL); 4909 } 4910 4911 ixa->ixa_src_preferences = prefs; 4912 return (0); 4913 } 4914 4915 size_t 4916 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val) 4917 { 4918 *val = ixa->ixa_src_preferences; 4919 return (sizeof (ixa->ixa_src_preferences)); 4920 } 4921 4922 /* 4923 * Get the size of the IP options (including the IP headers size) 4924 * without including the AH header's size. If till_ah is B_FALSE, 4925 * and if AH header is present, dest options beyond AH header will 4926 * also be included in the returned size. 4927 */ 4928 int 4929 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 4930 { 4931 ip6_t *ip6h; 4932 uint8_t nexthdr; 4933 uint8_t *whereptr; 4934 ip6_hbh_t *hbhhdr; 4935 ip6_dest_t *dsthdr; 4936 ip6_rthdr_t *rthdr; 4937 int ehdrlen; 4938 int size; 4939 ah_t *ah; 4940 4941 ip6h = (ip6_t *)mp->b_rptr; 4942 size = IPV6_HDR_LEN; 4943 nexthdr = ip6h->ip6_nxt; 4944 whereptr = (uint8_t *)&ip6h[1]; 4945 for (;;) { 4946 /* Assume IP has already stripped it */ 4947 ASSERT(nexthdr != IPPROTO_FRAGMENT); 4948 switch (nexthdr) { 4949 case IPPROTO_HOPOPTS: 4950 hbhhdr = (ip6_hbh_t *)whereptr; 4951 nexthdr = hbhhdr->ip6h_nxt; 4952 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 4953 break; 4954 case IPPROTO_DSTOPTS: 4955 dsthdr = (ip6_dest_t *)whereptr; 4956 nexthdr = dsthdr->ip6d_nxt; 4957 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4958 break; 4959 case IPPROTO_ROUTING: 4960 rthdr = (ip6_rthdr_t *)whereptr; 4961 nexthdr = rthdr->ip6r_nxt; 4962 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4963 break; 4964 default : 4965 if (till_ah) { 4966 ASSERT(nexthdr == IPPROTO_AH); 4967 return (size); 4968 } 4969 /* 4970 * If we don't have a AH header to traverse, 4971 * return now. This happens normally for 4972 * outbound datagrams where we have not inserted 4973 * the AH header. 4974 */ 4975 if (nexthdr != IPPROTO_AH) { 4976 return (size); 4977 } 4978 4979 /* 4980 * We don't include the AH header's size 4981 * to be symmetrical with other cases where 4982 * we either don't have a AH header (outbound) 4983 * or peek into the AH header yet (inbound and 4984 * not pulled up yet). 4985 */ 4986 ah = (ah_t *)whereptr; 4987 nexthdr = ah->ah_nexthdr; 4988 ehdrlen = (ah->ah_length << 2) + 8; 4989 4990 if (nexthdr == IPPROTO_DSTOPTS) { 4991 if (whereptr + ehdrlen >= mp->b_wptr) { 4992 /* 4993 * The destination options header 4994 * is not part of the first mblk. 4995 */ 4996 whereptr = mp->b_cont->b_rptr; 4997 } else { 4998 whereptr += ehdrlen; 4999 } 5000 5001 dsthdr = (ip6_dest_t *)whereptr; 5002 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 5003 size += ehdrlen; 5004 } 5005 return (size); 5006 } 5007 whereptr += ehdrlen; 5008 size += ehdrlen; 5009 } 5010 } 5011 5012 /* 5013 * Utility routine that checks if `v6srcp' is a valid address on underlying 5014 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 5015 * associated with `v6srcp' on success. NOTE: if this is not called from 5016 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 5017 * group during or after this lookup. 5018 */ 5019 boolean_t 5020 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 5021 { 5022 ipif_t *ipif; 5023 5024 5025 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 5026 if (ipif != NULL) { 5027 if (ipifp != NULL) 5028 *ipifp = ipif; 5029 else 5030 ipif_refrele(ipif); 5031 return (B_TRUE); 5032 } 5033 5034 if (ip_debug > 2) { 5035 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 5036 "src %s\n", AF_INET6, v6srcp); 5037 } 5038 return (B_FALSE); 5039 } 5040