1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 1990 Mentat Inc. 24 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. 25 * Copyright 2021 Joyent, Inc. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/dlpi.h> 31 #include <sys/stropts.h> 32 #include <sys/sysmacros.h> 33 #include <sys/strsun.h> 34 #include <sys/strlog.h> 35 #include <sys/strsubr.h> 36 #define _SUN_TPI_VERSION 2 37 #include <sys/tihdr.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/cmn_err.h> 41 #include <sys/debug.h> 42 #include <sys/sdt.h> 43 #include <sys/kobj.h> 44 #include <sys/zone.h> 45 #include <sys/neti.h> 46 #include <sys/hook.h> 47 48 #include <sys/kmem.h> 49 #include <sys/systm.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <sys/vtrace.h> 53 #include <sys/isa_defs.h> 54 #include <sys/atomic.h> 55 #include <sys/policy.h> 56 #include <sys/mac.h> 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_dl.h> 61 #include <sys/sockio.h> 62 #include <netinet/in.h> 63 #include <netinet/ip6.h> 64 #include <netinet/icmp6.h> 65 #include <netinet/sctp.h> 66 67 #include <inet/common.h> 68 #include <inet/mi.h> 69 #include <inet/optcom.h> 70 #include <inet/mib2.h> 71 #include <inet/nd.h> 72 #include <inet/arp.h> 73 74 #include <inet/ip.h> 75 #include <inet/ip_impl.h> 76 #include <inet/ip6.h> 77 #include <inet/ip6_asp.h> 78 #include <inet/tcp.h> 79 #include <inet/tcp_impl.h> 80 #include <inet/udp_impl.h> 81 #include <inet/ipp_common.h> 82 83 #include <inet/ip_multi.h> 84 #include <inet/ip_if.h> 85 #include <inet/ip_ire.h> 86 #include <inet/ip_rts.h> 87 #include <inet/ip_ndp.h> 88 #include <net/pfkeyv2.h> 89 #include <inet/sadb.h> 90 #include <inet/ipsec_impl.h> 91 #include <inet/iptun/iptun_impl.h> 92 #include <inet/sctp_ip.h> 93 #include <sys/pattr.h> 94 #include <inet/ipclassifier.h> 95 #include <inet/ipsecah.h> 96 #include <inet/rawip_impl.h> 97 #include <inet/rts_impl.h> 98 #include <sys/squeue_impl.h> 99 #include <sys/squeue.h> 100 101 #include <sys/tsol/label.h> 102 #include <sys/tsol/tnet.h> 103 104 /* Temporary; for CR 6451644 work-around */ 105 #include <sys/ethernet.h> 106 107 /* 108 * Naming conventions: 109 * These rules should be judiciously applied 110 * if there is a need to identify something as IPv6 versus IPv4 111 * IPv6 funcions will end with _v6 in the ip module. 112 * IPv6 funcions will end with _ipv6 in the transport modules. 113 * IPv6 macros: 114 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 115 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 116 * And then there are ..V4_PART_OF_V6. 117 * The intent is that macros in the ip module end with _V6. 118 * IPv6 global variables will start with ipv6_ 119 * IPv6 structures will start with ipv6 120 * IPv6 defined constants should start with IPV6_ 121 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 122 */ 123 124 /* 125 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 126 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 127 * from IANA. This mechanism will remain in effect until an official 128 * number is obtained. 129 */ 130 uchar_t ip6opt_ls; 131 132 const in6_addr_t ipv6_all_ones = 133 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 134 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 135 136 #ifdef _BIG_ENDIAN 137 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 138 #else /* _BIG_ENDIAN */ 139 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 140 #endif /* _BIG_ENDIAN */ 141 142 #ifdef _BIG_ENDIAN 143 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 144 #else /* _BIG_ENDIAN */ 145 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 146 #endif /* _BIG_ENDIAN */ 147 148 #ifdef _BIG_ENDIAN 149 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 150 #else /* _BIG_ENDIAN */ 151 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 152 #endif /* _BIG_ENDIAN */ 153 154 #ifdef _BIG_ENDIAN 155 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 156 #else /* _BIG_ENDIAN */ 157 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 158 #endif /* _BIG_ENDIAN */ 159 160 #ifdef _BIG_ENDIAN 161 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 162 #else /* _BIG_ENDIAN */ 163 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 164 #endif /* _BIG_ENDIAN */ 165 166 #ifdef _BIG_ENDIAN 167 const in6_addr_t ipv6_solicited_node_mcast = 168 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 169 #else /* _BIG_ENDIAN */ 170 const in6_addr_t ipv6_solicited_node_mcast = 171 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 172 #endif /* _BIG_ENDIAN */ 173 174 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *); 175 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *); 176 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *, 177 ip_recv_attr_t *); 178 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *, 179 ip_recv_attr_t *); 180 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *, 181 in6_addr_t *, ip_recv_attr_t *); 182 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *, 183 ip_recv_attr_t *); 184 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 185 186 /* 187 * icmp_inbound_v6 deals with ICMP messages that are handled by IP. 188 * If the ICMP message is consumed by IP, i.e., it should not be delivered 189 * to any IPPROTO_ICMP raw sockets, then it returns NULL. 190 * Likewise, if the ICMP error is misformed (too short, etc), then it 191 * returns NULL. The caller uses this to determine whether or not to send 192 * to raw sockets. 193 * 194 * All error messages are passed to the matching transport stream. 195 * 196 * See comment for icmp_inbound_v4() on how IPsec is handled. 197 */ 198 mblk_t * 199 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira) 200 { 201 icmp6_t *icmp6; 202 ip6_t *ip6h; /* Outer header */ 203 int ip_hdr_length; /* Outer header length */ 204 boolean_t interested; 205 ill_t *ill = ira->ira_ill; 206 ip_stack_t *ipst = ill->ill_ipst; 207 mblk_t *mp_ret = NULL; 208 209 ip6h = (ip6_t *)mp->b_rptr; 210 211 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 212 213 /* Check for Martian packets */ 214 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 215 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 216 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill); 217 freemsg(mp); 218 return (NULL); 219 } 220 221 /* Make sure ira_l2src is set for ndp_input */ 222 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 223 ip_setl2src(mp, ira, ira->ira_rill); 224 225 ip_hdr_length = ira->ira_ip_hdr_length; 226 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 227 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 228 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 229 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 230 freemsg(mp); 231 return (NULL); 232 } 233 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 234 if (ip6h == NULL) { 235 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 236 freemsg(mp); 237 return (NULL); 238 } 239 } 240 241 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 242 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6); 243 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 244 icmp6->icmp6_code)); 245 246 /* 247 * We will set "interested" to "true" if we should pass a copy to 248 * the transport i.e., if it is an error message. 249 */ 250 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 251 252 switch (icmp6->icmp6_type) { 253 case ICMP6_DST_UNREACH: 254 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 255 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 256 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 257 break; 258 259 case ICMP6_TIME_EXCEEDED: 260 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 261 break; 262 263 case ICMP6_PARAM_PROB: 264 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 265 break; 266 267 case ICMP6_PACKET_TOO_BIG: 268 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs); 269 break; 270 271 case ICMP6_ECHO_REQUEST: 272 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 273 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 274 !ipst->ips_ipv6_resp_echo_mcast) 275 break; 276 277 /* 278 * We must have exclusive use of the mblk to convert it to 279 * a response. 280 * If not, we copy it. 281 */ 282 if (mp->b_datap->db_ref > 1) { 283 mblk_t *mp1; 284 285 mp1 = copymsg(mp); 286 if (mp1 == NULL) { 287 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 288 ip_drop_input("ipIfStatsInDiscards - copymsg", 289 mp, ill); 290 freemsg(mp); 291 return (NULL); 292 } 293 freemsg(mp); 294 mp = mp1; 295 ip6h = (ip6_t *)mp->b_rptr; 296 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 297 } 298 299 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 300 icmp_send_reply_v6(mp, ip6h, icmp6, ira); 301 return (NULL); 302 303 case ICMP6_ECHO_REPLY: 304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 305 break; 306 307 case ND_ROUTER_SOLICIT: 308 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 309 break; 310 311 case ND_ROUTER_ADVERT: 312 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 313 break; 314 315 case ND_NEIGHBOR_SOLICIT: 316 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 317 ndp_input(mp, ira); 318 return (NULL); 319 320 case ND_NEIGHBOR_ADVERT: 321 BUMP_MIB(ill->ill_icmp6_mib, 322 ipv6IfIcmpInNeighborAdvertisements); 323 ndp_input(mp, ira); 324 return (NULL); 325 326 case ND_REDIRECT: 327 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 328 329 if (ipst->ips_ipv6_ignore_redirect) 330 break; 331 332 /* We now allow a RAW socket to receive this. */ 333 interested = B_TRUE; 334 break; 335 336 /* 337 * The next three icmp messages will be handled by MLD. 338 * Pass all valid MLD packets up to any process(es) 339 * listening on a raw ICMP socket. 340 */ 341 case MLD_LISTENER_QUERY: 342 case MLD_LISTENER_REPORT: 343 case MLD_LISTENER_REDUCTION: 344 mp = mld_input(mp, ira); 345 return (mp); 346 default: 347 break; 348 } 349 /* 350 * See if there is an ICMP client to avoid an extra copymsg/freemsg 351 * if there isn't one. 352 */ 353 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) { 354 /* If there is an ICMP client and we want one too, copy it. */ 355 356 if (!interested) { 357 /* Caller will deliver to RAW sockets */ 358 return (mp); 359 } 360 mp_ret = copymsg(mp); 361 if (mp_ret == NULL) { 362 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 363 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 364 } 365 } else if (!interested) { 366 /* Neither we nor raw sockets are interested. Drop packet now */ 367 freemsg(mp); 368 return (NULL); 369 } 370 371 /* 372 * ICMP error or redirect packet. Make sure we have enough of 373 * the header and that db_ref == 1 since we might end up modifying 374 * the packet. 375 */ 376 if (mp->b_cont != NULL) { 377 if (ip_pullup(mp, -1, ira) == NULL) { 378 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 379 ip_drop_input("ipIfStatsInDiscards - ip_pullup", 380 mp, ill); 381 freemsg(mp); 382 return (mp_ret); 383 } 384 } 385 386 if (mp->b_datap->db_ref > 1) { 387 mblk_t *mp1; 388 389 mp1 = copymsg(mp); 390 if (mp1 == NULL) { 391 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 392 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 393 freemsg(mp); 394 return (mp_ret); 395 } 396 freemsg(mp); 397 mp = mp1; 398 } 399 400 /* 401 * In case mp has changed, verify the message before any further 402 * processes. 403 */ 404 ip6h = (ip6_t *)mp->b_rptr; 405 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 406 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 407 freemsg(mp); 408 return (mp_ret); 409 } 410 411 switch (icmp6->icmp6_type) { 412 case ND_REDIRECT: 413 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira); 414 break; 415 case ICMP6_PACKET_TOO_BIG: 416 /* Update DCE and adjust MTU is icmp header if needed */ 417 icmp_inbound_too_big_v6(icmp6, ira); 418 /* FALLTHROUGH */ 419 default: 420 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 421 break; 422 } 423 424 return (mp_ret); 425 } 426 427 /* 428 * Send an ICMP echo reply. 429 * The caller has already updated the payload part of the packet. 430 * We handle the ICMP checksum, IP source address selection and feed 431 * the packet into ip_output_simple. 432 */ 433 static void 434 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6, 435 ip_recv_attr_t *ira) 436 { 437 uint_t ip_hdr_length = ira->ira_ip_hdr_length; 438 ill_t *ill = ira->ira_ill; 439 ip_stack_t *ipst = ill->ill_ipst; 440 ip_xmit_attr_t ixas; 441 in6_addr_t origsrc; 442 443 /* 444 * Remove any extension headers (do not reverse a source route) 445 * and clear the flow id (keep traffic class for now). 446 */ 447 if (ip_hdr_length != IPV6_HDR_LEN) { 448 int i; 449 450 for (i = 0; i < IPV6_HDR_LEN; i++) { 451 mp->b_rptr[ip_hdr_length - i - 1] = 452 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 453 } 454 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN); 455 ip6h = (ip6_t *)mp->b_rptr; 456 ip6h->ip6_nxt = IPPROTO_ICMPV6; 457 i = ntohs(ip6h->ip6_plen); 458 i -= (ip_hdr_length - IPV6_HDR_LEN); 459 ip6h->ip6_plen = htons(i); 460 ip_hdr_length = IPV6_HDR_LEN; 461 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp)); 462 } 463 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 464 465 /* Reverse the source and destination addresses. */ 466 origsrc = ip6h->ip6_src; 467 ip6h->ip6_src = ip6h->ip6_dst; 468 ip6h->ip6_dst = origsrc; 469 470 /* set the hop limit */ 471 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 472 473 /* 474 * Prepare for checksum by putting icmp length in the icmp 475 * checksum field. The checksum is calculated in ip_output 476 */ 477 icmp6->icmp6_cksum = ip6h->ip6_plen; 478 479 bzero(&ixas, sizeof (ixas)); 480 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 481 ixas.ixa_zoneid = ira->ira_zoneid; 482 ixas.ixa_cred = kcred; 483 ixas.ixa_cpid = NOPID; 484 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 485 ixas.ixa_ifindex = 0; 486 ixas.ixa_ipst = ipst; 487 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 488 489 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 490 /* 491 * This packet should go out the same way as it 492 * came in i.e in clear, independent of the IPsec 493 * policy for transmitting packets. 494 */ 495 ixas.ixa_flags |= IXAF_NO_IPSEC; 496 } else { 497 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 498 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 499 /* Note: mp already consumed and ip_drop_packet done */ 500 return; 501 } 502 } 503 504 /* Was the destination (now source) link-local? Send out same group */ 505 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 506 ixas.ixa_flags |= IXAF_SCOPEID_SET; 507 if (IS_UNDER_IPMP(ill)) 508 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 509 else 510 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 511 } 512 513 if (ira->ira_flags & IRAF_MULTIBROADCAST) { 514 /* 515 * Not one or our addresses (IRE_LOCALs), thus we let 516 * ip_output_simple pick the source. 517 */ 518 ip6h->ip6_src = ipv6_all_zeros; 519 ixas.ixa_flags |= IXAF_SET_SOURCE; 520 } 521 522 /* Should we send using dce_pmtu? */ 523 if (ipst->ips_ipv6_icmp_return_pmtu) 524 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; 525 526 (void) ip_output_simple(mp, &ixas); 527 ixa_cleanup(&ixas); 528 529 } 530 531 /* 532 * Verify the ICMP messages for either for ICMP error or redirect packet. 533 * The caller should have fully pulled up the message. If it's a redirect 534 * packet, only basic checks on IP header will be done; otherwise, verify 535 * the packet by looking at the included ULP header. 536 * 537 * Called before icmp_inbound_error_fanout_v6 is called. 538 */ 539 static boolean_t 540 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 541 { 542 ill_t *ill = ira->ira_ill; 543 uint16_t hdr_length; 544 uint8_t *nexthdrp; 545 uint8_t nexthdr; 546 ip_stack_t *ipst = ill->ill_ipst; 547 conn_t *connp; 548 ip6_t *ip6h; /* Inner header */ 549 550 ip6h = (ip6_t *)&icmp6[1]; 551 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr) 552 goto truncated; 553 554 if (icmp6->icmp6_type == ND_REDIRECT) { 555 hdr_length = sizeof (nd_redirect_t); 556 } else { 557 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION)) 558 goto discard_pkt; 559 hdr_length = IPV6_HDR_LEN; 560 } 561 562 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr) 563 goto truncated; 564 565 /* 566 * Stop here for ICMP_REDIRECT. 567 */ 568 if (icmp6->icmp6_type == ND_REDIRECT) 569 return (B_TRUE); 570 571 /* 572 * ICMP errors only. 573 */ 574 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 575 goto discard_pkt; 576 nexthdr = *nexthdrp; 577 578 /* Try to pass the ICMP message to clients who need it */ 579 switch (nexthdr) { 580 case IPPROTO_UDP: 581 /* 582 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 583 * transport header. 584 */ 585 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 586 mp->b_wptr) 587 goto truncated; 588 break; 589 case IPPROTO_TCP: { 590 tcpha_t *tcpha; 591 592 /* 593 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 594 * transport header. 595 */ 596 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 597 mp->b_wptr) 598 goto truncated; 599 600 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 601 /* 602 * With IPMP we need to match across group, which we do 603 * since we have the upper ill from ira_ill. 604 */ 605 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, 606 ill->ill_phyint->phyint_ifindex, ipst); 607 if (connp == NULL) 608 goto discard_pkt; 609 610 if ((connp->conn_verifyicmp != NULL) && 611 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) { 612 CONN_DEC_REF(connp); 613 goto discard_pkt; 614 } 615 CONN_DEC_REF(connp); 616 break; 617 } 618 case IPPROTO_SCTP: 619 /* 620 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 621 * transport header. 622 */ 623 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 624 mp->b_wptr) 625 goto truncated; 626 break; 627 case IPPROTO_ESP: 628 case IPPROTO_AH: 629 break; 630 case IPPROTO_ENCAP: 631 case IPPROTO_IPV6: { 632 /* Look for self-encapsulated packets that caused an error */ 633 ip6_t *in_ip6h; 634 635 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 636 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ? 637 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr) 638 goto truncated; 639 break; 640 } 641 default: 642 break; 643 } 644 645 return (B_TRUE); 646 647 discard_pkt: 648 /* Bogus ICMP error. */ 649 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 650 return (B_FALSE); 651 652 truncated: 653 /* We pulled up everthing already. Must be truncated */ 654 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 655 return (B_FALSE); 656 } 657 658 /* 659 * Process received IPv6 ICMP Packet too big. 660 * The caller is responsible for validating the packet before passing it in 661 * and also to fanout the ICMP error to any matching transport conns. Assumes 662 * the message has been fully pulled up. 663 * 664 * Before getting here, the caller has called icmp_inbound_verify_v6() 665 * that should have verified with ULP to prevent undoing the changes we're 666 * going to make to DCE. For example, TCP might have verified that the packet 667 * which generated error is in the send window. 668 * 669 * In some cases modified this MTU in the ICMP header packet; the caller 670 * should pass to the matching ULP after this returns. 671 */ 672 static void 673 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira) 674 { 675 uint32_t mtu; 676 dce_t *dce; 677 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */ 678 ip_stack_t *ipst = ill->ill_ipst; 679 int old_max_frag; 680 in6_addr_t final_dst; 681 ip6_t *ip6h; /* Inner IP header */ 682 683 /* Caller has already pulled up everything. */ 684 ip6h = (ip6_t *)&icmp6[1]; 685 final_dst = ip_get_dst_v6(ip6h, NULL, NULL); 686 687 mtu = ntohl(icmp6->icmp6_mtu); 688 if (mtu < IPV6_MIN_MTU) { 689 /* 690 * RFC 8021 suggests to ignore messages where mtu is 691 * less than the IPv6 minimum. 692 */ 693 ip1dbg(("Received mtu less than IPv6 " 694 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 695 DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu); 696 return; 697 } 698 699 /* 700 * For link local destinations matching simply on address is not 701 * sufficient. Same link local addresses for different ILL's is 702 * possible. 703 */ 704 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) { 705 dce = dce_lookup_and_add_v6(&final_dst, 706 ill->ill_phyint->phyint_ifindex, ipst); 707 } else { 708 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst); 709 } 710 if (dce == NULL) { 711 /* Couldn't add a unique one - ENOMEM */ 712 if (ip_debug > 2) { 713 /* ip1dbg */ 714 pr_addr_dbg("icmp_inbound_too_big_v6:" 715 "no dce for dst %s\n", AF_INET6, 716 &final_dst); 717 } 718 return; 719 } 720 721 mutex_enter(&dce->dce_lock); 722 if (dce->dce_flags & DCEF_PMTU) 723 old_max_frag = dce->dce_pmtu; 724 else if (IN6_IS_ADDR_MULTICAST(&final_dst)) 725 old_max_frag = ill->ill_mc_mtu; 726 else 727 old_max_frag = ill->ill_mtu; 728 729 ip1dbg(("Received mtu from router: %d\n", mtu)); 730 DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu); 731 dce->dce_pmtu = MIN(old_max_frag, mtu); 732 icmp6->icmp6_mtu = htonl(dce->dce_pmtu); 733 734 /* We now have a PMTU for sure */ 735 dce->dce_flags |= DCEF_PMTU; 736 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 737 738 mutex_exit(&dce->dce_lock); 739 /* 740 * After dropping the lock the new value is visible to everyone. 741 * Then we bump the generation number so any cached values reinspect 742 * the dce_t. 743 */ 744 dce_increment_generation(dce); 745 dce_refrele(dce); 746 } 747 748 /* 749 * Fanout received ICMPv6 error packets to the transports. 750 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 751 * 752 * The caller must have called icmp_inbound_verify_v6. 753 */ 754 void 755 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 756 { 757 uint16_t *up; /* Pointer to ports in ULP header */ 758 uint32_t ports; /* reversed ports for fanout */ 759 ip6_t rip6h; /* With reversed addresses */ 760 ip6_t *ip6h; /* Inner IP header */ 761 uint16_t hdr_length; /* Inner IP header length */ 762 uint8_t *nexthdrp; 763 uint8_t nexthdr; 764 tcpha_t *tcpha; 765 conn_t *connp; 766 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */ 767 ip_stack_t *ipst = ill->ill_ipst; 768 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 769 770 /* Caller has already pulled up everything. */ 771 ip6h = (ip6_t *)&icmp6[1]; 772 ASSERT(mp->b_cont == NULL); 773 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 774 775 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 776 goto drop_pkt; 777 nexthdr = *nexthdrp; 778 ira->ira_protocol = nexthdr; 779 780 /* 781 * We need a separate IP header with the source and destination 782 * addresses reversed to do fanout/classification because the ip6h in 783 * the ICMPv6 error is in the form we sent it out. 784 */ 785 rip6h.ip6_src = ip6h->ip6_dst; 786 rip6h.ip6_dst = ip6h->ip6_src; 787 rip6h.ip6_nxt = nexthdr; 788 789 /* Try to pass the ICMP message to clients who need it */ 790 switch (nexthdr) { 791 case IPPROTO_UDP: { 792 /* Attempt to find a client stream based on port. */ 793 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 794 795 /* Note that we send error to all matches. */ 796 ira->ira_flags |= IRAF_ICMP_ERROR; 797 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira); 798 ira->ira_flags &= ~IRAF_ICMP_ERROR; 799 return; 800 } 801 case IPPROTO_TCP: { 802 /* 803 * Attempt to find a client stream based on port. 804 * Note that we do a reverse lookup since the header is 805 * in the form we sent it out. 806 */ 807 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 808 /* 809 * With IPMP we need to match across group, which we do 810 * since we have the upper ill from ira_ill. 811 */ 812 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 813 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 814 if (connp == NULL) { 815 goto drop_pkt; 816 } 817 818 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 819 (ira->ira_flags & IRAF_IPSEC_SECURE)) { 820 mp = ipsec_check_inbound_policy(mp, connp, 821 NULL, ip6h, ira); 822 if (mp == NULL) { 823 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 824 /* Note that mp is NULL */ 825 ip_drop_input("ipIfStatsInDiscards", mp, ill); 826 CONN_DEC_REF(connp); 827 return; 828 } 829 } 830 831 ira->ira_flags |= IRAF_ICMP_ERROR; 832 if (IPCL_IS_TCP(connp)) { 833 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 834 connp->conn_recvicmp, connp, ira, SQ_FILL, 835 SQTAG_TCP6_INPUT_ICMP_ERR); 836 } else { 837 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 838 ill_t *rill = ira->ira_rill; 839 840 ira->ira_ill = ira->ira_rill = NULL; 841 (connp->conn_recv)(connp, mp, NULL, ira); 842 CONN_DEC_REF(connp); 843 ira->ira_ill = ill; 844 ira->ira_rill = rill; 845 } 846 ira->ira_flags &= ~IRAF_ICMP_ERROR; 847 return; 848 849 } 850 case IPPROTO_SCTP: 851 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 852 /* Find a SCTP client stream for this packet. */ 853 ((uint16_t *)&ports)[0] = up[1]; 854 ((uint16_t *)&ports)[1] = up[0]; 855 856 ira->ira_flags |= IRAF_ICMP_ERROR; 857 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira); 858 ira->ira_flags &= ~IRAF_ICMP_ERROR; 859 return; 860 861 case IPPROTO_ESP: 862 case IPPROTO_AH: 863 if (!ipsec_loaded(ipss)) { 864 ip_proto_not_sup(mp, ira); 865 return; 866 } 867 868 if (nexthdr == IPPROTO_ESP) 869 mp = ipsecesp_icmp_error(mp, ira); 870 else 871 mp = ipsecah_icmp_error(mp, ira); 872 if (mp == NULL) 873 return; 874 875 /* Just in case ipsec didn't preserve the NULL b_cont */ 876 if (mp->b_cont != NULL) { 877 if (!pullupmsg(mp, -1)) 878 goto drop_pkt; 879 } 880 881 /* 882 * If succesful, the mp has been modified to not include 883 * the ESP/AH header so we can fanout to the ULP's icmp 884 * error handler. 885 */ 886 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN) 887 goto drop_pkt; 888 889 ip6h = (ip6_t *)mp->b_rptr; 890 /* Don't call hdr_length_v6() unless you have to. */ 891 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 892 hdr_length = ip_hdr_length_v6(mp, ip6h); 893 else 894 hdr_length = IPV6_HDR_LEN; 895 896 /* Verify the modified message before any further processes. */ 897 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 898 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 899 freemsg(mp); 900 return; 901 } 902 903 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 904 return; 905 906 case IPPROTO_IPV6: { 907 /* Look for self-encapsulated packets that caused an error */ 908 ip6_t *in_ip6h; 909 910 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 911 912 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) && 913 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) { 914 /* 915 * Self-encapsulated case. As in the ipv4 case, 916 * we need to strip the 2nd IP header. Since mp 917 * is already pulled-up, we can simply bcopy 918 * the 3rd header + data over the 2nd header. 919 */ 920 uint16_t unused_len; 921 922 /* 923 * Make sure we don't do recursion more than once. 924 */ 925 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h, 926 &unused_len, &nexthdrp) || 927 *nexthdrp == IPPROTO_IPV6) { 928 goto drop_pkt; 929 } 930 931 /* 932 * Copy the 3rd header + remaining data on top 933 * of the 2nd header. 934 */ 935 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h); 936 937 /* 938 * Subtract length of the 2nd header. 939 */ 940 mp->b_wptr -= hdr_length; 941 942 ip6h = (ip6_t *)mp->b_rptr; 943 /* Don't call hdr_length_v6() unless you have to. */ 944 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 945 hdr_length = ip_hdr_length_v6(mp, ip6h); 946 else 947 hdr_length = IPV6_HDR_LEN; 948 949 /* 950 * Verify the modified message before any further 951 * processes. 952 */ 953 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 954 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 955 freemsg(mp); 956 return; 957 } 958 959 /* 960 * Now recurse, and see what I _really_ should be 961 * doing here. 962 */ 963 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 964 return; 965 } 966 } 967 /* FALLTHROUGH */ 968 case IPPROTO_ENCAP: 969 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src, 970 &rip6h.ip6_dst, ipst)) != NULL) { 971 ira->ira_flags |= IRAF_ICMP_ERROR; 972 connp->conn_recvicmp(connp, mp, NULL, ira); 973 CONN_DEC_REF(connp); 974 ira->ira_flags &= ~IRAF_ICMP_ERROR; 975 return; 976 } 977 /* 978 * No IP tunnel is interested, fallthrough and see 979 * if a raw socket will want it. 980 */ 981 /* FALLTHROUGH */ 982 default: 983 ira->ira_flags |= IRAF_ICMP_ERROR; 984 ASSERT(ira->ira_protocol == nexthdr); 985 ip_fanout_proto_v6(mp, &rip6h, ira); 986 ira->ira_flags &= ~IRAF_ICMP_ERROR; 987 return; 988 } 989 /* NOTREACHED */ 990 drop_pkt: 991 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 992 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 993 freemsg(mp); 994 } 995 996 /* 997 * Process received IPv6 ICMP Redirect messages. 998 * Assumes the caller has verified that the headers are in the pulled up mblk. 999 * Consumes mp. 1000 */ 1001 /* ARGSUSED */ 1002 static void 1003 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd, 1004 ip_recv_attr_t *ira) 1005 { 1006 ire_t *ire, *nire; 1007 ire_t *prev_ire = NULL; 1008 ire_t *redir_ire; 1009 in6_addr_t *src, *dst, *gateway; 1010 nd_opt_hdr_t *opt; 1011 nce_t *nce; 1012 int ncec_flags = 0; 1013 int err = 0; 1014 boolean_t redirect_to_router = B_FALSE; 1015 int len; 1016 int optlen; 1017 ill_t *ill = ira->ira_rill; 1018 ill_t *rill = ira->ira_rill; 1019 ip_stack_t *ipst = ill->ill_ipst; 1020 1021 /* 1022 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 1023 * and make it be the IPMP upper so avoid being confused by a packet 1024 * addressed to a unicast address on a different ill. 1025 */ 1026 if (IS_UNDER_IPMP(rill)) { 1027 rill = ipmp_ill_hold_ipmp_ill(rill); 1028 if (rill == NULL) { 1029 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1030 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill", 1031 mp, ill); 1032 freemsg(mp); 1033 return; 1034 } 1035 ASSERT(rill != ira->ira_rill); 1036 } 1037 1038 len = mp->b_wptr - (uchar_t *)rd; 1039 src = &ip6h->ip6_src; 1040 dst = &rd->nd_rd_dst; 1041 gateway = &rd->nd_rd_target; 1042 1043 /* Verify if it is a valid redirect */ 1044 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1045 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1046 (rd->nd_rd_code != 0) || 1047 (len < sizeof (nd_redirect_t)) || 1048 (IN6_IS_ADDR_V4MAPPED(dst)) || 1049 (IN6_IS_ADDR_MULTICAST(dst))) { 1050 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1051 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill); 1052 goto fail_redirect; 1053 } 1054 1055 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1056 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1057 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1058 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway", 1059 mp, ill); 1060 goto fail_redirect; 1061 } 1062 1063 optlen = len - sizeof (nd_redirect_t); 1064 if (optlen != 0) { 1065 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) { 1066 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1067 ip_drop_input("ipv6IfIcmpInBadRedirects - options", 1068 mp, ill); 1069 goto fail_redirect; 1070 } 1071 } 1072 1073 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1074 redirect_to_router = B_TRUE; 1075 ncec_flags |= NCE_F_ISROUTER; 1076 } else { 1077 gateway = dst; /* Add nce for dst */ 1078 } 1079 1080 1081 /* 1082 * Verify that the IP source address of the redirect is 1083 * the same as the current first-hop router for the specified 1084 * ICMP destination address. 1085 * Also, Make sure we had a route for the dest in question and 1086 * that route was pointing to the old gateway (the source of the 1087 * redirect packet.) 1088 * We do longest match and then compare ire_gateway_addr_v6 below. 1089 */ 1090 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill, 1091 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL); 1092 1093 /* 1094 * Check that 1095 * the redirect was not from ourselves 1096 * old gateway is still directly reachable 1097 */ 1098 if (prev_ire == NULL || 1099 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) || 1100 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 1101 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) { 1102 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1103 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill); 1104 goto fail_redirect; 1105 } 1106 1107 ASSERT(prev_ire->ire_ill != NULL); 1108 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD) 1109 ncec_flags |= NCE_F_NONUD; 1110 1111 opt = (nd_opt_hdr_t *)&rd[1]; 1112 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1113 if (opt != NULL) { 1114 err = nce_lookup_then_add_v6(rill, 1115 (uchar_t *)&opt[1], /* Link layer address */ 1116 rill->ill_phys_addr_length, 1117 gateway, ncec_flags, ND_STALE, &nce); 1118 switch (err) { 1119 case 0: 1120 nce_refrele(nce); 1121 break; 1122 case EEXIST: 1123 /* 1124 * Check to see if link layer address has changed and 1125 * process the ncec_state accordingly. 1126 */ 1127 nce_process(nce->nce_common, 1128 (uchar_t *)&opt[1], 0, B_FALSE); 1129 nce_refrele(nce); 1130 break; 1131 default: 1132 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1133 err)); 1134 goto fail_redirect; 1135 } 1136 } 1137 if (redirect_to_router) { 1138 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1139 1140 /* 1141 * Create a Route Association. This will allow us to remember 1142 * a router told us to use the particular gateway. 1143 */ 1144 ire = ire_create_v6( 1145 dst, 1146 &ipv6_all_ones, /* mask */ 1147 gateway, /* gateway addr */ 1148 IRE_HOST, 1149 prev_ire->ire_ill, 1150 ALL_ZONES, 1151 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1152 NULL, 1153 ipst); 1154 } else { 1155 ipif_t *ipif; 1156 in6_addr_t gw; 1157 1158 /* 1159 * Just create an on link entry, i.e. interface route. 1160 * The gateway field is our link-local on the ill. 1161 */ 1162 mutex_enter(&rill->ill_lock); 1163 for (ipif = rill->ill_ipif; ipif != NULL; 1164 ipif = ipif->ipif_next) { 1165 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1166 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr)) 1167 break; 1168 } 1169 if (ipif == NULL) { 1170 /* We have no link-local address! */ 1171 mutex_exit(&rill->ill_lock); 1172 goto fail_redirect; 1173 } 1174 gw = ipif->ipif_v6lcl_addr; 1175 mutex_exit(&rill->ill_lock); 1176 1177 ire = ire_create_v6( 1178 dst, /* gateway == dst */ 1179 &ipv6_all_ones, /* mask */ 1180 &gw, /* gateway addr */ 1181 rill->ill_net_type, /* IF_[NO]RESOLVER */ 1182 prev_ire->ire_ill, 1183 ALL_ZONES, 1184 (RTF_DYNAMIC | RTF_HOST), 1185 NULL, 1186 ipst); 1187 } 1188 1189 if (ire == NULL) 1190 goto fail_redirect; 1191 1192 nire = ire_add(ire); 1193 /* Check if it was a duplicate entry */ 1194 if (nire != NULL && nire != ire) { 1195 ASSERT(nire->ire_identical_ref > 1); 1196 ire_delete(nire); 1197 ire_refrele(nire); 1198 nire = NULL; 1199 } 1200 ire = nire; 1201 if (ire != NULL) { 1202 ire_refrele(ire); /* Held in ire_add */ 1203 1204 /* tell routing sockets that we received a redirect */ 1205 ip_rts_change_v6(RTM_REDIRECT, 1206 &rd->nd_rd_dst, 1207 &rd->nd_rd_target, 1208 &ipv6_all_ones, 0, src, 1209 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1210 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1211 1212 /* 1213 * Delete any existing IRE_HOST type ires for this destination. 1214 * This together with the added IRE has the effect of 1215 * modifying an existing redirect. 1216 */ 1217 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1218 prev_ire->ire_ill, ALL_ZONES, NULL, 1219 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst, 1220 NULL); 1221 1222 if (redir_ire != NULL) { 1223 if (redir_ire->ire_flags & RTF_DYNAMIC) 1224 ire_delete(redir_ire); 1225 ire_refrele(redir_ire); 1226 } 1227 } 1228 1229 ire_refrele(prev_ire); 1230 prev_ire = NULL; 1231 1232 fail_redirect: 1233 if (prev_ire != NULL) 1234 ire_refrele(prev_ire); 1235 freemsg(mp); 1236 if (rill != ira->ira_rill) 1237 ill_refrele(rill); 1238 } 1239 1240 /* 1241 * Build and ship an IPv6 ICMP message using the packet data in mp, 1242 * and the ICMP header pointed to by "stuff". (May be called as 1243 * writer.) 1244 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1245 * verify that an icmp error packet can be sent. 1246 * 1247 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1248 * source address (see above function). 1249 */ 1250 static void 1251 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len, 1252 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira) 1253 { 1254 ip6_t *ip6h; 1255 in6_addr_t v6dst; 1256 size_t len_needed; 1257 size_t msg_len; 1258 mblk_t *mp1; 1259 icmp6_t *icmp6; 1260 in6_addr_t v6src; 1261 ill_t *ill = ira->ira_ill; 1262 ip_stack_t *ipst = ill->ill_ipst; 1263 ip_xmit_attr_t ixas; 1264 1265 ip6h = (ip6_t *)mp->b_rptr; 1266 1267 bzero(&ixas, sizeof (ixas)); 1268 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 1269 ixas.ixa_zoneid = ira->ira_zoneid; 1270 ixas.ixa_ifindex = 0; 1271 ixas.ixa_ipst = ipst; 1272 ixas.ixa_cred = kcred; 1273 ixas.ixa_cpid = NOPID; 1274 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 1275 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1276 1277 /* 1278 * If the source of the original packet was link-local, then 1279 * make sure we send on the same ill (group) as we received it on. 1280 */ 1281 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 1282 ixas.ixa_flags |= IXAF_SCOPEID_SET; 1283 if (IS_UNDER_IPMP(ill)) 1284 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 1285 else 1286 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 1287 } 1288 1289 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1290 /* 1291 * Apply IPsec based on how IPsec was applied to 1292 * the packet that had the error. 1293 * 1294 * If it was an outbound packet that caused the ICMP 1295 * error, then the caller will have setup the IRA 1296 * appropriately. 1297 */ 1298 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 1299 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1300 /* Note: mp already consumed and ip_drop_packet done */ 1301 return; 1302 } 1303 } else { 1304 /* 1305 * This is in clear. The icmp message we are building 1306 * here should go out in clear, independent of our policy. 1307 */ 1308 ixas.ixa_flags |= IXAF_NO_IPSEC; 1309 } 1310 1311 /* 1312 * If the caller specified the source we use that. 1313 * Otherwise, if the packet was for one of our unicast addresses, make 1314 * sure we respond with that as the source. Otherwise 1315 * have ip_output_simple pick the source address. 1316 */ 1317 if (v6src_ptr != NULL) { 1318 v6src = *v6src_ptr; 1319 } else { 1320 ire_t *ire; 1321 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY; 1322 1323 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1324 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) 1325 match_flags |= MATCH_IRE_ILL; 1326 1327 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 1328 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL, 1329 match_flags, 0, ipst, NULL); 1330 if (ire != NULL) { 1331 v6src = ip6h->ip6_dst; 1332 ire_refrele(ire); 1333 } else { 1334 v6src = ipv6_all_zeros; 1335 ixas.ixa_flags |= IXAF_SET_SOURCE; 1336 } 1337 } 1338 v6dst = ip6h->ip6_src; 1339 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1340 msg_len = msgdsize(mp); 1341 if (msg_len > len_needed) { 1342 if (!adjmsg(mp, len_needed - msg_len)) { 1343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1344 freemsg(mp); 1345 return; 1346 } 1347 msg_len = len_needed; 1348 } 1349 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED); 1350 if (mp1 == NULL) { 1351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1352 freemsg(mp); 1353 return; 1354 } 1355 mp1->b_cont = mp; 1356 mp = mp1; 1357 1358 /* 1359 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this 1360 * node generates be accepted in peace by all on-host destinations. 1361 * If we do NOT assume that all on-host destinations trust 1362 * self-generated ICMP messages, then rework here, ip6.c, and spd.c. 1363 * (Look for IXAF_TRUSTED_ICMP). 1364 */ 1365 ixas.ixa_flags |= IXAF_TRUSTED_ICMP; 1366 1367 ip6h = (ip6_t *)mp->b_rptr; 1368 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1369 1370 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1371 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1372 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1373 ip6h->ip6_dst = v6dst; 1374 ip6h->ip6_src = v6src; 1375 msg_len += IPV6_HDR_LEN + len; 1376 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1377 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1378 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1379 } 1380 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1381 icmp6 = (icmp6_t *)&ip6h[1]; 1382 bcopy(stuff, (char *)icmp6, len); 1383 /* 1384 * Prepare for checksum by putting icmp length in the icmp 1385 * checksum field. The checksum is calculated in ip_output_wire_v6. 1386 */ 1387 icmp6->icmp6_cksum = ip6h->ip6_plen; 1388 if (icmp6->icmp6_type == ND_REDIRECT) { 1389 ip6h->ip6_hops = IPV6_MAX_HOPS; 1390 } 1391 1392 (void) ip_output_simple(mp, &ixas); 1393 ixa_cleanup(&ixas); 1394 } 1395 1396 /* 1397 * Update the output mib when ICMPv6 packets are sent. 1398 */ 1399 void 1400 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1401 { 1402 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1403 1404 switch (icmp6->icmp6_type) { 1405 case ICMP6_DST_UNREACH: 1406 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1407 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1408 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1409 break; 1410 1411 case ICMP6_TIME_EXCEEDED: 1412 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1413 break; 1414 1415 case ICMP6_PARAM_PROB: 1416 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1417 break; 1418 1419 case ICMP6_PACKET_TOO_BIG: 1420 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1421 break; 1422 1423 case ICMP6_ECHO_REQUEST: 1424 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1425 break; 1426 1427 case ICMP6_ECHO_REPLY: 1428 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1429 break; 1430 1431 case ND_ROUTER_SOLICIT: 1432 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1433 break; 1434 1435 case ND_ROUTER_ADVERT: 1436 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1437 break; 1438 1439 case ND_NEIGHBOR_SOLICIT: 1440 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1441 break; 1442 1443 case ND_NEIGHBOR_ADVERT: 1444 BUMP_MIB(ill->ill_icmp6_mib, 1445 ipv6IfIcmpOutNeighborAdvertisements); 1446 break; 1447 1448 case ND_REDIRECT: 1449 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1450 break; 1451 1452 case MLD_LISTENER_QUERY: 1453 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1454 break; 1455 1456 case MLD_LISTENER_REPORT: 1457 case MLD_V2_LISTENER_REPORT: 1458 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1459 break; 1460 1461 case MLD_LISTENER_REDUCTION: 1462 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1463 break; 1464 } 1465 } 1466 1467 /* 1468 * Check if it is ok to send an ICMPv6 error packet in 1469 * response to the IP packet in mp. 1470 * Free the message and return null if no 1471 * ICMP error packet should be sent. 1472 */ 1473 static mblk_t * 1474 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira) 1475 { 1476 ill_t *ill = ira->ira_ill; 1477 ip_stack_t *ipst = ill->ill_ipst; 1478 boolean_t llbcast; 1479 ip6_t *ip6h; 1480 1481 if (!mp) 1482 return (NULL); 1483 1484 /* We view multicast and broadcast as the same.. */ 1485 llbcast = (ira->ira_flags & 1486 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0; 1487 ip6h = (ip6_t *)mp->b_rptr; 1488 1489 /* Check if source address uniquely identifies the host */ 1490 1491 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1492 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1493 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1494 freemsg(mp); 1495 return (NULL); 1496 } 1497 1498 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1499 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1500 icmp6_t *icmp6; 1501 1502 if (mp->b_wptr - mp->b_rptr < len_needed) { 1503 if (!pullupmsg(mp, len_needed)) { 1504 BUMP_MIB(ill->ill_icmp6_mib, 1505 ipv6IfIcmpInErrors); 1506 freemsg(mp); 1507 return (NULL); 1508 } 1509 ip6h = (ip6_t *)mp->b_rptr; 1510 } 1511 icmp6 = (icmp6_t *)&ip6h[1]; 1512 /* Explicitly do not generate errors in response to redirects */ 1513 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1514 icmp6->icmp6_type == ND_REDIRECT) { 1515 freemsg(mp); 1516 return (NULL); 1517 } 1518 } 1519 /* 1520 * Check that the destination is not multicast and that the packet 1521 * was not sent on link layer broadcast or multicast. (Exception 1522 * is Packet too big message as per the draft - when mcast_ok is set.) 1523 */ 1524 if (!mcast_ok && 1525 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1526 freemsg(mp); 1527 return (NULL); 1528 } 1529 /* 1530 * If this is a labeled system, then check to see if we're allowed to 1531 * send a response to this particular sender. If not, then just drop. 1532 */ 1533 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) { 1534 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1535 freemsg(mp); 1536 return (NULL); 1537 } 1538 1539 if (icmp_err_rate_limit(ipst)) { 1540 /* 1541 * Only send ICMP error packets every so often. 1542 * This should be done on a per port/source basis, 1543 * but for now this will suffice. 1544 */ 1545 freemsg(mp); 1546 return (NULL); 1547 } 1548 return (mp); 1549 } 1550 1551 /* 1552 * Called when a packet was sent out the same link that it arrived on. 1553 * Check if it is ok to send a redirect and then send it. 1554 */ 1555 void 1556 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire, 1557 ip_recv_attr_t *ira) 1558 { 1559 ill_t *ill = ira->ira_ill; 1560 ip_stack_t *ipst = ill->ill_ipst; 1561 in6_addr_t *v6targ; 1562 ire_t *src_ire_v6 = NULL; 1563 mblk_t *mp1; 1564 ire_t *nhop_ire = NULL; 1565 1566 /* 1567 * Don't send a redirect when forwarding a source 1568 * routed packet. 1569 */ 1570 if (ip_source_routed_v6(ip6h, mp, ipst)) 1571 return; 1572 1573 if (ire->ire_type & IRE_ONLINK) { 1574 /* Target is directly connected */ 1575 v6targ = &ip6h->ip6_dst; 1576 } else { 1577 /* Determine the most specific IRE used to send the packets */ 1578 nhop_ire = ire_nexthop(ire); 1579 if (nhop_ire == NULL) 1580 return; 1581 1582 /* 1583 * We won't send redirects to a router 1584 * that doesn't have a link local 1585 * address, but will forward. 1586 */ 1587 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) { 1588 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1589 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1590 ire_refrele(nhop_ire); 1591 return; 1592 } 1593 v6targ = &nhop_ire->ire_addr_v6; 1594 } 1595 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 1596 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL, 1597 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL); 1598 1599 if (src_ire_v6 == NULL) { 1600 if (nhop_ire != NULL) 1601 ire_refrele(nhop_ire); 1602 return; 1603 } 1604 1605 /* 1606 * The source is directly connected. 1607 */ 1608 mp1 = copymsg(mp); 1609 if (mp1 != NULL) 1610 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira); 1611 1612 if (nhop_ire != NULL) 1613 ire_refrele(nhop_ire); 1614 ire_refrele(src_ire_v6); 1615 } 1616 1617 /* 1618 * Generate an ICMPv6 redirect message. 1619 * Include target link layer address option if it exits. 1620 * Always include redirect header. 1621 */ 1622 static void 1623 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest, 1624 ip_recv_attr_t *ira) 1625 { 1626 nd_redirect_t *rd; 1627 nd_opt_rd_hdr_t *rdh; 1628 uchar_t *buf; 1629 ncec_t *ncec = NULL; 1630 nd_opt_hdr_t *opt; 1631 int len; 1632 int ll_opt_len = 0; 1633 int max_redir_hdr_data_len; 1634 int pkt_len; 1635 in6_addr_t *srcp; 1636 ill_t *ill; 1637 boolean_t need_refrele; 1638 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1639 1640 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira); 1641 if (mp == NULL) 1642 return; 1643 1644 if (IS_UNDER_IPMP(ira->ira_ill)) { 1645 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill); 1646 if (ill == NULL) { 1647 ill = ira->ira_ill; 1648 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1649 ip_drop_output("no IPMP ill for sending redirect", 1650 mp, ill); 1651 freemsg(mp); 1652 return; 1653 } 1654 need_refrele = B_TRUE; 1655 } else { 1656 ill = ira->ira_ill; 1657 need_refrele = B_FALSE; 1658 } 1659 1660 ncec = ncec_lookup_illgrp_v6(ill, targetp); 1661 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE && 1662 ncec->ncec_lladdr != NULL) { 1663 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1664 ill->ill_phys_addr_length + 7)/8 * 8; 1665 } 1666 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1667 ASSERT(len % 4 == 0); 1668 buf = kmem_alloc(len, KM_NOSLEEP); 1669 if (buf == NULL) { 1670 if (ncec != NULL) 1671 ncec_refrele(ncec); 1672 if (need_refrele) 1673 ill_refrele(ill); 1674 freemsg(mp); 1675 return; 1676 } 1677 1678 rd = (nd_redirect_t *)buf; 1679 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1680 rd->nd_rd_code = 0; 1681 rd->nd_rd_reserved = 0; 1682 rd->nd_rd_target = *targetp; 1683 rd->nd_rd_dst = *dest; 1684 1685 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1686 if (ncec != NULL && ll_opt_len != 0) { 1687 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1688 opt->nd_opt_len = ll_opt_len/8; 1689 bcopy((char *)ncec->ncec_lladdr, &opt[1], 1690 ill->ill_phys_addr_length); 1691 } 1692 if (ncec != NULL) 1693 ncec_refrele(ncec); 1694 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1695 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1696 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1697 max_redir_hdr_data_len = 1698 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1699 pkt_len = msgdsize(mp); 1700 /* Make sure mp is 8 byte aligned */ 1701 if (pkt_len > max_redir_hdr_data_len) { 1702 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1703 sizeof (nd_opt_rd_hdr_t))/8; 1704 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1705 } else { 1706 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1707 (void) adjmsg(mp, -(pkt_len % 8)); 1708 } 1709 rdh->nd_opt_rh_reserved1 = 0; 1710 rdh->nd_opt_rh_reserved2 = 0; 1711 /* ipif_v6lcl_addr contains the link-local source address */ 1712 srcp = &ill->ill_ipif->ipif_v6lcl_addr; 1713 1714 /* Redirects sent by router, and router is global zone */ 1715 ASSERT(ira->ira_zoneid == ALL_ZONES); 1716 ira->ira_zoneid = GLOBAL_ZONEID; 1717 icmp_pkt_v6(mp, buf, len, srcp, ira); 1718 kmem_free(buf, len); 1719 if (need_refrele) 1720 ill_refrele(ill); 1721 } 1722 1723 1724 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1725 void 1726 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1727 ip_recv_attr_t *ira) 1728 { 1729 icmp6_t icmp6; 1730 1731 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1732 if (mp == NULL) 1733 return; 1734 1735 bzero(&icmp6, sizeof (icmp6_t)); 1736 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1737 icmp6.icmp6_code = code; 1738 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1739 } 1740 1741 /* 1742 * Generate an ICMP unreachable message. 1743 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1744 * constructed by the caller. 1745 */ 1746 void 1747 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1748 ip_recv_attr_t *ira) 1749 { 1750 icmp6_t icmp6; 1751 1752 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1753 if (mp == NULL) 1754 return; 1755 1756 bzero(&icmp6, sizeof (icmp6_t)); 1757 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1758 icmp6.icmp6_code = code; 1759 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1760 } 1761 1762 /* 1763 * Generate an ICMP pkt too big message. 1764 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1765 * constructed by the caller. 1766 */ 1767 void 1768 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok, 1769 ip_recv_attr_t *ira) 1770 { 1771 icmp6_t icmp6; 1772 1773 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1774 if (mp == NULL) 1775 return; 1776 1777 bzero(&icmp6, sizeof (icmp6_t)); 1778 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1779 icmp6.icmp6_code = 0; 1780 icmp6.icmp6_mtu = htonl(mtu); 1781 1782 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1783 } 1784 1785 /* 1786 * Generate an ICMP parameter problem message. (May be called as writer.) 1787 * 'offset' is the offset from the beginning of the packet in error. 1788 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1789 * constructed by the caller. 1790 */ 1791 static void 1792 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset, 1793 boolean_t mcast_ok, ip_recv_attr_t *ira) 1794 { 1795 icmp6_t icmp6; 1796 1797 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1798 if (mp == NULL) 1799 return; 1800 1801 bzero((char *)&icmp6, sizeof (icmp6_t)); 1802 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1803 icmp6.icmp6_code = code; 1804 icmp6.icmp6_pptr = htonl(offset); 1805 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1806 } 1807 1808 void 1809 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok, 1810 ip_recv_attr_t *ira) 1811 { 1812 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1813 uint16_t hdr_length; 1814 uint8_t *nexthdrp; 1815 uint32_t offset; 1816 ill_t *ill = ira->ira_ill; 1817 1818 /* Determine the offset of the bad nexthdr value */ 1819 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { 1820 /* Malformed packet */ 1821 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1822 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1823 freemsg(mp); 1824 return; 1825 } 1826 1827 offset = nexthdrp - mp->b_rptr; 1828 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset, 1829 mcast_ok, ira); 1830 } 1831 1832 /* 1833 * Verify whether or not the IP address is a valid local address. 1834 * Could be a unicast, including one for a down interface. 1835 * If allow_mcbc then a multicast or broadcast address is also 1836 * acceptable. 1837 * 1838 * In the case of a multicast address, however, the 1839 * upper protocol is expected to reset the src address 1840 * to zero when we return IPVL_MCAST so that 1841 * no packets are emitted with multicast address as 1842 * source address. 1843 * The addresses valid for bind are: 1844 * (1) - in6addr_any 1845 * (2) - IP address of an UP interface 1846 * (3) - IP address of a DOWN interface 1847 * (4) - a multicast address. In this case 1848 * the conn will only receive packets destined to 1849 * the specified multicast address. Note: the 1850 * application still has to issue an 1851 * IPV6_JOIN_GROUP socket option. 1852 * 1853 * In all the above cases, the bound address must be valid in the current zone. 1854 * When the address is loopback or multicast, there might be many matching IREs 1855 * so bind has to look up based on the zone. 1856 */ 1857 ip_laddr_t 1858 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid, 1859 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid) 1860 { 1861 ire_t *src_ire; 1862 uint_t match_flags; 1863 ill_t *ill = NULL; 1864 1865 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src)); 1866 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src)); 1867 1868 match_flags = MATCH_IRE_ZONEONLY; 1869 if (scopeid != 0) { 1870 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst); 1871 if (ill == NULL) 1872 return (IPVL_BAD); 1873 match_flags |= MATCH_IRE_ILL; 1874 } 1875 1876 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0, 1877 ill, zoneid, NULL, match_flags, 0, ipst, NULL); 1878 if (ill != NULL) 1879 ill_refrele(ill); 1880 1881 /* 1882 * If an address other than in6addr_any is requested, 1883 * we verify that it is a valid address for bind 1884 * Note: Following code is in if-else-if form for 1885 * readability compared to a condition check. 1886 */ 1887 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) { 1888 /* 1889 * (2) Bind to address of local UP interface 1890 */ 1891 ire_refrele(src_ire); 1892 return (IPVL_UNICAST_UP); 1893 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 1894 /* (4) bind to multicast address. */ 1895 if (src_ire != NULL) 1896 ire_refrele(src_ire); 1897 1898 /* 1899 * Note: caller should take IPV6_MULTICAST_IF 1900 * into account when selecting a real source address. 1901 */ 1902 if (allow_mcbc) 1903 return (IPVL_MCAST); 1904 else 1905 return (IPVL_BAD); 1906 } else { 1907 ipif_t *ipif; 1908 1909 /* 1910 * (3) Bind to address of local DOWN interface? 1911 * (ipif_lookup_addr() looks up all interfaces 1912 * but we do not get here for UP interfaces 1913 * - case (2) above) 1914 */ 1915 if (src_ire != NULL) 1916 ire_refrele(src_ire); 1917 1918 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst); 1919 if (ipif == NULL) 1920 return (IPVL_BAD); 1921 1922 /* Not a useful source? */ 1923 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) { 1924 ipif_refrele(ipif); 1925 return (IPVL_BAD); 1926 } 1927 ipif_refrele(ipif); 1928 return (IPVL_UNICAST_DOWN); 1929 } 1930 } 1931 1932 /* 1933 * Verify that both the source and destination addresses are valid. If 1934 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable, 1935 * i.e. have no route to it. Protocols like TCP want to verify destination 1936 * reachability, while tunnels do not. 1937 * 1938 * Determine the route, the interface, and (optionally) the source address 1939 * to use to reach a given destination. 1940 * Note that we allow connect to broadcast and multicast addresses when 1941 * IPDF_ALLOW_MCBC is set. 1942 * first_hop and dst_addr are normally the same, but if source routing 1943 * they will differ; in that case the first_hop is what we'll use for the 1944 * routing lookup but the dce and label checks will be done on dst_addr, 1945 * 1946 * If uinfo is set, then we fill in the best available information 1947 * we have for the destination. This is based on (in priority order) any 1948 * metrics and path MTU stored in a dce_t, route metrics, and finally the 1949 * ill_mtu/ill_mc_mtu. 1950 * 1951 * Tsol note: If we have a source route then dst_addr != firsthop. But we 1952 * always do the label check on dst_addr. 1953 * 1954 * Assumes that the caller has set ixa_scopeid for link-local communication. 1955 */ 1956 int 1957 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr, 1958 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo, 1959 uint32_t flags, uint_t mac_mode) 1960 { 1961 ire_t *ire; 1962 int error = 0; 1963 in6_addr_t setsrc; /* RTF_SETSRC */ 1964 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */ 1965 ip_stack_t *ipst = ixa->ixa_ipst; 1966 dce_t *dce; 1967 uint_t pmtu; 1968 uint_t ifindex; 1969 uint_t generation; 1970 nce_t *nce; 1971 ill_t *ill = NULL; 1972 boolean_t multirt = B_FALSE; 1973 1974 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr)); 1975 1976 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1977 1978 /* 1979 * We never send to zero; the ULPs map it to the loopback address. 1980 * We can't allow it since we use zero to mean unitialized in some 1981 * places. 1982 */ 1983 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr)); 1984 1985 if (is_system_labeled()) { 1986 ts_label_t *tsl = NULL; 1987 1988 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION, 1989 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl); 1990 if (error != 0) 1991 return (error); 1992 if (tsl != NULL) { 1993 /* Update the label */ 1994 ip_xmit_attr_replace_tsl(ixa, tsl); 1995 } 1996 } 1997 1998 setsrc = ipv6_all_zeros; 1999 /* 2000 * Select a route; For IPMP interfaces, we would only select 2001 * a "hidden" route (i.e., going through a specific under_ill) 2002 * if ixa_ifindex has been specified. 2003 */ 2004 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation, 2005 &setsrc, &error, &multirt); 2006 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 2007 if (error != 0) 2008 goto bad_addr; 2009 2010 /* 2011 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set. 2012 * If IPDF_VERIFY_DST is set, the destination must be reachable. 2013 * Otherwise the destination needn't be reachable. 2014 * 2015 * If we match on a reject or black hole, then we've got a 2016 * local failure. May as well fail out the connect() attempt, 2017 * since it's never going to succeed. 2018 */ 2019 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2020 /* 2021 * If we're verifying destination reachability, we always want 2022 * to complain here. 2023 * 2024 * If we're not verifying destination reachability but the 2025 * destination has a route, we still want to fail on the 2026 * temporary address and broadcast address tests. 2027 * 2028 * In both cases do we let the code continue so some reasonable 2029 * information is returned to the caller. That enables the 2030 * caller to use (and even cache) the IRE. conn_ip_ouput will 2031 * use the generation mismatch path to check for the unreachable 2032 * case thereby avoiding any specific check in the main path. 2033 */ 2034 ASSERT(generation == IRE_GENERATION_VERIFY); 2035 if (flags & IPDF_VERIFY_DST) { 2036 /* 2037 * Set errno but continue to set up ixa_ire to be 2038 * the RTF_REJECT|RTF_BLACKHOLE IRE. 2039 * That allows callers to use ip_output to get an 2040 * ICMP error back. 2041 */ 2042 if (!(ire->ire_type & IRE_HOST)) 2043 error = ENETUNREACH; 2044 else 2045 error = EHOSTUNREACH; 2046 } 2047 } 2048 2049 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) && 2050 !(flags & IPDF_ALLOW_MCBC)) { 2051 ire_refrele(ire); 2052 ire = ire_reject(ipst, B_FALSE); 2053 generation = IRE_GENERATION_VERIFY; 2054 error = ENETUNREACH; 2055 } 2056 2057 /* Cache things */ 2058 if (ixa->ixa_ire != NULL) 2059 ire_refrele_notr(ixa->ixa_ire); 2060 #ifdef DEBUG 2061 ire_refhold_notr(ire); 2062 ire_refrele(ire); 2063 #endif 2064 ixa->ixa_ire = ire; 2065 ixa->ixa_ire_generation = generation; 2066 2067 /* 2068 * Ensure that ixa_dce is always set any time that ixa_ire is set, 2069 * since some callers will send a packet to conn_ip_output() even if 2070 * there's an error. 2071 */ 2072 ifindex = 0; 2073 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) { 2074 /* If we are creating a DCE we'd better have an ifindex */ 2075 if (ill != NULL) 2076 ifindex = ill->ill_phyint->phyint_ifindex; 2077 else 2078 flags &= ~IPDF_UNIQUE_DCE; 2079 } 2080 2081 if (flags & IPDF_UNIQUE_DCE) { 2082 /* Fallback to the default dce if allocation fails */ 2083 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst); 2084 if (dce != NULL) { 2085 generation = dce->dce_generation; 2086 } else { 2087 dce = dce_lookup_v6(dst_addr, ifindex, ipst, 2088 &generation); 2089 } 2090 } else { 2091 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation); 2092 } 2093 ASSERT(dce != NULL); 2094 if (ixa->ixa_dce != NULL) 2095 dce_refrele_notr(ixa->ixa_dce); 2096 #ifdef DEBUG 2097 dce_refhold_notr(dce); 2098 dce_refrele(dce); 2099 #endif 2100 ixa->ixa_dce = dce; 2101 ixa->ixa_dce_generation = generation; 2102 2103 2104 /* 2105 * For multicast with multirt we have a flag passed back from 2106 * ire_lookup_multi_ill_v6 since we don't have an IRE for each 2107 * possible multicast address. 2108 * We also need a flag for multicast since we can't check 2109 * whether RTF_MULTIRT is set in ixa_ire for multicast. 2110 */ 2111 if (multirt) { 2112 ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 2113 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 2114 } else { 2115 ixa->ixa_postfragfn = ire->ire_postfragfn; 2116 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 2117 } 2118 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2119 /* Get an nce to cache. */ 2120 nce = ire_to_nce(ire, 0, firsthop); 2121 if (nce == NULL) { 2122 /* Allocation failure? */ 2123 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2124 } else { 2125 if (ixa->ixa_nce != NULL) 2126 nce_refrele(ixa->ixa_nce); 2127 ixa->ixa_nce = nce; 2128 } 2129 } 2130 2131 /* 2132 * If the source address is a loopback address, the 2133 * destination had best be local or multicast. 2134 * If we are sending to an IRE_LOCAL using a loopback source then 2135 * it had better be the same zoneid. 2136 */ 2137 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) { 2138 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) { 2139 ire = NULL; /* Stored in ixa_ire */ 2140 error = EADDRNOTAVAIL; 2141 goto bad_addr; 2142 } 2143 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) { 2144 ire = NULL; /* Stored in ixa_ire */ 2145 error = EADDRNOTAVAIL; 2146 goto bad_addr; 2147 } 2148 } 2149 2150 /* 2151 * Does the caller want us to pick a source address? 2152 */ 2153 if (flags & IPDF_SELECT_SRC) { 2154 in6_addr_t src_addr; 2155 2156 /* 2157 * We use use ire_nexthop_ill to avoid the under ipmp 2158 * interface for source address selection. Note that for ipmp 2159 * probe packets, ixa_ifindex would have been specified, and 2160 * the ip_select_route() invocation would have picked an ire 2161 * will ire_ill pointing at an under interface. 2162 */ 2163 ill = ire_nexthop_ill(ire); 2164 2165 /* If unreachable we have no ill but need some source */ 2166 if (ill == NULL) { 2167 src_addr = ipv6_loopback; 2168 /* Make sure we look for a better source address */ 2169 generation = SRC_GENERATION_VERIFY; 2170 } else { 2171 error = ip_select_source_v6(ill, &setsrc, dst_addr, 2172 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences, 2173 &src_addr, &generation, NULL); 2174 if (error != 0) { 2175 ire = NULL; /* Stored in ixa_ire */ 2176 goto bad_addr; 2177 } 2178 } 2179 2180 /* 2181 * We allow the source address to to down. 2182 * However, we check that we don't use the loopback address 2183 * as a source when sending out on the wire. 2184 */ 2185 if (IN6_IS_ADDR_LOOPBACK(&src_addr) && 2186 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) && 2187 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2188 ire = NULL; /* Stored in ixa_ire */ 2189 error = EADDRNOTAVAIL; 2190 goto bad_addr; 2191 } 2192 2193 *src_addrp = src_addr; 2194 ixa->ixa_src_generation = generation; 2195 } 2196 2197 /* 2198 * Make sure we don't leave an unreachable ixa_nce in place 2199 * since ip_select_route is used when we unplumb i.e., remove 2200 * references on ixa_ire, ixa_nce, and ixa_dce. 2201 */ 2202 nce = ixa->ixa_nce; 2203 if (nce != NULL && nce->nce_is_condemned) { 2204 nce_refrele(nce); 2205 ixa->ixa_nce = NULL; 2206 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2207 } 2208 2209 /* 2210 * Note that IPv6 multicast supports PMTU discovery unlike IPv4 2211 * multicast. But pmtu discovery is only enabled for connected 2212 * sockets in general. 2213 */ 2214 2215 /* 2216 * Set initial value for fragmentation limit. Either conn_ip_output 2217 * or ULP might updates it when there are routing changes. 2218 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT. 2219 */ 2220 pmtu = ip_get_pmtu(ixa); 2221 ixa->ixa_fragsize = pmtu; 2222 /* Make sure ixa_fragsize and ixa_pmtu remain identical */ 2223 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) 2224 ixa->ixa_pmtu = pmtu; 2225 2226 /* 2227 * Extract information useful for some transports. 2228 * First we look for DCE metrics. Then we take what we have in 2229 * the metrics in the route, where the offlink is used if we have 2230 * one. 2231 */ 2232 if (uinfo != NULL) { 2233 bzero(uinfo, sizeof (*uinfo)); 2234 2235 if (dce->dce_flags & DCEF_UINFO) 2236 *uinfo = dce->dce_uinfo; 2237 2238 rts_merge_metrics(uinfo, &ire->ire_metrics); 2239 2240 /* Allow ire_metrics to decrease the path MTU from above */ 2241 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu) 2242 uinfo->iulp_mtu = pmtu; 2243 2244 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0; 2245 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0; 2246 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0; 2247 } 2248 2249 if (ill != NULL) 2250 ill_refrele(ill); 2251 2252 return (error); 2253 2254 bad_addr: 2255 if (ire != NULL) 2256 ire_refrele(ire); 2257 2258 if (ill != NULL) 2259 ill_refrele(ill); 2260 2261 /* 2262 * Make sure we don't leave an unreachable ixa_nce in place 2263 * since ip_select_route is used when we unplumb i.e., remove 2264 * references on ixa_ire, ixa_nce, and ixa_dce. 2265 */ 2266 nce = ixa->ixa_nce; 2267 if (nce != NULL && nce->nce_is_condemned) { 2268 nce_refrele(nce); 2269 ixa->ixa_nce = NULL; 2270 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2271 } 2272 2273 return (error); 2274 } 2275 2276 /* 2277 * Handle protocols with which IP is less intimate. There 2278 * can be more than one stream bound to a particular 2279 * protocol. When this is the case, normally each one gets a copy 2280 * of any incoming packets. 2281 * 2282 * Zones notes: 2283 * Packets will be distributed to conns in all zones. This is really only 2284 * useful for ICMPv6 as only applications in the global zone can create raw 2285 * sockets for other protocols. 2286 */ 2287 void 2288 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 2289 { 2290 mblk_t *mp1; 2291 in6_addr_t laddr = ip6h->ip6_dst; 2292 conn_t *connp, *first_connp, *next_connp; 2293 connf_t *connfp; 2294 ill_t *ill = ira->ira_ill; 2295 ip_stack_t *ipst = ill->ill_ipst; 2296 2297 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol]; 2298 mutex_enter(&connfp->connf_lock); 2299 connp = connfp->connf_head; 2300 for (connp = connfp->connf_head; connp != NULL; 2301 connp = connp->conn_next) { 2302 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2303 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2304 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2305 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2306 break; 2307 } 2308 2309 if (connp == NULL) { 2310 /* 2311 * No one bound to this port. Is 2312 * there a client that wants all 2313 * unclaimed datagrams? 2314 */ 2315 mutex_exit(&connfp->connf_lock); 2316 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB, 2317 ICMP6_PARAMPROB_NEXTHEADER, ira); 2318 return; 2319 } 2320 2321 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 2322 2323 CONN_INC_REF(connp); 2324 first_connp = connp; 2325 2326 /* 2327 * XXX: Fix the multiple protocol listeners case. We should not 2328 * be walking the conn->conn_next list here. 2329 */ 2330 connp = connp->conn_next; 2331 for (;;) { 2332 while (connp != NULL) { 2333 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2334 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2335 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2336 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2337 ira, connp))) 2338 break; 2339 connp = connp->conn_next; 2340 } 2341 2342 if (connp == NULL) { 2343 /* No more interested clients */ 2344 connp = first_connp; 2345 break; 2346 } 2347 if (((mp1 = dupmsg(mp)) == NULL) && 2348 ((mp1 = copymsg(mp)) == NULL)) { 2349 /* Memory allocation failed */ 2350 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2351 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2352 connp = first_connp; 2353 break; 2354 } 2355 2356 CONN_INC_REF(connp); 2357 mutex_exit(&connfp->connf_lock); 2358 2359 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr, 2360 ira); 2361 2362 mutex_enter(&connfp->connf_lock); 2363 /* Follow the next pointer before releasing the conn. */ 2364 next_connp = connp->conn_next; 2365 CONN_DEC_REF(connp); 2366 connp = next_connp; 2367 } 2368 2369 /* Last one. Send it upstream. */ 2370 mutex_exit(&connfp->connf_lock); 2371 2372 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira); 2373 2374 CONN_DEC_REF(connp); 2375 } 2376 2377 /* 2378 * Called when it is conceptually a ULP that would sent the packet 2379 * e.g., port unreachable and nexthdr unknown. Check that the packet 2380 * would have passed the IPsec global policy before sending the error. 2381 * 2382 * Send an ICMP error after patching up the packet appropriately. 2383 * Uses ip_drop_input and bumps the appropriate MIB. 2384 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use. 2385 */ 2386 void 2387 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code, 2388 ip_recv_attr_t *ira) 2389 { 2390 ip6_t *ip6h; 2391 boolean_t secure; 2392 ill_t *ill = ira->ira_ill; 2393 ip_stack_t *ipst = ill->ill_ipst; 2394 netstack_t *ns = ipst->ips_netstack; 2395 ipsec_stack_t *ipss = ns->netstack_ipsec; 2396 2397 secure = ira->ira_flags & IRAF_IPSEC_SECURE; 2398 2399 /* 2400 * We are generating an icmp error for some inbound packet. 2401 * Called from all ip_fanout_(udp, tcp, proto) functions. 2402 * Before we generate an error, check with global policy 2403 * to see whether this is allowed to enter the system. As 2404 * there is no "conn", we are checking with global policy. 2405 */ 2406 ip6h = (ip6_t *)mp->b_rptr; 2407 if (secure || ipss->ipsec_inbound_v6_policy_present) { 2408 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns); 2409 if (mp == NULL) 2410 return; 2411 } 2412 2413 /* We never send errors for protocols that we do implement */ 2414 if (ira->ira_protocol == IPPROTO_ICMPV6) { 2415 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2416 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill); 2417 freemsg(mp); 2418 return; 2419 } 2420 2421 switch (icmp_type) { 2422 case ICMP6_DST_UNREACH: 2423 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT); 2424 2425 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 2426 ip_drop_input("ipIfStatsNoPorts", mp, ill); 2427 2428 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira); 2429 break; 2430 case ICMP6_PARAM_PROB: 2431 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER); 2432 2433 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 2434 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill); 2435 2436 /* Let the system determine the offset for this one */ 2437 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2438 break; 2439 default: 2440 #ifdef DEBUG 2441 panic("ip_fanout_send_icmp_v6: wrong type"); 2442 /*NOTREACHED*/ 2443 #else 2444 freemsg(mp); 2445 break; 2446 #endif 2447 } 2448 } 2449 2450 /* 2451 * Fanout for UDP packets that are multicast or ICMP errors. 2452 * (Unicast fanout is handled in ip_input_v6.) 2453 * 2454 * If SO_REUSEADDR is set all multicast packets 2455 * will be delivered to all conns bound to the same port. 2456 * 2457 * Fanout for UDP packets. 2458 * The caller puts <fport, lport> in the ports parameter. 2459 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 2460 * 2461 * If SO_REUSEADDR is set all multicast and broadcast packets 2462 * will be delivered to all conns bound to the same port. 2463 * 2464 * Zones notes: 2465 * Earlier in ip_input on a system with multiple shared-IP zones we 2466 * duplicate the multicast and broadcast packets and send them up 2467 * with each explicit zoneid that exists on that ill. 2468 * This means that here we can match the zoneid with SO_ALLZONES being special. 2469 */ 2470 void 2471 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport, 2472 ip_recv_attr_t *ira) 2473 { 2474 in6_addr_t laddr; 2475 conn_t *connp; 2476 connf_t *connfp; 2477 in6_addr_t faddr; 2478 ill_t *ill = ira->ira_ill; 2479 ip_stack_t *ipst = ill->ill_ipst; 2480 2481 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR)); 2482 2483 laddr = ip6h->ip6_dst; 2484 faddr = ip6h->ip6_src; 2485 2486 /* Attempt to find a client stream based on destination port. */ 2487 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2488 mutex_enter(&connfp->connf_lock); 2489 connp = connfp->connf_head; 2490 while (connp != NULL) { 2491 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) && 2492 conn_wantpacket_v6(connp, ira, ip6h) && 2493 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2494 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2495 break; 2496 connp = connp->conn_next; 2497 } 2498 2499 if (connp == NULL) 2500 goto notfound; 2501 2502 CONN_INC_REF(connp); 2503 2504 if (connp->conn_reuseaddr) { 2505 conn_t *first_connp = connp; 2506 conn_t *next_connp; 2507 mblk_t *mp1; 2508 2509 connp = connp->conn_next; 2510 for (;;) { 2511 while (connp != NULL) { 2512 if (IPCL_UDP_MATCH_V6(connp, lport, laddr, 2513 fport, faddr) && 2514 conn_wantpacket_v6(connp, ira, ip6h) && 2515 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2516 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2517 ira, connp))) 2518 break; 2519 connp = connp->conn_next; 2520 } 2521 if (connp == NULL) { 2522 /* No more interested clients */ 2523 connp = first_connp; 2524 break; 2525 } 2526 if (((mp1 = dupmsg(mp)) == NULL) && 2527 ((mp1 = copymsg(mp)) == NULL)) { 2528 /* Memory allocation failed */ 2529 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2530 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2531 connp = first_connp; 2532 break; 2533 } 2534 2535 CONN_INC_REF(connp); 2536 mutex_exit(&connfp->connf_lock); 2537 2538 IP6_STAT(ipst, ip6_udp_fanmb); 2539 ip_fanout_udp_conn(connp, mp1, NULL, 2540 (ip6_t *)mp1->b_rptr, ira); 2541 2542 mutex_enter(&connfp->connf_lock); 2543 /* Follow the next pointer before releasing the conn. */ 2544 next_connp = connp->conn_next; 2545 IP6_STAT(ipst, ip6_udp_fanmb); 2546 CONN_DEC_REF(connp); 2547 connp = next_connp; 2548 } 2549 } 2550 2551 /* Last one. Send it upstream. */ 2552 mutex_exit(&connfp->connf_lock); 2553 2554 IP6_STAT(ipst, ip6_udp_fanmb); 2555 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira); 2556 CONN_DEC_REF(connp); 2557 return; 2558 2559 notfound: 2560 mutex_exit(&connfp->connf_lock); 2561 /* 2562 * No one bound to this port. Is 2563 * there a client that wants all 2564 * unclaimed datagrams? 2565 */ 2566 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 2567 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2568 ip_fanout_proto_v6(mp, ip6h, ira); 2569 } else { 2570 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2571 ICMP6_DST_UNREACH_NOPORT, ira); 2572 } 2573 } 2574 2575 /* 2576 * int ip_find_hdr_v6() 2577 * 2578 * This routine is used by the upper layer protocols, iptun, and IPsec: 2579 * - Set extension header pointers to appropriate locations 2580 * - Determine IPv6 header length and return it 2581 * - Return a pointer to the last nexthdr value 2582 * 2583 * The caller must initialize ipp_fields. 2584 * The upper layer protocols normally set label_separate which makes the 2585 * routine put the TX label in ipp_label_v6. If this is not set then 2586 * the hop-by-hop options including the label are placed in ipp_hopopts. 2587 * 2588 * NOTE: If multiple extension headers of the same type are present, 2589 * ip_find_hdr_v6() will set the respective extension header pointers 2590 * to the first one that it encounters in the IPv6 header. It also 2591 * skips fragment headers. This routine deals with malformed packets 2592 * of various sorts in which case the returned length is up to the 2593 * malformed part. 2594 */ 2595 int 2596 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp, 2597 uint8_t *nexthdrp) 2598 { 2599 uint_t length, ehdrlen; 2600 uint8_t nexthdr; 2601 uint8_t *whereptr, *endptr; 2602 ip6_dest_t *tmpdstopts; 2603 ip6_rthdr_t *tmprthdr; 2604 ip6_hbh_t *tmphopopts; 2605 ip6_frag_t *tmpfraghdr; 2606 2607 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR; 2608 ipp->ipp_hoplimit = ip6h->ip6_hops; 2609 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 2610 ipp->ipp_addr = ip6h->ip6_dst; 2611 2612 length = IPV6_HDR_LEN; 2613 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2614 endptr = mp->b_wptr; 2615 2616 nexthdr = ip6h->ip6_nxt; 2617 while (whereptr < endptr) { 2618 /* Is there enough left for len + nexthdr? */ 2619 if (whereptr + MIN_EHDR_LEN > endptr) 2620 goto done; 2621 2622 switch (nexthdr) { 2623 case IPPROTO_HOPOPTS: { 2624 /* We check for any CIPSO */ 2625 uchar_t *secopt; 2626 boolean_t hbh_needed; 2627 uchar_t *after_secopt; 2628 2629 tmphopopts = (ip6_hbh_t *)whereptr; 2630 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 2631 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 2632 goto done; 2633 nexthdr = tmphopopts->ip6h_nxt; 2634 2635 if (!label_separate) { 2636 secopt = NULL; 2637 after_secopt = whereptr; 2638 } else { 2639 /* 2640 * We have dropped packets with bad options in 2641 * ip6_input. No need to check return value 2642 * here. 2643 */ 2644 (void) tsol_find_secopt_v6(whereptr, ehdrlen, 2645 &secopt, &after_secopt, &hbh_needed); 2646 } 2647 if (secopt != NULL && after_secopt - whereptr > 0) { 2648 ipp->ipp_fields |= IPPF_LABEL_V6; 2649 ipp->ipp_label_v6 = secopt; 2650 ipp->ipp_label_len_v6 = after_secopt - whereptr; 2651 } else { 2652 ipp->ipp_label_len_v6 = 0; 2653 after_secopt = whereptr; 2654 hbh_needed = B_TRUE; 2655 } 2656 /* return only 1st hbh */ 2657 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) { 2658 ipp->ipp_fields |= IPPF_HOPOPTS; 2659 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt; 2660 ipp->ipp_hopoptslen = ehdrlen - 2661 ipp->ipp_label_len_v6; 2662 } 2663 break; 2664 } 2665 case IPPROTO_DSTOPTS: 2666 tmpdstopts = (ip6_dest_t *)whereptr; 2667 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 2668 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 2669 goto done; 2670 nexthdr = tmpdstopts->ip6d_nxt; 2671 /* 2672 * ipp_dstopts is set to the destination header after a 2673 * routing header. 2674 * Assume it is a post-rthdr destination header 2675 * and adjust when we find an rthdr. 2676 */ 2677 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2678 ipp->ipp_fields |= IPPF_DSTOPTS; 2679 ipp->ipp_dstopts = tmpdstopts; 2680 ipp->ipp_dstoptslen = ehdrlen; 2681 } 2682 break; 2683 case IPPROTO_ROUTING: 2684 tmprthdr = (ip6_rthdr_t *)whereptr; 2685 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 2686 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 2687 goto done; 2688 nexthdr = tmprthdr->ip6r_nxt; 2689 /* return only 1st rthdr */ 2690 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 2691 ipp->ipp_fields |= IPPF_RTHDR; 2692 ipp->ipp_rthdr = tmprthdr; 2693 ipp->ipp_rthdrlen = ehdrlen; 2694 } 2695 /* 2696 * Make any destination header we've seen be a 2697 * pre-rthdr destination header. 2698 */ 2699 if (ipp->ipp_fields & IPPF_DSTOPTS) { 2700 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2701 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 2702 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts; 2703 ipp->ipp_dstopts = NULL; 2704 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen; 2705 ipp->ipp_dstoptslen = 0; 2706 } 2707 break; 2708 case IPPROTO_FRAGMENT: 2709 tmpfraghdr = (ip6_frag_t *)whereptr; 2710 ehdrlen = sizeof (ip6_frag_t); 2711 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 2712 goto done; 2713 nexthdr = tmpfraghdr->ip6f_nxt; 2714 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 2715 ipp->ipp_fields |= IPPF_FRAGHDR; 2716 ipp->ipp_fraghdr = tmpfraghdr; 2717 ipp->ipp_fraghdrlen = ehdrlen; 2718 } 2719 break; 2720 case IPPROTO_NONE: 2721 default: 2722 goto done; 2723 } 2724 length += ehdrlen; 2725 whereptr += ehdrlen; 2726 } 2727 done: 2728 if (nexthdrp != NULL) 2729 *nexthdrp = nexthdr; 2730 return (length); 2731 } 2732 2733 /* 2734 * Return the length of the IPv6 related headers (including extension headers) 2735 * If the packet is malformed, this returns the simple IPv6 header length. 2736 */ 2737 uint16_t 2738 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 2739 { 2740 uint16_t hdr_len; 2741 2742 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, NULL)) 2743 hdr_len = sizeof (*ip6h); 2744 return (hdr_len); 2745 } 2746 2747 /* 2748 * Parse and process any hop-by-hop or destination options. 2749 * 2750 * Assumes that q is an ill read queue so that ICMP errors for link-local 2751 * destinations are sent out the correct interface. 2752 * 2753 * Returns -1 if there was an error and mp has been consumed. 2754 * Returns 0 if no special action is needed. 2755 * Returns 1 if the packet contained a router alert option for this node 2756 * which is verified to be "interesting/known" for our implementation. 2757 * 2758 * XXX Note: In future as more hbh or dest options are defined, 2759 * it may be better to have different routines for hbh and dest 2760 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 2761 * may have same value in different namespaces. Or is it same namespace ?? 2762 * Current code checks for each opt_type (other than pads) if it is in 2763 * the expected nexthdr (hbh or dest) 2764 */ 2765 int 2766 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, 2767 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) 2768 { 2769 uint8_t opt_type; 2770 uint_t optused = 0; 2771 int ret = 0; 2772 const char *errtype; 2773 ill_t *ill = ira->ira_ill; 2774 ip_stack_t *ipst = ill->ill_ipst; 2775 2776 while (optlen != 0) { 2777 opt_type = *optptr; 2778 if (opt_type == IP6OPT_PAD1) { 2779 optused = 1; 2780 } else { 2781 if (optlen < 2) 2782 goto bad_opt; 2783 errtype = "malformed"; 2784 if (opt_type == ip6opt_ls) { 2785 optused = 2 + optptr[1]; 2786 if (optused > optlen) 2787 goto bad_opt; 2788 } else switch (opt_type) { 2789 case IP6OPT_PADN: 2790 /* 2791 * Note:We don't verify that (N-2) pad octets 2792 * are zero as required by spec. Adhere to 2793 * "be liberal in what you accept..." part of 2794 * implementation philosophy (RFC791,RFC1122) 2795 */ 2796 optused = 2 + optptr[1]; 2797 if (optused > optlen) 2798 goto bad_opt; 2799 break; 2800 2801 case IP6OPT_JUMBO: 2802 if (hdr_type != IPPROTO_HOPOPTS) 2803 goto opt_error; 2804 goto opt_error; /* XXX Not implemented! */ 2805 2806 case IP6OPT_ROUTER_ALERT: { 2807 struct ip6_opt_router *or; 2808 2809 if (hdr_type != IPPROTO_HOPOPTS) 2810 goto opt_error; 2811 optused = 2 + optptr[1]; 2812 if (optused > optlen) 2813 goto bad_opt; 2814 or = (struct ip6_opt_router *)optptr; 2815 /* Check total length and alignment */ 2816 if (optused != sizeof (*or) || 2817 ((uintptr_t)or->ip6or_value & 0x1) != 0) 2818 goto opt_error; 2819 /* Check value */ 2820 switch (*((uint16_t *)or->ip6or_value)) { 2821 case IP6_ALERT_MLD: 2822 case IP6_ALERT_RSVP: 2823 ret = 1; 2824 } 2825 break; 2826 } 2827 case IP6OPT_HOME_ADDRESS: { 2828 /* 2829 * Minimal support for the home address option 2830 * (which is required by all IPv6 nodes). 2831 * Implement by just swapping the home address 2832 * and source address. 2833 * XXX Note: this has IPsec implications since 2834 * AH needs to take this into account. 2835 * Also, when IPsec is used we need to ensure 2836 * that this is only processed once 2837 * in the received packet (to avoid swapping 2838 * back and forth). 2839 * NOTE:This option processing is considered 2840 * to be unsafe and prone to a denial of 2841 * service attack. 2842 * The current processing is not safe even with 2843 * IPsec secured IP packets. Since the home 2844 * address option processing requirement still 2845 * is in the IETF draft and in the process of 2846 * being redefined for its usage, it has been 2847 * decided to turn off the option by default. 2848 * If this section of code needs to be executed, 2849 * ndd variable ip6_ignore_home_address_opt 2850 * should be set to 0 at the user's own risk. 2851 */ 2852 struct ip6_opt_home_address *oh; 2853 in6_addr_t tmp; 2854 2855 if (ipst->ips_ipv6_ignore_home_address_opt) 2856 goto opt_error; 2857 2858 if (hdr_type != IPPROTO_DSTOPTS) 2859 goto opt_error; 2860 optused = 2 + optptr[1]; 2861 if (optused > optlen) 2862 goto bad_opt; 2863 2864 /* 2865 * We did this dest. opt the first time 2866 * around (i.e. before AH processing). 2867 * If we've done AH... stop now. 2868 */ 2869 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && 2870 ira->ira_ipsec_ah_sa != NULL) 2871 break; 2872 2873 oh = (struct ip6_opt_home_address *)optptr; 2874 /* Check total length and alignment */ 2875 if (optused < sizeof (*oh) || 2876 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 2877 goto opt_error; 2878 /* Swap ip6_src and the home address */ 2879 tmp = ip6h->ip6_src; 2880 /* XXX Note: only 8 byte alignment option */ 2881 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 2882 *(in6_addr_t *)oh->ip6oh_addr = tmp; 2883 break; 2884 } 2885 2886 case IP6OPT_TUNNEL_LIMIT: 2887 if (hdr_type != IPPROTO_DSTOPTS) { 2888 goto opt_error; 2889 } 2890 optused = 2 + optptr[1]; 2891 if (optused > optlen) { 2892 goto bad_opt; 2893 } 2894 if (optused != 3) { 2895 goto opt_error; 2896 } 2897 break; 2898 2899 default: 2900 errtype = "unknown"; 2901 /* FALLTHROUGH */ 2902 opt_error: 2903 /* Determine which zone should send error */ 2904 switch (IP6OPT_TYPE(opt_type)) { 2905 case IP6OPT_TYPE_SKIP: 2906 optused = 2 + optptr[1]; 2907 if (optused > optlen) 2908 goto bad_opt; 2909 ip1dbg(("ip_process_options_v6: %s " 2910 "opt 0x%x skipped\n", 2911 errtype, opt_type)); 2912 break; 2913 case IP6OPT_TYPE_DISCARD: 2914 ip1dbg(("ip_process_options_v6: %s " 2915 "opt 0x%x; packet dropped\n", 2916 errtype, opt_type)); 2917 BUMP_MIB(ill->ill_ip_mib, 2918 ipIfStatsInHdrErrors); 2919 ip_drop_input("ipIfStatsInHdrErrors", 2920 mp, ill); 2921 freemsg(mp); 2922 return (-1); 2923 case IP6OPT_TYPE_ICMP: 2924 BUMP_MIB(ill->ill_ip_mib, 2925 ipIfStatsInHdrErrors); 2926 ip_drop_input("ipIfStatsInHdrErrors", 2927 mp, ill); 2928 icmp_param_problem_v6(mp, 2929 ICMP6_PARAMPROB_OPTION, 2930 (uint32_t)(optptr - 2931 (uint8_t *)ip6h), 2932 B_FALSE, ira); 2933 return (-1); 2934 case IP6OPT_TYPE_FORCEICMP: 2935 BUMP_MIB(ill->ill_ip_mib, 2936 ipIfStatsInHdrErrors); 2937 ip_drop_input("ipIfStatsInHdrErrors", 2938 mp, ill); 2939 icmp_param_problem_v6(mp, 2940 ICMP6_PARAMPROB_OPTION, 2941 (uint32_t)(optptr - 2942 (uint8_t *)ip6h), 2943 B_TRUE, ira); 2944 return (-1); 2945 default: 2946 ASSERT(0); 2947 } 2948 } 2949 } 2950 optlen -= optused; 2951 optptr += optused; 2952 } 2953 return (ret); 2954 2955 bad_opt: 2956 /* Determine which zone should send error */ 2957 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 2958 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION, 2959 (uint32_t)(optptr - (uint8_t *)ip6h), 2960 B_FALSE, ira); 2961 return (-1); 2962 } 2963 2964 /* 2965 * Process a routing header that is not yet empty. 2966 * Because of RFC 5095, we now reject all route headers. 2967 */ 2968 void 2969 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 2970 ip_recv_attr_t *ira) 2971 { 2972 ill_t *ill = ira->ira_ill; 2973 ip_stack_t *ipst = ill->ill_ipst; 2974 2975 ASSERT(rth->ip6r_segleft != 0); 2976 2977 if (!ipst->ips_ipv6_forward_src_routed) { 2978 /* XXX Check for source routed out same interface? */ 2979 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 2980 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 2981 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 2982 freemsg(mp); 2983 return; 2984 } 2985 2986 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 2987 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 2988 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 2989 B_FALSE, ira); 2990 } 2991 2992 /* 2993 * Read side put procedure for IPv6 module. 2994 */ 2995 int 2996 ip_rput_v6(queue_t *q, mblk_t *mp) 2997 { 2998 ill_t *ill; 2999 3000 ill = (ill_t *)q->q_ptr; 3001 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { 3002 union DL_primitives *dl; 3003 3004 dl = (union DL_primitives *)mp->b_rptr; 3005 /* 3006 * Things are opening or closing - only accept DLPI 3007 * ack messages. If the stream is closing and ip_wsrv 3008 * has completed, ip_close is out of the qwait, but has 3009 * not yet completed qprocsoff. Don't proceed any further 3010 * because the ill has been cleaned up and things hanging 3011 * off the ill have been freed. 3012 */ 3013 if ((mp->b_datap->db_type != M_PCPROTO) || 3014 (dl->dl_primitive == DL_UNITDATA_IND)) { 3015 inet_freemsg(mp); 3016 return (0); 3017 } 3018 } 3019 if (DB_TYPE(mp) == M_DATA) { 3020 struct mac_header_info_s mhi; 3021 3022 ip_mdata_to_mhi(ill, mp, &mhi); 3023 ip_input_v6(ill, NULL, mp, &mhi); 3024 } else { 3025 ip_rput_notdata(ill, mp); 3026 } 3027 return (0); 3028 } 3029 3030 /* 3031 * Walk through the IPv6 packet in mp and see if there's an AH header 3032 * in it. See if the AH header needs to get done before other headers in 3033 * the packet. (Worker function for ipsec_early_ah_v6().) 3034 */ 3035 #define IPSEC_HDR_DONT_PROCESS 0 3036 #define IPSEC_HDR_PROCESS 1 3037 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 3038 static int 3039 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 3040 { 3041 uint_t length; 3042 uint_t ehdrlen; 3043 uint8_t *whereptr; 3044 uint8_t *endptr; 3045 uint8_t *nexthdrp; 3046 ip6_dest_t *desthdr; 3047 ip6_rthdr_t *rthdr; 3048 ip6_t *ip6h; 3049 3050 /* 3051 * For now just pullup everything. In general, the less pullups, 3052 * the better, but there's so much squirrelling through anyway, 3053 * it's just easier this way. 3054 */ 3055 if (!pullupmsg(mp, -1)) { 3056 return (IPSEC_MEMORY_ERROR); 3057 } 3058 3059 ip6h = (ip6_t *)mp->b_rptr; 3060 length = IPV6_HDR_LEN; 3061 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3062 endptr = mp->b_wptr; 3063 3064 /* 3065 * We can't just use the argument nexthdr in the place 3066 * of nexthdrp becaue we don't dereference nexthdrp 3067 * till we confirm whether it is a valid address. 3068 */ 3069 nexthdrp = &ip6h->ip6_nxt; 3070 while (whereptr < endptr) { 3071 /* Is there enough left for len + nexthdr? */ 3072 if (whereptr + MIN_EHDR_LEN > endptr) 3073 return (IPSEC_MEMORY_ERROR); 3074 3075 switch (*nexthdrp) { 3076 case IPPROTO_HOPOPTS: 3077 case IPPROTO_DSTOPTS: 3078 /* Assumes the headers are identical for hbh and dst */ 3079 desthdr = (ip6_dest_t *)whereptr; 3080 ehdrlen = 8 * (desthdr->ip6d_len + 1); 3081 if ((uchar_t *)desthdr + ehdrlen > endptr) 3082 return (IPSEC_MEMORY_ERROR); 3083 /* 3084 * Return DONT_PROCESS because the destination 3085 * options header may be for each hop in a 3086 * routing-header, and we only want AH if we're 3087 * finished with routing headers. 3088 */ 3089 if (*nexthdrp == IPPROTO_DSTOPTS) 3090 return (IPSEC_HDR_DONT_PROCESS); 3091 nexthdrp = &desthdr->ip6d_nxt; 3092 break; 3093 case IPPROTO_ROUTING: 3094 rthdr = (ip6_rthdr_t *)whereptr; 3095 3096 /* 3097 * If there's more hops left on the routing header, 3098 * return now with DON'T PROCESS. 3099 */ 3100 if (rthdr->ip6r_segleft > 0) 3101 return (IPSEC_HDR_DONT_PROCESS); 3102 3103 ehdrlen = 8 * (rthdr->ip6r_len + 1); 3104 if ((uchar_t *)rthdr + ehdrlen > endptr) 3105 return (IPSEC_MEMORY_ERROR); 3106 nexthdrp = &rthdr->ip6r_nxt; 3107 break; 3108 case IPPROTO_FRAGMENT: 3109 /* Wait for reassembly */ 3110 return (IPSEC_HDR_DONT_PROCESS); 3111 case IPPROTO_AH: 3112 *nexthdr = IPPROTO_AH; 3113 return (IPSEC_HDR_PROCESS); 3114 case IPPROTO_NONE: 3115 /* No next header means we're finished */ 3116 default: 3117 return (IPSEC_HDR_DONT_PROCESS); 3118 } 3119 length += ehdrlen; 3120 whereptr += ehdrlen; 3121 } 3122 /* 3123 * Malformed/truncated packet. 3124 */ 3125 return (IPSEC_MEMORY_ERROR); 3126 } 3127 3128 /* 3129 * Path for AH if options are present. 3130 * Returns NULL if the mblk was consumed. 3131 * 3132 * Sometimes AH needs to be done before other IPv6 headers for security 3133 * reasons. This function (and its ipsec_needs_processing_v6() above) 3134 * indicates if that is so, and fans out to the appropriate IPsec protocol 3135 * for the datagram passed in. 3136 */ 3137 mblk_t * 3138 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira) 3139 { 3140 uint8_t nexthdr; 3141 ah_t *ah; 3142 ill_t *ill = ira->ira_ill; 3143 ip_stack_t *ipst = ill->ill_ipst; 3144 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3145 3146 switch (ipsec_needs_processing_v6(mp, &nexthdr)) { 3147 case IPSEC_MEMORY_ERROR: 3148 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3149 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3150 freemsg(mp); 3151 return (NULL); 3152 case IPSEC_HDR_DONT_PROCESS: 3153 return (mp); 3154 } 3155 3156 /* Default means send it to AH! */ 3157 ASSERT(nexthdr == IPPROTO_AH); 3158 3159 if (!ipsec_loaded(ipss)) { 3160 ip_proto_not_sup(mp, ira); 3161 return (NULL); 3162 } 3163 3164 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 3165 if (mp == NULL) 3166 return (NULL); 3167 ASSERT(ah != NULL); 3168 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3169 ASSERT(ira->ira_ipsec_ah_sa != NULL); 3170 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 3171 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira); 3172 3173 if (mp == NULL) { 3174 /* 3175 * Either it failed or is pending. In the former case 3176 * ipIfStatsInDiscards was increased. 3177 */ 3178 return (NULL); 3179 } 3180 3181 /* we're done with IPsec processing, send it up */ 3182 ip_input_post_ipsec(mp, ira); 3183 return (NULL); 3184 } 3185 3186 /* 3187 * Reassemble fragment. 3188 * When it returns a completed message the first mblk will only contain 3189 * the headers prior to the fragment header, with the nexthdr value updated 3190 * to be the header after the fragment header. 3191 */ 3192 mblk_t * 3193 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h, 3194 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira) 3195 { 3196 uint32_t ident = ntohl(fraghdr->ip6f_ident); 3197 uint16_t offset; 3198 boolean_t more_frags; 3199 uint8_t nexthdr = fraghdr->ip6f_nxt; 3200 in6_addr_t *v6dst_ptr; 3201 in6_addr_t *v6src_ptr; 3202 uint_t end; 3203 uint_t hdr_length; 3204 size_t count; 3205 ipf_t *ipf; 3206 ipf_t **ipfp; 3207 ipfb_t *ipfb; 3208 mblk_t *mp1; 3209 uint8_t ecn_info = 0; 3210 size_t msg_len; 3211 mblk_t *tail_mp; 3212 mblk_t *t_mp; 3213 boolean_t pruned = B_FALSE; 3214 uint32_t sum_val; 3215 uint16_t sum_flags; 3216 ill_t *ill = ira->ira_ill; 3217 ip_stack_t *ipst = ill->ill_ipst; 3218 uint_t prev_nexthdr_offset; 3219 uint8_t prev_nexthdr; 3220 uint8_t *ptr; 3221 uint32_t packet_size; 3222 3223 /* 3224 * We utilize hardware computed checksum info only for UDP since 3225 * IP fragmentation is a normal occurence for the protocol. In 3226 * addition, checksum offload support for IP fragments carrying 3227 * UDP payload is commonly implemented across network adapters. 3228 */ 3229 ASSERT(ira->ira_rill != NULL); 3230 if (nexthdr == IPPROTO_UDP && dohwcksum && 3231 ILL_HCKSUM_CAPABLE(ira->ira_rill) && 3232 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 3233 mblk_t *mp1 = mp->b_cont; 3234 int32_t len; 3235 3236 /* Record checksum information from the packet */ 3237 sum_val = (uint32_t)DB_CKSUM16(mp); 3238 sum_flags = DB_CKSUMFLAGS(mp); 3239 3240 /* fragmented payload offset from beginning of mblk */ 3241 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 3242 3243 if ((sum_flags & HCK_PARTIALCKSUM) && 3244 (mp1 == NULL || mp1->b_cont == NULL) && 3245 offset >= DB_CKSUMSTART(mp) && 3246 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) { 3247 uint32_t adj; 3248 /* 3249 * Partial checksum has been calculated by hardware 3250 * and attached to the packet; in addition, any 3251 * prepended extraneous data is even byte aligned. 3252 * If any such data exists, we adjust the checksum; 3253 * this would also handle any postpended data. 3254 */ 3255 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 3256 mp, mp1, len, adj); 3257 3258 /* One's complement subtract extraneous checksum */ 3259 if (adj >= sum_val) 3260 sum_val = ~(adj - sum_val) & 0xFFFF; 3261 else 3262 sum_val -= adj; 3263 } 3264 } else { 3265 sum_val = 0; 3266 sum_flags = 0; 3267 } 3268 3269 /* Clear hardware checksumming flag */ 3270 DB_CKSUMFLAGS(mp) = 0; 3271 3272 /* 3273 * Determine the offset (from the begining of the IP header) 3274 * of the nexthdr value which has IPPROTO_FRAGMENT. We use 3275 * this when removing the fragment header from the packet. 3276 * This packet consists of the IPv6 header, a potential 3277 * hop-by-hop options header, a potential pre-routing-header 3278 * destination options header, and a potential routing header. 3279 */ 3280 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 3281 prev_nexthdr = ip6h->ip6_nxt; 3282 ptr = (uint8_t *)&ip6h[1]; 3283 3284 if (prev_nexthdr == IPPROTO_HOPOPTS) { 3285 ip6_hbh_t *hbh_hdr; 3286 uint_t hdr_len; 3287 3288 hbh_hdr = (ip6_hbh_t *)ptr; 3289 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 3290 prev_nexthdr = hbh_hdr->ip6h_nxt; 3291 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 3292 - (uint8_t *)ip6h; 3293 ptr += hdr_len; 3294 } 3295 if (prev_nexthdr == IPPROTO_DSTOPTS) { 3296 ip6_dest_t *dest_hdr; 3297 uint_t hdr_len; 3298 3299 dest_hdr = (ip6_dest_t *)ptr; 3300 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 3301 prev_nexthdr = dest_hdr->ip6d_nxt; 3302 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 3303 - (uint8_t *)ip6h; 3304 ptr += hdr_len; 3305 } 3306 if (prev_nexthdr == IPPROTO_ROUTING) { 3307 ip6_rthdr_t *rthdr; 3308 uint_t hdr_len; 3309 3310 rthdr = (ip6_rthdr_t *)ptr; 3311 prev_nexthdr = rthdr->ip6r_nxt; 3312 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 3313 - (uint8_t *)ip6h; 3314 hdr_len = 8 * (rthdr->ip6r_len + 1); 3315 ptr += hdr_len; 3316 } 3317 if (prev_nexthdr != IPPROTO_FRAGMENT) { 3318 /* Can't handle other headers before the fragment header */ 3319 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3320 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3321 freemsg(mp); 3322 return (NULL); 3323 } 3324 3325 /* 3326 * Note: Fragment offset in header is in 8-octet units. 3327 * Clearing least significant 3 bits not only extracts 3328 * it but also gets it in units of octets. 3329 */ 3330 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 3331 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 3332 3333 /* 3334 * Is the more frags flag on and the payload length not a multiple 3335 * of eight? 3336 */ 3337 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 3338 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3339 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3340 (uint32_t)((char *)&ip6h->ip6_plen - 3341 (char *)ip6h), B_FALSE, ira); 3342 return (NULL); 3343 } 3344 3345 v6src_ptr = &ip6h->ip6_src; 3346 v6dst_ptr = &ip6h->ip6_dst; 3347 end = remlen; 3348 3349 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 3350 end += offset; 3351 3352 /* 3353 * Would fragment cause reassembled packet to have a payload length 3354 * greater than IP_MAXPACKET - the max payload size? 3355 */ 3356 if (end > IP_MAXPACKET) { 3357 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3358 ip_drop_input("Reassembled packet too large", mp, ill); 3359 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3360 (uint32_t)((char *)&fraghdr->ip6f_offlg - 3361 (char *)ip6h), B_FALSE, ira); 3362 return (NULL); 3363 } 3364 3365 /* 3366 * This packet just has one fragment. Reassembly not 3367 * needed. 3368 */ 3369 if (!more_frags && offset == 0) { 3370 goto reass_done; 3371 } 3372 3373 /* 3374 * Drop the fragmented as early as possible, if 3375 * we don't have resource(s) to re-assemble. 3376 */ 3377 if (ipst->ips_ip_reass_queue_bytes == 0) { 3378 freemsg(mp); 3379 return (NULL); 3380 } 3381 3382 /* Record the ECN field info. */ 3383 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 3384 /* 3385 * If this is not the first fragment, dump the unfragmentable 3386 * portion of the packet. 3387 */ 3388 if (offset) 3389 mp->b_rptr = (uchar_t *)&fraghdr[1]; 3390 3391 /* 3392 * Fragmentation reassembly. Each ILL has a hash table for 3393 * queueing packets undergoing reassembly for all IPIFs 3394 * associated with the ILL. The hash is based on the packet 3395 * IP ident field. The ILL frag hash table was allocated 3396 * as a timer block at the time the ILL was created. Whenever 3397 * there is anything on the reassembly queue, the timer will 3398 * be running. 3399 */ 3400 /* Handle vnic loopback of fragments */ 3401 if (mp->b_datap->db_ref > 2) 3402 msg_len = 0; 3403 else 3404 msg_len = MBLKSIZE(mp); 3405 3406 tail_mp = mp; 3407 while (tail_mp->b_cont != NULL) { 3408 tail_mp = tail_mp->b_cont; 3409 if (tail_mp->b_datap->db_ref <= 2) 3410 msg_len += MBLKSIZE(tail_mp); 3411 } 3412 /* 3413 * If the reassembly list for this ILL will get too big 3414 * prune it. 3415 */ 3416 3417 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 3418 ipst->ips_ip_reass_queue_bytes) { 3419 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len, 3420 uint_t, ill->ill_frag_count, 3421 uint_t, ipst->ips_ip_reass_queue_bytes); 3422 ill_frag_prune(ill, 3423 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 3424 (ipst->ips_ip_reass_queue_bytes - msg_len)); 3425 pruned = B_TRUE; 3426 } 3427 3428 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 3429 mutex_enter(&ipfb->ipfb_lock); 3430 3431 ipfp = &ipfb->ipfb_ipf; 3432 /* Try to find an existing fragment queue for this packet. */ 3433 for (;;) { 3434 ipf = ipfp[0]; 3435 if (ipf) { 3436 /* 3437 * It has to match on ident, source address, and 3438 * dest address. 3439 */ 3440 if (ipf->ipf_ident == ident && 3441 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 3442 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 3443 3444 /* 3445 * If we have received too many 3446 * duplicate fragments for this packet 3447 * free it. 3448 */ 3449 if (ipf->ipf_num_dups > ip_max_frag_dups) { 3450 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3451 freemsg(mp); 3452 mutex_exit(&ipfb->ipfb_lock); 3453 return (NULL); 3454 } 3455 3456 break; 3457 } 3458 ipfp = &ipf->ipf_hash_next; 3459 continue; 3460 } 3461 3462 3463 /* 3464 * If we pruned the list, do we want to store this new 3465 * fragment?. We apply an optimization here based on the 3466 * fact that most fragments will be received in order. 3467 * So if the offset of this incoming fragment is zero, 3468 * it is the first fragment of a new packet. We will 3469 * keep it. Otherwise drop the fragment, as we have 3470 * probably pruned the packet already (since the 3471 * packet cannot be found). 3472 */ 3473 3474 if (pruned && offset != 0) { 3475 mutex_exit(&ipfb->ipfb_lock); 3476 freemsg(mp); 3477 return (NULL); 3478 } 3479 3480 /* New guy. Allocate a frag message. */ 3481 mp1 = allocb(sizeof (*ipf), BPRI_MED); 3482 if (!mp1) { 3483 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3484 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3485 freemsg(mp); 3486 partial_reass_done: 3487 mutex_exit(&ipfb->ipfb_lock); 3488 return (NULL); 3489 } 3490 3491 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 3492 /* 3493 * Too many fragmented packets in this hash bucket. 3494 * Free the oldest. 3495 */ 3496 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 3497 } 3498 3499 mp1->b_cont = mp; 3500 3501 /* Initialize the fragment header. */ 3502 ipf = (ipf_t *)mp1->b_rptr; 3503 ipf->ipf_mp = mp1; 3504 ipf->ipf_ptphn = ipfp; 3505 ipfp[0] = ipf; 3506 ipf->ipf_hash_next = NULL; 3507 ipf->ipf_ident = ident; 3508 ipf->ipf_v6src = *v6src_ptr; 3509 ipf->ipf_v6dst = *v6dst_ptr; 3510 /* Record reassembly start time. */ 3511 ipf->ipf_timestamp = gethrestime_sec(); 3512 /* Record ipf generation and account for frag header */ 3513 ipf->ipf_gen = ill->ill_ipf_gen++; 3514 ipf->ipf_count = MBLKSIZE(mp1); 3515 ipf->ipf_protocol = nexthdr; 3516 ipf->ipf_nf_hdr_len = 0; 3517 ipf->ipf_prev_nexthdr_offset = 0; 3518 ipf->ipf_last_frag_seen = B_FALSE; 3519 ipf->ipf_ecn = ecn_info; 3520 ipf->ipf_num_dups = 0; 3521 ipfb->ipfb_frag_pkts++; 3522 ipf->ipf_checksum = 0; 3523 ipf->ipf_checksum_flags = 0; 3524 3525 /* Store checksum value in fragment header */ 3526 if (sum_flags != 0) { 3527 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3528 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3529 ipf->ipf_checksum = sum_val; 3530 ipf->ipf_checksum_flags = sum_flags; 3531 } 3532 3533 /* 3534 * We handle reassembly two ways. In the easy case, 3535 * where all the fragments show up in order, we do 3536 * minimal bookkeeping, and just clip new pieces on 3537 * the end. If we ever see a hole, then we go off 3538 * to ip_reassemble which has to mark the pieces and 3539 * keep track of the number of holes, etc. Obviously, 3540 * the point of having both mechanisms is so we can 3541 * handle the easy case as efficiently as possible. 3542 */ 3543 if (offset == 0) { 3544 /* Easy case, in-order reassembly so far. */ 3545 /* Update the byte count */ 3546 ipf->ipf_count += msg_len; 3547 ipf->ipf_tail_mp = tail_mp; 3548 /* 3549 * Keep track of next expected offset in 3550 * ipf_end. 3551 */ 3552 ipf->ipf_end = end; 3553 ipf->ipf_nf_hdr_len = hdr_length; 3554 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset; 3555 } else { 3556 /* Hard case, hole at the beginning. */ 3557 ipf->ipf_tail_mp = NULL; 3558 /* 3559 * ipf_end == 0 means that we have given up 3560 * on easy reassembly. 3561 */ 3562 ipf->ipf_end = 0; 3563 3564 /* Forget checksum offload from now on */ 3565 ipf->ipf_checksum_flags = 0; 3566 3567 /* 3568 * ipf_hole_cnt is set by ip_reassemble. 3569 * ipf_count is updated by ip_reassemble. 3570 * No need to check for return value here 3571 * as we don't expect reassembly to complete or 3572 * fail for the first fragment itself. 3573 */ 3574 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 3575 msg_len); 3576 } 3577 /* Update per ipfb and ill byte counts */ 3578 ipfb->ipfb_count += ipf->ipf_count; 3579 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3580 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 3581 /* If the frag timer wasn't already going, start it. */ 3582 mutex_enter(&ill->ill_lock); 3583 ill_frag_timer_start(ill); 3584 mutex_exit(&ill->ill_lock); 3585 goto partial_reass_done; 3586 } 3587 3588 /* 3589 * If the packet's flag has changed (it could be coming up 3590 * from an interface different than the previous, therefore 3591 * possibly different checksum capability), then forget about 3592 * any stored checksum states. Otherwise add the value to 3593 * the existing one stored in the fragment header. 3594 */ 3595 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 3596 sum_val += ipf->ipf_checksum; 3597 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3598 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3599 ipf->ipf_checksum = sum_val; 3600 } else if (ipf->ipf_checksum_flags != 0) { 3601 /* Forget checksum offload from now on */ 3602 ipf->ipf_checksum_flags = 0; 3603 } 3604 3605 /* 3606 * We have a new piece of a datagram which is already being 3607 * reassembled. Update the ECN info if all IP fragments 3608 * are ECN capable. If there is one which is not, clear 3609 * all the info. If there is at least one which has CE 3610 * code point, IP needs to report that up to transport. 3611 */ 3612 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 3613 if (ecn_info == IPH_ECN_CE) 3614 ipf->ipf_ecn = IPH_ECN_CE; 3615 } else { 3616 ipf->ipf_ecn = IPH_ECN_NECT; 3617 } 3618 3619 if (offset && ipf->ipf_end == offset) { 3620 /* The new fragment fits at the end */ 3621 ipf->ipf_tail_mp->b_cont = mp; 3622 /* Update the byte count */ 3623 ipf->ipf_count += msg_len; 3624 /* Update per ipfb and ill byte counts */ 3625 ipfb->ipfb_count += msg_len; 3626 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3627 atomic_add_32(&ill->ill_frag_count, msg_len); 3628 if (more_frags) { 3629 /* More to come. */ 3630 ipf->ipf_end = end; 3631 ipf->ipf_tail_mp = tail_mp; 3632 goto partial_reass_done; 3633 } 3634 } else { 3635 /* 3636 * Go do the hard cases. 3637 * Call ip_reassemble(). 3638 */ 3639 int ret; 3640 3641 if (offset == 0) { 3642 if (ipf->ipf_prev_nexthdr_offset == 0) { 3643 ipf->ipf_nf_hdr_len = hdr_length; 3644 ipf->ipf_prev_nexthdr_offset = 3645 prev_nexthdr_offset; 3646 } 3647 } 3648 /* Save current byte count */ 3649 count = ipf->ipf_count; 3650 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 3651 3652 /* Count of bytes added and subtracted (freeb()ed) */ 3653 count = ipf->ipf_count - count; 3654 if (count) { 3655 /* Update per ipfb and ill byte counts */ 3656 ipfb->ipfb_count += count; 3657 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3658 atomic_add_32(&ill->ill_frag_count, count); 3659 } 3660 if (ret == IP_REASS_PARTIAL) { 3661 goto partial_reass_done; 3662 } else if (ret == IP_REASS_FAILED) { 3663 /* Reassembly failed. Free up all resources */ 3664 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3665 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 3666 IP_REASS_SET_START(t_mp, 0); 3667 IP_REASS_SET_END(t_mp, 0); 3668 } 3669 freemsg(mp); 3670 goto partial_reass_done; 3671 } 3672 3673 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 3674 } 3675 /* 3676 * We have completed reassembly. Unhook the frag header from 3677 * the reassembly list. 3678 * 3679 * Grab the unfragmentable header length next header value out 3680 * of the first fragment 3681 */ 3682 ASSERT(ipf->ipf_nf_hdr_len != 0); 3683 hdr_length = ipf->ipf_nf_hdr_len; 3684 3685 /* 3686 * Before we free the frag header, record the ECN info 3687 * to report back to the transport. 3688 */ 3689 ecn_info = ipf->ipf_ecn; 3690 3691 /* 3692 * Store the nextheader field in the header preceding the fragment 3693 * header 3694 */ 3695 nexthdr = ipf->ipf_protocol; 3696 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 3697 ipfp = ipf->ipf_ptphn; 3698 3699 /* We need to supply these to caller */ 3700 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 3701 sum_val = ipf->ipf_checksum; 3702 else 3703 sum_val = 0; 3704 3705 mp1 = ipf->ipf_mp; 3706 count = ipf->ipf_count; 3707 ipf = ipf->ipf_hash_next; 3708 if (ipf) 3709 ipf->ipf_ptphn = ipfp; 3710 ipfp[0] = ipf; 3711 atomic_add_32(&ill->ill_frag_count, -count); 3712 ASSERT(ipfb->ipfb_count >= count); 3713 ipfb->ipfb_count -= count; 3714 ipfb->ipfb_frag_pkts--; 3715 mutex_exit(&ipfb->ipfb_lock); 3716 /* Ditch the frag header. */ 3717 mp = mp1->b_cont; 3718 freeb(mp1); 3719 3720 /* 3721 * Make sure the packet is good by doing some sanity 3722 * check. If bad we can silentely drop the packet. 3723 */ 3724 reass_done: 3725 if (hdr_length < sizeof (ip6_frag_t)) { 3726 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3727 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3728 ip1dbg(("ip_input_fragment_v6: bad packet\n")); 3729 freemsg(mp); 3730 return (NULL); 3731 } 3732 3733 /* 3734 * Remove the fragment header from the initial header by 3735 * splitting the mblk into the non-fragmentable header and 3736 * everthing after the fragment extension header. This has the 3737 * side effect of putting all the headers that need destination 3738 * processing into the b_cont block-- on return this fact is 3739 * used in order to avoid having to look at the extensions 3740 * already processed. 3741 * 3742 * Note that this code assumes that the unfragmentable portion 3743 * of the header is in the first mblk and increments 3744 * the read pointer past it. If this assumption is broken 3745 * this code fails badly. 3746 */ 3747 if (mp->b_rptr + hdr_length != mp->b_wptr) { 3748 mblk_t *nmp; 3749 3750 if (!(nmp = dupb(mp))) { 3751 ip1dbg(("ip_input_fragment_v6: dupb failed\n")); 3752 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3753 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3754 freemsg(mp); 3755 return (NULL); 3756 } 3757 nmp->b_cont = mp->b_cont; 3758 mp->b_cont = nmp; 3759 nmp->b_rptr += hdr_length; 3760 } 3761 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 3762 3763 ip6h = (ip6_t *)mp->b_rptr; 3764 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr; 3765 3766 /* Restore original IP length in header. */ 3767 packet_size = msgdsize(mp); 3768 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN)); 3769 /* Record the ECN info. */ 3770 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 3771 ip6h->ip6_vcf |= htonl(ecn_info << 20); 3772 3773 /* Update the receive attributes */ 3774 ira->ira_pktlen = packet_size; 3775 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t); 3776 ira->ira_protocol = nexthdr; 3777 3778 /* Reassembly is successful; set checksum information in packet */ 3779 DB_CKSUM16(mp) = (uint16_t)sum_val; 3780 DB_CKSUMFLAGS(mp) = sum_flags; 3781 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length; 3782 3783 return (mp); 3784 } 3785 3786 /* 3787 * Given an mblk and a ptr, find the destination address in an IPv6 routing 3788 * header. 3789 */ 3790 static in6_addr_t 3791 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 3792 { 3793 ip6_rthdr0_t *rt0; 3794 int segleft, numaddr; 3795 in6_addr_t *ap, rv = oldrv; 3796 3797 rt0 = (ip6_rthdr0_t *)whereptr; 3798 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 3799 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 3800 uint8_t *, whereptr); 3801 return (rv); 3802 } 3803 segleft = rt0->ip6r0_segleft; 3804 numaddr = rt0->ip6r0_len / 2; 3805 3806 if ((rt0->ip6r0_len & 0x1) || 3807 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) || 3808 (segleft > rt0->ip6r0_len / 2)) { 3809 /* 3810 * Corrupt packet. Either the routing header length is odd 3811 * (can't happen) or mismatched compared to the packet, or the 3812 * number of addresses is. Return what we can. This will 3813 * only be a problem on forwarded packets that get squeezed 3814 * through an outbound tunnel enforcing IPsec Tunnel Mode. 3815 */ 3816 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 3817 whereptr); 3818 return (rv); 3819 } 3820 3821 if (segleft != 0) { 3822 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 3823 rv = ap[numaddr - 1]; 3824 } 3825 3826 return (rv); 3827 } 3828 3829 /* 3830 * Walk through the options to see if there is a routing header. 3831 * If present get the destination which is the last address of 3832 * the option. 3833 * mp needs to be provided in cases when the extension headers might span 3834 * b_cont; mp is never modified by this function. 3835 */ 3836 in6_addr_t 3837 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment) 3838 { 3839 const mblk_t *current_mp = mp; 3840 uint8_t nexthdr; 3841 uint8_t *whereptr; 3842 int ehdrlen; 3843 in6_addr_t rv; 3844 3845 whereptr = (uint8_t *)ip6h; 3846 ehdrlen = sizeof (ip6_t); 3847 3848 /* We assume at least the IPv6 base header is within one mblk. */ 3849 ASSERT(mp == NULL || 3850 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen)); 3851 3852 rv = ip6h->ip6_dst; 3853 nexthdr = ip6h->ip6_nxt; 3854 if (is_fragment != NULL) 3855 *is_fragment = B_FALSE; 3856 3857 /* 3858 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 3859 * no extension headers will be split across mblks. 3860 */ 3861 3862 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 3863 nexthdr == IPPROTO_ROUTING) { 3864 if (nexthdr == IPPROTO_ROUTING) 3865 rv = pluck_out_dst(current_mp, whereptr, rv); 3866 3867 /* 3868 * All IPv6 extension headers have the next-header in byte 3869 * 0, and the (length - 8) in 8-byte-words. 3870 */ 3871 while (current_mp != NULL && 3872 whereptr + ehdrlen >= current_mp->b_wptr) { 3873 ehdrlen -= (current_mp->b_wptr - whereptr); 3874 current_mp = current_mp->b_cont; 3875 if (current_mp == NULL) { 3876 /* Bad packet. Return what we can. */ 3877 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 3878 mp, mblk_t *, current_mp, ip6_t *, ip6h); 3879 goto done; 3880 } 3881 whereptr = current_mp->b_rptr; 3882 } 3883 whereptr += ehdrlen; 3884 3885 nexthdr = *whereptr; 3886 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr); 3887 ehdrlen = (*(whereptr + 1) + 1) * 8; 3888 } 3889 3890 done: 3891 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 3892 *is_fragment = B_TRUE; 3893 return (rv); 3894 } 3895 3896 /* 3897 * ip_source_routed_v6: 3898 * This function is called by redirect code (called from ip_input_v6) to 3899 * know whether this packet is source routed through this node i.e 3900 * whether this node (router) is part of the journey. This 3901 * function is called under two cases : 3902 * 3903 * case 1 : Routing header was processed by this node and 3904 * ip_process_rthdr replaced ip6_dst with the next hop 3905 * and we are forwarding the packet to the next hop. 3906 * 3907 * case 2 : Routing header was not processed by this node and we 3908 * are just forwarding the packet. 3909 * 3910 * For case (1) we don't want to send redirects. For case(2) we 3911 * want to send redirects. 3912 */ 3913 static boolean_t 3914 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 3915 { 3916 uint8_t nexthdr; 3917 in6_addr_t *addrptr; 3918 ip6_rthdr0_t *rthdr; 3919 uint8_t numaddr; 3920 ip6_hbh_t *hbhhdr; 3921 uint_t ehdrlen; 3922 uint8_t *byteptr; 3923 3924 ip2dbg(("ip_source_routed_v6\n")); 3925 nexthdr = ip6h->ip6_nxt; 3926 ehdrlen = IPV6_HDR_LEN; 3927 3928 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 3929 while (nexthdr == IPPROTO_HOPOPTS || 3930 nexthdr == IPPROTO_DSTOPTS) { 3931 byteptr = (uint8_t *)ip6h + ehdrlen; 3932 /* 3933 * Check if we have already processed 3934 * packets or we are just a forwarding 3935 * router which only pulled up msgs up 3936 * to IPV6HDR and one HBH ext header 3937 */ 3938 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 3939 ip2dbg(("ip_source_routed_v6: Extension" 3940 " headers not processed\n")); 3941 return (B_FALSE); 3942 } 3943 hbhhdr = (ip6_hbh_t *)byteptr; 3944 nexthdr = hbhhdr->ip6h_nxt; 3945 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 3946 } 3947 switch (nexthdr) { 3948 case IPPROTO_ROUTING: 3949 byteptr = (uint8_t *)ip6h + ehdrlen; 3950 /* 3951 * If for some reason, we haven't pulled up 3952 * the routing hdr data mblk, then we must 3953 * not have processed it at all. So for sure 3954 * we are not part of the source routed journey. 3955 */ 3956 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 3957 ip2dbg(("ip_source_routed_v6: Routing" 3958 " header not processed\n")); 3959 return (B_FALSE); 3960 } 3961 rthdr = (ip6_rthdr0_t *)byteptr; 3962 /* 3963 * Either we are an intermediate router or the 3964 * last hop before destination and we have 3965 * already processed the routing header. 3966 * If segment_left is greater than or equal to zero, 3967 * then we must be the (numaddr - segleft) entry 3968 * of the routing header. Although ip6r0_segleft 3969 * is a unit8_t variable, we still check for zero 3970 * or greater value, if in case the data type 3971 * is changed someday in future. 3972 */ 3973 if (rthdr->ip6r0_segleft > 0 || 3974 rthdr->ip6r0_segleft == 0) { 3975 numaddr = rthdr->ip6r0_len / 2; 3976 addrptr = (in6_addr_t *)((char *)rthdr + 3977 sizeof (*rthdr)); 3978 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 3979 if (addrptr != NULL) { 3980 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL) 3981 return (B_TRUE); 3982 ip1dbg(("ip_source_routed_v6: Not local\n")); 3983 } 3984 } 3985 /* FALLTHROUGH */ 3986 default: 3987 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 3988 return (B_FALSE); 3989 } 3990 } 3991 3992 /* 3993 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 3994 * We have not optimized this in terms of number of mblks 3995 * allocated. For instance, for each fragment sent we always allocate a 3996 * mblk to hold the IPv6 header and fragment header. 3997 * 3998 * Assumes that all the extension headers are contained in the first mblk 3999 * and that the fragment header has has already been added by calling 4000 * ip_fraghdr_add_v6. 4001 */ 4002 int 4003 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len, 4004 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 4005 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie) 4006 { 4007 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4008 ip6_t *fip6h; 4009 mblk_t *hmp; 4010 mblk_t *hmp0; 4011 mblk_t *dmp; 4012 ip6_frag_t *fraghdr; 4013 size_t unfragmentable_len; 4014 size_t mlen; 4015 size_t max_chunk; 4016 uint16_t off_flags; 4017 uint16_t offset = 0; 4018 ill_t *ill = nce->nce_ill; 4019 uint8_t nexthdr; 4020 uint8_t *ptr; 4021 ip_stack_t *ipst = ill->ill_ipst; 4022 uint_t priority = mp->b_band; 4023 int error = 0; 4024 4025 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 4026 if (max_frag == 0) { 4027 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4028 ip_drop_output("FragFails: zero max_frag", mp, ill); 4029 freemsg(mp); 4030 return (EINVAL); 4031 } 4032 4033 /* 4034 * Caller should have added fraghdr_t to pkt_len, and also 4035 * updated ip6_plen. 4036 */ 4037 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len); 4038 ASSERT(msgdsize(mp) == pkt_len); 4039 4040 /* 4041 * Determine the length of the unfragmentable portion of this 4042 * datagram. This consists of the IPv6 header, a potential 4043 * hop-by-hop options header, a potential pre-routing-header 4044 * destination options header, and a potential routing header. 4045 */ 4046 nexthdr = ip6h->ip6_nxt; 4047 ptr = (uint8_t *)&ip6h[1]; 4048 4049 if (nexthdr == IPPROTO_HOPOPTS) { 4050 ip6_hbh_t *hbh_hdr; 4051 uint_t hdr_len; 4052 4053 hbh_hdr = (ip6_hbh_t *)ptr; 4054 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4055 nexthdr = hbh_hdr->ip6h_nxt; 4056 ptr += hdr_len; 4057 } 4058 if (nexthdr == IPPROTO_DSTOPTS) { 4059 ip6_dest_t *dest_hdr; 4060 uint_t hdr_len; 4061 4062 dest_hdr = (ip6_dest_t *)ptr; 4063 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4064 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4065 nexthdr = dest_hdr->ip6d_nxt; 4066 ptr += hdr_len; 4067 } 4068 } 4069 if (nexthdr == IPPROTO_ROUTING) { 4070 ip6_rthdr_t *rthdr; 4071 uint_t hdr_len; 4072 4073 rthdr = (ip6_rthdr_t *)ptr; 4074 nexthdr = rthdr->ip6r_nxt; 4075 hdr_len = 8 * (rthdr->ip6r_len + 1); 4076 ptr += hdr_len; 4077 } 4078 if (nexthdr != IPPROTO_FRAGMENT) { 4079 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4080 ip_drop_output("FragFails: bad nexthdr", mp, ill); 4081 freemsg(mp); 4082 return (EINVAL); 4083 } 4084 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4085 unfragmentable_len += sizeof (ip6_frag_t); 4086 4087 max_chunk = (max_frag - unfragmentable_len) & ~7; 4088 4089 /* 4090 * Allocate an mblk with enough room for the link-layer 4091 * header and the unfragmentable part of the datagram, which includes 4092 * the fragment header. This (or a copy) will be used as the 4093 * first mblk for each fragment we send. 4094 */ 4095 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp); 4096 if (hmp == NULL) { 4097 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4098 ip_drop_output("FragFails: no hmp", mp, ill); 4099 freemsg(mp); 4100 return (ENOBUFS); 4101 } 4102 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4103 hmp->b_wptr = hmp->b_rptr + unfragmentable_len; 4104 4105 fip6h = (ip6_t *)hmp->b_rptr; 4106 bcopy(ip6h, fip6h, unfragmentable_len); 4107 4108 /* 4109 * pkt_len is set to the total length of the fragmentable data in this 4110 * datagram. For each fragment sent, we will decrement pkt_len 4111 * by the amount of fragmentable data sent in that fragment 4112 * until len reaches zero. 4113 */ 4114 pkt_len -= unfragmentable_len; 4115 4116 /* 4117 * Move read ptr past unfragmentable portion, we don't want this part 4118 * of the data in our fragments. 4119 */ 4120 mp->b_rptr += unfragmentable_len; 4121 if (mp->b_rptr == mp->b_wptr) { 4122 mblk_t *mp1 = mp->b_cont; 4123 freeb(mp); 4124 mp = mp1; 4125 } 4126 4127 while (pkt_len != 0) { 4128 mlen = MIN(pkt_len, max_chunk); 4129 pkt_len -= mlen; 4130 if (pkt_len != 0) { 4131 /* Not last */ 4132 hmp0 = copyb(hmp); 4133 if (hmp0 == NULL) { 4134 BUMP_MIB(ill->ill_ip_mib, 4135 ipIfStatsOutFragFails); 4136 ip_drop_output("FragFails: copyb failed", 4137 mp, ill); 4138 freeb(hmp); 4139 freemsg(mp); 4140 ip1dbg(("ip_fragment_v6: copyb failed\n")); 4141 return (ENOBUFS); 4142 } 4143 off_flags = IP6F_MORE_FRAG; 4144 } else { 4145 /* Last fragment */ 4146 hmp0 = hmp; 4147 hmp = NULL; 4148 off_flags = 0; 4149 } 4150 fip6h = (ip6_t *)(hmp0->b_rptr); 4151 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len - 4152 sizeof (ip6_frag_t)); 4153 4154 fip6h->ip6_plen = htons((uint16_t)(mlen + 4155 unfragmentable_len - IPV6_HDR_LEN)); 4156 /* 4157 * Note: Optimization alert. 4158 * In IPv6 (and IPv4) protocol header, Fragment Offset 4159 * ("offset") is 13 bits wide and in 8-octet units. 4160 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 4161 * it occupies the most significant 13 bits. 4162 * (least significant 13 bits in IPv4). 4163 * We do not do any shifts here. Not shifting is same effect 4164 * as taking offset value in octet units, dividing by 8 and 4165 * then shifting 3 bits left to line it up in place in proper 4166 * place protocol header. 4167 */ 4168 fraghdr->ip6f_offlg = htons(offset) | off_flags; 4169 4170 if (!(dmp = ip_carve_mp(&mp, mlen))) { 4171 /* mp has already been freed by ip_carve_mp() */ 4172 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4173 ip_drop_output("FragFails: could not carve mp", 4174 hmp0, ill); 4175 if (hmp != NULL) 4176 freeb(hmp); 4177 freeb(hmp0); 4178 ip1dbg(("ip_carve_mp: failed\n")); 4179 return (ENOBUFS); 4180 } 4181 hmp0->b_cont = dmp; 4182 /* Get the priority marking, if any */ 4183 hmp0->b_band = priority; 4184 4185 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 4186 4187 error = postfragfn(hmp0, nce, ixaflags, 4188 mlen + unfragmentable_len, xmit_hint, szone, nolzid, 4189 ixa_cookie); 4190 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) { 4191 /* No point in sending the other fragments */ 4192 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4193 ip_drop_output("FragFails: postfragfn failed", 4194 hmp, ill); 4195 freeb(hmp); 4196 freemsg(mp); 4197 return (error); 4198 } 4199 /* No need to redo state machine in loop */ 4200 ixaflags &= ~IXAF_REACH_CONF; 4201 4202 offset += mlen; 4203 } 4204 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 4205 return (error); 4206 } 4207 4208 /* 4209 * Add a fragment header to an IPv6 packet. 4210 * Assumes that all the extension headers are contained in the first mblk. 4211 * 4212 * The fragment header is inserted after an hop-by-hop options header 4213 * and after [an optional destinations header followed by] a routing header. 4214 */ 4215 mblk_t * 4216 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa) 4217 { 4218 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4219 ip6_t *fip6h; 4220 mblk_t *hmp; 4221 ip6_frag_t *fraghdr; 4222 size_t unfragmentable_len; 4223 uint8_t nexthdr; 4224 uint_t prev_nexthdr_offset; 4225 uint8_t *ptr; 4226 uint_t priority = mp->b_band; 4227 ip_stack_t *ipst = ixa->ixa_ipst; 4228 4229 /* 4230 * Determine the length of the unfragmentable portion of this 4231 * datagram. This consists of the IPv6 header, a potential 4232 * hop-by-hop options header, a potential pre-routing-header 4233 * destination options header, and a potential routing header. 4234 */ 4235 nexthdr = ip6h->ip6_nxt; 4236 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 4237 ptr = (uint8_t *)&ip6h[1]; 4238 4239 if (nexthdr == IPPROTO_HOPOPTS) { 4240 ip6_hbh_t *hbh_hdr; 4241 uint_t hdr_len; 4242 4243 hbh_hdr = (ip6_hbh_t *)ptr; 4244 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4245 nexthdr = hbh_hdr->ip6h_nxt; 4246 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 4247 - (uint8_t *)ip6h; 4248 ptr += hdr_len; 4249 } 4250 if (nexthdr == IPPROTO_DSTOPTS) { 4251 ip6_dest_t *dest_hdr; 4252 uint_t hdr_len; 4253 4254 dest_hdr = (ip6_dest_t *)ptr; 4255 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4256 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4257 nexthdr = dest_hdr->ip6d_nxt; 4258 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 4259 - (uint8_t *)ip6h; 4260 ptr += hdr_len; 4261 } 4262 } 4263 if (nexthdr == IPPROTO_ROUTING) { 4264 ip6_rthdr_t *rthdr; 4265 uint_t hdr_len; 4266 4267 rthdr = (ip6_rthdr_t *)ptr; 4268 nexthdr = rthdr->ip6r_nxt; 4269 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 4270 - (uint8_t *)ip6h; 4271 hdr_len = 8 * (rthdr->ip6r_len + 1); 4272 ptr += hdr_len; 4273 } 4274 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4275 4276 /* 4277 * Allocate an mblk with enough room for the link-layer 4278 * header, the unfragmentable part of the datagram, and the 4279 * fragment header. 4280 */ 4281 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 4282 ipst->ips_ip_wroff_extra, mp); 4283 if (hmp == NULL) { 4284 ill_t *ill = ixa->ixa_nce->nce_ill; 4285 4286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 4287 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill); 4288 freemsg(mp); 4289 return (NULL); 4290 } 4291 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4292 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 4293 4294 fip6h = (ip6_t *)hmp->b_rptr; 4295 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 4296 4297 bcopy(ip6h, fip6h, unfragmentable_len); 4298 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t)); 4299 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 4300 4301 fraghdr->ip6f_nxt = nexthdr; 4302 fraghdr->ip6f_reserved = 0; 4303 fraghdr->ip6f_offlg = 0; 4304 fraghdr->ip6f_ident = htonl(ident); 4305 4306 /* Get the priority marking, if any */ 4307 hmp->b_band = priority; 4308 4309 /* 4310 * Move read ptr past unfragmentable portion, we don't want this part 4311 * of the data in our fragments. 4312 */ 4313 mp->b_rptr += unfragmentable_len; 4314 hmp->b_cont = mp; 4315 return (hmp); 4316 } 4317 4318 /* 4319 * Determine if the ill and multicast aspects of that packets 4320 * "matches" the conn. 4321 */ 4322 boolean_t 4323 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h) 4324 { 4325 ill_t *ill = ira->ira_rill; 4326 zoneid_t zoneid = ira->ira_zoneid; 4327 uint_t in_ifindex; 4328 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 4329 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 4330 4331 /* 4332 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local 4333 * scopeid. This is used to limit 4334 * unicast and multicast reception to conn_incoming_ifindex. 4335 * conn_wantpacket_v6 is called both for unicast and 4336 * multicast packets. 4337 */ 4338 in_ifindex = connp->conn_incoming_ifindex; 4339 4340 /* mpathd can bind to the under IPMP interface, which we allow */ 4341 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) { 4342 if (!IS_UNDER_IPMP(ill)) 4343 return (B_FALSE); 4344 4345 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill)) 4346 return (B_FALSE); 4347 } 4348 4349 if (!IPCL_ZONE_MATCH(connp, zoneid)) 4350 return (B_FALSE); 4351 4352 if (!(ira->ira_flags & IRAF_MULTICAST)) 4353 return (B_TRUE); 4354 4355 if (connp->conn_multi_router) 4356 return (B_TRUE); 4357 4358 if (ira->ira_protocol == IPPROTO_RSVP) 4359 return (B_TRUE); 4360 4361 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, 4362 ira->ira_ill)); 4363 } 4364 4365 /* 4366 * pr_addr_dbg function provides the needed buffer space to call 4367 * inet_ntop() function's 3rd argument. This function should be 4368 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 4369 * stack buffer space in it's own stack frame. This function uses 4370 * a buffer from it's own stack and prints the information. 4371 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 4372 * 4373 * Note: This function can call inet_ntop() once. 4374 */ 4375 void 4376 pr_addr_dbg(char *fmt1, int af, const void *addr) 4377 { 4378 char buf[INET6_ADDRSTRLEN]; 4379 4380 if (fmt1 == NULL) { 4381 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 4382 return; 4383 } 4384 4385 /* 4386 * This does not compare debug level and just prints 4387 * out. Thus it is the responsibility of the caller 4388 * to check the appropriate debug-level before calling 4389 * this function. 4390 */ 4391 if (ip_debug > 0) { 4392 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 4393 } 4394 4395 4396 } 4397 4398 4399 /* 4400 * Return the length in bytes of the IPv6 headers (base header 4401 * extension headers) that will be needed based on the 4402 * ip_pkt_t structure passed by the caller. 4403 * 4404 * The returned length does not include the length of the upper level 4405 * protocol (ULP) header. 4406 */ 4407 int 4408 ip_total_hdrs_len_v6(const ip_pkt_t *ipp) 4409 { 4410 int len; 4411 4412 len = IPV6_HDR_LEN; 4413 4414 /* 4415 * If there's a security label here, then we ignore any hop-by-hop 4416 * options the user may try to set. 4417 */ 4418 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4419 uint_t hopoptslen; 4420 /* 4421 * Note that ipp_label_len_v6 is just the option - not 4422 * the hopopts extension header. It also needs to be padded 4423 * to a multiple of 8 bytes. 4424 */ 4425 ASSERT(ipp->ipp_label_len_v6 != 0); 4426 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4427 hopoptslen = (hopoptslen + 7)/8 * 8; 4428 len += hopoptslen; 4429 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4430 ASSERT(ipp->ipp_hopoptslen != 0); 4431 len += ipp->ipp_hopoptslen; 4432 } 4433 4434 /* 4435 * En-route destination options 4436 * Only do them if there's a routing header as well 4437 */ 4438 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4439 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4440 ASSERT(ipp->ipp_rthdrdstoptslen != 0); 4441 len += ipp->ipp_rthdrdstoptslen; 4442 } 4443 if (ipp->ipp_fields & IPPF_RTHDR) { 4444 ASSERT(ipp->ipp_rthdrlen != 0); 4445 len += ipp->ipp_rthdrlen; 4446 } 4447 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4448 ASSERT(ipp->ipp_dstoptslen != 0); 4449 len += ipp->ipp_dstoptslen; 4450 } 4451 return (len); 4452 } 4453 4454 /* 4455 * All-purpose routine to build a header chain of an IPv6 header 4456 * followed by any required extension headers and a proto header. 4457 * 4458 * The caller has to set the source and destination address as well as 4459 * ip6_plen. The caller has to massage any routing header and compensate 4460 * for the ULP pseudo-header checksum due to the source route. 4461 * 4462 * The extension headers will all be fully filled in. 4463 */ 4464 void 4465 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp, 4466 uint8_t protocol, uint32_t flowinfo) 4467 { 4468 uint8_t *nxthdr_ptr; 4469 uint8_t *cp; 4470 ip6_t *ip6h = (ip6_t *)buf; 4471 4472 /* Initialize IPv6 header */ 4473 ip6h->ip6_vcf = 4474 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4475 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4476 4477 if (ipp->ipp_fields & IPPF_TCLASS) { 4478 /* Overrides the class part of flowinfo */ 4479 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4480 ipp->ipp_tclass); 4481 } 4482 4483 if (ipp->ipp_fields & IPPF_HOPLIMIT) 4484 ip6h->ip6_hops = ipp->ipp_hoplimit; 4485 else 4486 ip6h->ip6_hops = ipp->ipp_unicast_hops; 4487 4488 if ((ipp->ipp_fields & IPPF_ADDR) && 4489 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4490 ip6h->ip6_src = ipp->ipp_addr; 4491 4492 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4493 cp = (uint8_t *)&ip6h[1]; 4494 /* 4495 * Here's where we have to start stringing together 4496 * any extension headers in the right order: 4497 * Hop-by-hop, destination, routing, and final destination opts. 4498 */ 4499 /* 4500 * If there's a security label here, then we ignore any hop-by-hop 4501 * options the user may try to set. 4502 */ 4503 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4504 /* 4505 * Hop-by-hop options with the label. 4506 * Note that ipp_label_v6 is just the option - not 4507 * the hopopts extension header. It also needs to be padded 4508 * to a multiple of 8 bytes. 4509 */ 4510 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4511 uint_t hopoptslen; 4512 uint_t padlen; 4513 4514 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4515 hopoptslen = (padlen + 7)/8 * 8; 4516 padlen = hopoptslen - padlen; 4517 4518 *nxthdr_ptr = IPPROTO_HOPOPTS; 4519 nxthdr_ptr = &hbh->ip6h_nxt; 4520 hbh->ip6h_len = hopoptslen/8 - 1; 4521 cp += sizeof (ip6_hbh_t); 4522 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6); 4523 cp += ipp->ipp_label_len_v6; 4524 4525 ASSERT(padlen <= 7); 4526 switch (padlen) { 4527 case 0: 4528 break; 4529 case 1: 4530 cp[0] = IP6OPT_PAD1; 4531 break; 4532 default: 4533 cp[0] = IP6OPT_PADN; 4534 cp[1] = padlen - 2; 4535 bzero(&cp[2], padlen - 2); 4536 break; 4537 } 4538 cp += padlen; 4539 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4540 /* Hop-by-hop options */ 4541 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4542 4543 *nxthdr_ptr = IPPROTO_HOPOPTS; 4544 nxthdr_ptr = &hbh->ip6h_nxt; 4545 4546 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 4547 cp += ipp->ipp_hopoptslen; 4548 } 4549 /* 4550 * En-route destination options 4551 * Only do them if there's a routing header as well 4552 */ 4553 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4554 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4555 ip6_dest_t *dst = (ip6_dest_t *)cp; 4556 4557 *nxthdr_ptr = IPPROTO_DSTOPTS; 4558 nxthdr_ptr = &dst->ip6d_nxt; 4559 4560 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen); 4561 cp += ipp->ipp_rthdrdstoptslen; 4562 } 4563 /* 4564 * Routing header next 4565 */ 4566 if (ipp->ipp_fields & IPPF_RTHDR) { 4567 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4568 4569 *nxthdr_ptr = IPPROTO_ROUTING; 4570 nxthdr_ptr = &rt->ip6r_nxt; 4571 4572 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 4573 cp += ipp->ipp_rthdrlen; 4574 } 4575 /* 4576 * Do ultimate destination options 4577 */ 4578 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4579 ip6_dest_t *dest = (ip6_dest_t *)cp; 4580 4581 *nxthdr_ptr = IPPROTO_DSTOPTS; 4582 nxthdr_ptr = &dest->ip6d_nxt; 4583 4584 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 4585 cp += ipp->ipp_dstoptslen; 4586 } 4587 /* 4588 * Now set the last header pointer to the proto passed in 4589 */ 4590 *nxthdr_ptr = protocol; 4591 ASSERT((int)(cp - buf) == buf_len); 4592 } 4593 4594 /* 4595 * Return a pointer to the routing header extension header 4596 * in the IPv6 header(s) chain passed in. 4597 * If none found, return NULL 4598 * Assumes that all extension headers are in same mblk as the v6 header 4599 */ 4600 ip6_rthdr_t * 4601 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 4602 { 4603 ip6_dest_t *desthdr; 4604 ip6_frag_t *fraghdr; 4605 uint_t hdrlen; 4606 uint8_t nexthdr; 4607 uint8_t *ptr = (uint8_t *)&ip6h[1]; 4608 4609 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 4610 return ((ip6_rthdr_t *)ptr); 4611 4612 /* 4613 * The routing header will precede all extension headers 4614 * other than the hop-by-hop and destination options 4615 * extension headers, so if we see anything other than those, 4616 * we're done and didn't find it. 4617 * We could see a destination options header alone but no 4618 * routing header, in which case we'll return NULL as soon as 4619 * we see anything after that. 4620 * Hop-by-hop and destination option headers are identical, 4621 * so we can use either one we want as a template. 4622 */ 4623 nexthdr = ip6h->ip6_nxt; 4624 while (ptr < endptr) { 4625 /* Is there enough left for len + nexthdr? */ 4626 if (ptr + MIN_EHDR_LEN > endptr) 4627 return (NULL); 4628 4629 switch (nexthdr) { 4630 case IPPROTO_HOPOPTS: 4631 case IPPROTO_DSTOPTS: 4632 /* Assumes the headers are identical for hbh and dst */ 4633 desthdr = (ip6_dest_t *)ptr; 4634 hdrlen = 8 * (desthdr->ip6d_len + 1); 4635 nexthdr = desthdr->ip6d_nxt; 4636 break; 4637 4638 case IPPROTO_ROUTING: 4639 return ((ip6_rthdr_t *)ptr); 4640 4641 case IPPROTO_FRAGMENT: 4642 fraghdr = (ip6_frag_t *)ptr; 4643 hdrlen = sizeof (ip6_frag_t); 4644 nexthdr = fraghdr->ip6f_nxt; 4645 break; 4646 4647 default: 4648 return (NULL); 4649 } 4650 ptr += hdrlen; 4651 } 4652 return (NULL); 4653 } 4654 4655 /* 4656 * Called for source-routed packets originating on this node. 4657 * Manipulates the original routing header by moving every entry up 4658 * one slot, placing the first entry in the v6 header's v6_dst field, 4659 * and placing the ultimate destination in the routing header's last 4660 * slot. 4661 * 4662 * Returns the checksum diference between the ultimate destination 4663 * (last hop in the routing header when the packet is sent) and 4664 * the first hop (ip6_dst when the packet is sent) 4665 */ 4666 /* ARGSUSED2 */ 4667 uint32_t 4668 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 4669 { 4670 uint_t numaddr; 4671 uint_t i; 4672 in6_addr_t *addrptr; 4673 in6_addr_t tmp; 4674 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 4675 uint32_t cksm; 4676 uint32_t addrsum = 0; 4677 uint16_t *ptr; 4678 4679 /* 4680 * Perform any processing needed for source routing. 4681 * We know that all extension headers will be in the same mblk 4682 * as the IPv6 header. 4683 */ 4684 4685 /* 4686 * If no segments left in header, or the header length field is zero, 4687 * don't move hop addresses around; 4688 * Checksum difference is zero. 4689 */ 4690 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 4691 return (0); 4692 4693 ptr = (uint16_t *)&ip6h->ip6_dst; 4694 cksm = 0; 4695 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4696 cksm += ptr[i]; 4697 } 4698 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4699 4700 /* 4701 * Here's where the fun begins - we have to 4702 * move all addresses up one spot, take the 4703 * first hop and make it our first ip6_dst, 4704 * and place the ultimate destination in the 4705 * newly-opened last slot. 4706 */ 4707 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 4708 numaddr = rthdr->ip6r0_len / 2; 4709 tmp = *addrptr; 4710 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 4711 *addrptr = addrptr[1]; 4712 } 4713 *addrptr = ip6h->ip6_dst; 4714 ip6h->ip6_dst = tmp; 4715 4716 /* 4717 * From the checksummed ultimate destination subtract the checksummed 4718 * current ip6_dst (the first hop address). Return that number. 4719 * (In the v4 case, the second part of this is done in each routine 4720 * that calls ip_massage_options(). We do it all in this one place 4721 * for v6). 4722 */ 4723 ptr = (uint16_t *)&ip6h->ip6_dst; 4724 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4725 addrsum += ptr[i]; 4726 } 4727 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 4728 if ((int)cksm < 0) 4729 cksm--; 4730 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4731 4732 return (cksm); 4733 } 4734 4735 void 4736 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 4737 { 4738 kstat_t *ksp; 4739 4740 ip6_stat_t template = { 4741 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 4742 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 4743 { "ip6_recv_pullup", KSTAT_DATA_UINT64 }, 4744 { "ip6_db_ref", KSTAT_DATA_UINT64 }, 4745 { "ip6_notaligned", KSTAT_DATA_UINT64 }, 4746 { "ip6_multimblk", KSTAT_DATA_UINT64 }, 4747 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, 4748 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 4749 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 4750 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 4751 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4752 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4753 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4754 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4755 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4756 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4757 }; 4758 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 4759 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4760 KSTAT_FLAG_VIRTUAL, stackid); 4761 4762 if (ksp == NULL) 4763 return (NULL); 4764 4765 bcopy(&template, ip6_statisticsp, sizeof (template)); 4766 ksp->ks_data = (void *)ip6_statisticsp; 4767 ksp->ks_private = (void *)(uintptr_t)stackid; 4768 4769 kstat_install(ksp); 4770 return (ksp); 4771 } 4772 4773 void 4774 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4775 { 4776 if (ksp != NULL) { 4777 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4778 kstat_delete_netstack(ksp, stackid); 4779 } 4780 } 4781 4782 /* 4783 * The following two functions set and get the value for the 4784 * IPV6_SRC_PREFERENCES socket option. 4785 */ 4786 int 4787 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs) 4788 { 4789 /* 4790 * We only support preferences that are covered by 4791 * IPV6_PREFER_SRC_MASK. 4792 */ 4793 if (prefs & ~IPV6_PREFER_SRC_MASK) 4794 return (EINVAL); 4795 4796 /* 4797 * Look for conflicting preferences or default preferences. If 4798 * both bits of a related pair are clear, the application wants the 4799 * system's default value for that pair. Both bits in a pair can't 4800 * be set. 4801 */ 4802 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 4803 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 4804 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 4805 IPV6_PREFER_SRC_MIPMASK) { 4806 return (EINVAL); 4807 } 4808 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 4809 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 4810 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 4811 IPV6_PREFER_SRC_TMPMASK) { 4812 return (EINVAL); 4813 } 4814 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 4815 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 4816 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 4817 IPV6_PREFER_SRC_CGAMASK) { 4818 return (EINVAL); 4819 } 4820 4821 ixa->ixa_src_preferences = prefs; 4822 return (0); 4823 } 4824 4825 size_t 4826 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val) 4827 { 4828 *val = ixa->ixa_src_preferences; 4829 return (sizeof (ixa->ixa_src_preferences)); 4830 } 4831 4832 /* 4833 * Get the size of the IP options (including the IP headers size) 4834 * without including the AH header's size. If till_ah is B_FALSE, 4835 * and if AH header is present, dest options beyond AH header will 4836 * also be included in the returned size. 4837 */ 4838 int 4839 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 4840 { 4841 ip6_t *ip6h; 4842 uint8_t nexthdr; 4843 uint8_t *whereptr; 4844 ip6_hbh_t *hbhhdr; 4845 ip6_dest_t *dsthdr; 4846 ip6_rthdr_t *rthdr; 4847 int ehdrlen; 4848 int size; 4849 ah_t *ah; 4850 4851 ip6h = (ip6_t *)mp->b_rptr; 4852 size = IPV6_HDR_LEN; 4853 nexthdr = ip6h->ip6_nxt; 4854 whereptr = (uint8_t *)&ip6h[1]; 4855 for (;;) { 4856 /* Assume IP has already stripped it */ 4857 ASSERT(nexthdr != IPPROTO_FRAGMENT); 4858 switch (nexthdr) { 4859 case IPPROTO_HOPOPTS: 4860 hbhhdr = (ip6_hbh_t *)whereptr; 4861 nexthdr = hbhhdr->ip6h_nxt; 4862 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 4863 break; 4864 case IPPROTO_DSTOPTS: 4865 dsthdr = (ip6_dest_t *)whereptr; 4866 nexthdr = dsthdr->ip6d_nxt; 4867 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4868 break; 4869 case IPPROTO_ROUTING: 4870 rthdr = (ip6_rthdr_t *)whereptr; 4871 nexthdr = rthdr->ip6r_nxt; 4872 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4873 break; 4874 default : 4875 if (till_ah) { 4876 ASSERT(nexthdr == IPPROTO_AH); 4877 return (size); 4878 } 4879 /* 4880 * If we don't have a AH header to traverse, 4881 * return now. This happens normally for 4882 * outbound datagrams where we have not inserted 4883 * the AH header. 4884 */ 4885 if (nexthdr != IPPROTO_AH) { 4886 return (size); 4887 } 4888 4889 /* 4890 * We don't include the AH header's size 4891 * to be symmetrical with other cases where 4892 * we either don't have a AH header (outbound) 4893 * or peek into the AH header yet (inbound and 4894 * not pulled up yet). 4895 */ 4896 ah = (ah_t *)whereptr; 4897 nexthdr = ah->ah_nexthdr; 4898 ehdrlen = (ah->ah_length << 2) + 8; 4899 4900 if (nexthdr == IPPROTO_DSTOPTS) { 4901 if (whereptr + ehdrlen >= mp->b_wptr) { 4902 /* 4903 * The destination options header 4904 * is not part of the first mblk. 4905 */ 4906 whereptr = mp->b_cont->b_rptr; 4907 } else { 4908 whereptr += ehdrlen; 4909 } 4910 4911 dsthdr = (ip6_dest_t *)whereptr; 4912 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4913 size += ehdrlen; 4914 } 4915 return (size); 4916 } 4917 whereptr += ehdrlen; 4918 size += ehdrlen; 4919 } 4920 } 4921 4922 /* 4923 * Utility routine that checks if `v6srcp' is a valid address on underlying 4924 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 4925 * associated with `v6srcp' on success. NOTE: if this is not called from 4926 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 4927 * group during or after this lookup. 4928 */ 4929 boolean_t 4930 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 4931 { 4932 ipif_t *ipif; 4933 4934 4935 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 4936 if (ipif != NULL) { 4937 if (ipifp != NULL) 4938 *ipifp = ipif; 4939 else 4940 ipif_refrele(ipif); 4941 return (B_TRUE); 4942 } 4943 4944 if (ip_debug > 2) { 4945 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 4946 "src %s\n", AF_INET6, v6srcp); 4947 } 4948 return (B_FALSE); 4949 } 4950