1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 1990 Mentat Inc. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/dlpi.h> 29 #include <sys/stropts.h> 30 #include <sys/sysmacros.h> 31 #include <sys/strsun.h> 32 #include <sys/strlog.h> 33 #include <sys/strsubr.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/sdt.h> 41 #include <sys/kobj.h> 42 #include <sys/zone.h> 43 #include <sys/neti.h> 44 #include <sys/hook.h> 45 46 #include <sys/kmem.h> 47 #include <sys/systm.h> 48 #include <sys/param.h> 49 #include <sys/socket.h> 50 #include <sys/vtrace.h> 51 #include <sys/isa_defs.h> 52 #include <sys/atomic.h> 53 #include <sys/policy.h> 54 #include <sys/mac.h> 55 #include <net/if.h> 56 #include <net/if_types.h> 57 #include <net/route.h> 58 #include <net/if_dl.h> 59 #include <sys/sockio.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/sctp.h> 64 65 #include <inet/common.h> 66 #include <inet/mi.h> 67 #include <inet/optcom.h> 68 #include <inet/mib2.h> 69 #include <inet/nd.h> 70 #include <inet/arp.h> 71 72 #include <inet/ip.h> 73 #include <inet/ip_impl.h> 74 #include <inet/ip6.h> 75 #include <inet/ip6_asp.h> 76 #include <inet/tcp.h> 77 #include <inet/tcp_impl.h> 78 #include <inet/udp_impl.h> 79 #include <inet/ipp_common.h> 80 81 #include <inet/ip_multi.h> 82 #include <inet/ip_if.h> 83 #include <inet/ip_ire.h> 84 #include <inet/ip_rts.h> 85 #include <inet/ip_ndp.h> 86 #include <net/pfkeyv2.h> 87 #include <inet/sadb.h> 88 #include <inet/ipsec_impl.h> 89 #include <inet/iptun/iptun_impl.h> 90 #include <inet/sctp_ip.h> 91 #include <sys/pattr.h> 92 #include <inet/ipclassifier.h> 93 #include <inet/ipsecah.h> 94 #include <inet/rawip_impl.h> 95 #include <inet/rts_impl.h> 96 #include <sys/squeue_impl.h> 97 #include <sys/squeue.h> 98 99 #include <sys/tsol/label.h> 100 #include <sys/tsol/tnet.h> 101 102 /* Temporary; for CR 6451644 work-around */ 103 #include <sys/ethernet.h> 104 105 /* 106 * Naming conventions: 107 * These rules should be judiciously applied 108 * if there is a need to identify something as IPv6 versus IPv4 109 * IPv6 funcions will end with _v6 in the ip module. 110 * IPv6 funcions will end with _ipv6 in the transport modules. 111 * IPv6 macros: 112 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 113 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 114 * And then there are ..V4_PART_OF_V6. 115 * The intent is that macros in the ip module end with _V6. 116 * IPv6 global variables will start with ipv6_ 117 * IPv6 structures will start with ipv6 118 * IPv6 defined constants should start with IPV6_ 119 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 120 */ 121 122 /* 123 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 124 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 125 * from IANA. This mechanism will remain in effect until an official 126 * number is obtained. 127 */ 128 uchar_t ip6opt_ls; 129 130 const in6_addr_t ipv6_all_ones = 131 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 132 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 133 134 #ifdef _BIG_ENDIAN 135 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 136 #else /* _BIG_ENDIAN */ 137 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 138 #endif /* _BIG_ENDIAN */ 139 140 #ifdef _BIG_ENDIAN 141 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 142 #else /* _BIG_ENDIAN */ 143 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 144 #endif /* _BIG_ENDIAN */ 145 146 #ifdef _BIG_ENDIAN 147 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 148 #else /* _BIG_ENDIAN */ 149 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 150 #endif /* _BIG_ENDIAN */ 151 152 #ifdef _BIG_ENDIAN 153 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 154 #else /* _BIG_ENDIAN */ 155 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 156 #endif /* _BIG_ENDIAN */ 157 158 #ifdef _BIG_ENDIAN 159 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 160 #else /* _BIG_ENDIAN */ 161 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 162 #endif /* _BIG_ENDIAN */ 163 164 #ifdef _BIG_ENDIAN 165 const in6_addr_t ipv6_solicited_node_mcast = 166 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 167 #else /* _BIG_ENDIAN */ 168 const in6_addr_t ipv6_solicited_node_mcast = 169 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 170 #endif /* _BIG_ENDIAN */ 171 172 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *); 173 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *); 174 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *, 175 ip_recv_attr_t *); 176 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *, 177 ip_recv_attr_t *); 178 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *, 179 in6_addr_t *, ip_recv_attr_t *); 180 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *, 181 ip_recv_attr_t *); 182 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 183 184 /* 185 * icmp_inbound_v6 deals with ICMP messages that are handled by IP. 186 * If the ICMP message is consumed by IP, i.e., it should not be delivered 187 * to any IPPROTO_ICMP raw sockets, then it returns NULL. 188 * Likewise, if the ICMP error is misformed (too short, etc), then it 189 * returns NULL. The caller uses this to determine whether or not to send 190 * to raw sockets. 191 * 192 * All error messages are passed to the matching transport stream. 193 * 194 * See comment for icmp_inbound_v4() on how IPsec is handled. 195 */ 196 mblk_t * 197 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira) 198 { 199 icmp6_t *icmp6; 200 ip6_t *ip6h; /* Outer header */ 201 int ip_hdr_length; /* Outer header length */ 202 boolean_t interested; 203 ill_t *ill = ira->ira_ill; 204 ip_stack_t *ipst = ill->ill_ipst; 205 mblk_t *mp_ret = NULL; 206 207 ip6h = (ip6_t *)mp->b_rptr; 208 209 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 210 211 /* Check for Martian packets */ 212 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 214 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill); 215 freemsg(mp); 216 return (NULL); 217 } 218 219 /* Make sure ira_l2src is set for ndp_input */ 220 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 221 ip_setl2src(mp, ira, ira->ira_rill); 222 223 ip_hdr_length = ira->ira_ip_hdr_length; 224 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 225 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 226 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 227 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 228 freemsg(mp); 229 return (NULL); 230 } 231 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 232 if (ip6h == NULL) { 233 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 234 freemsg(mp); 235 return (NULL); 236 } 237 } 238 239 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 240 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6); 241 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 242 icmp6->icmp6_code)); 243 244 /* 245 * We will set "interested" to "true" if we should pass a copy to 246 * the transport i.e., if it is an error message. 247 */ 248 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 249 250 switch (icmp6->icmp6_type) { 251 case ICMP6_DST_UNREACH: 252 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 253 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 254 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 255 break; 256 257 case ICMP6_TIME_EXCEEDED: 258 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 259 break; 260 261 case ICMP6_PARAM_PROB: 262 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 263 break; 264 265 case ICMP6_PACKET_TOO_BIG: 266 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs); 267 break; 268 269 case ICMP6_ECHO_REQUEST: 270 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 271 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 272 !ipst->ips_ipv6_resp_echo_mcast) 273 break; 274 275 /* 276 * We must have exclusive use of the mblk to convert it to 277 * a response. 278 * If not, we copy it. 279 */ 280 if (mp->b_datap->db_ref > 1) { 281 mblk_t *mp1; 282 283 mp1 = copymsg(mp); 284 if (mp1 == NULL) { 285 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 286 ip_drop_input("ipIfStatsInDiscards - copymsg", 287 mp, ill); 288 freemsg(mp); 289 return (NULL); 290 } 291 freemsg(mp); 292 mp = mp1; 293 ip6h = (ip6_t *)mp->b_rptr; 294 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 295 } 296 297 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 298 icmp_send_reply_v6(mp, ip6h, icmp6, ira); 299 return (NULL); 300 301 case ICMP6_ECHO_REPLY: 302 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 303 break; 304 305 case ND_ROUTER_SOLICIT: 306 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 307 break; 308 309 case ND_ROUTER_ADVERT: 310 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 311 break; 312 313 case ND_NEIGHBOR_SOLICIT: 314 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 315 ndp_input(mp, ira); 316 return (NULL); 317 318 case ND_NEIGHBOR_ADVERT: 319 BUMP_MIB(ill->ill_icmp6_mib, 320 ipv6IfIcmpInNeighborAdvertisements); 321 ndp_input(mp, ira); 322 return (NULL); 323 324 case ND_REDIRECT: 325 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 326 327 if (ipst->ips_ipv6_ignore_redirect) 328 break; 329 330 /* We now allow a RAW socket to receive this. */ 331 interested = B_TRUE; 332 break; 333 334 /* 335 * The next three icmp messages will be handled by MLD. 336 * Pass all valid MLD packets up to any process(es) 337 * listening on a raw ICMP socket. 338 */ 339 case MLD_LISTENER_QUERY: 340 case MLD_LISTENER_REPORT: 341 case MLD_LISTENER_REDUCTION: 342 mp = mld_input(mp, ira); 343 return (mp); 344 default: 345 break; 346 } 347 /* 348 * See if there is an ICMP client to avoid an extra copymsg/freemsg 349 * if there isn't one. 350 */ 351 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) { 352 /* If there is an ICMP client and we want one too, copy it. */ 353 354 if (!interested) { 355 /* Caller will deliver to RAW sockets */ 356 return (mp); 357 } 358 mp_ret = copymsg(mp); 359 if (mp_ret == NULL) { 360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 361 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 362 } 363 } else if (!interested) { 364 /* Neither we nor raw sockets are interested. Drop packet now */ 365 freemsg(mp); 366 return (NULL); 367 } 368 369 /* 370 * ICMP error or redirect packet. Make sure we have enough of 371 * the header and that db_ref == 1 since we might end up modifying 372 * the packet. 373 */ 374 if (mp->b_cont != NULL) { 375 if (ip_pullup(mp, -1, ira) == NULL) { 376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 377 ip_drop_input("ipIfStatsInDiscards - ip_pullup", 378 mp, ill); 379 freemsg(mp); 380 return (mp_ret); 381 } 382 } 383 384 if (mp->b_datap->db_ref > 1) { 385 mblk_t *mp1; 386 387 mp1 = copymsg(mp); 388 if (mp1 == NULL) { 389 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 390 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 391 freemsg(mp); 392 return (mp_ret); 393 } 394 freemsg(mp); 395 mp = mp1; 396 } 397 398 /* 399 * In case mp has changed, verify the message before any further 400 * processes. 401 */ 402 ip6h = (ip6_t *)mp->b_rptr; 403 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 404 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 405 freemsg(mp); 406 return (mp_ret); 407 } 408 409 switch (icmp6->icmp6_type) { 410 case ND_REDIRECT: 411 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira); 412 break; 413 case ICMP6_PACKET_TOO_BIG: 414 /* Update DCE and adjust MTU is icmp header if needed */ 415 icmp_inbound_too_big_v6(icmp6, ira); 416 /* FALLTHRU */ 417 default: 418 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 419 break; 420 } 421 422 return (mp_ret); 423 } 424 425 /* 426 * Send an ICMP echo reply. 427 * The caller has already updated the payload part of the packet. 428 * We handle the ICMP checksum, IP source address selection and feed 429 * the packet into ip_output_simple. 430 */ 431 static void 432 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6, 433 ip_recv_attr_t *ira) 434 { 435 uint_t ip_hdr_length = ira->ira_ip_hdr_length; 436 ill_t *ill = ira->ira_ill; 437 ip_stack_t *ipst = ill->ill_ipst; 438 ip_xmit_attr_t ixas; 439 in6_addr_t origsrc; 440 441 /* 442 * Remove any extension headers (do not reverse a source route) 443 * and clear the flow id (keep traffic class for now). 444 */ 445 if (ip_hdr_length != IPV6_HDR_LEN) { 446 int i; 447 448 for (i = 0; i < IPV6_HDR_LEN; i++) { 449 mp->b_rptr[ip_hdr_length - i - 1] = 450 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 451 } 452 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN); 453 ip6h = (ip6_t *)mp->b_rptr; 454 ip6h->ip6_nxt = IPPROTO_ICMPV6; 455 i = ntohs(ip6h->ip6_plen); 456 i -= (ip_hdr_length - IPV6_HDR_LEN); 457 ip6h->ip6_plen = htons(i); 458 ip_hdr_length = IPV6_HDR_LEN; 459 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp)); 460 } 461 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 462 463 /* Reverse the source and destination addresses. */ 464 origsrc = ip6h->ip6_src; 465 ip6h->ip6_src = ip6h->ip6_dst; 466 ip6h->ip6_dst = origsrc; 467 468 /* set the hop limit */ 469 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 470 471 /* 472 * Prepare for checksum by putting icmp length in the icmp 473 * checksum field. The checksum is calculated in ip_output 474 */ 475 icmp6->icmp6_cksum = ip6h->ip6_plen; 476 477 bzero(&ixas, sizeof (ixas)); 478 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 479 ixas.ixa_zoneid = ira->ira_zoneid; 480 ixas.ixa_cred = kcred; 481 ixas.ixa_cpid = NOPID; 482 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 483 ixas.ixa_ifindex = 0; 484 ixas.ixa_ipst = ipst; 485 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 486 487 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 488 /* 489 * This packet should go out the same way as it 490 * came in i.e in clear, independent of the IPsec 491 * policy for transmitting packets. 492 */ 493 ixas.ixa_flags |= IXAF_NO_IPSEC; 494 } else { 495 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 497 /* Note: mp already consumed and ip_drop_packet done */ 498 return; 499 } 500 } 501 502 /* Was the destination (now source) link-local? Send out same group */ 503 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 504 ixas.ixa_flags |= IXAF_SCOPEID_SET; 505 if (IS_UNDER_IPMP(ill)) 506 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 507 else 508 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 509 } 510 511 if (ira->ira_flags & IRAF_MULTIBROADCAST) { 512 /* 513 * Not one or our addresses (IRE_LOCALs), thus we let 514 * ip_output_simple pick the source. 515 */ 516 ip6h->ip6_src = ipv6_all_zeros; 517 ixas.ixa_flags |= IXAF_SET_SOURCE; 518 } 519 520 /* Should we send using dce_pmtu? */ 521 if (ipst->ips_ipv6_icmp_return_pmtu) 522 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; 523 524 (void) ip_output_simple(mp, &ixas); 525 ixa_cleanup(&ixas); 526 527 } 528 529 /* 530 * Verify the ICMP messages for either for ICMP error or redirect packet. 531 * The caller should have fully pulled up the message. If it's a redirect 532 * packet, only basic checks on IP header will be done; otherwise, verify 533 * the packet by looking at the included ULP header. 534 * 535 * Called before icmp_inbound_error_fanout_v6 is called. 536 */ 537 static boolean_t 538 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 539 { 540 ill_t *ill = ira->ira_ill; 541 uint16_t hdr_length; 542 uint8_t *nexthdrp; 543 uint8_t nexthdr; 544 ip_stack_t *ipst = ill->ill_ipst; 545 conn_t *connp; 546 ip6_t *ip6h; /* Inner header */ 547 548 ip6h = (ip6_t *)&icmp6[1]; 549 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr) 550 goto truncated; 551 552 if (icmp6->icmp6_type == ND_REDIRECT) { 553 hdr_length = sizeof (nd_redirect_t); 554 } else { 555 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION)) 556 goto discard_pkt; 557 hdr_length = IPV6_HDR_LEN; 558 } 559 560 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr) 561 goto truncated; 562 563 /* 564 * Stop here for ICMP_REDIRECT. 565 */ 566 if (icmp6->icmp6_type == ND_REDIRECT) 567 return (B_TRUE); 568 569 /* 570 * ICMP errors only. 571 */ 572 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 573 goto discard_pkt; 574 nexthdr = *nexthdrp; 575 576 /* Try to pass the ICMP message to clients who need it */ 577 switch (nexthdr) { 578 case IPPROTO_UDP: 579 /* 580 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 581 * transport header. 582 */ 583 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 584 mp->b_wptr) 585 goto truncated; 586 break; 587 case IPPROTO_TCP: { 588 tcpha_t *tcpha; 589 590 /* 591 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 592 * transport header. 593 */ 594 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 595 mp->b_wptr) 596 goto truncated; 597 598 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 599 /* 600 * With IPMP we need to match across group, which we do 601 * since we have the upper ill from ira_ill. 602 */ 603 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, 604 ill->ill_phyint->phyint_ifindex, ipst); 605 if (connp == NULL) 606 goto discard_pkt; 607 608 if ((connp->conn_verifyicmp != NULL) && 609 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) { 610 CONN_DEC_REF(connp); 611 goto discard_pkt; 612 } 613 CONN_DEC_REF(connp); 614 break; 615 } 616 case IPPROTO_SCTP: 617 /* 618 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 619 * transport header. 620 */ 621 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 622 mp->b_wptr) 623 goto truncated; 624 break; 625 case IPPROTO_ESP: 626 case IPPROTO_AH: 627 break; 628 case IPPROTO_ENCAP: 629 case IPPROTO_IPV6: { 630 /* Look for self-encapsulated packets that caused an error */ 631 ip6_t *in_ip6h; 632 633 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 634 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ? 635 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr) 636 goto truncated; 637 break; 638 } 639 default: 640 break; 641 } 642 643 return (B_TRUE); 644 645 discard_pkt: 646 /* Bogus ICMP error. */ 647 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 648 return (B_FALSE); 649 650 truncated: 651 /* We pulled up everthing already. Must be truncated */ 652 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 653 return (B_FALSE); 654 } 655 656 /* 657 * Process received IPv6 ICMP Packet too big. 658 * The caller is responsible for validating the packet before passing it in 659 * and also to fanout the ICMP error to any matching transport conns. Assumes 660 * the message has been fully pulled up. 661 * 662 * Before getting here, the caller has called icmp_inbound_verify_v6() 663 * that should have verified with ULP to prevent undoing the changes we're 664 * going to make to DCE. For example, TCP might have verified that the packet 665 * which generated error is in the send window. 666 * 667 * In some cases modified this MTU in the ICMP header packet; the caller 668 * should pass to the matching ULP after this returns. 669 */ 670 static void 671 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira) 672 { 673 uint32_t mtu; 674 dce_t *dce; 675 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */ 676 ip_stack_t *ipst = ill->ill_ipst; 677 int old_max_frag; 678 in6_addr_t final_dst; 679 ip6_t *ip6h; /* Inner IP header */ 680 681 /* Caller has already pulled up everything. */ 682 ip6h = (ip6_t *)&icmp6[1]; 683 final_dst = ip_get_dst_v6(ip6h, NULL, NULL); 684 685 /* 686 * For link local destinations matching simply on address is not 687 * sufficient. Same link local addresses for different ILL's is 688 * possible. 689 */ 690 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) { 691 dce = dce_lookup_and_add_v6(&final_dst, 692 ill->ill_phyint->phyint_ifindex, ipst); 693 } else { 694 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst); 695 } 696 if (dce == NULL) { 697 /* Couldn't add a unique one - ENOMEM */ 698 if (ip_debug > 2) { 699 /* ip1dbg */ 700 pr_addr_dbg("icmp_inbound_too_big_v6:" 701 "no dce for dst %s\n", AF_INET6, 702 &final_dst); 703 } 704 return; 705 } 706 707 mtu = ntohl(icmp6->icmp6_mtu); 708 709 mutex_enter(&dce->dce_lock); 710 if (dce->dce_flags & DCEF_PMTU) 711 old_max_frag = dce->dce_pmtu; 712 else 713 old_max_frag = ill->ill_mtu; 714 715 if (mtu < IPV6_MIN_MTU) { 716 ip1dbg(("Received mtu less than IPv6 " 717 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 718 mtu = IPV6_MIN_MTU; 719 /* 720 * If an mtu less than IPv6 min mtu is received, 721 * we must include a fragment header in 722 * subsequent packets. 723 */ 724 dce->dce_flags |= DCEF_TOO_SMALL_PMTU; 725 } else { 726 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU; 727 } 728 ip1dbg(("Received mtu from router: %d\n", mtu)); 729 dce->dce_pmtu = MIN(old_max_frag, mtu); 730 731 /* Prepare to send the new max frag size for the ULP. */ 732 if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) { 733 /* 734 * If we need a fragment header in every packet 735 * (above case or multirouting), make sure the 736 * ULP takes it into account when computing the 737 * payload size. 738 */ 739 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t)); 740 } else { 741 icmp6->icmp6_mtu = htonl(dce->dce_pmtu); 742 } 743 /* We now have a PMTU for sure */ 744 dce->dce_flags |= DCEF_PMTU; 745 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 746 mutex_exit(&dce->dce_lock); 747 /* 748 * After dropping the lock the new value is visible to everyone. 749 * Then we bump the generation number so any cached values reinspect 750 * the dce_t. 751 */ 752 dce_increment_generation(dce); 753 dce_refrele(dce); 754 } 755 756 /* 757 * Fanout received ICMPv6 error packets to the transports. 758 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 759 * 760 * The caller must have called icmp_inbound_verify_v6. 761 */ 762 void 763 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 764 { 765 uint16_t *up; /* Pointer to ports in ULP header */ 766 uint32_t ports; /* reversed ports for fanout */ 767 ip6_t rip6h; /* With reversed addresses */ 768 ip6_t *ip6h; /* Inner IP header */ 769 uint16_t hdr_length; /* Inner IP header length */ 770 uint8_t *nexthdrp; 771 uint8_t nexthdr; 772 tcpha_t *tcpha; 773 conn_t *connp; 774 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */ 775 ip_stack_t *ipst = ill->ill_ipst; 776 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 777 778 /* Caller has already pulled up everything. */ 779 ip6h = (ip6_t *)&icmp6[1]; 780 ASSERT(mp->b_cont == NULL); 781 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 782 783 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 784 goto drop_pkt; 785 nexthdr = *nexthdrp; 786 ira->ira_protocol = nexthdr; 787 788 /* 789 * We need a separate IP header with the source and destination 790 * addresses reversed to do fanout/classification because the ip6h in 791 * the ICMPv6 error is in the form we sent it out. 792 */ 793 rip6h.ip6_src = ip6h->ip6_dst; 794 rip6h.ip6_dst = ip6h->ip6_src; 795 rip6h.ip6_nxt = nexthdr; 796 797 /* Try to pass the ICMP message to clients who need it */ 798 switch (nexthdr) { 799 case IPPROTO_UDP: { 800 /* Attempt to find a client stream based on port. */ 801 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 802 803 /* Note that we send error to all matches. */ 804 ira->ira_flags |= IRAF_ICMP_ERROR; 805 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira); 806 ira->ira_flags &= ~IRAF_ICMP_ERROR; 807 return; 808 } 809 case IPPROTO_TCP: { 810 /* 811 * Attempt to find a client stream based on port. 812 * Note that we do a reverse lookup since the header is 813 * in the form we sent it out. 814 */ 815 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 816 /* 817 * With IPMP we need to match across group, which we do 818 * since we have the upper ill from ira_ill. 819 */ 820 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 821 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 822 if (connp == NULL) { 823 goto drop_pkt; 824 } 825 826 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 827 (ira->ira_flags & IRAF_IPSEC_SECURE)) { 828 mp = ipsec_check_inbound_policy(mp, connp, 829 NULL, ip6h, ira); 830 if (mp == NULL) { 831 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 832 /* Note that mp is NULL */ 833 ip_drop_input("ipIfStatsInDiscards", mp, ill); 834 CONN_DEC_REF(connp); 835 return; 836 } 837 } 838 839 ira->ira_flags |= IRAF_ICMP_ERROR; 840 if (IPCL_IS_TCP(connp)) { 841 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 842 connp->conn_recvicmp, connp, ira, SQ_FILL, 843 SQTAG_TCP6_INPUT_ICMP_ERR); 844 } else { 845 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 846 ill_t *rill = ira->ira_rill; 847 848 ira->ira_ill = ira->ira_rill = NULL; 849 (connp->conn_recv)(connp, mp, NULL, ira); 850 CONN_DEC_REF(connp); 851 ira->ira_ill = ill; 852 ira->ira_rill = rill; 853 } 854 ira->ira_flags &= ~IRAF_ICMP_ERROR; 855 return; 856 857 } 858 case IPPROTO_SCTP: 859 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 860 /* Find a SCTP client stream for this packet. */ 861 ((uint16_t *)&ports)[0] = up[1]; 862 ((uint16_t *)&ports)[1] = up[0]; 863 864 ira->ira_flags |= IRAF_ICMP_ERROR; 865 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira); 866 ira->ira_flags &= ~IRAF_ICMP_ERROR; 867 return; 868 869 case IPPROTO_ESP: 870 case IPPROTO_AH: 871 if (!ipsec_loaded(ipss)) { 872 ip_proto_not_sup(mp, ira); 873 return; 874 } 875 876 if (nexthdr == IPPROTO_ESP) 877 mp = ipsecesp_icmp_error(mp, ira); 878 else 879 mp = ipsecah_icmp_error(mp, ira); 880 if (mp == NULL) 881 return; 882 883 /* Just in case ipsec didn't preserve the NULL b_cont */ 884 if (mp->b_cont != NULL) { 885 if (!pullupmsg(mp, -1)) 886 goto drop_pkt; 887 } 888 889 /* 890 * If succesful, the mp has been modified to not include 891 * the ESP/AH header so we can fanout to the ULP's icmp 892 * error handler. 893 */ 894 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN) 895 goto drop_pkt; 896 897 ip6h = (ip6_t *)mp->b_rptr; 898 /* Don't call hdr_length_v6() unless you have to. */ 899 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 900 hdr_length = ip_hdr_length_v6(mp, ip6h); 901 else 902 hdr_length = IPV6_HDR_LEN; 903 904 /* Verify the modified message before any further processes. */ 905 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 906 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 907 freemsg(mp); 908 return; 909 } 910 911 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 912 return; 913 914 case IPPROTO_IPV6: { 915 /* Look for self-encapsulated packets that caused an error */ 916 ip6_t *in_ip6h; 917 918 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 919 920 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) && 921 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) { 922 /* 923 * Self-encapsulated case. As in the ipv4 case, 924 * we need to strip the 2nd IP header. Since mp 925 * is already pulled-up, we can simply bcopy 926 * the 3rd header + data over the 2nd header. 927 */ 928 uint16_t unused_len; 929 930 /* 931 * Make sure we don't do recursion more than once. 932 */ 933 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h, 934 &unused_len, &nexthdrp) || 935 *nexthdrp == IPPROTO_IPV6) { 936 goto drop_pkt; 937 } 938 939 /* 940 * Copy the 3rd header + remaining data on top 941 * of the 2nd header. 942 */ 943 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h); 944 945 /* 946 * Subtract length of the 2nd header. 947 */ 948 mp->b_wptr -= hdr_length; 949 950 ip6h = (ip6_t *)mp->b_rptr; 951 /* Don't call hdr_length_v6() unless you have to. */ 952 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 953 hdr_length = ip_hdr_length_v6(mp, ip6h); 954 else 955 hdr_length = IPV6_HDR_LEN; 956 957 /* 958 * Verify the modified message before any further 959 * processes. 960 */ 961 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 962 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 963 freemsg(mp); 964 return; 965 } 966 967 /* 968 * Now recurse, and see what I _really_ should be 969 * doing here. 970 */ 971 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 972 return; 973 } 974 /* FALLTHRU */ 975 } 976 case IPPROTO_ENCAP: 977 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src, 978 &rip6h.ip6_dst, ipst)) != NULL) { 979 ira->ira_flags |= IRAF_ICMP_ERROR; 980 connp->conn_recvicmp(connp, mp, NULL, ira); 981 CONN_DEC_REF(connp); 982 ira->ira_flags &= ~IRAF_ICMP_ERROR; 983 return; 984 } 985 /* 986 * No IP tunnel is interested, fallthrough and see 987 * if a raw socket will want it. 988 */ 989 /* FALLTHRU */ 990 default: 991 ira->ira_flags |= IRAF_ICMP_ERROR; 992 ASSERT(ira->ira_protocol == nexthdr); 993 ip_fanout_proto_v6(mp, &rip6h, ira); 994 ira->ira_flags &= ~IRAF_ICMP_ERROR; 995 return; 996 } 997 /* NOTREACHED */ 998 drop_pkt: 999 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1000 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1001 freemsg(mp); 1002 } 1003 1004 /* 1005 * Process received IPv6 ICMP Redirect messages. 1006 * Assumes the caller has verified that the headers are in the pulled up mblk. 1007 * Consumes mp. 1008 */ 1009 /* ARGSUSED */ 1010 static void 1011 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd, 1012 ip_recv_attr_t *ira) 1013 { 1014 ire_t *ire, *nire; 1015 ire_t *prev_ire = NULL; 1016 ire_t *redir_ire; 1017 in6_addr_t *src, *dst, *gateway; 1018 nd_opt_hdr_t *opt; 1019 nce_t *nce; 1020 int ncec_flags = 0; 1021 int err = 0; 1022 boolean_t redirect_to_router = B_FALSE; 1023 int len; 1024 int optlen; 1025 ill_t *ill = ira->ira_rill; 1026 ill_t *rill = ira->ira_rill; 1027 ip_stack_t *ipst = ill->ill_ipst; 1028 1029 /* 1030 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 1031 * and make it be the IPMP upper so avoid being confused by a packet 1032 * addressed to a unicast address on a different ill. 1033 */ 1034 if (IS_UNDER_IPMP(rill)) { 1035 rill = ipmp_ill_hold_ipmp_ill(rill); 1036 if (rill == NULL) { 1037 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1038 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill", 1039 mp, ill); 1040 freemsg(mp); 1041 return; 1042 } 1043 ASSERT(rill != ira->ira_rill); 1044 } 1045 1046 len = mp->b_wptr - (uchar_t *)rd; 1047 src = &ip6h->ip6_src; 1048 dst = &rd->nd_rd_dst; 1049 gateway = &rd->nd_rd_target; 1050 1051 /* Verify if it is a valid redirect */ 1052 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1053 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1054 (rd->nd_rd_code != 0) || 1055 (len < sizeof (nd_redirect_t)) || 1056 (IN6_IS_ADDR_V4MAPPED(dst)) || 1057 (IN6_IS_ADDR_MULTICAST(dst))) { 1058 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1059 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill); 1060 goto fail_redirect; 1061 } 1062 1063 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1064 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1065 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1066 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway", 1067 mp, ill); 1068 goto fail_redirect; 1069 } 1070 1071 optlen = len - sizeof (nd_redirect_t); 1072 if (optlen != 0) { 1073 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) { 1074 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1075 ip_drop_input("ipv6IfIcmpInBadRedirects - options", 1076 mp, ill); 1077 goto fail_redirect; 1078 } 1079 } 1080 1081 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1082 redirect_to_router = B_TRUE; 1083 ncec_flags |= NCE_F_ISROUTER; 1084 } else { 1085 gateway = dst; /* Add nce for dst */ 1086 } 1087 1088 1089 /* 1090 * Verify that the IP source address of the redirect is 1091 * the same as the current first-hop router for the specified 1092 * ICMP destination address. 1093 * Also, Make sure we had a route for the dest in question and 1094 * that route was pointing to the old gateway (the source of the 1095 * redirect packet.) 1096 * We do longest match and then compare ire_gateway_addr_v6 below. 1097 */ 1098 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill, 1099 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL); 1100 1101 /* 1102 * Check that 1103 * the redirect was not from ourselves 1104 * old gateway is still directly reachable 1105 */ 1106 if (prev_ire == NULL || 1107 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) || 1108 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 1109 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) { 1110 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1111 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill); 1112 goto fail_redirect; 1113 } 1114 1115 ASSERT(prev_ire->ire_ill != NULL); 1116 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD) 1117 ncec_flags |= NCE_F_NONUD; 1118 1119 opt = (nd_opt_hdr_t *)&rd[1]; 1120 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1121 if (opt != NULL) { 1122 err = nce_lookup_then_add_v6(rill, 1123 (uchar_t *)&opt[1], /* Link layer address */ 1124 rill->ill_phys_addr_length, 1125 gateway, ncec_flags, ND_STALE, &nce); 1126 switch (err) { 1127 case 0: 1128 nce_refrele(nce); 1129 break; 1130 case EEXIST: 1131 /* 1132 * Check to see if link layer address has changed and 1133 * process the ncec_state accordingly. 1134 */ 1135 nce_process(nce->nce_common, 1136 (uchar_t *)&opt[1], 0, B_FALSE); 1137 nce_refrele(nce); 1138 break; 1139 default: 1140 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1141 err)); 1142 goto fail_redirect; 1143 } 1144 } 1145 if (redirect_to_router) { 1146 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1147 1148 /* 1149 * Create a Route Association. This will allow us to remember 1150 * a router told us to use the particular gateway. 1151 */ 1152 ire = ire_create_v6( 1153 dst, 1154 &ipv6_all_ones, /* mask */ 1155 gateway, /* gateway addr */ 1156 IRE_HOST, 1157 prev_ire->ire_ill, 1158 ALL_ZONES, 1159 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1160 NULL, 1161 ipst); 1162 } else { 1163 ipif_t *ipif; 1164 in6_addr_t gw; 1165 1166 /* 1167 * Just create an on link entry, i.e. interface route. 1168 * The gateway field is our link-local on the ill. 1169 */ 1170 mutex_enter(&rill->ill_lock); 1171 for (ipif = rill->ill_ipif; ipif != NULL; 1172 ipif = ipif->ipif_next) { 1173 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1174 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr)) 1175 break; 1176 } 1177 if (ipif == NULL) { 1178 /* We have no link-local address! */ 1179 mutex_exit(&rill->ill_lock); 1180 goto fail_redirect; 1181 } 1182 gw = ipif->ipif_v6lcl_addr; 1183 mutex_exit(&rill->ill_lock); 1184 1185 ire = ire_create_v6( 1186 dst, /* gateway == dst */ 1187 &ipv6_all_ones, /* mask */ 1188 &gw, /* gateway addr */ 1189 rill->ill_net_type, /* IF_[NO]RESOLVER */ 1190 prev_ire->ire_ill, 1191 ALL_ZONES, 1192 (RTF_DYNAMIC | RTF_HOST), 1193 NULL, 1194 ipst); 1195 } 1196 1197 if (ire == NULL) 1198 goto fail_redirect; 1199 1200 nire = ire_add(ire); 1201 /* Check if it was a duplicate entry */ 1202 if (nire != NULL && nire != ire) { 1203 ASSERT(nire->ire_identical_ref > 1); 1204 ire_delete(nire); 1205 ire_refrele(nire); 1206 nire = NULL; 1207 } 1208 ire = nire; 1209 if (ire != NULL) { 1210 ire_refrele(ire); /* Held in ire_add */ 1211 1212 /* tell routing sockets that we received a redirect */ 1213 ip_rts_change_v6(RTM_REDIRECT, 1214 &rd->nd_rd_dst, 1215 &rd->nd_rd_target, 1216 &ipv6_all_ones, 0, src, 1217 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1218 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1219 1220 /* 1221 * Delete any existing IRE_HOST type ires for this destination. 1222 * This together with the added IRE has the effect of 1223 * modifying an existing redirect. 1224 */ 1225 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1226 prev_ire->ire_ill, ALL_ZONES, NULL, 1227 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst, 1228 NULL); 1229 1230 if (redir_ire != NULL) { 1231 if (redir_ire->ire_flags & RTF_DYNAMIC) 1232 ire_delete(redir_ire); 1233 ire_refrele(redir_ire); 1234 } 1235 } 1236 1237 ire_refrele(prev_ire); 1238 prev_ire = NULL; 1239 1240 fail_redirect: 1241 if (prev_ire != NULL) 1242 ire_refrele(prev_ire); 1243 freemsg(mp); 1244 if (rill != ira->ira_rill) 1245 ill_refrele(rill); 1246 } 1247 1248 /* 1249 * Build and ship an IPv6 ICMP message using the packet data in mp, 1250 * and the ICMP header pointed to by "stuff". (May be called as 1251 * writer.) 1252 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1253 * verify that an icmp error packet can be sent. 1254 * 1255 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1256 * source address (see above function). 1257 */ 1258 static void 1259 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len, 1260 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira) 1261 { 1262 ip6_t *ip6h; 1263 in6_addr_t v6dst; 1264 size_t len_needed; 1265 size_t msg_len; 1266 mblk_t *mp1; 1267 icmp6_t *icmp6; 1268 in6_addr_t v6src; 1269 ill_t *ill = ira->ira_ill; 1270 ip_stack_t *ipst = ill->ill_ipst; 1271 ip_xmit_attr_t ixas; 1272 1273 ip6h = (ip6_t *)mp->b_rptr; 1274 1275 bzero(&ixas, sizeof (ixas)); 1276 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 1277 ixas.ixa_zoneid = ira->ira_zoneid; 1278 ixas.ixa_ifindex = 0; 1279 ixas.ixa_ipst = ipst; 1280 ixas.ixa_cred = kcred; 1281 ixas.ixa_cpid = NOPID; 1282 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 1283 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1284 1285 /* 1286 * If the source of the original packet was link-local, then 1287 * make sure we send on the same ill (group) as we received it on. 1288 */ 1289 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 1290 ixas.ixa_flags |= IXAF_SCOPEID_SET; 1291 if (IS_UNDER_IPMP(ill)) 1292 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 1293 else 1294 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 1295 } 1296 1297 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1298 /* 1299 * Apply IPsec based on how IPsec was applied to 1300 * the packet that had the error. 1301 * 1302 * If it was an outbound packet that caused the ICMP 1303 * error, then the caller will have setup the IRA 1304 * appropriately. 1305 */ 1306 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 1307 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1308 /* Note: mp already consumed and ip_drop_packet done */ 1309 return; 1310 } 1311 } else { 1312 /* 1313 * This is in clear. The icmp message we are building 1314 * here should go out in clear, independent of our policy. 1315 */ 1316 ixas.ixa_flags |= IXAF_NO_IPSEC; 1317 } 1318 1319 /* 1320 * If the caller specified the source we use that. 1321 * Otherwise, if the packet was for one of our unicast addresses, make 1322 * sure we respond with that as the source. Otherwise 1323 * have ip_output_simple pick the source address. 1324 */ 1325 if (v6src_ptr != NULL) { 1326 v6src = *v6src_ptr; 1327 } else { 1328 ire_t *ire; 1329 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY; 1330 1331 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1332 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) 1333 match_flags |= MATCH_IRE_ILL; 1334 1335 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 1336 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL, 1337 match_flags, 0, ipst, NULL); 1338 if (ire != NULL) { 1339 v6src = ip6h->ip6_dst; 1340 ire_refrele(ire); 1341 } else { 1342 v6src = ipv6_all_zeros; 1343 ixas.ixa_flags |= IXAF_SET_SOURCE; 1344 } 1345 } 1346 v6dst = ip6h->ip6_src; 1347 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1348 msg_len = msgdsize(mp); 1349 if (msg_len > len_needed) { 1350 if (!adjmsg(mp, len_needed - msg_len)) { 1351 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1352 freemsg(mp); 1353 return; 1354 } 1355 msg_len = len_needed; 1356 } 1357 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED); 1358 if (mp1 == NULL) { 1359 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1360 freemsg(mp); 1361 return; 1362 } 1363 mp1->b_cont = mp; 1364 mp = mp1; 1365 1366 /* 1367 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this 1368 * node generates be accepted in peace by all on-host destinations. 1369 * If we do NOT assume that all on-host destinations trust 1370 * self-generated ICMP messages, then rework here, ip6.c, and spd.c. 1371 * (Look for IXAF_TRUSTED_ICMP). 1372 */ 1373 ixas.ixa_flags |= IXAF_TRUSTED_ICMP; 1374 1375 ip6h = (ip6_t *)mp->b_rptr; 1376 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1377 1378 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1379 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1380 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1381 ip6h->ip6_dst = v6dst; 1382 ip6h->ip6_src = v6src; 1383 msg_len += IPV6_HDR_LEN + len; 1384 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1385 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1386 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1387 } 1388 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1389 icmp6 = (icmp6_t *)&ip6h[1]; 1390 bcopy(stuff, (char *)icmp6, len); 1391 /* 1392 * Prepare for checksum by putting icmp length in the icmp 1393 * checksum field. The checksum is calculated in ip_output_wire_v6. 1394 */ 1395 icmp6->icmp6_cksum = ip6h->ip6_plen; 1396 if (icmp6->icmp6_type == ND_REDIRECT) { 1397 ip6h->ip6_hops = IPV6_MAX_HOPS; 1398 } 1399 1400 (void) ip_output_simple(mp, &ixas); 1401 ixa_cleanup(&ixas); 1402 } 1403 1404 /* 1405 * Update the output mib when ICMPv6 packets are sent. 1406 */ 1407 void 1408 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1409 { 1410 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1411 1412 switch (icmp6->icmp6_type) { 1413 case ICMP6_DST_UNREACH: 1414 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1415 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1416 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1417 break; 1418 1419 case ICMP6_TIME_EXCEEDED: 1420 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1421 break; 1422 1423 case ICMP6_PARAM_PROB: 1424 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1425 break; 1426 1427 case ICMP6_PACKET_TOO_BIG: 1428 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1429 break; 1430 1431 case ICMP6_ECHO_REQUEST: 1432 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1433 break; 1434 1435 case ICMP6_ECHO_REPLY: 1436 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1437 break; 1438 1439 case ND_ROUTER_SOLICIT: 1440 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1441 break; 1442 1443 case ND_ROUTER_ADVERT: 1444 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1445 break; 1446 1447 case ND_NEIGHBOR_SOLICIT: 1448 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1449 break; 1450 1451 case ND_NEIGHBOR_ADVERT: 1452 BUMP_MIB(ill->ill_icmp6_mib, 1453 ipv6IfIcmpOutNeighborAdvertisements); 1454 break; 1455 1456 case ND_REDIRECT: 1457 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1458 break; 1459 1460 case MLD_LISTENER_QUERY: 1461 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1462 break; 1463 1464 case MLD_LISTENER_REPORT: 1465 case MLD_V2_LISTENER_REPORT: 1466 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1467 break; 1468 1469 case MLD_LISTENER_REDUCTION: 1470 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1471 break; 1472 } 1473 } 1474 1475 /* 1476 * Check if it is ok to send an ICMPv6 error packet in 1477 * response to the IP packet in mp. 1478 * Free the message and return null if no 1479 * ICMP error packet should be sent. 1480 */ 1481 static mblk_t * 1482 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira) 1483 { 1484 ill_t *ill = ira->ira_ill; 1485 ip_stack_t *ipst = ill->ill_ipst; 1486 boolean_t llbcast; 1487 ip6_t *ip6h; 1488 1489 if (!mp) 1490 return (NULL); 1491 1492 /* We view multicast and broadcast as the same.. */ 1493 llbcast = (ira->ira_flags & 1494 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0; 1495 ip6h = (ip6_t *)mp->b_rptr; 1496 1497 /* Check if source address uniquely identifies the host */ 1498 1499 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1500 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1501 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1502 freemsg(mp); 1503 return (NULL); 1504 } 1505 1506 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1507 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1508 icmp6_t *icmp6; 1509 1510 if (mp->b_wptr - mp->b_rptr < len_needed) { 1511 if (!pullupmsg(mp, len_needed)) { 1512 BUMP_MIB(ill->ill_icmp6_mib, 1513 ipv6IfIcmpInErrors); 1514 freemsg(mp); 1515 return (NULL); 1516 } 1517 ip6h = (ip6_t *)mp->b_rptr; 1518 } 1519 icmp6 = (icmp6_t *)&ip6h[1]; 1520 /* Explicitly do not generate errors in response to redirects */ 1521 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1522 icmp6->icmp6_type == ND_REDIRECT) { 1523 freemsg(mp); 1524 return (NULL); 1525 } 1526 } 1527 /* 1528 * Check that the destination is not multicast and that the packet 1529 * was not sent on link layer broadcast or multicast. (Exception 1530 * is Packet too big message as per the draft - when mcast_ok is set.) 1531 */ 1532 if (!mcast_ok && 1533 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1534 freemsg(mp); 1535 return (NULL); 1536 } 1537 /* 1538 * If this is a labeled system, then check to see if we're allowed to 1539 * send a response to this particular sender. If not, then just drop. 1540 */ 1541 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) { 1542 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1543 freemsg(mp); 1544 return (NULL); 1545 } 1546 1547 if (icmp_err_rate_limit(ipst)) { 1548 /* 1549 * Only send ICMP error packets every so often. 1550 * This should be done on a per port/source basis, 1551 * but for now this will suffice. 1552 */ 1553 freemsg(mp); 1554 return (NULL); 1555 } 1556 return (mp); 1557 } 1558 1559 /* 1560 * Called when a packet was sent out the same link that it arrived on. 1561 * Check if it is ok to send a redirect and then send it. 1562 */ 1563 void 1564 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire, 1565 ip_recv_attr_t *ira) 1566 { 1567 ill_t *ill = ira->ira_ill; 1568 ip_stack_t *ipst = ill->ill_ipst; 1569 in6_addr_t *v6targ; 1570 ire_t *src_ire_v6 = NULL; 1571 mblk_t *mp1; 1572 ire_t *nhop_ire = NULL; 1573 1574 /* 1575 * Don't send a redirect when forwarding a source 1576 * routed packet. 1577 */ 1578 if (ip_source_routed_v6(ip6h, mp, ipst)) 1579 return; 1580 1581 if (ire->ire_type & IRE_ONLINK) { 1582 /* Target is directly connected */ 1583 v6targ = &ip6h->ip6_dst; 1584 } else { 1585 /* Determine the most specific IRE used to send the packets */ 1586 nhop_ire = ire_nexthop(ire); 1587 if (nhop_ire == NULL) 1588 return; 1589 1590 /* 1591 * We won't send redirects to a router 1592 * that doesn't have a link local 1593 * address, but will forward. 1594 */ 1595 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) { 1596 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1597 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1598 ire_refrele(nhop_ire); 1599 return; 1600 } 1601 v6targ = &nhop_ire->ire_addr_v6; 1602 } 1603 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 1604 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL, 1605 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL); 1606 1607 if (src_ire_v6 == NULL) { 1608 if (nhop_ire != NULL) 1609 ire_refrele(nhop_ire); 1610 return; 1611 } 1612 1613 /* 1614 * The source is directly connected. 1615 */ 1616 mp1 = copymsg(mp); 1617 if (mp1 != NULL) 1618 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira); 1619 1620 if (nhop_ire != NULL) 1621 ire_refrele(nhop_ire); 1622 ire_refrele(src_ire_v6); 1623 } 1624 1625 /* 1626 * Generate an ICMPv6 redirect message. 1627 * Include target link layer address option if it exits. 1628 * Always include redirect header. 1629 */ 1630 static void 1631 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest, 1632 ip_recv_attr_t *ira) 1633 { 1634 nd_redirect_t *rd; 1635 nd_opt_rd_hdr_t *rdh; 1636 uchar_t *buf; 1637 ncec_t *ncec = NULL; 1638 nd_opt_hdr_t *opt; 1639 int len; 1640 int ll_opt_len = 0; 1641 int max_redir_hdr_data_len; 1642 int pkt_len; 1643 in6_addr_t *srcp; 1644 ill_t *ill; 1645 boolean_t need_refrele; 1646 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1647 1648 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira); 1649 if (mp == NULL) 1650 return; 1651 1652 if (IS_UNDER_IPMP(ira->ira_ill)) { 1653 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill); 1654 if (ill == NULL) { 1655 ill = ira->ira_ill; 1656 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1657 ip_drop_output("no IPMP ill for sending redirect", 1658 mp, ill); 1659 freemsg(mp); 1660 return; 1661 } 1662 need_refrele = B_TRUE; 1663 } else { 1664 ill = ira->ira_ill; 1665 need_refrele = B_FALSE; 1666 } 1667 1668 ncec = ncec_lookup_illgrp_v6(ill, targetp); 1669 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE && 1670 ncec->ncec_lladdr != NULL) { 1671 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1672 ill->ill_phys_addr_length + 7)/8 * 8; 1673 } 1674 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1675 ASSERT(len % 4 == 0); 1676 buf = kmem_alloc(len, KM_NOSLEEP); 1677 if (buf == NULL) { 1678 if (ncec != NULL) 1679 ncec_refrele(ncec); 1680 if (need_refrele) 1681 ill_refrele(ill); 1682 freemsg(mp); 1683 return; 1684 } 1685 1686 rd = (nd_redirect_t *)buf; 1687 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1688 rd->nd_rd_code = 0; 1689 rd->nd_rd_reserved = 0; 1690 rd->nd_rd_target = *targetp; 1691 rd->nd_rd_dst = *dest; 1692 1693 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1694 if (ncec != NULL && ll_opt_len != 0) { 1695 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1696 opt->nd_opt_len = ll_opt_len/8; 1697 bcopy((char *)ncec->ncec_lladdr, &opt[1], 1698 ill->ill_phys_addr_length); 1699 } 1700 if (ncec != NULL) 1701 ncec_refrele(ncec); 1702 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1703 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1704 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1705 max_redir_hdr_data_len = 1706 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1707 pkt_len = msgdsize(mp); 1708 /* Make sure mp is 8 byte aligned */ 1709 if (pkt_len > max_redir_hdr_data_len) { 1710 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1711 sizeof (nd_opt_rd_hdr_t))/8; 1712 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1713 } else { 1714 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1715 (void) adjmsg(mp, -(pkt_len % 8)); 1716 } 1717 rdh->nd_opt_rh_reserved1 = 0; 1718 rdh->nd_opt_rh_reserved2 = 0; 1719 /* ipif_v6lcl_addr contains the link-local source address */ 1720 srcp = &ill->ill_ipif->ipif_v6lcl_addr; 1721 1722 /* Redirects sent by router, and router is global zone */ 1723 ASSERT(ira->ira_zoneid == ALL_ZONES); 1724 ira->ira_zoneid = GLOBAL_ZONEID; 1725 icmp_pkt_v6(mp, buf, len, srcp, ira); 1726 kmem_free(buf, len); 1727 if (need_refrele) 1728 ill_refrele(ill); 1729 } 1730 1731 1732 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1733 void 1734 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1735 ip_recv_attr_t *ira) 1736 { 1737 icmp6_t icmp6; 1738 1739 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1740 if (mp == NULL) 1741 return; 1742 1743 bzero(&icmp6, sizeof (icmp6_t)); 1744 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1745 icmp6.icmp6_code = code; 1746 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1747 } 1748 1749 /* 1750 * Generate an ICMP unreachable message. 1751 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1752 * constructed by the caller. 1753 */ 1754 void 1755 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1756 ip_recv_attr_t *ira) 1757 { 1758 icmp6_t icmp6; 1759 1760 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1761 if (mp == NULL) 1762 return; 1763 1764 bzero(&icmp6, sizeof (icmp6_t)); 1765 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1766 icmp6.icmp6_code = code; 1767 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1768 } 1769 1770 /* 1771 * Generate an ICMP pkt too big message. 1772 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1773 * constructed by the caller. 1774 */ 1775 void 1776 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok, 1777 ip_recv_attr_t *ira) 1778 { 1779 icmp6_t icmp6; 1780 1781 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1782 if (mp == NULL) 1783 return; 1784 1785 bzero(&icmp6, sizeof (icmp6_t)); 1786 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1787 icmp6.icmp6_code = 0; 1788 icmp6.icmp6_mtu = htonl(mtu); 1789 1790 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1791 } 1792 1793 /* 1794 * Generate an ICMP parameter problem message. (May be called as writer.) 1795 * 'offset' is the offset from the beginning of the packet in error. 1796 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1797 * constructed by the caller. 1798 */ 1799 static void 1800 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset, 1801 boolean_t mcast_ok, ip_recv_attr_t *ira) 1802 { 1803 icmp6_t icmp6; 1804 1805 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1806 if (mp == NULL) 1807 return; 1808 1809 bzero((char *)&icmp6, sizeof (icmp6_t)); 1810 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1811 icmp6.icmp6_code = code; 1812 icmp6.icmp6_pptr = htonl(offset); 1813 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1814 } 1815 1816 void 1817 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok, 1818 ip_recv_attr_t *ira) 1819 { 1820 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1821 uint16_t hdr_length; 1822 uint8_t *nexthdrp; 1823 uint32_t offset; 1824 ill_t *ill = ira->ira_ill; 1825 1826 /* Determine the offset of the bad nexthdr value */ 1827 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { 1828 /* Malformed packet */ 1829 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1830 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1831 freemsg(mp); 1832 return; 1833 } 1834 1835 offset = nexthdrp - mp->b_rptr; 1836 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset, 1837 mcast_ok, ira); 1838 } 1839 1840 /* 1841 * Verify whether or not the IP address is a valid local address. 1842 * Could be a unicast, including one for a down interface. 1843 * If allow_mcbc then a multicast or broadcast address is also 1844 * acceptable. 1845 * 1846 * In the case of a multicast address, however, the 1847 * upper protocol is expected to reset the src address 1848 * to zero when we return IPVL_MCAST so that 1849 * no packets are emitted with multicast address as 1850 * source address. 1851 * The addresses valid for bind are: 1852 * (1) - in6addr_any 1853 * (2) - IP address of an UP interface 1854 * (3) - IP address of a DOWN interface 1855 * (4) - a multicast address. In this case 1856 * the conn will only receive packets destined to 1857 * the specified multicast address. Note: the 1858 * application still has to issue an 1859 * IPV6_JOIN_GROUP socket option. 1860 * 1861 * In all the above cases, the bound address must be valid in the current zone. 1862 * When the address is loopback or multicast, there might be many matching IREs 1863 * so bind has to look up based on the zone. 1864 */ 1865 ip_laddr_t 1866 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid, 1867 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid) 1868 { 1869 ire_t *src_ire; 1870 uint_t match_flags; 1871 ill_t *ill = NULL; 1872 1873 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src)); 1874 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src)); 1875 1876 match_flags = MATCH_IRE_ZONEONLY; 1877 if (scopeid != 0) { 1878 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst); 1879 if (ill == NULL) 1880 return (IPVL_BAD); 1881 match_flags |= MATCH_IRE_ILL; 1882 } 1883 1884 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0, 1885 ill, zoneid, NULL, match_flags, 0, ipst, NULL); 1886 if (ill != NULL) 1887 ill_refrele(ill); 1888 1889 /* 1890 * If an address other than in6addr_any is requested, 1891 * we verify that it is a valid address for bind 1892 * Note: Following code is in if-else-if form for 1893 * readability compared to a condition check. 1894 */ 1895 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) { 1896 /* 1897 * (2) Bind to address of local UP interface 1898 */ 1899 ire_refrele(src_ire); 1900 return (IPVL_UNICAST_UP); 1901 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 1902 /* (4) bind to multicast address. */ 1903 if (src_ire != NULL) 1904 ire_refrele(src_ire); 1905 1906 /* 1907 * Note: caller should take IPV6_MULTICAST_IF 1908 * into account when selecting a real source address. 1909 */ 1910 if (allow_mcbc) 1911 return (IPVL_MCAST); 1912 else 1913 return (IPVL_BAD); 1914 } else { 1915 ipif_t *ipif; 1916 1917 /* 1918 * (3) Bind to address of local DOWN interface? 1919 * (ipif_lookup_addr() looks up all interfaces 1920 * but we do not get here for UP interfaces 1921 * - case (2) above) 1922 */ 1923 if (src_ire != NULL) 1924 ire_refrele(src_ire); 1925 1926 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst); 1927 if (ipif == NULL) 1928 return (IPVL_BAD); 1929 1930 /* Not a useful source? */ 1931 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) { 1932 ipif_refrele(ipif); 1933 return (IPVL_BAD); 1934 } 1935 ipif_refrele(ipif); 1936 return (IPVL_UNICAST_DOWN); 1937 } 1938 } 1939 1940 /* 1941 * Verify that both the source and destination addresses are valid. If 1942 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable, 1943 * i.e. have no route to it. Protocols like TCP want to verify destination 1944 * reachability, while tunnels do not. 1945 * 1946 * Determine the route, the interface, and (optionally) the source address 1947 * to use to reach a given destination. 1948 * Note that we allow connect to broadcast and multicast addresses when 1949 * IPDF_ALLOW_MCBC is set. 1950 * first_hop and dst_addr are normally the same, but if source routing 1951 * they will differ; in that case the first_hop is what we'll use for the 1952 * routing lookup but the dce and label checks will be done on dst_addr, 1953 * 1954 * If uinfo is set, then we fill in the best available information 1955 * we have for the destination. This is based on (in priority order) any 1956 * metrics and path MTU stored in a dce_t, route metrics, and finally the 1957 * ill_mtu. 1958 * 1959 * Tsol note: If we have a source route then dst_addr != firsthop. But we 1960 * always do the label check on dst_addr. 1961 * 1962 * Assumes that the caller has set ixa_scopeid for link-local communication. 1963 */ 1964 int 1965 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr, 1966 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo, 1967 uint32_t flags, uint_t mac_mode) 1968 { 1969 ire_t *ire; 1970 int error = 0; 1971 in6_addr_t setsrc; /* RTF_SETSRC */ 1972 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */ 1973 ip_stack_t *ipst = ixa->ixa_ipst; 1974 dce_t *dce; 1975 uint_t pmtu; 1976 uint_t ifindex; 1977 uint_t generation; 1978 nce_t *nce; 1979 ill_t *ill = NULL; 1980 boolean_t multirt = B_FALSE; 1981 1982 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr)); 1983 1984 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1985 1986 /* 1987 * We never send to zero; the ULPs map it to the loopback address. 1988 * We can't allow it since we use zero to mean unitialized in some 1989 * places. 1990 */ 1991 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr)); 1992 1993 if (is_system_labeled()) { 1994 ts_label_t *tsl = NULL; 1995 1996 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION, 1997 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl); 1998 if (error != 0) 1999 return (error); 2000 if (tsl != NULL) { 2001 /* Update the label */ 2002 ip_xmit_attr_replace_tsl(ixa, tsl); 2003 } 2004 } 2005 2006 setsrc = ipv6_all_zeros; 2007 /* 2008 * Select a route; For IPMP interfaces, we would only select 2009 * a "hidden" route (i.e., going through a specific under_ill) 2010 * if ixa_ifindex has been specified. 2011 */ 2012 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation, 2013 &setsrc, &error, &multirt); 2014 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 2015 if (error != 0) 2016 goto bad_addr; 2017 2018 /* 2019 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set. 2020 * If IPDF_VERIFY_DST is set, the destination must be reachable. 2021 * Otherwise the destination needn't be reachable. 2022 * 2023 * If we match on a reject or black hole, then we've got a 2024 * local failure. May as well fail out the connect() attempt, 2025 * since it's never going to succeed. 2026 */ 2027 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2028 /* 2029 * If we're verifying destination reachability, we always want 2030 * to complain here. 2031 * 2032 * If we're not verifying destination reachability but the 2033 * destination has a route, we still want to fail on the 2034 * temporary address and broadcast address tests. 2035 * 2036 * In both cases do we let the code continue so some reasonable 2037 * information is returned to the caller. That enables the 2038 * caller to use (and even cache) the IRE. conn_ip_ouput will 2039 * use the generation mismatch path to check for the unreachable 2040 * case thereby avoiding any specific check in the main path. 2041 */ 2042 ASSERT(generation == IRE_GENERATION_VERIFY); 2043 if (flags & IPDF_VERIFY_DST) { 2044 /* 2045 * Set errno but continue to set up ixa_ire to be 2046 * the RTF_REJECT|RTF_BLACKHOLE IRE. 2047 * That allows callers to use ip_output to get an 2048 * ICMP error back. 2049 */ 2050 if (!(ire->ire_type & IRE_HOST)) 2051 error = ENETUNREACH; 2052 else 2053 error = EHOSTUNREACH; 2054 } 2055 } 2056 2057 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) && 2058 !(flags & IPDF_ALLOW_MCBC)) { 2059 ire_refrele(ire); 2060 ire = ire_reject(ipst, B_FALSE); 2061 generation = IRE_GENERATION_VERIFY; 2062 error = ENETUNREACH; 2063 } 2064 2065 /* Cache things */ 2066 if (ixa->ixa_ire != NULL) 2067 ire_refrele_notr(ixa->ixa_ire); 2068 #ifdef DEBUG 2069 ire_refhold_notr(ire); 2070 ire_refrele(ire); 2071 #endif 2072 ixa->ixa_ire = ire; 2073 ixa->ixa_ire_generation = generation; 2074 2075 /* 2076 * Ensure that ixa_dce is always set any time that ixa_ire is set, 2077 * since some callers will send a packet to conn_ip_output() even if 2078 * there's an error. 2079 */ 2080 ifindex = 0; 2081 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) { 2082 /* If we are creating a DCE we'd better have an ifindex */ 2083 if (ill != NULL) 2084 ifindex = ill->ill_phyint->phyint_ifindex; 2085 else 2086 flags &= ~IPDF_UNIQUE_DCE; 2087 } 2088 2089 if (flags & IPDF_UNIQUE_DCE) { 2090 /* Fallback to the default dce if allocation fails */ 2091 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst); 2092 if (dce != NULL) { 2093 generation = dce->dce_generation; 2094 } else { 2095 dce = dce_lookup_v6(dst_addr, ifindex, ipst, 2096 &generation); 2097 } 2098 } else { 2099 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation); 2100 } 2101 ASSERT(dce != NULL); 2102 if (ixa->ixa_dce != NULL) 2103 dce_refrele_notr(ixa->ixa_dce); 2104 #ifdef DEBUG 2105 dce_refhold_notr(dce); 2106 dce_refrele(dce); 2107 #endif 2108 ixa->ixa_dce = dce; 2109 ixa->ixa_dce_generation = generation; 2110 2111 2112 /* 2113 * For multicast with multirt we have a flag passed back from 2114 * ire_lookup_multi_ill_v6 since we don't have an IRE for each 2115 * possible multicast address. 2116 * We also need a flag for multicast since we can't check 2117 * whether RTF_MULTIRT is set in ixa_ire for multicast. 2118 */ 2119 if (multirt) { 2120 ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 2121 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 2122 } else { 2123 ixa->ixa_postfragfn = ire->ire_postfragfn; 2124 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 2125 } 2126 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2127 /* Get an nce to cache. */ 2128 nce = ire_to_nce(ire, NULL, firsthop); 2129 if (nce == NULL) { 2130 /* Allocation failure? */ 2131 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2132 } else { 2133 if (ixa->ixa_nce != NULL) 2134 nce_refrele(ixa->ixa_nce); 2135 ixa->ixa_nce = nce; 2136 } 2137 } 2138 2139 /* 2140 * If the source address is a loopback address, the 2141 * destination had best be local or multicast. 2142 * If we are sending to an IRE_LOCAL using a loopback source then 2143 * it had better be the same zoneid. 2144 */ 2145 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) { 2146 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) { 2147 ire = NULL; /* Stored in ixa_ire */ 2148 error = EADDRNOTAVAIL; 2149 goto bad_addr; 2150 } 2151 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) { 2152 ire = NULL; /* Stored in ixa_ire */ 2153 error = EADDRNOTAVAIL; 2154 goto bad_addr; 2155 } 2156 } 2157 2158 /* 2159 * Does the caller want us to pick a source address? 2160 */ 2161 if (flags & IPDF_SELECT_SRC) { 2162 in6_addr_t src_addr; 2163 2164 /* 2165 * We use use ire_nexthop_ill to avoid the under ipmp 2166 * interface for source address selection. Note that for ipmp 2167 * probe packets, ixa_ifindex would have been specified, and 2168 * the ip_select_route() invocation would have picked an ire 2169 * will ire_ill pointing at an under interface. 2170 */ 2171 ill = ire_nexthop_ill(ire); 2172 2173 /* If unreachable we have no ill but need some source */ 2174 if (ill == NULL) { 2175 src_addr = ipv6_loopback; 2176 /* Make sure we look for a better source address */ 2177 generation = SRC_GENERATION_VERIFY; 2178 } else { 2179 error = ip_select_source_v6(ill, &setsrc, dst_addr, 2180 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences, 2181 &src_addr, &generation, NULL); 2182 if (error != 0) { 2183 ire = NULL; /* Stored in ixa_ire */ 2184 goto bad_addr; 2185 } 2186 } 2187 2188 /* 2189 * We allow the source address to to down. 2190 * However, we check that we don't use the loopback address 2191 * as a source when sending out on the wire. 2192 */ 2193 if (IN6_IS_ADDR_LOOPBACK(&src_addr) && 2194 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) && 2195 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2196 ire = NULL; /* Stored in ixa_ire */ 2197 error = EADDRNOTAVAIL; 2198 goto bad_addr; 2199 } 2200 2201 *src_addrp = src_addr; 2202 ixa->ixa_src_generation = generation; 2203 } 2204 2205 /* 2206 * Make sure we don't leave an unreachable ixa_nce in place 2207 * since ip_select_route is used when we unplumb i.e., remove 2208 * references on ixa_ire, ixa_nce, and ixa_dce. 2209 */ 2210 nce = ixa->ixa_nce; 2211 if (nce != NULL && nce->nce_is_condemned) { 2212 nce_refrele(nce); 2213 ixa->ixa_nce = NULL; 2214 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2215 } 2216 2217 /* 2218 * Note that IPv6 multicast supports PMTU discovery unlike IPv4 2219 * multicast. But pmtu discovery is only enabled for connected 2220 * sockets in general. 2221 */ 2222 2223 /* 2224 * Set initial value for fragmentation limit. Either conn_ip_output 2225 * or ULP might updates it when there are routing changes. 2226 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT. 2227 */ 2228 pmtu = ip_get_pmtu(ixa); 2229 ixa->ixa_fragsize = pmtu; 2230 /* Make sure ixa_fragsize and ixa_pmtu remain identical */ 2231 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) 2232 ixa->ixa_pmtu = pmtu; 2233 2234 /* 2235 * Extract information useful for some transports. 2236 * First we look for DCE metrics. Then we take what we have in 2237 * the metrics in the route, where the offlink is used if we have 2238 * one. 2239 */ 2240 if (uinfo != NULL) { 2241 bzero(uinfo, sizeof (*uinfo)); 2242 2243 if (dce->dce_flags & DCEF_UINFO) 2244 *uinfo = dce->dce_uinfo; 2245 2246 rts_merge_metrics(uinfo, &ire->ire_metrics); 2247 2248 /* Allow ire_metrics to decrease the path MTU from above */ 2249 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu) 2250 uinfo->iulp_mtu = pmtu; 2251 2252 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0; 2253 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0; 2254 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0; 2255 } 2256 2257 if (ill != NULL) 2258 ill_refrele(ill); 2259 2260 return (error); 2261 2262 bad_addr: 2263 if (ire != NULL) 2264 ire_refrele(ire); 2265 2266 if (ill != NULL) 2267 ill_refrele(ill); 2268 2269 /* 2270 * Make sure we don't leave an unreachable ixa_nce in place 2271 * since ip_select_route is used when we unplumb i.e., remove 2272 * references on ixa_ire, ixa_nce, and ixa_dce. 2273 */ 2274 nce = ixa->ixa_nce; 2275 if (nce != NULL && nce->nce_is_condemned) { 2276 nce_refrele(nce); 2277 ixa->ixa_nce = NULL; 2278 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2279 } 2280 2281 return (error); 2282 } 2283 2284 /* 2285 * Handle protocols with which IP is less intimate. There 2286 * can be more than one stream bound to a particular 2287 * protocol. When this is the case, normally each one gets a copy 2288 * of any incoming packets. 2289 * 2290 * Zones notes: 2291 * Packets will be distributed to conns in all zones. This is really only 2292 * useful for ICMPv6 as only applications in the global zone can create raw 2293 * sockets for other protocols. 2294 */ 2295 void 2296 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 2297 { 2298 mblk_t *mp1; 2299 in6_addr_t laddr = ip6h->ip6_dst; 2300 conn_t *connp, *first_connp, *next_connp; 2301 connf_t *connfp; 2302 ill_t *ill = ira->ira_ill; 2303 ip_stack_t *ipst = ill->ill_ipst; 2304 2305 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol]; 2306 mutex_enter(&connfp->connf_lock); 2307 connp = connfp->connf_head; 2308 for (connp = connfp->connf_head; connp != NULL; 2309 connp = connp->conn_next) { 2310 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2311 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2312 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2313 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2314 break; 2315 } 2316 2317 if (connp == NULL) { 2318 /* 2319 * No one bound to this port. Is 2320 * there a client that wants all 2321 * unclaimed datagrams? 2322 */ 2323 mutex_exit(&connfp->connf_lock); 2324 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB, 2325 ICMP6_PARAMPROB_NEXTHEADER, ira); 2326 return; 2327 } 2328 2329 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 2330 2331 CONN_INC_REF(connp); 2332 first_connp = connp; 2333 2334 /* 2335 * XXX: Fix the multiple protocol listeners case. We should not 2336 * be walking the conn->conn_next list here. 2337 */ 2338 connp = connp->conn_next; 2339 for (;;) { 2340 while (connp != NULL) { 2341 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2342 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2343 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2344 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2345 ira, connp))) 2346 break; 2347 connp = connp->conn_next; 2348 } 2349 2350 if (connp == NULL) { 2351 /* No more interested clients */ 2352 connp = first_connp; 2353 break; 2354 } 2355 if (((mp1 = dupmsg(mp)) == NULL) && 2356 ((mp1 = copymsg(mp)) == NULL)) { 2357 /* Memory allocation failed */ 2358 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2359 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2360 connp = first_connp; 2361 break; 2362 } 2363 2364 CONN_INC_REF(connp); 2365 mutex_exit(&connfp->connf_lock); 2366 2367 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr, 2368 ira); 2369 2370 mutex_enter(&connfp->connf_lock); 2371 /* Follow the next pointer before releasing the conn. */ 2372 next_connp = connp->conn_next; 2373 CONN_DEC_REF(connp); 2374 connp = next_connp; 2375 } 2376 2377 /* Last one. Send it upstream. */ 2378 mutex_exit(&connfp->connf_lock); 2379 2380 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira); 2381 2382 CONN_DEC_REF(connp); 2383 } 2384 2385 /* 2386 * Called when it is conceptually a ULP that would sent the packet 2387 * e.g., port unreachable and nexthdr unknown. Check that the packet 2388 * would have passed the IPsec global policy before sending the error. 2389 * 2390 * Send an ICMP error after patching up the packet appropriately. 2391 * Uses ip_drop_input and bumps the appropriate MIB. 2392 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use. 2393 */ 2394 void 2395 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code, 2396 ip_recv_attr_t *ira) 2397 { 2398 ip6_t *ip6h; 2399 boolean_t secure; 2400 ill_t *ill = ira->ira_ill; 2401 ip_stack_t *ipst = ill->ill_ipst; 2402 netstack_t *ns = ipst->ips_netstack; 2403 ipsec_stack_t *ipss = ns->netstack_ipsec; 2404 2405 secure = ira->ira_flags & IRAF_IPSEC_SECURE; 2406 2407 /* 2408 * We are generating an icmp error for some inbound packet. 2409 * Called from all ip_fanout_(udp, tcp, proto) functions. 2410 * Before we generate an error, check with global policy 2411 * to see whether this is allowed to enter the system. As 2412 * there is no "conn", we are checking with global policy. 2413 */ 2414 ip6h = (ip6_t *)mp->b_rptr; 2415 if (secure || ipss->ipsec_inbound_v6_policy_present) { 2416 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns); 2417 if (mp == NULL) 2418 return; 2419 } 2420 2421 /* We never send errors for protocols that we do implement */ 2422 if (ira->ira_protocol == IPPROTO_ICMPV6) { 2423 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2424 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill); 2425 freemsg(mp); 2426 return; 2427 } 2428 2429 switch (icmp_type) { 2430 case ICMP6_DST_UNREACH: 2431 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT); 2432 2433 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 2434 ip_drop_input("ipIfStatsNoPorts", mp, ill); 2435 2436 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira); 2437 break; 2438 case ICMP6_PARAM_PROB: 2439 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER); 2440 2441 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 2442 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill); 2443 2444 /* Let the system determine the offset for this one */ 2445 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2446 break; 2447 default: 2448 #ifdef DEBUG 2449 panic("ip_fanout_send_icmp_v6: wrong type"); 2450 /*NOTREACHED*/ 2451 #else 2452 freemsg(mp); 2453 break; 2454 #endif 2455 } 2456 } 2457 2458 /* 2459 * Fanout for UDP packets that are multicast or ICMP errors. 2460 * (Unicast fanout is handled in ip_input_v6.) 2461 * 2462 * If SO_REUSEADDR is set all multicast packets 2463 * will be delivered to all conns bound to the same port. 2464 * 2465 * Fanout for UDP packets. 2466 * The caller puts <fport, lport> in the ports parameter. 2467 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 2468 * 2469 * If SO_REUSEADDR is set all multicast and broadcast packets 2470 * will be delivered to all conns bound to the same port. 2471 * 2472 * Zones notes: 2473 * Earlier in ip_input on a system with multiple shared-IP zones we 2474 * duplicate the multicast and broadcast packets and send them up 2475 * with each explicit zoneid that exists on that ill. 2476 * This means that here we can match the zoneid with SO_ALLZONES being special. 2477 */ 2478 void 2479 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport, 2480 ip_recv_attr_t *ira) 2481 { 2482 in6_addr_t laddr; 2483 conn_t *connp; 2484 connf_t *connfp; 2485 in6_addr_t faddr; 2486 ill_t *ill = ira->ira_ill; 2487 ip_stack_t *ipst = ill->ill_ipst; 2488 2489 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR)); 2490 2491 laddr = ip6h->ip6_dst; 2492 faddr = ip6h->ip6_src; 2493 2494 /* Attempt to find a client stream based on destination port. */ 2495 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2496 mutex_enter(&connfp->connf_lock); 2497 connp = connfp->connf_head; 2498 while (connp != NULL) { 2499 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) && 2500 conn_wantpacket_v6(connp, ira, ip6h) && 2501 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2502 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2503 break; 2504 connp = connp->conn_next; 2505 } 2506 2507 if (connp == NULL) 2508 goto notfound; 2509 2510 CONN_INC_REF(connp); 2511 2512 if (connp->conn_reuseaddr) { 2513 conn_t *first_connp = connp; 2514 conn_t *next_connp; 2515 mblk_t *mp1; 2516 2517 connp = connp->conn_next; 2518 for (;;) { 2519 while (connp != NULL) { 2520 if (IPCL_UDP_MATCH_V6(connp, lport, laddr, 2521 fport, faddr) && 2522 conn_wantpacket_v6(connp, ira, ip6h) && 2523 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2524 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2525 ira, connp))) 2526 break; 2527 connp = connp->conn_next; 2528 } 2529 if (connp == NULL) { 2530 /* No more interested clients */ 2531 connp = first_connp; 2532 break; 2533 } 2534 if (((mp1 = dupmsg(mp)) == NULL) && 2535 ((mp1 = copymsg(mp)) == NULL)) { 2536 /* Memory allocation failed */ 2537 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2538 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2539 connp = first_connp; 2540 break; 2541 } 2542 2543 CONN_INC_REF(connp); 2544 mutex_exit(&connfp->connf_lock); 2545 2546 IP6_STAT(ipst, ip6_udp_fanmb); 2547 ip_fanout_udp_conn(connp, mp1, NULL, 2548 (ip6_t *)mp1->b_rptr, ira); 2549 2550 mutex_enter(&connfp->connf_lock); 2551 /* Follow the next pointer before releasing the conn. */ 2552 next_connp = connp->conn_next; 2553 IP6_STAT(ipst, ip6_udp_fanmb); 2554 CONN_DEC_REF(connp); 2555 connp = next_connp; 2556 } 2557 } 2558 2559 /* Last one. Send it upstream. */ 2560 mutex_exit(&connfp->connf_lock); 2561 2562 IP6_STAT(ipst, ip6_udp_fanmb); 2563 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira); 2564 CONN_DEC_REF(connp); 2565 return; 2566 2567 notfound: 2568 mutex_exit(&connfp->connf_lock); 2569 /* 2570 * No one bound to this port. Is 2571 * there a client that wants all 2572 * unclaimed datagrams? 2573 */ 2574 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 2575 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2576 ip_fanout_proto_v6(mp, ip6h, ira); 2577 } else { 2578 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2579 ICMP6_DST_UNREACH_NOPORT, ira); 2580 } 2581 } 2582 2583 /* 2584 * int ip_find_hdr_v6() 2585 * 2586 * This routine is used by the upper layer protocols, iptun, and IPsec: 2587 * - Set extension header pointers to appropriate locations 2588 * - Determine IPv6 header length and return it 2589 * - Return a pointer to the last nexthdr value 2590 * 2591 * The caller must initialize ipp_fields. 2592 * The upper layer protocols normally set label_separate which makes the 2593 * routine put the TX label in ipp_label_v6. If this is not set then 2594 * the hop-by-hop options including the label are placed in ipp_hopopts. 2595 * 2596 * NOTE: If multiple extension headers of the same type are present, 2597 * ip_find_hdr_v6() will set the respective extension header pointers 2598 * to the first one that it encounters in the IPv6 header. It also 2599 * skips fragment headers. This routine deals with malformed packets 2600 * of various sorts in which case the returned length is up to the 2601 * malformed part. 2602 */ 2603 int 2604 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp, 2605 uint8_t *nexthdrp) 2606 { 2607 uint_t length, ehdrlen; 2608 uint8_t nexthdr; 2609 uint8_t *whereptr, *endptr; 2610 ip6_dest_t *tmpdstopts; 2611 ip6_rthdr_t *tmprthdr; 2612 ip6_hbh_t *tmphopopts; 2613 ip6_frag_t *tmpfraghdr; 2614 2615 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR; 2616 ipp->ipp_hoplimit = ip6h->ip6_hops; 2617 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 2618 ipp->ipp_addr = ip6h->ip6_dst; 2619 2620 length = IPV6_HDR_LEN; 2621 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2622 endptr = mp->b_wptr; 2623 2624 nexthdr = ip6h->ip6_nxt; 2625 while (whereptr < endptr) { 2626 /* Is there enough left for len + nexthdr? */ 2627 if (whereptr + MIN_EHDR_LEN > endptr) 2628 goto done; 2629 2630 switch (nexthdr) { 2631 case IPPROTO_HOPOPTS: { 2632 /* We check for any CIPSO */ 2633 uchar_t *secopt; 2634 boolean_t hbh_needed; 2635 uchar_t *after_secopt; 2636 2637 tmphopopts = (ip6_hbh_t *)whereptr; 2638 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 2639 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 2640 goto done; 2641 nexthdr = tmphopopts->ip6h_nxt; 2642 2643 if (!label_separate) { 2644 secopt = NULL; 2645 after_secopt = whereptr; 2646 } else { 2647 /* 2648 * We have dropped packets with bad options in 2649 * ip6_input. No need to check return value 2650 * here. 2651 */ 2652 (void) tsol_find_secopt_v6(whereptr, ehdrlen, 2653 &secopt, &after_secopt, &hbh_needed); 2654 } 2655 if (secopt != NULL && after_secopt - whereptr > 0) { 2656 ipp->ipp_fields |= IPPF_LABEL_V6; 2657 ipp->ipp_label_v6 = secopt; 2658 ipp->ipp_label_len_v6 = after_secopt - whereptr; 2659 } else { 2660 ipp->ipp_label_len_v6 = 0; 2661 after_secopt = whereptr; 2662 hbh_needed = B_TRUE; 2663 } 2664 /* return only 1st hbh */ 2665 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) { 2666 ipp->ipp_fields |= IPPF_HOPOPTS; 2667 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt; 2668 ipp->ipp_hopoptslen = ehdrlen - 2669 ipp->ipp_label_len_v6; 2670 } 2671 break; 2672 } 2673 case IPPROTO_DSTOPTS: 2674 tmpdstopts = (ip6_dest_t *)whereptr; 2675 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 2676 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 2677 goto done; 2678 nexthdr = tmpdstopts->ip6d_nxt; 2679 /* 2680 * ipp_dstopts is set to the destination header after a 2681 * routing header. 2682 * Assume it is a post-rthdr destination header 2683 * and adjust when we find an rthdr. 2684 */ 2685 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2686 ipp->ipp_fields |= IPPF_DSTOPTS; 2687 ipp->ipp_dstopts = tmpdstopts; 2688 ipp->ipp_dstoptslen = ehdrlen; 2689 } 2690 break; 2691 case IPPROTO_ROUTING: 2692 tmprthdr = (ip6_rthdr_t *)whereptr; 2693 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 2694 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 2695 goto done; 2696 nexthdr = tmprthdr->ip6r_nxt; 2697 /* return only 1st rthdr */ 2698 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 2699 ipp->ipp_fields |= IPPF_RTHDR; 2700 ipp->ipp_rthdr = tmprthdr; 2701 ipp->ipp_rthdrlen = ehdrlen; 2702 } 2703 /* 2704 * Make any destination header we've seen be a 2705 * pre-rthdr destination header. 2706 */ 2707 if (ipp->ipp_fields & IPPF_DSTOPTS) { 2708 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2709 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 2710 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts; 2711 ipp->ipp_dstopts = NULL; 2712 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen; 2713 ipp->ipp_dstoptslen = 0; 2714 } 2715 break; 2716 case IPPROTO_FRAGMENT: 2717 tmpfraghdr = (ip6_frag_t *)whereptr; 2718 ehdrlen = sizeof (ip6_frag_t); 2719 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 2720 goto done; 2721 nexthdr = tmpfraghdr->ip6f_nxt; 2722 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 2723 ipp->ipp_fields |= IPPF_FRAGHDR; 2724 ipp->ipp_fraghdr = tmpfraghdr; 2725 ipp->ipp_fraghdrlen = ehdrlen; 2726 } 2727 break; 2728 case IPPROTO_NONE: 2729 default: 2730 goto done; 2731 } 2732 length += ehdrlen; 2733 whereptr += ehdrlen; 2734 } 2735 done: 2736 if (nexthdrp != NULL) 2737 *nexthdrp = nexthdr; 2738 return (length); 2739 } 2740 2741 /* 2742 * Try to determine where and what are the IPv6 header length and 2743 * pointer to nexthdr value for the upper layer protocol (or an 2744 * unknown next hdr). 2745 * 2746 * Parameters returns a pointer to the nexthdr value; 2747 * Must handle malformed packets of various sorts. 2748 * Function returns failure for malformed cases. 2749 */ 2750 boolean_t 2751 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 2752 uint8_t **nexthdrpp) 2753 { 2754 uint16_t length; 2755 uint_t ehdrlen; 2756 uint8_t *nexthdrp; 2757 uint8_t *whereptr; 2758 uint8_t *endptr; 2759 ip6_dest_t *desthdr; 2760 ip6_rthdr_t *rthdr; 2761 ip6_frag_t *fraghdr; 2762 2763 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 2764 length = IPV6_HDR_LEN; 2765 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2766 endptr = mp->b_wptr; 2767 2768 nexthdrp = &ip6h->ip6_nxt; 2769 while (whereptr < endptr) { 2770 /* Is there enough left for len + nexthdr? */ 2771 if (whereptr + MIN_EHDR_LEN > endptr) 2772 break; 2773 2774 switch (*nexthdrp) { 2775 case IPPROTO_HOPOPTS: 2776 case IPPROTO_DSTOPTS: 2777 /* Assumes the headers are identical for hbh and dst */ 2778 desthdr = (ip6_dest_t *)whereptr; 2779 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2780 if ((uchar_t *)desthdr + ehdrlen > endptr) 2781 return (B_FALSE); 2782 nexthdrp = &desthdr->ip6d_nxt; 2783 break; 2784 case IPPROTO_ROUTING: 2785 rthdr = (ip6_rthdr_t *)whereptr; 2786 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2787 if ((uchar_t *)rthdr + ehdrlen > endptr) 2788 return (B_FALSE); 2789 nexthdrp = &rthdr->ip6r_nxt; 2790 break; 2791 case IPPROTO_FRAGMENT: 2792 fraghdr = (ip6_frag_t *)whereptr; 2793 ehdrlen = sizeof (ip6_frag_t); 2794 if ((uchar_t *)&fraghdr[1] > endptr) 2795 return (B_FALSE); 2796 nexthdrp = &fraghdr->ip6f_nxt; 2797 break; 2798 case IPPROTO_NONE: 2799 /* No next header means we're finished */ 2800 default: 2801 *hdr_length_ptr = length; 2802 *nexthdrpp = nexthdrp; 2803 return (B_TRUE); 2804 } 2805 length += ehdrlen; 2806 whereptr += ehdrlen; 2807 *hdr_length_ptr = length; 2808 *nexthdrpp = nexthdrp; 2809 } 2810 switch (*nexthdrp) { 2811 case IPPROTO_HOPOPTS: 2812 case IPPROTO_DSTOPTS: 2813 case IPPROTO_ROUTING: 2814 case IPPROTO_FRAGMENT: 2815 /* 2816 * If any know extension headers are still to be processed, 2817 * the packet's malformed (or at least all the IP header(s) are 2818 * not in the same mblk - and that should never happen. 2819 */ 2820 return (B_FALSE); 2821 2822 default: 2823 /* 2824 * If we get here, we know that all of the IP headers were in 2825 * the same mblk, even if the ULP header is in the next mblk. 2826 */ 2827 *hdr_length_ptr = length; 2828 *nexthdrpp = nexthdrp; 2829 return (B_TRUE); 2830 } 2831 } 2832 2833 /* 2834 * Return the length of the IPv6 related headers (including extension headers) 2835 * Returns a length even if the packet is malformed. 2836 */ 2837 int 2838 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 2839 { 2840 uint16_t hdr_len; 2841 uint8_t *nexthdrp; 2842 2843 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 2844 return (hdr_len); 2845 } 2846 2847 /* 2848 * Parse and process any hop-by-hop or destination options. 2849 * 2850 * Assumes that q is an ill read queue so that ICMP errors for link-local 2851 * destinations are sent out the correct interface. 2852 * 2853 * Returns -1 if there was an error and mp has been consumed. 2854 * Returns 0 if no special action is needed. 2855 * Returns 1 if the packet contained a router alert option for this node 2856 * which is verified to be "interesting/known" for our implementation. 2857 * 2858 * XXX Note: In future as more hbh or dest options are defined, 2859 * it may be better to have different routines for hbh and dest 2860 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 2861 * may have same value in different namespaces. Or is it same namespace ?? 2862 * Current code checks for each opt_type (other than pads) if it is in 2863 * the expected nexthdr (hbh or dest) 2864 */ 2865 int 2866 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, 2867 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) 2868 { 2869 uint8_t opt_type; 2870 uint_t optused; 2871 int ret = 0; 2872 const char *errtype; 2873 ill_t *ill = ira->ira_ill; 2874 ip_stack_t *ipst = ill->ill_ipst; 2875 2876 while (optlen != 0) { 2877 opt_type = *optptr; 2878 if (opt_type == IP6OPT_PAD1) { 2879 optused = 1; 2880 } else { 2881 if (optlen < 2) 2882 goto bad_opt; 2883 errtype = "malformed"; 2884 if (opt_type == ip6opt_ls) { 2885 optused = 2 + optptr[1]; 2886 if (optused > optlen) 2887 goto bad_opt; 2888 } else switch (opt_type) { 2889 case IP6OPT_PADN: 2890 /* 2891 * Note:We don't verify that (N-2) pad octets 2892 * are zero as required by spec. Adhere to 2893 * "be liberal in what you accept..." part of 2894 * implementation philosophy (RFC791,RFC1122) 2895 */ 2896 optused = 2 + optptr[1]; 2897 if (optused > optlen) 2898 goto bad_opt; 2899 break; 2900 2901 case IP6OPT_JUMBO: 2902 if (hdr_type != IPPROTO_HOPOPTS) 2903 goto opt_error; 2904 goto opt_error; /* XXX Not implemented! */ 2905 2906 case IP6OPT_ROUTER_ALERT: { 2907 struct ip6_opt_router *or; 2908 2909 if (hdr_type != IPPROTO_HOPOPTS) 2910 goto opt_error; 2911 optused = 2 + optptr[1]; 2912 if (optused > optlen) 2913 goto bad_opt; 2914 or = (struct ip6_opt_router *)optptr; 2915 /* Check total length and alignment */ 2916 if (optused != sizeof (*or) || 2917 ((uintptr_t)or->ip6or_value & 0x1) != 0) 2918 goto opt_error; 2919 /* Check value */ 2920 switch (*((uint16_t *)or->ip6or_value)) { 2921 case IP6_ALERT_MLD: 2922 case IP6_ALERT_RSVP: 2923 ret = 1; 2924 } 2925 break; 2926 } 2927 case IP6OPT_HOME_ADDRESS: { 2928 /* 2929 * Minimal support for the home address option 2930 * (which is required by all IPv6 nodes). 2931 * Implement by just swapping the home address 2932 * and source address. 2933 * XXX Note: this has IPsec implications since 2934 * AH needs to take this into account. 2935 * Also, when IPsec is used we need to ensure 2936 * that this is only processed once 2937 * in the received packet (to avoid swapping 2938 * back and forth). 2939 * NOTE:This option processing is considered 2940 * to be unsafe and prone to a denial of 2941 * service attack. 2942 * The current processing is not safe even with 2943 * IPsec secured IP packets. Since the home 2944 * address option processing requirement still 2945 * is in the IETF draft and in the process of 2946 * being redefined for its usage, it has been 2947 * decided to turn off the option by default. 2948 * If this section of code needs to be executed, 2949 * ndd variable ip6_ignore_home_address_opt 2950 * should be set to 0 at the user's own risk. 2951 */ 2952 struct ip6_opt_home_address *oh; 2953 in6_addr_t tmp; 2954 2955 if (ipst->ips_ipv6_ignore_home_address_opt) 2956 goto opt_error; 2957 2958 if (hdr_type != IPPROTO_DSTOPTS) 2959 goto opt_error; 2960 optused = 2 + optptr[1]; 2961 if (optused > optlen) 2962 goto bad_opt; 2963 2964 /* 2965 * We did this dest. opt the first time 2966 * around (i.e. before AH processing). 2967 * If we've done AH... stop now. 2968 */ 2969 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && 2970 ira->ira_ipsec_ah_sa != NULL) 2971 break; 2972 2973 oh = (struct ip6_opt_home_address *)optptr; 2974 /* Check total length and alignment */ 2975 if (optused < sizeof (*oh) || 2976 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 2977 goto opt_error; 2978 /* Swap ip6_src and the home address */ 2979 tmp = ip6h->ip6_src; 2980 /* XXX Note: only 8 byte alignment option */ 2981 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 2982 *(in6_addr_t *)oh->ip6oh_addr = tmp; 2983 break; 2984 } 2985 2986 case IP6OPT_TUNNEL_LIMIT: 2987 if (hdr_type != IPPROTO_DSTOPTS) { 2988 goto opt_error; 2989 } 2990 optused = 2 + optptr[1]; 2991 if (optused > optlen) { 2992 goto bad_opt; 2993 } 2994 if (optused != 3) { 2995 goto opt_error; 2996 } 2997 break; 2998 2999 default: 3000 errtype = "unknown"; 3001 /* FALLTHROUGH */ 3002 opt_error: 3003 /* Determine which zone should send error */ 3004 switch (IP6OPT_TYPE(opt_type)) { 3005 case IP6OPT_TYPE_SKIP: 3006 optused = 2 + optptr[1]; 3007 if (optused > optlen) 3008 goto bad_opt; 3009 ip1dbg(("ip_process_options_v6: %s " 3010 "opt 0x%x skipped\n", 3011 errtype, opt_type)); 3012 break; 3013 case IP6OPT_TYPE_DISCARD: 3014 ip1dbg(("ip_process_options_v6: %s " 3015 "opt 0x%x; packet dropped\n", 3016 errtype, opt_type)); 3017 BUMP_MIB(ill->ill_ip_mib, 3018 ipIfStatsInHdrErrors); 3019 ip_drop_input("ipIfStatsInHdrErrors", 3020 mp, ill); 3021 freemsg(mp); 3022 return (-1); 3023 case IP6OPT_TYPE_ICMP: 3024 BUMP_MIB(ill->ill_ip_mib, 3025 ipIfStatsInHdrErrors); 3026 ip_drop_input("ipIfStatsInHdrErrors", 3027 mp, ill); 3028 icmp_param_problem_v6(mp, 3029 ICMP6_PARAMPROB_OPTION, 3030 (uint32_t)(optptr - 3031 (uint8_t *)ip6h), 3032 B_FALSE, ira); 3033 return (-1); 3034 case IP6OPT_TYPE_FORCEICMP: 3035 BUMP_MIB(ill->ill_ip_mib, 3036 ipIfStatsInHdrErrors); 3037 ip_drop_input("ipIfStatsInHdrErrors", 3038 mp, ill); 3039 icmp_param_problem_v6(mp, 3040 ICMP6_PARAMPROB_OPTION, 3041 (uint32_t)(optptr - 3042 (uint8_t *)ip6h), 3043 B_TRUE, ira); 3044 return (-1); 3045 default: 3046 ASSERT(0); 3047 } 3048 } 3049 } 3050 optlen -= optused; 3051 optptr += optused; 3052 } 3053 return (ret); 3054 3055 bad_opt: 3056 /* Determine which zone should send error */ 3057 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3058 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION, 3059 (uint32_t)(optptr - (uint8_t *)ip6h), 3060 B_FALSE, ira); 3061 return (-1); 3062 } 3063 3064 /* 3065 * Process a routing header that is not yet empty. 3066 * Because of RFC 5095, we now reject all route headers. 3067 */ 3068 void 3069 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 3070 ip_recv_attr_t *ira) 3071 { 3072 ill_t *ill = ira->ira_ill; 3073 ip_stack_t *ipst = ill->ill_ipst; 3074 3075 ASSERT(rth->ip6r_segleft != 0); 3076 3077 if (!ipst->ips_ipv6_forward_src_routed) { 3078 /* XXX Check for source routed out same interface? */ 3079 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 3080 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 3081 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 3082 freemsg(mp); 3083 return; 3084 } 3085 3086 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3087 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3088 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 3089 B_FALSE, ira); 3090 } 3091 3092 /* 3093 * Read side put procedure for IPv6 module. 3094 */ 3095 void 3096 ip_rput_v6(queue_t *q, mblk_t *mp) 3097 { 3098 ill_t *ill; 3099 3100 ill = (ill_t *)q->q_ptr; 3101 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { 3102 union DL_primitives *dl; 3103 3104 dl = (union DL_primitives *)mp->b_rptr; 3105 /* 3106 * Things are opening or closing - only accept DLPI 3107 * ack messages. If the stream is closing and ip_wsrv 3108 * has completed, ip_close is out of the qwait, but has 3109 * not yet completed qprocsoff. Don't proceed any further 3110 * because the ill has been cleaned up and things hanging 3111 * off the ill have been freed. 3112 */ 3113 if ((mp->b_datap->db_type != M_PCPROTO) || 3114 (dl->dl_primitive == DL_UNITDATA_IND)) { 3115 inet_freemsg(mp); 3116 return; 3117 } 3118 } 3119 if (DB_TYPE(mp) == M_DATA) { 3120 struct mac_header_info_s mhi; 3121 3122 ip_mdata_to_mhi(ill, mp, &mhi); 3123 ip_input_v6(ill, NULL, mp, &mhi); 3124 } else { 3125 ip_rput_notdata(ill, mp); 3126 } 3127 } 3128 3129 /* 3130 * Walk through the IPv6 packet in mp and see if there's an AH header 3131 * in it. See if the AH header needs to get done before other headers in 3132 * the packet. (Worker function for ipsec_early_ah_v6().) 3133 */ 3134 #define IPSEC_HDR_DONT_PROCESS 0 3135 #define IPSEC_HDR_PROCESS 1 3136 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 3137 static int 3138 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 3139 { 3140 uint_t length; 3141 uint_t ehdrlen; 3142 uint8_t *whereptr; 3143 uint8_t *endptr; 3144 uint8_t *nexthdrp; 3145 ip6_dest_t *desthdr; 3146 ip6_rthdr_t *rthdr; 3147 ip6_t *ip6h; 3148 3149 /* 3150 * For now just pullup everything. In general, the less pullups, 3151 * the better, but there's so much squirrelling through anyway, 3152 * it's just easier this way. 3153 */ 3154 if (!pullupmsg(mp, -1)) { 3155 return (IPSEC_MEMORY_ERROR); 3156 } 3157 3158 ip6h = (ip6_t *)mp->b_rptr; 3159 length = IPV6_HDR_LEN; 3160 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3161 endptr = mp->b_wptr; 3162 3163 /* 3164 * We can't just use the argument nexthdr in the place 3165 * of nexthdrp becaue we don't dereference nexthdrp 3166 * till we confirm whether it is a valid address. 3167 */ 3168 nexthdrp = &ip6h->ip6_nxt; 3169 while (whereptr < endptr) { 3170 /* Is there enough left for len + nexthdr? */ 3171 if (whereptr + MIN_EHDR_LEN > endptr) 3172 return (IPSEC_MEMORY_ERROR); 3173 3174 switch (*nexthdrp) { 3175 case IPPROTO_HOPOPTS: 3176 case IPPROTO_DSTOPTS: 3177 /* Assumes the headers are identical for hbh and dst */ 3178 desthdr = (ip6_dest_t *)whereptr; 3179 ehdrlen = 8 * (desthdr->ip6d_len + 1); 3180 if ((uchar_t *)desthdr + ehdrlen > endptr) 3181 return (IPSEC_MEMORY_ERROR); 3182 /* 3183 * Return DONT_PROCESS because the destination 3184 * options header may be for each hop in a 3185 * routing-header, and we only want AH if we're 3186 * finished with routing headers. 3187 */ 3188 if (*nexthdrp == IPPROTO_DSTOPTS) 3189 return (IPSEC_HDR_DONT_PROCESS); 3190 nexthdrp = &desthdr->ip6d_nxt; 3191 break; 3192 case IPPROTO_ROUTING: 3193 rthdr = (ip6_rthdr_t *)whereptr; 3194 3195 /* 3196 * If there's more hops left on the routing header, 3197 * return now with DON'T PROCESS. 3198 */ 3199 if (rthdr->ip6r_segleft > 0) 3200 return (IPSEC_HDR_DONT_PROCESS); 3201 3202 ehdrlen = 8 * (rthdr->ip6r_len + 1); 3203 if ((uchar_t *)rthdr + ehdrlen > endptr) 3204 return (IPSEC_MEMORY_ERROR); 3205 nexthdrp = &rthdr->ip6r_nxt; 3206 break; 3207 case IPPROTO_FRAGMENT: 3208 /* Wait for reassembly */ 3209 return (IPSEC_HDR_DONT_PROCESS); 3210 case IPPROTO_AH: 3211 *nexthdr = IPPROTO_AH; 3212 return (IPSEC_HDR_PROCESS); 3213 case IPPROTO_NONE: 3214 /* No next header means we're finished */ 3215 default: 3216 return (IPSEC_HDR_DONT_PROCESS); 3217 } 3218 length += ehdrlen; 3219 whereptr += ehdrlen; 3220 } 3221 /* 3222 * Malformed/truncated packet. 3223 */ 3224 return (IPSEC_MEMORY_ERROR); 3225 } 3226 3227 /* 3228 * Path for AH if options are present. 3229 * Returns NULL if the mblk was consumed. 3230 * 3231 * Sometimes AH needs to be done before other IPv6 headers for security 3232 * reasons. This function (and its ipsec_needs_processing_v6() above) 3233 * indicates if that is so, and fans out to the appropriate IPsec protocol 3234 * for the datagram passed in. 3235 */ 3236 mblk_t * 3237 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira) 3238 { 3239 uint8_t nexthdr; 3240 ah_t *ah; 3241 ill_t *ill = ira->ira_ill; 3242 ip_stack_t *ipst = ill->ill_ipst; 3243 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3244 3245 switch (ipsec_needs_processing_v6(mp, &nexthdr)) { 3246 case IPSEC_MEMORY_ERROR: 3247 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3248 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3249 freemsg(mp); 3250 return (NULL); 3251 case IPSEC_HDR_DONT_PROCESS: 3252 return (mp); 3253 } 3254 3255 /* Default means send it to AH! */ 3256 ASSERT(nexthdr == IPPROTO_AH); 3257 3258 if (!ipsec_loaded(ipss)) { 3259 ip_proto_not_sup(mp, ira); 3260 return (NULL); 3261 } 3262 3263 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 3264 if (mp == NULL) 3265 return (NULL); 3266 ASSERT(ah != NULL); 3267 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3268 ASSERT(ira->ira_ipsec_ah_sa != NULL); 3269 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 3270 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira); 3271 3272 if (mp == NULL) { 3273 /* 3274 * Either it failed or is pending. In the former case 3275 * ipIfStatsInDiscards was increased. 3276 */ 3277 return (NULL); 3278 } 3279 3280 /* we're done with IPsec processing, send it up */ 3281 ip_input_post_ipsec(mp, ira); 3282 return (NULL); 3283 } 3284 3285 /* 3286 * Reassemble fragment. 3287 * When it returns a completed message the first mblk will only contain 3288 * the headers prior to the fragment header, with the nexthdr value updated 3289 * to be the header after the fragment header. 3290 */ 3291 mblk_t * 3292 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h, 3293 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira) 3294 { 3295 uint32_t ident = ntohl(fraghdr->ip6f_ident); 3296 uint16_t offset; 3297 boolean_t more_frags; 3298 uint8_t nexthdr = fraghdr->ip6f_nxt; 3299 in6_addr_t *v6dst_ptr; 3300 in6_addr_t *v6src_ptr; 3301 uint_t end; 3302 uint_t hdr_length; 3303 size_t count; 3304 ipf_t *ipf; 3305 ipf_t **ipfp; 3306 ipfb_t *ipfb; 3307 mblk_t *mp1; 3308 uint8_t ecn_info = 0; 3309 size_t msg_len; 3310 mblk_t *tail_mp; 3311 mblk_t *t_mp; 3312 boolean_t pruned = B_FALSE; 3313 uint32_t sum_val; 3314 uint16_t sum_flags; 3315 ill_t *ill = ira->ira_ill; 3316 ip_stack_t *ipst = ill->ill_ipst; 3317 uint_t prev_nexthdr_offset; 3318 uint8_t prev_nexthdr; 3319 uint8_t *ptr; 3320 uint32_t packet_size; 3321 3322 /* 3323 * We utilize hardware computed checksum info only for UDP since 3324 * IP fragmentation is a normal occurence for the protocol. In 3325 * addition, checksum offload support for IP fragments carrying 3326 * UDP payload is commonly implemented across network adapters. 3327 */ 3328 ASSERT(ira->ira_rill != NULL); 3329 if (nexthdr == IPPROTO_UDP && dohwcksum && 3330 ILL_HCKSUM_CAPABLE(ira->ira_rill) && 3331 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 3332 mblk_t *mp1 = mp->b_cont; 3333 int32_t len; 3334 3335 /* Record checksum information from the packet */ 3336 sum_val = (uint32_t)DB_CKSUM16(mp); 3337 sum_flags = DB_CKSUMFLAGS(mp); 3338 3339 /* fragmented payload offset from beginning of mblk */ 3340 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 3341 3342 if ((sum_flags & HCK_PARTIALCKSUM) && 3343 (mp1 == NULL || mp1->b_cont == NULL) && 3344 offset >= DB_CKSUMSTART(mp) && 3345 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) { 3346 uint32_t adj; 3347 /* 3348 * Partial checksum has been calculated by hardware 3349 * and attached to the packet; in addition, any 3350 * prepended extraneous data is even byte aligned. 3351 * If any such data exists, we adjust the checksum; 3352 * this would also handle any postpended data. 3353 */ 3354 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 3355 mp, mp1, len, adj); 3356 3357 /* One's complement subtract extraneous checksum */ 3358 if (adj >= sum_val) 3359 sum_val = ~(adj - sum_val) & 0xFFFF; 3360 else 3361 sum_val -= adj; 3362 } 3363 } else { 3364 sum_val = 0; 3365 sum_flags = 0; 3366 } 3367 3368 /* Clear hardware checksumming flag */ 3369 DB_CKSUMFLAGS(mp) = 0; 3370 3371 /* 3372 * Determine the offset (from the begining of the IP header) 3373 * of the nexthdr value which has IPPROTO_FRAGMENT. We use 3374 * this when removing the fragment header from the packet. 3375 * This packet consists of the IPv6 header, a potential 3376 * hop-by-hop options header, a potential pre-routing-header 3377 * destination options header, and a potential routing header. 3378 */ 3379 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 3380 prev_nexthdr = ip6h->ip6_nxt; 3381 ptr = (uint8_t *)&ip6h[1]; 3382 3383 if (prev_nexthdr == IPPROTO_HOPOPTS) { 3384 ip6_hbh_t *hbh_hdr; 3385 uint_t hdr_len; 3386 3387 hbh_hdr = (ip6_hbh_t *)ptr; 3388 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 3389 prev_nexthdr = hbh_hdr->ip6h_nxt; 3390 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 3391 - (uint8_t *)ip6h; 3392 ptr += hdr_len; 3393 } 3394 if (prev_nexthdr == IPPROTO_DSTOPTS) { 3395 ip6_dest_t *dest_hdr; 3396 uint_t hdr_len; 3397 3398 dest_hdr = (ip6_dest_t *)ptr; 3399 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 3400 prev_nexthdr = dest_hdr->ip6d_nxt; 3401 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 3402 - (uint8_t *)ip6h; 3403 ptr += hdr_len; 3404 } 3405 if (prev_nexthdr == IPPROTO_ROUTING) { 3406 ip6_rthdr_t *rthdr; 3407 uint_t hdr_len; 3408 3409 rthdr = (ip6_rthdr_t *)ptr; 3410 prev_nexthdr = rthdr->ip6r_nxt; 3411 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 3412 - (uint8_t *)ip6h; 3413 hdr_len = 8 * (rthdr->ip6r_len + 1); 3414 ptr += hdr_len; 3415 } 3416 if (prev_nexthdr != IPPROTO_FRAGMENT) { 3417 /* Can't handle other headers before the fragment header */ 3418 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3419 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3420 freemsg(mp); 3421 return (NULL); 3422 } 3423 3424 /* 3425 * Note: Fragment offset in header is in 8-octet units. 3426 * Clearing least significant 3 bits not only extracts 3427 * it but also gets it in units of octets. 3428 */ 3429 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 3430 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 3431 3432 /* 3433 * Is the more frags flag on and the payload length not a multiple 3434 * of eight? 3435 */ 3436 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 3437 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3438 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3439 (uint32_t)((char *)&ip6h->ip6_plen - 3440 (char *)ip6h), B_FALSE, ira); 3441 return (NULL); 3442 } 3443 3444 v6src_ptr = &ip6h->ip6_src; 3445 v6dst_ptr = &ip6h->ip6_dst; 3446 end = remlen; 3447 3448 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 3449 end += offset; 3450 3451 /* 3452 * Would fragment cause reassembled packet to have a payload length 3453 * greater than IP_MAXPACKET - the max payload size? 3454 */ 3455 if (end > IP_MAXPACKET) { 3456 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3457 ip_drop_input("Reassembled packet too large", mp, ill); 3458 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3459 (uint32_t)((char *)&fraghdr->ip6f_offlg - 3460 (char *)ip6h), B_FALSE, ira); 3461 return (NULL); 3462 } 3463 3464 /* 3465 * This packet just has one fragment. Reassembly not 3466 * needed. 3467 */ 3468 if (!more_frags && offset == 0) { 3469 goto reass_done; 3470 } 3471 3472 /* 3473 * Drop the fragmented as early as possible, if 3474 * we don't have resource(s) to re-assemble. 3475 */ 3476 if (ipst->ips_ip_reass_queue_bytes == 0) { 3477 freemsg(mp); 3478 return (NULL); 3479 } 3480 3481 /* Record the ECN field info. */ 3482 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 3483 /* 3484 * If this is not the first fragment, dump the unfragmentable 3485 * portion of the packet. 3486 */ 3487 if (offset) 3488 mp->b_rptr = (uchar_t *)&fraghdr[1]; 3489 3490 /* 3491 * Fragmentation reassembly. Each ILL has a hash table for 3492 * queueing packets undergoing reassembly for all IPIFs 3493 * associated with the ILL. The hash is based on the packet 3494 * IP ident field. The ILL frag hash table was allocated 3495 * as a timer block at the time the ILL was created. Whenever 3496 * there is anything on the reassembly queue, the timer will 3497 * be running. 3498 */ 3499 /* Handle vnic loopback of fragments */ 3500 if (mp->b_datap->db_ref > 2) 3501 msg_len = 0; 3502 else 3503 msg_len = MBLKSIZE(mp); 3504 3505 tail_mp = mp; 3506 while (tail_mp->b_cont != NULL) { 3507 tail_mp = tail_mp->b_cont; 3508 if (tail_mp->b_datap->db_ref <= 2) 3509 msg_len += MBLKSIZE(tail_mp); 3510 } 3511 /* 3512 * If the reassembly list for this ILL will get too big 3513 * prune it. 3514 */ 3515 3516 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 3517 ipst->ips_ip_reass_queue_bytes) { 3518 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len, 3519 uint_t, ill->ill_frag_count, 3520 uint_t, ipst->ips_ip_reass_queue_bytes); 3521 ill_frag_prune(ill, 3522 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 3523 (ipst->ips_ip_reass_queue_bytes - msg_len)); 3524 pruned = B_TRUE; 3525 } 3526 3527 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 3528 mutex_enter(&ipfb->ipfb_lock); 3529 3530 ipfp = &ipfb->ipfb_ipf; 3531 /* Try to find an existing fragment queue for this packet. */ 3532 for (;;) { 3533 ipf = ipfp[0]; 3534 if (ipf) { 3535 /* 3536 * It has to match on ident, source address, and 3537 * dest address. 3538 */ 3539 if (ipf->ipf_ident == ident && 3540 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 3541 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 3542 3543 /* 3544 * If we have received too many 3545 * duplicate fragments for this packet 3546 * free it. 3547 */ 3548 if (ipf->ipf_num_dups > ip_max_frag_dups) { 3549 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3550 freemsg(mp); 3551 mutex_exit(&ipfb->ipfb_lock); 3552 return (NULL); 3553 } 3554 3555 break; 3556 } 3557 ipfp = &ipf->ipf_hash_next; 3558 continue; 3559 } 3560 3561 3562 /* 3563 * If we pruned the list, do we want to store this new 3564 * fragment?. We apply an optimization here based on the 3565 * fact that most fragments will be received in order. 3566 * So if the offset of this incoming fragment is zero, 3567 * it is the first fragment of a new packet. We will 3568 * keep it. Otherwise drop the fragment, as we have 3569 * probably pruned the packet already (since the 3570 * packet cannot be found). 3571 */ 3572 3573 if (pruned && offset != 0) { 3574 mutex_exit(&ipfb->ipfb_lock); 3575 freemsg(mp); 3576 return (NULL); 3577 } 3578 3579 /* New guy. Allocate a frag message. */ 3580 mp1 = allocb(sizeof (*ipf), BPRI_MED); 3581 if (!mp1) { 3582 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3583 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3584 freemsg(mp); 3585 partial_reass_done: 3586 mutex_exit(&ipfb->ipfb_lock); 3587 return (NULL); 3588 } 3589 3590 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 3591 /* 3592 * Too many fragmented packets in this hash bucket. 3593 * Free the oldest. 3594 */ 3595 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 3596 } 3597 3598 mp1->b_cont = mp; 3599 3600 /* Initialize the fragment header. */ 3601 ipf = (ipf_t *)mp1->b_rptr; 3602 ipf->ipf_mp = mp1; 3603 ipf->ipf_ptphn = ipfp; 3604 ipfp[0] = ipf; 3605 ipf->ipf_hash_next = NULL; 3606 ipf->ipf_ident = ident; 3607 ipf->ipf_v6src = *v6src_ptr; 3608 ipf->ipf_v6dst = *v6dst_ptr; 3609 /* Record reassembly start time. */ 3610 ipf->ipf_timestamp = gethrestime_sec(); 3611 /* Record ipf generation and account for frag header */ 3612 ipf->ipf_gen = ill->ill_ipf_gen++; 3613 ipf->ipf_count = MBLKSIZE(mp1); 3614 ipf->ipf_protocol = nexthdr; 3615 ipf->ipf_nf_hdr_len = 0; 3616 ipf->ipf_prev_nexthdr_offset = 0; 3617 ipf->ipf_last_frag_seen = B_FALSE; 3618 ipf->ipf_ecn = ecn_info; 3619 ipf->ipf_num_dups = 0; 3620 ipfb->ipfb_frag_pkts++; 3621 ipf->ipf_checksum = 0; 3622 ipf->ipf_checksum_flags = 0; 3623 3624 /* Store checksum value in fragment header */ 3625 if (sum_flags != 0) { 3626 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3627 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3628 ipf->ipf_checksum = sum_val; 3629 ipf->ipf_checksum_flags = sum_flags; 3630 } 3631 3632 /* 3633 * We handle reassembly two ways. In the easy case, 3634 * where all the fragments show up in order, we do 3635 * minimal bookkeeping, and just clip new pieces on 3636 * the end. If we ever see a hole, then we go off 3637 * to ip_reassemble which has to mark the pieces and 3638 * keep track of the number of holes, etc. Obviously, 3639 * the point of having both mechanisms is so we can 3640 * handle the easy case as efficiently as possible. 3641 */ 3642 if (offset == 0) { 3643 /* Easy case, in-order reassembly so far. */ 3644 /* Update the byte count */ 3645 ipf->ipf_count += msg_len; 3646 ipf->ipf_tail_mp = tail_mp; 3647 /* 3648 * Keep track of next expected offset in 3649 * ipf_end. 3650 */ 3651 ipf->ipf_end = end; 3652 ipf->ipf_nf_hdr_len = hdr_length; 3653 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset; 3654 } else { 3655 /* Hard case, hole at the beginning. */ 3656 ipf->ipf_tail_mp = NULL; 3657 /* 3658 * ipf_end == 0 means that we have given up 3659 * on easy reassembly. 3660 */ 3661 ipf->ipf_end = 0; 3662 3663 /* Forget checksum offload from now on */ 3664 ipf->ipf_checksum_flags = 0; 3665 3666 /* 3667 * ipf_hole_cnt is set by ip_reassemble. 3668 * ipf_count is updated by ip_reassemble. 3669 * No need to check for return value here 3670 * as we don't expect reassembly to complete or 3671 * fail for the first fragment itself. 3672 */ 3673 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 3674 msg_len); 3675 } 3676 /* Update per ipfb and ill byte counts */ 3677 ipfb->ipfb_count += ipf->ipf_count; 3678 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3679 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 3680 /* If the frag timer wasn't already going, start it. */ 3681 mutex_enter(&ill->ill_lock); 3682 ill_frag_timer_start(ill); 3683 mutex_exit(&ill->ill_lock); 3684 goto partial_reass_done; 3685 } 3686 3687 /* 3688 * If the packet's flag has changed (it could be coming up 3689 * from an interface different than the previous, therefore 3690 * possibly different checksum capability), then forget about 3691 * any stored checksum states. Otherwise add the value to 3692 * the existing one stored in the fragment header. 3693 */ 3694 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 3695 sum_val += ipf->ipf_checksum; 3696 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3697 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3698 ipf->ipf_checksum = sum_val; 3699 } else if (ipf->ipf_checksum_flags != 0) { 3700 /* Forget checksum offload from now on */ 3701 ipf->ipf_checksum_flags = 0; 3702 } 3703 3704 /* 3705 * We have a new piece of a datagram which is already being 3706 * reassembled. Update the ECN info if all IP fragments 3707 * are ECN capable. If there is one which is not, clear 3708 * all the info. If there is at least one which has CE 3709 * code point, IP needs to report that up to transport. 3710 */ 3711 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 3712 if (ecn_info == IPH_ECN_CE) 3713 ipf->ipf_ecn = IPH_ECN_CE; 3714 } else { 3715 ipf->ipf_ecn = IPH_ECN_NECT; 3716 } 3717 3718 if (offset && ipf->ipf_end == offset) { 3719 /* The new fragment fits at the end */ 3720 ipf->ipf_tail_mp->b_cont = mp; 3721 /* Update the byte count */ 3722 ipf->ipf_count += msg_len; 3723 /* Update per ipfb and ill byte counts */ 3724 ipfb->ipfb_count += msg_len; 3725 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3726 atomic_add_32(&ill->ill_frag_count, msg_len); 3727 if (more_frags) { 3728 /* More to come. */ 3729 ipf->ipf_end = end; 3730 ipf->ipf_tail_mp = tail_mp; 3731 goto partial_reass_done; 3732 } 3733 } else { 3734 /* 3735 * Go do the hard cases. 3736 * Call ip_reassemble(). 3737 */ 3738 int ret; 3739 3740 if (offset == 0) { 3741 if (ipf->ipf_prev_nexthdr_offset == 0) { 3742 ipf->ipf_nf_hdr_len = hdr_length; 3743 ipf->ipf_prev_nexthdr_offset = 3744 prev_nexthdr_offset; 3745 } 3746 } 3747 /* Save current byte count */ 3748 count = ipf->ipf_count; 3749 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 3750 3751 /* Count of bytes added and subtracted (freeb()ed) */ 3752 count = ipf->ipf_count - count; 3753 if (count) { 3754 /* Update per ipfb and ill byte counts */ 3755 ipfb->ipfb_count += count; 3756 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3757 atomic_add_32(&ill->ill_frag_count, count); 3758 } 3759 if (ret == IP_REASS_PARTIAL) { 3760 goto partial_reass_done; 3761 } else if (ret == IP_REASS_FAILED) { 3762 /* Reassembly failed. Free up all resources */ 3763 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3764 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 3765 IP_REASS_SET_START(t_mp, 0); 3766 IP_REASS_SET_END(t_mp, 0); 3767 } 3768 freemsg(mp); 3769 goto partial_reass_done; 3770 } 3771 3772 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 3773 } 3774 /* 3775 * We have completed reassembly. Unhook the frag header from 3776 * the reassembly list. 3777 * 3778 * Grab the unfragmentable header length next header value out 3779 * of the first fragment 3780 */ 3781 ASSERT(ipf->ipf_nf_hdr_len != 0); 3782 hdr_length = ipf->ipf_nf_hdr_len; 3783 3784 /* 3785 * Before we free the frag header, record the ECN info 3786 * to report back to the transport. 3787 */ 3788 ecn_info = ipf->ipf_ecn; 3789 3790 /* 3791 * Store the nextheader field in the header preceding the fragment 3792 * header 3793 */ 3794 nexthdr = ipf->ipf_protocol; 3795 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 3796 ipfp = ipf->ipf_ptphn; 3797 3798 /* We need to supply these to caller */ 3799 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 3800 sum_val = ipf->ipf_checksum; 3801 else 3802 sum_val = 0; 3803 3804 mp1 = ipf->ipf_mp; 3805 count = ipf->ipf_count; 3806 ipf = ipf->ipf_hash_next; 3807 if (ipf) 3808 ipf->ipf_ptphn = ipfp; 3809 ipfp[0] = ipf; 3810 atomic_add_32(&ill->ill_frag_count, -count); 3811 ASSERT(ipfb->ipfb_count >= count); 3812 ipfb->ipfb_count -= count; 3813 ipfb->ipfb_frag_pkts--; 3814 mutex_exit(&ipfb->ipfb_lock); 3815 /* Ditch the frag header. */ 3816 mp = mp1->b_cont; 3817 freeb(mp1); 3818 3819 /* 3820 * Make sure the packet is good by doing some sanity 3821 * check. If bad we can silentely drop the packet. 3822 */ 3823 reass_done: 3824 if (hdr_length < sizeof (ip6_frag_t)) { 3825 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3826 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3827 ip1dbg(("ip_input_fragment_v6: bad packet\n")); 3828 freemsg(mp); 3829 return (NULL); 3830 } 3831 3832 /* 3833 * Remove the fragment header from the initial header by 3834 * splitting the mblk into the non-fragmentable header and 3835 * everthing after the fragment extension header. This has the 3836 * side effect of putting all the headers that need destination 3837 * processing into the b_cont block-- on return this fact is 3838 * used in order to avoid having to look at the extensions 3839 * already processed. 3840 * 3841 * Note that this code assumes that the unfragmentable portion 3842 * of the header is in the first mblk and increments 3843 * the read pointer past it. If this assumption is broken 3844 * this code fails badly. 3845 */ 3846 if (mp->b_rptr + hdr_length != mp->b_wptr) { 3847 mblk_t *nmp; 3848 3849 if (!(nmp = dupb(mp))) { 3850 ip1dbg(("ip_input_fragment_v6: dupb failed\n")); 3851 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3852 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3853 freemsg(mp); 3854 return (NULL); 3855 } 3856 nmp->b_cont = mp->b_cont; 3857 mp->b_cont = nmp; 3858 nmp->b_rptr += hdr_length; 3859 } 3860 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 3861 3862 ip6h = (ip6_t *)mp->b_rptr; 3863 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr; 3864 3865 /* Restore original IP length in header. */ 3866 packet_size = msgdsize(mp); 3867 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN)); 3868 /* Record the ECN info. */ 3869 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 3870 ip6h->ip6_vcf |= htonl(ecn_info << 20); 3871 3872 /* Update the receive attributes */ 3873 ira->ira_pktlen = packet_size; 3874 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t); 3875 ira->ira_protocol = nexthdr; 3876 3877 /* Reassembly is successful; set checksum information in packet */ 3878 DB_CKSUM16(mp) = (uint16_t)sum_val; 3879 DB_CKSUMFLAGS(mp) = sum_flags; 3880 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length; 3881 3882 return (mp); 3883 } 3884 3885 /* 3886 * Given an mblk and a ptr, find the destination address in an IPv6 routing 3887 * header. 3888 */ 3889 static in6_addr_t 3890 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 3891 { 3892 ip6_rthdr0_t *rt0; 3893 int segleft, numaddr; 3894 in6_addr_t *ap, rv = oldrv; 3895 3896 rt0 = (ip6_rthdr0_t *)whereptr; 3897 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 3898 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 3899 uint8_t *, whereptr); 3900 return (rv); 3901 } 3902 segleft = rt0->ip6r0_segleft; 3903 numaddr = rt0->ip6r0_len / 2; 3904 3905 if ((rt0->ip6r0_len & 0x1) || 3906 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) || 3907 (segleft > rt0->ip6r0_len / 2)) { 3908 /* 3909 * Corrupt packet. Either the routing header length is odd 3910 * (can't happen) or mismatched compared to the packet, or the 3911 * number of addresses is. Return what we can. This will 3912 * only be a problem on forwarded packets that get squeezed 3913 * through an outbound tunnel enforcing IPsec Tunnel Mode. 3914 */ 3915 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 3916 whereptr); 3917 return (rv); 3918 } 3919 3920 if (segleft != 0) { 3921 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 3922 rv = ap[numaddr - 1]; 3923 } 3924 3925 return (rv); 3926 } 3927 3928 /* 3929 * Walk through the options to see if there is a routing header. 3930 * If present get the destination which is the last address of 3931 * the option. 3932 * mp needs to be provided in cases when the extension headers might span 3933 * b_cont; mp is never modified by this function. 3934 */ 3935 in6_addr_t 3936 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment) 3937 { 3938 const mblk_t *current_mp = mp; 3939 uint8_t nexthdr; 3940 uint8_t *whereptr; 3941 int ehdrlen; 3942 in6_addr_t rv; 3943 3944 whereptr = (uint8_t *)ip6h; 3945 ehdrlen = sizeof (ip6_t); 3946 3947 /* We assume at least the IPv6 base header is within one mblk. */ 3948 ASSERT(mp == NULL || 3949 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen)); 3950 3951 rv = ip6h->ip6_dst; 3952 nexthdr = ip6h->ip6_nxt; 3953 if (is_fragment != NULL) 3954 *is_fragment = B_FALSE; 3955 3956 /* 3957 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 3958 * no extension headers will be split across mblks. 3959 */ 3960 3961 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 3962 nexthdr == IPPROTO_ROUTING) { 3963 if (nexthdr == IPPROTO_ROUTING) 3964 rv = pluck_out_dst(current_mp, whereptr, rv); 3965 3966 /* 3967 * All IPv6 extension headers have the next-header in byte 3968 * 0, and the (length - 8) in 8-byte-words. 3969 */ 3970 while (current_mp != NULL && 3971 whereptr + ehdrlen >= current_mp->b_wptr) { 3972 ehdrlen -= (current_mp->b_wptr - whereptr); 3973 current_mp = current_mp->b_cont; 3974 if (current_mp == NULL) { 3975 /* Bad packet. Return what we can. */ 3976 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 3977 mp, mblk_t *, current_mp, ip6_t *, ip6h); 3978 goto done; 3979 } 3980 whereptr = current_mp->b_rptr; 3981 } 3982 whereptr += ehdrlen; 3983 3984 nexthdr = *whereptr; 3985 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr); 3986 ehdrlen = (*(whereptr + 1) + 1) * 8; 3987 } 3988 3989 done: 3990 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 3991 *is_fragment = B_TRUE; 3992 return (rv); 3993 } 3994 3995 /* 3996 * ip_source_routed_v6: 3997 * This function is called by redirect code (called from ip_input_v6) to 3998 * know whether this packet is source routed through this node i.e 3999 * whether this node (router) is part of the journey. This 4000 * function is called under two cases : 4001 * 4002 * case 1 : Routing header was processed by this node and 4003 * ip_process_rthdr replaced ip6_dst with the next hop 4004 * and we are forwarding the packet to the next hop. 4005 * 4006 * case 2 : Routing header was not processed by this node and we 4007 * are just forwarding the packet. 4008 * 4009 * For case (1) we don't want to send redirects. For case(2) we 4010 * want to send redirects. 4011 */ 4012 static boolean_t 4013 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 4014 { 4015 uint8_t nexthdr; 4016 in6_addr_t *addrptr; 4017 ip6_rthdr0_t *rthdr; 4018 uint8_t numaddr; 4019 ip6_hbh_t *hbhhdr; 4020 uint_t ehdrlen; 4021 uint8_t *byteptr; 4022 4023 ip2dbg(("ip_source_routed_v6\n")); 4024 nexthdr = ip6h->ip6_nxt; 4025 ehdrlen = IPV6_HDR_LEN; 4026 4027 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 4028 while (nexthdr == IPPROTO_HOPOPTS || 4029 nexthdr == IPPROTO_DSTOPTS) { 4030 byteptr = (uint8_t *)ip6h + ehdrlen; 4031 /* 4032 * Check if we have already processed 4033 * packets or we are just a forwarding 4034 * router which only pulled up msgs up 4035 * to IPV6HDR and one HBH ext header 4036 */ 4037 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4038 ip2dbg(("ip_source_routed_v6: Extension" 4039 " headers not processed\n")); 4040 return (B_FALSE); 4041 } 4042 hbhhdr = (ip6_hbh_t *)byteptr; 4043 nexthdr = hbhhdr->ip6h_nxt; 4044 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 4045 } 4046 switch (nexthdr) { 4047 case IPPROTO_ROUTING: 4048 byteptr = (uint8_t *)ip6h + ehdrlen; 4049 /* 4050 * If for some reason, we haven't pulled up 4051 * the routing hdr data mblk, then we must 4052 * not have processed it at all. So for sure 4053 * we are not part of the source routed journey. 4054 */ 4055 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4056 ip2dbg(("ip_source_routed_v6: Routing" 4057 " header not processed\n")); 4058 return (B_FALSE); 4059 } 4060 rthdr = (ip6_rthdr0_t *)byteptr; 4061 /* 4062 * Either we are an intermediate router or the 4063 * last hop before destination and we have 4064 * already processed the routing header. 4065 * If segment_left is greater than or equal to zero, 4066 * then we must be the (numaddr - segleft) entry 4067 * of the routing header. Although ip6r0_segleft 4068 * is a unit8_t variable, we still check for zero 4069 * or greater value, if in case the data type 4070 * is changed someday in future. 4071 */ 4072 if (rthdr->ip6r0_segleft > 0 || 4073 rthdr->ip6r0_segleft == 0) { 4074 numaddr = rthdr->ip6r0_len / 2; 4075 addrptr = (in6_addr_t *)((char *)rthdr + 4076 sizeof (*rthdr)); 4077 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 4078 if (addrptr != NULL) { 4079 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL) 4080 return (B_TRUE); 4081 ip1dbg(("ip_source_routed_v6: Not local\n")); 4082 } 4083 } 4084 /* FALLTHRU */ 4085 default: 4086 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 4087 return (B_FALSE); 4088 } 4089 } 4090 4091 /* 4092 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 4093 * We have not optimized this in terms of number of mblks 4094 * allocated. For instance, for each fragment sent we always allocate a 4095 * mblk to hold the IPv6 header and fragment header. 4096 * 4097 * Assumes that all the extension headers are contained in the first mblk 4098 * and that the fragment header has has already been added by calling 4099 * ip_fraghdr_add_v6. 4100 */ 4101 int 4102 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len, 4103 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 4104 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie) 4105 { 4106 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4107 ip6_t *fip6h; 4108 mblk_t *hmp; 4109 mblk_t *hmp0; 4110 mblk_t *dmp; 4111 ip6_frag_t *fraghdr; 4112 size_t unfragmentable_len; 4113 size_t mlen; 4114 size_t max_chunk; 4115 uint16_t off_flags; 4116 uint16_t offset = 0; 4117 ill_t *ill = nce->nce_ill; 4118 uint8_t nexthdr; 4119 uint8_t *ptr; 4120 ip_stack_t *ipst = ill->ill_ipst; 4121 uint_t priority = mp->b_band; 4122 int error = 0; 4123 4124 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 4125 if (max_frag == 0) { 4126 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4127 ip_drop_output("FragFails: zero max_frag", mp, ill); 4128 freemsg(mp); 4129 return (EINVAL); 4130 } 4131 4132 /* 4133 * Caller should have added fraghdr_t to pkt_len, and also 4134 * updated ip6_plen. 4135 */ 4136 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len); 4137 ASSERT(msgdsize(mp) == pkt_len); 4138 4139 /* 4140 * Determine the length of the unfragmentable portion of this 4141 * datagram. This consists of the IPv6 header, a potential 4142 * hop-by-hop options header, a potential pre-routing-header 4143 * destination options header, and a potential routing header. 4144 */ 4145 nexthdr = ip6h->ip6_nxt; 4146 ptr = (uint8_t *)&ip6h[1]; 4147 4148 if (nexthdr == IPPROTO_HOPOPTS) { 4149 ip6_hbh_t *hbh_hdr; 4150 uint_t hdr_len; 4151 4152 hbh_hdr = (ip6_hbh_t *)ptr; 4153 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4154 nexthdr = hbh_hdr->ip6h_nxt; 4155 ptr += hdr_len; 4156 } 4157 if (nexthdr == IPPROTO_DSTOPTS) { 4158 ip6_dest_t *dest_hdr; 4159 uint_t hdr_len; 4160 4161 dest_hdr = (ip6_dest_t *)ptr; 4162 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4163 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4164 nexthdr = dest_hdr->ip6d_nxt; 4165 ptr += hdr_len; 4166 } 4167 } 4168 if (nexthdr == IPPROTO_ROUTING) { 4169 ip6_rthdr_t *rthdr; 4170 uint_t hdr_len; 4171 4172 rthdr = (ip6_rthdr_t *)ptr; 4173 nexthdr = rthdr->ip6r_nxt; 4174 hdr_len = 8 * (rthdr->ip6r_len + 1); 4175 ptr += hdr_len; 4176 } 4177 if (nexthdr != IPPROTO_FRAGMENT) { 4178 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4179 ip_drop_output("FragFails: bad nexthdr", mp, ill); 4180 freemsg(mp); 4181 return (EINVAL); 4182 } 4183 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4184 unfragmentable_len += sizeof (ip6_frag_t); 4185 4186 max_chunk = (max_frag - unfragmentable_len) & ~7; 4187 4188 /* 4189 * Allocate an mblk with enough room for the link-layer 4190 * header and the unfragmentable part of the datagram, which includes 4191 * the fragment header. This (or a copy) will be used as the 4192 * first mblk for each fragment we send. 4193 */ 4194 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp); 4195 if (hmp == NULL) { 4196 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4197 ip_drop_output("FragFails: no hmp", mp, ill); 4198 freemsg(mp); 4199 return (ENOBUFS); 4200 } 4201 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4202 hmp->b_wptr = hmp->b_rptr + unfragmentable_len; 4203 4204 fip6h = (ip6_t *)hmp->b_rptr; 4205 bcopy(ip6h, fip6h, unfragmentable_len); 4206 4207 /* 4208 * pkt_len is set to the total length of the fragmentable data in this 4209 * datagram. For each fragment sent, we will decrement pkt_len 4210 * by the amount of fragmentable data sent in that fragment 4211 * until len reaches zero. 4212 */ 4213 pkt_len -= unfragmentable_len; 4214 4215 /* 4216 * Move read ptr past unfragmentable portion, we don't want this part 4217 * of the data in our fragments. 4218 */ 4219 mp->b_rptr += unfragmentable_len; 4220 if (mp->b_rptr == mp->b_wptr) { 4221 mblk_t *mp1 = mp->b_cont; 4222 freeb(mp); 4223 mp = mp1; 4224 } 4225 4226 while (pkt_len != 0) { 4227 mlen = MIN(pkt_len, max_chunk); 4228 pkt_len -= mlen; 4229 if (pkt_len != 0) { 4230 /* Not last */ 4231 hmp0 = copyb(hmp); 4232 if (hmp0 == NULL) { 4233 BUMP_MIB(ill->ill_ip_mib, 4234 ipIfStatsOutFragFails); 4235 ip_drop_output("FragFails: copyb failed", 4236 mp, ill); 4237 freeb(hmp); 4238 freemsg(mp); 4239 ip1dbg(("ip_fragment_v6: copyb failed\n")); 4240 return (ENOBUFS); 4241 } 4242 off_flags = IP6F_MORE_FRAG; 4243 } else { 4244 /* Last fragment */ 4245 hmp0 = hmp; 4246 hmp = NULL; 4247 off_flags = 0; 4248 } 4249 fip6h = (ip6_t *)(hmp0->b_rptr); 4250 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len - 4251 sizeof (ip6_frag_t)); 4252 4253 fip6h->ip6_plen = htons((uint16_t)(mlen + 4254 unfragmentable_len - IPV6_HDR_LEN)); 4255 /* 4256 * Note: Optimization alert. 4257 * In IPv6 (and IPv4) protocol header, Fragment Offset 4258 * ("offset") is 13 bits wide and in 8-octet units. 4259 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 4260 * it occupies the most significant 13 bits. 4261 * (least significant 13 bits in IPv4). 4262 * We do not do any shifts here. Not shifting is same effect 4263 * as taking offset value in octet units, dividing by 8 and 4264 * then shifting 3 bits left to line it up in place in proper 4265 * place protocol header. 4266 */ 4267 fraghdr->ip6f_offlg = htons(offset) | off_flags; 4268 4269 if (!(dmp = ip_carve_mp(&mp, mlen))) { 4270 /* mp has already been freed by ip_carve_mp() */ 4271 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4272 ip_drop_output("FragFails: could not carve mp", 4273 hmp0, ill); 4274 if (hmp != NULL) 4275 freeb(hmp); 4276 freeb(hmp0); 4277 ip1dbg(("ip_carve_mp: failed\n")); 4278 return (ENOBUFS); 4279 } 4280 hmp0->b_cont = dmp; 4281 /* Get the priority marking, if any */ 4282 hmp0->b_band = priority; 4283 4284 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 4285 4286 error = postfragfn(hmp0, nce, ixaflags, 4287 mlen + unfragmentable_len, xmit_hint, szone, nolzid, 4288 ixa_cookie); 4289 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) { 4290 /* No point in sending the other fragments */ 4291 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4292 ip_drop_output("FragFails: postfragfn failed", 4293 hmp, ill); 4294 freeb(hmp); 4295 freemsg(mp); 4296 return (error); 4297 } 4298 /* No need to redo state machine in loop */ 4299 ixaflags &= ~IXAF_REACH_CONF; 4300 4301 offset += mlen; 4302 } 4303 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 4304 return (error); 4305 } 4306 4307 /* 4308 * Add a fragment header to an IPv6 packet. 4309 * Assumes that all the extension headers are contained in the first mblk. 4310 * 4311 * The fragment header is inserted after an hop-by-hop options header 4312 * and after [an optional destinations header followed by] a routing header. 4313 */ 4314 mblk_t * 4315 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa) 4316 { 4317 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4318 ip6_t *fip6h; 4319 mblk_t *hmp; 4320 ip6_frag_t *fraghdr; 4321 size_t unfragmentable_len; 4322 uint8_t nexthdr; 4323 uint_t prev_nexthdr_offset; 4324 uint8_t *ptr; 4325 uint_t priority = mp->b_band; 4326 ip_stack_t *ipst = ixa->ixa_ipst; 4327 4328 /* 4329 * Determine the length of the unfragmentable portion of this 4330 * datagram. This consists of the IPv6 header, a potential 4331 * hop-by-hop options header, a potential pre-routing-header 4332 * destination options header, and a potential routing header. 4333 */ 4334 nexthdr = ip6h->ip6_nxt; 4335 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 4336 ptr = (uint8_t *)&ip6h[1]; 4337 4338 if (nexthdr == IPPROTO_HOPOPTS) { 4339 ip6_hbh_t *hbh_hdr; 4340 uint_t hdr_len; 4341 4342 hbh_hdr = (ip6_hbh_t *)ptr; 4343 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4344 nexthdr = hbh_hdr->ip6h_nxt; 4345 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 4346 - (uint8_t *)ip6h; 4347 ptr += hdr_len; 4348 } 4349 if (nexthdr == IPPROTO_DSTOPTS) { 4350 ip6_dest_t *dest_hdr; 4351 uint_t hdr_len; 4352 4353 dest_hdr = (ip6_dest_t *)ptr; 4354 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4355 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4356 nexthdr = dest_hdr->ip6d_nxt; 4357 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 4358 - (uint8_t *)ip6h; 4359 ptr += hdr_len; 4360 } 4361 } 4362 if (nexthdr == IPPROTO_ROUTING) { 4363 ip6_rthdr_t *rthdr; 4364 uint_t hdr_len; 4365 4366 rthdr = (ip6_rthdr_t *)ptr; 4367 nexthdr = rthdr->ip6r_nxt; 4368 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 4369 - (uint8_t *)ip6h; 4370 hdr_len = 8 * (rthdr->ip6r_len + 1); 4371 ptr += hdr_len; 4372 } 4373 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4374 4375 /* 4376 * Allocate an mblk with enough room for the link-layer 4377 * header, the unfragmentable part of the datagram, and the 4378 * fragment header. 4379 */ 4380 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 4381 ipst->ips_ip_wroff_extra, mp); 4382 if (hmp == NULL) { 4383 ill_t *ill = ixa->ixa_nce->nce_ill; 4384 4385 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 4386 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill); 4387 freemsg(mp); 4388 return (NULL); 4389 } 4390 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4391 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 4392 4393 fip6h = (ip6_t *)hmp->b_rptr; 4394 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 4395 4396 bcopy(ip6h, fip6h, unfragmentable_len); 4397 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t)); 4398 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 4399 4400 fraghdr->ip6f_nxt = nexthdr; 4401 fraghdr->ip6f_reserved = 0; 4402 fraghdr->ip6f_offlg = 0; 4403 fraghdr->ip6f_ident = htonl(ident); 4404 4405 /* Get the priority marking, if any */ 4406 hmp->b_band = priority; 4407 4408 /* 4409 * Move read ptr past unfragmentable portion, we don't want this part 4410 * of the data in our fragments. 4411 */ 4412 mp->b_rptr += unfragmentable_len; 4413 hmp->b_cont = mp; 4414 return (hmp); 4415 } 4416 4417 /* 4418 * Determine if the ill and multicast aspects of that packets 4419 * "matches" the conn. 4420 */ 4421 boolean_t 4422 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h) 4423 { 4424 ill_t *ill = ira->ira_rill; 4425 zoneid_t zoneid = ira->ira_zoneid; 4426 uint_t in_ifindex; 4427 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 4428 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 4429 4430 /* 4431 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local 4432 * scopeid. This is used to limit 4433 * unicast and multicast reception to conn_incoming_ifindex. 4434 * conn_wantpacket_v6 is called both for unicast and 4435 * multicast packets. 4436 */ 4437 in_ifindex = connp->conn_incoming_ifindex; 4438 4439 /* mpathd can bind to the under IPMP interface, which we allow */ 4440 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) { 4441 if (!IS_UNDER_IPMP(ill)) 4442 return (B_FALSE); 4443 4444 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill)) 4445 return (B_FALSE); 4446 } 4447 4448 if (!IPCL_ZONE_MATCH(connp, zoneid)) 4449 return (B_FALSE); 4450 4451 if (!(ira->ira_flags & IRAF_MULTICAST)) 4452 return (B_TRUE); 4453 4454 if (connp->conn_multi_router) 4455 return (B_TRUE); 4456 4457 if (ira->ira_protocol == IPPROTO_RSVP) 4458 return (B_TRUE); 4459 4460 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, 4461 ira->ira_ill)); 4462 } 4463 4464 /* 4465 * pr_addr_dbg function provides the needed buffer space to call 4466 * inet_ntop() function's 3rd argument. This function should be 4467 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 4468 * stack buffer space in it's own stack frame. This function uses 4469 * a buffer from it's own stack and prints the information. 4470 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 4471 * 4472 * Note: This function can call inet_ntop() once. 4473 */ 4474 void 4475 pr_addr_dbg(char *fmt1, int af, const void *addr) 4476 { 4477 char buf[INET6_ADDRSTRLEN]; 4478 4479 if (fmt1 == NULL) { 4480 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 4481 return; 4482 } 4483 4484 /* 4485 * This does not compare debug level and just prints 4486 * out. Thus it is the responsibility of the caller 4487 * to check the appropriate debug-level before calling 4488 * this function. 4489 */ 4490 if (ip_debug > 0) { 4491 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 4492 } 4493 4494 4495 } 4496 4497 4498 /* 4499 * Return the length in bytes of the IPv6 headers (base header 4500 * extension headers) that will be needed based on the 4501 * ip_pkt_t structure passed by the caller. 4502 * 4503 * The returned length does not include the length of the upper level 4504 * protocol (ULP) header. 4505 */ 4506 int 4507 ip_total_hdrs_len_v6(const ip_pkt_t *ipp) 4508 { 4509 int len; 4510 4511 len = IPV6_HDR_LEN; 4512 4513 /* 4514 * If there's a security label here, then we ignore any hop-by-hop 4515 * options the user may try to set. 4516 */ 4517 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4518 uint_t hopoptslen; 4519 /* 4520 * Note that ipp_label_len_v6 is just the option - not 4521 * the hopopts extension header. It also needs to be padded 4522 * to a multiple of 8 bytes. 4523 */ 4524 ASSERT(ipp->ipp_label_len_v6 != 0); 4525 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4526 hopoptslen = (hopoptslen + 7)/8 * 8; 4527 len += hopoptslen; 4528 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4529 ASSERT(ipp->ipp_hopoptslen != 0); 4530 len += ipp->ipp_hopoptslen; 4531 } 4532 4533 /* 4534 * En-route destination options 4535 * Only do them if there's a routing header as well 4536 */ 4537 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4538 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4539 ASSERT(ipp->ipp_rthdrdstoptslen != 0); 4540 len += ipp->ipp_rthdrdstoptslen; 4541 } 4542 if (ipp->ipp_fields & IPPF_RTHDR) { 4543 ASSERT(ipp->ipp_rthdrlen != 0); 4544 len += ipp->ipp_rthdrlen; 4545 } 4546 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4547 ASSERT(ipp->ipp_dstoptslen != 0); 4548 len += ipp->ipp_dstoptslen; 4549 } 4550 return (len); 4551 } 4552 4553 /* 4554 * All-purpose routine to build a header chain of an IPv6 header 4555 * followed by any required extension headers and a proto header. 4556 * 4557 * The caller has to set the source and destination address as well as 4558 * ip6_plen. The caller has to massage any routing header and compensate 4559 * for the ULP pseudo-header checksum due to the source route. 4560 * 4561 * The extension headers will all be fully filled in. 4562 */ 4563 void 4564 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp, 4565 uint8_t protocol, uint32_t flowinfo) 4566 { 4567 uint8_t *nxthdr_ptr; 4568 uint8_t *cp; 4569 ip6_t *ip6h = (ip6_t *)buf; 4570 4571 /* Initialize IPv6 header */ 4572 ip6h->ip6_vcf = 4573 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4574 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4575 4576 if (ipp->ipp_fields & IPPF_TCLASS) { 4577 /* Overrides the class part of flowinfo */ 4578 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4579 ipp->ipp_tclass); 4580 } 4581 4582 if (ipp->ipp_fields & IPPF_HOPLIMIT) 4583 ip6h->ip6_hops = ipp->ipp_hoplimit; 4584 else 4585 ip6h->ip6_hops = ipp->ipp_unicast_hops; 4586 4587 if ((ipp->ipp_fields & IPPF_ADDR) && 4588 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4589 ip6h->ip6_src = ipp->ipp_addr; 4590 4591 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4592 cp = (uint8_t *)&ip6h[1]; 4593 /* 4594 * Here's where we have to start stringing together 4595 * any extension headers in the right order: 4596 * Hop-by-hop, destination, routing, and final destination opts. 4597 */ 4598 /* 4599 * If there's a security label here, then we ignore any hop-by-hop 4600 * options the user may try to set. 4601 */ 4602 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4603 /* 4604 * Hop-by-hop options with the label. 4605 * Note that ipp_label_v6 is just the option - not 4606 * the hopopts extension header. It also needs to be padded 4607 * to a multiple of 8 bytes. 4608 */ 4609 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4610 uint_t hopoptslen; 4611 uint_t padlen; 4612 4613 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4614 hopoptslen = (padlen + 7)/8 * 8; 4615 padlen = hopoptslen - padlen; 4616 4617 *nxthdr_ptr = IPPROTO_HOPOPTS; 4618 nxthdr_ptr = &hbh->ip6h_nxt; 4619 hbh->ip6h_len = hopoptslen/8 - 1; 4620 cp += sizeof (ip6_hbh_t); 4621 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6); 4622 cp += ipp->ipp_label_len_v6; 4623 4624 ASSERT(padlen <= 7); 4625 switch (padlen) { 4626 case 0: 4627 break; 4628 case 1: 4629 cp[0] = IP6OPT_PAD1; 4630 break; 4631 default: 4632 cp[0] = IP6OPT_PADN; 4633 cp[1] = padlen - 2; 4634 bzero(&cp[2], padlen - 2); 4635 break; 4636 } 4637 cp += padlen; 4638 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4639 /* Hop-by-hop options */ 4640 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4641 4642 *nxthdr_ptr = IPPROTO_HOPOPTS; 4643 nxthdr_ptr = &hbh->ip6h_nxt; 4644 4645 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 4646 cp += ipp->ipp_hopoptslen; 4647 } 4648 /* 4649 * En-route destination options 4650 * Only do them if there's a routing header as well 4651 */ 4652 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4653 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4654 ip6_dest_t *dst = (ip6_dest_t *)cp; 4655 4656 *nxthdr_ptr = IPPROTO_DSTOPTS; 4657 nxthdr_ptr = &dst->ip6d_nxt; 4658 4659 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen); 4660 cp += ipp->ipp_rthdrdstoptslen; 4661 } 4662 /* 4663 * Routing header next 4664 */ 4665 if (ipp->ipp_fields & IPPF_RTHDR) { 4666 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4667 4668 *nxthdr_ptr = IPPROTO_ROUTING; 4669 nxthdr_ptr = &rt->ip6r_nxt; 4670 4671 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 4672 cp += ipp->ipp_rthdrlen; 4673 } 4674 /* 4675 * Do ultimate destination options 4676 */ 4677 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4678 ip6_dest_t *dest = (ip6_dest_t *)cp; 4679 4680 *nxthdr_ptr = IPPROTO_DSTOPTS; 4681 nxthdr_ptr = &dest->ip6d_nxt; 4682 4683 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 4684 cp += ipp->ipp_dstoptslen; 4685 } 4686 /* 4687 * Now set the last header pointer to the proto passed in 4688 */ 4689 *nxthdr_ptr = protocol; 4690 ASSERT((int)(cp - buf) == buf_len); 4691 } 4692 4693 /* 4694 * Return a pointer to the routing header extension header 4695 * in the IPv6 header(s) chain passed in. 4696 * If none found, return NULL 4697 * Assumes that all extension headers are in same mblk as the v6 header 4698 */ 4699 ip6_rthdr_t * 4700 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 4701 { 4702 ip6_dest_t *desthdr; 4703 ip6_frag_t *fraghdr; 4704 uint_t hdrlen; 4705 uint8_t nexthdr; 4706 uint8_t *ptr = (uint8_t *)&ip6h[1]; 4707 4708 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 4709 return ((ip6_rthdr_t *)ptr); 4710 4711 /* 4712 * The routing header will precede all extension headers 4713 * other than the hop-by-hop and destination options 4714 * extension headers, so if we see anything other than those, 4715 * we're done and didn't find it. 4716 * We could see a destination options header alone but no 4717 * routing header, in which case we'll return NULL as soon as 4718 * we see anything after that. 4719 * Hop-by-hop and destination option headers are identical, 4720 * so we can use either one we want as a template. 4721 */ 4722 nexthdr = ip6h->ip6_nxt; 4723 while (ptr < endptr) { 4724 /* Is there enough left for len + nexthdr? */ 4725 if (ptr + MIN_EHDR_LEN > endptr) 4726 return (NULL); 4727 4728 switch (nexthdr) { 4729 case IPPROTO_HOPOPTS: 4730 case IPPROTO_DSTOPTS: 4731 /* Assumes the headers are identical for hbh and dst */ 4732 desthdr = (ip6_dest_t *)ptr; 4733 hdrlen = 8 * (desthdr->ip6d_len + 1); 4734 nexthdr = desthdr->ip6d_nxt; 4735 break; 4736 4737 case IPPROTO_ROUTING: 4738 return ((ip6_rthdr_t *)ptr); 4739 4740 case IPPROTO_FRAGMENT: 4741 fraghdr = (ip6_frag_t *)ptr; 4742 hdrlen = sizeof (ip6_frag_t); 4743 nexthdr = fraghdr->ip6f_nxt; 4744 break; 4745 4746 default: 4747 return (NULL); 4748 } 4749 ptr += hdrlen; 4750 } 4751 return (NULL); 4752 } 4753 4754 /* 4755 * Called for source-routed packets originating on this node. 4756 * Manipulates the original routing header by moving every entry up 4757 * one slot, placing the first entry in the v6 header's v6_dst field, 4758 * and placing the ultimate destination in the routing header's last 4759 * slot. 4760 * 4761 * Returns the checksum diference between the ultimate destination 4762 * (last hop in the routing header when the packet is sent) and 4763 * the first hop (ip6_dst when the packet is sent) 4764 */ 4765 /* ARGSUSED2 */ 4766 uint32_t 4767 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 4768 { 4769 uint_t numaddr; 4770 uint_t i; 4771 in6_addr_t *addrptr; 4772 in6_addr_t tmp; 4773 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 4774 uint32_t cksm; 4775 uint32_t addrsum = 0; 4776 uint16_t *ptr; 4777 4778 /* 4779 * Perform any processing needed for source routing. 4780 * We know that all extension headers will be in the same mblk 4781 * as the IPv6 header. 4782 */ 4783 4784 /* 4785 * If no segments left in header, or the header length field is zero, 4786 * don't move hop addresses around; 4787 * Checksum difference is zero. 4788 */ 4789 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 4790 return (0); 4791 4792 ptr = (uint16_t *)&ip6h->ip6_dst; 4793 cksm = 0; 4794 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4795 cksm += ptr[i]; 4796 } 4797 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4798 4799 /* 4800 * Here's where the fun begins - we have to 4801 * move all addresses up one spot, take the 4802 * first hop and make it our first ip6_dst, 4803 * and place the ultimate destination in the 4804 * newly-opened last slot. 4805 */ 4806 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 4807 numaddr = rthdr->ip6r0_len / 2; 4808 tmp = *addrptr; 4809 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 4810 *addrptr = addrptr[1]; 4811 } 4812 *addrptr = ip6h->ip6_dst; 4813 ip6h->ip6_dst = tmp; 4814 4815 /* 4816 * From the checksummed ultimate destination subtract the checksummed 4817 * current ip6_dst (the first hop address). Return that number. 4818 * (In the v4 case, the second part of this is done in each routine 4819 * that calls ip_massage_options(). We do it all in this one place 4820 * for v6). 4821 */ 4822 ptr = (uint16_t *)&ip6h->ip6_dst; 4823 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4824 addrsum += ptr[i]; 4825 } 4826 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 4827 if ((int)cksm < 0) 4828 cksm--; 4829 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4830 4831 return (cksm); 4832 } 4833 4834 void 4835 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 4836 { 4837 kstat_t *ksp; 4838 4839 ip6_stat_t template = { 4840 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 4841 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 4842 { "ip6_recv_pullup", KSTAT_DATA_UINT64 }, 4843 { "ip6_db_ref", KSTAT_DATA_UINT64 }, 4844 { "ip6_notaligned", KSTAT_DATA_UINT64 }, 4845 { "ip6_multimblk", KSTAT_DATA_UINT64 }, 4846 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, 4847 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 4848 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 4849 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 4850 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4851 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4852 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4853 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4854 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4855 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4856 }; 4857 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 4858 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4859 KSTAT_FLAG_VIRTUAL, stackid); 4860 4861 if (ksp == NULL) 4862 return (NULL); 4863 4864 bcopy(&template, ip6_statisticsp, sizeof (template)); 4865 ksp->ks_data = (void *)ip6_statisticsp; 4866 ksp->ks_private = (void *)(uintptr_t)stackid; 4867 4868 kstat_install(ksp); 4869 return (ksp); 4870 } 4871 4872 void 4873 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4874 { 4875 if (ksp != NULL) { 4876 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4877 kstat_delete_netstack(ksp, stackid); 4878 } 4879 } 4880 4881 /* 4882 * The following two functions set and get the value for the 4883 * IPV6_SRC_PREFERENCES socket option. 4884 */ 4885 int 4886 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs) 4887 { 4888 /* 4889 * We only support preferences that are covered by 4890 * IPV6_PREFER_SRC_MASK. 4891 */ 4892 if (prefs & ~IPV6_PREFER_SRC_MASK) 4893 return (EINVAL); 4894 4895 /* 4896 * Look for conflicting preferences or default preferences. If 4897 * both bits of a related pair are clear, the application wants the 4898 * system's default value for that pair. Both bits in a pair can't 4899 * be set. 4900 */ 4901 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 4902 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 4903 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 4904 IPV6_PREFER_SRC_MIPMASK) { 4905 return (EINVAL); 4906 } 4907 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 4908 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 4909 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 4910 IPV6_PREFER_SRC_TMPMASK) { 4911 return (EINVAL); 4912 } 4913 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 4914 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 4915 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 4916 IPV6_PREFER_SRC_CGAMASK) { 4917 return (EINVAL); 4918 } 4919 4920 ixa->ixa_src_preferences = prefs; 4921 return (0); 4922 } 4923 4924 size_t 4925 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val) 4926 { 4927 *val = ixa->ixa_src_preferences; 4928 return (sizeof (ixa->ixa_src_preferences)); 4929 } 4930 4931 /* 4932 * Get the size of the IP options (including the IP headers size) 4933 * without including the AH header's size. If till_ah is B_FALSE, 4934 * and if AH header is present, dest options beyond AH header will 4935 * also be included in the returned size. 4936 */ 4937 int 4938 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 4939 { 4940 ip6_t *ip6h; 4941 uint8_t nexthdr; 4942 uint8_t *whereptr; 4943 ip6_hbh_t *hbhhdr; 4944 ip6_dest_t *dsthdr; 4945 ip6_rthdr_t *rthdr; 4946 int ehdrlen; 4947 int size; 4948 ah_t *ah; 4949 4950 ip6h = (ip6_t *)mp->b_rptr; 4951 size = IPV6_HDR_LEN; 4952 nexthdr = ip6h->ip6_nxt; 4953 whereptr = (uint8_t *)&ip6h[1]; 4954 for (;;) { 4955 /* Assume IP has already stripped it */ 4956 ASSERT(nexthdr != IPPROTO_FRAGMENT); 4957 switch (nexthdr) { 4958 case IPPROTO_HOPOPTS: 4959 hbhhdr = (ip6_hbh_t *)whereptr; 4960 nexthdr = hbhhdr->ip6h_nxt; 4961 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 4962 break; 4963 case IPPROTO_DSTOPTS: 4964 dsthdr = (ip6_dest_t *)whereptr; 4965 nexthdr = dsthdr->ip6d_nxt; 4966 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4967 break; 4968 case IPPROTO_ROUTING: 4969 rthdr = (ip6_rthdr_t *)whereptr; 4970 nexthdr = rthdr->ip6r_nxt; 4971 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4972 break; 4973 default : 4974 if (till_ah) { 4975 ASSERT(nexthdr == IPPROTO_AH); 4976 return (size); 4977 } 4978 /* 4979 * If we don't have a AH header to traverse, 4980 * return now. This happens normally for 4981 * outbound datagrams where we have not inserted 4982 * the AH header. 4983 */ 4984 if (nexthdr != IPPROTO_AH) { 4985 return (size); 4986 } 4987 4988 /* 4989 * We don't include the AH header's size 4990 * to be symmetrical with other cases where 4991 * we either don't have a AH header (outbound) 4992 * or peek into the AH header yet (inbound and 4993 * not pulled up yet). 4994 */ 4995 ah = (ah_t *)whereptr; 4996 nexthdr = ah->ah_nexthdr; 4997 ehdrlen = (ah->ah_length << 2) + 8; 4998 4999 if (nexthdr == IPPROTO_DSTOPTS) { 5000 if (whereptr + ehdrlen >= mp->b_wptr) { 5001 /* 5002 * The destination options header 5003 * is not part of the first mblk. 5004 */ 5005 whereptr = mp->b_cont->b_rptr; 5006 } else { 5007 whereptr += ehdrlen; 5008 } 5009 5010 dsthdr = (ip6_dest_t *)whereptr; 5011 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 5012 size += ehdrlen; 5013 } 5014 return (size); 5015 } 5016 whereptr += ehdrlen; 5017 size += ehdrlen; 5018 } 5019 } 5020 5021 /* 5022 * Utility routine that checks if `v6srcp' is a valid address on underlying 5023 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 5024 * associated with `v6srcp' on success. NOTE: if this is not called from 5025 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 5026 * group during or after this lookup. 5027 */ 5028 boolean_t 5029 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 5030 { 5031 ipif_t *ipif; 5032 5033 5034 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 5035 if (ipif != NULL) { 5036 if (ipifp != NULL) 5037 *ipifp = ipif; 5038 else 5039 ipif_refrele(ipif); 5040 return (B_TRUE); 5041 } 5042 5043 if (ip_debug > 2) { 5044 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 5045 "src %s\n", AF_INET6, v6srcp); 5046 } 5047 return (B_FALSE); 5048 } 5049