1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 1990 Mentat Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/sysmacros.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/sdt.h> 44 #include <sys/kobj.h> 45 #include <sys/zone.h> 46 #include <sys/neti.h> 47 #include <sys/hook.h> 48 49 #include <sys/kmem.h> 50 #include <sys/systm.h> 51 #include <sys/param.h> 52 #include <sys/socket.h> 53 #include <sys/vtrace.h> 54 #include <sys/isa_defs.h> 55 #include <sys/atomic.h> 56 #include <sys/policy.h> 57 #include <sys/mac.h> 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/if_dl.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <netinet/ip6.h> 65 #include <netinet/icmp6.h> 66 #include <netinet/sctp.h> 67 68 #include <inet/common.h> 69 #include <inet/mi.h> 70 #include <inet/optcom.h> 71 #include <inet/mib2.h> 72 #include <inet/nd.h> 73 #include <inet/arp.h> 74 75 #include <inet/ip.h> 76 #include <inet/ip_impl.h> 77 #include <inet/ip6.h> 78 #include <inet/ip6_asp.h> 79 #include <inet/tcp.h> 80 #include <inet/tcp_impl.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipp_common.h> 83 84 #include <inet/ip_multi.h> 85 #include <inet/ip_if.h> 86 #include <inet/ip_ire.h> 87 #include <inet/ip_rts.h> 88 #include <inet/ip_ndp.h> 89 #include <net/pfkeyv2.h> 90 #include <inet/sadb.h> 91 #include <inet/ipsec_impl.h> 92 #include <inet/iptun/iptun_impl.h> 93 #include <inet/sctp_ip.h> 94 #include <sys/pattr.h> 95 #include <inet/ipclassifier.h> 96 #include <inet/ipsecah.h> 97 #include <inet/rawip_impl.h> 98 #include <inet/rts_impl.h> 99 #include <sys/squeue_impl.h> 100 #include <sys/squeue.h> 101 102 #include <sys/tsol/label.h> 103 #include <sys/tsol/tnet.h> 104 105 #include <rpc/pmap_prot.h> 106 107 /* Temporary; for CR 6451644 work-around */ 108 #include <sys/ethernet.h> 109 110 /* 111 * Naming conventions: 112 * These rules should be judiciously applied 113 * if there is a need to identify something as IPv6 versus IPv4 114 * IPv6 funcions will end with _v6 in the ip module. 115 * IPv6 funcions will end with _ipv6 in the transport modules. 116 * IPv6 macros: 117 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 118 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 119 * And then there are ..V4_PART_OF_V6. 120 * The intent is that macros in the ip module end with _V6. 121 * IPv6 global variables will start with ipv6_ 122 * IPv6 structures will start with ipv6 123 * IPv6 defined constants should start with IPV6_ 124 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 125 */ 126 127 /* 128 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 129 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 130 * from IANA. This mechanism will remain in effect until an official 131 * number is obtained. 132 */ 133 uchar_t ip6opt_ls; 134 135 const in6_addr_t ipv6_all_ones = 136 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }; 137 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 }; 138 139 #ifdef _BIG_ENDIAN 140 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 }; 141 #else /* _BIG_ENDIAN */ 142 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 }; 143 #endif /* _BIG_ENDIAN */ 144 145 #ifdef _BIG_ENDIAN 146 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U }; 147 #else /* _BIG_ENDIAN */ 148 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U }; 149 #endif /* _BIG_ENDIAN */ 150 151 #ifdef _BIG_ENDIAN 152 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U }; 153 #else /* _BIG_ENDIAN */ 154 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U }; 155 #endif /* _BIG_ENDIAN */ 156 157 #ifdef _BIG_ENDIAN 158 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U }; 159 #else /* _BIG_ENDIAN */ 160 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U }; 161 #endif /* _BIG_ENDIAN */ 162 163 #ifdef _BIG_ENDIAN 164 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U }; 165 #else /* _BIG_ENDIAN */ 166 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U }; 167 #endif /* _BIG_ENDIAN */ 168 169 #ifdef _BIG_ENDIAN 170 const in6_addr_t ipv6_solicited_node_mcast = 171 { 0xff020000U, 0, 0x00000001U, 0xff000000U }; 172 #else /* _BIG_ENDIAN */ 173 const in6_addr_t ipv6_solicited_node_mcast = 174 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU }; 175 #endif /* _BIG_ENDIAN */ 176 177 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *); 178 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *); 179 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *, 180 ip_recv_attr_t *); 181 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *, 182 ip_recv_attr_t *); 183 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *, 184 in6_addr_t *, ip_recv_attr_t *); 185 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *, 186 ip_recv_attr_t *); 187 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 188 189 /* 190 * icmp_inbound_v6 deals with ICMP messages that are handled by IP. 191 * If the ICMP message is consumed by IP, i.e., it should not be delivered 192 * to any IPPROTO_ICMP raw sockets, then it returns NULL. 193 * Likewise, if the ICMP error is misformed (too short, etc), then it 194 * returns NULL. The caller uses this to determine whether or not to send 195 * to raw sockets. 196 * 197 * All error messages are passed to the matching transport stream. 198 * 199 * See comment for icmp_inbound_v4() on how IPsec is handled. 200 */ 201 mblk_t * 202 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira) 203 { 204 icmp6_t *icmp6; 205 ip6_t *ip6h; /* Outer header */ 206 int ip_hdr_length; /* Outer header length */ 207 boolean_t interested; 208 ill_t *ill = ira->ira_ill; 209 ip_stack_t *ipst = ill->ill_ipst; 210 mblk_t *mp_ret = NULL; 211 212 ip6h = (ip6_t *)mp->b_rptr; 213 214 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 215 216 /* Make sure ira_l2src is set for ndp_input */ 217 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 218 ip_setl2src(mp, ira, ira->ira_rill); 219 220 ip_hdr_length = ira->ira_ip_hdr_length; 221 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 222 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 223 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 224 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 225 freemsg(mp); 226 return (NULL); 227 } 228 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 229 if (ip6h == NULL) { 230 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 231 freemsg(mp); 232 return (NULL); 233 } 234 } 235 236 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 237 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6); 238 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 239 icmp6->icmp6_code)); 240 241 /* 242 * We will set "interested" to "true" if we should pass a copy to 243 * the transport i.e., if it is an error message. 244 */ 245 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 246 247 switch (icmp6->icmp6_type) { 248 case ICMP6_DST_UNREACH: 249 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 250 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 251 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 252 break; 253 254 case ICMP6_TIME_EXCEEDED: 255 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 256 break; 257 258 case ICMP6_PARAM_PROB: 259 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 260 break; 261 262 case ICMP6_PACKET_TOO_BIG: 263 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs); 264 break; 265 266 case ICMP6_ECHO_REQUEST: 267 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 268 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 269 !ipst->ips_ipv6_resp_echo_mcast) 270 break; 271 272 /* 273 * We must have exclusive use of the mblk to convert it to 274 * a response. 275 * If not, we copy it. 276 */ 277 if (mp->b_datap->db_ref > 1) { 278 mblk_t *mp1; 279 280 mp1 = copymsg(mp); 281 if (mp1 == NULL) { 282 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 283 ip_drop_input("ipIfStatsInDiscards - copymsg", 284 mp, ill); 285 freemsg(mp); 286 return (NULL); 287 } 288 freemsg(mp); 289 mp = mp1; 290 ip6h = (ip6_t *)mp->b_rptr; 291 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 292 } 293 294 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 295 icmp_send_reply_v6(mp, ip6h, icmp6, ira); 296 return (NULL); 297 298 case ICMP6_ECHO_REPLY: 299 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 300 break; 301 302 case ND_ROUTER_SOLICIT: 303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 304 break; 305 306 case ND_ROUTER_ADVERT: 307 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 308 break; 309 310 case ND_NEIGHBOR_SOLICIT: 311 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 312 ndp_input(mp, ira); 313 return (NULL); 314 315 case ND_NEIGHBOR_ADVERT: 316 BUMP_MIB(ill->ill_icmp6_mib, 317 ipv6IfIcmpInNeighborAdvertisements); 318 ndp_input(mp, ira); 319 return (NULL); 320 321 case ND_REDIRECT: 322 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 323 324 if (ipst->ips_ipv6_ignore_redirect) 325 break; 326 327 /* We now allow a RAW socket to receive this. */ 328 interested = B_TRUE; 329 break; 330 331 /* 332 * The next three icmp messages will be handled by MLD. 333 * Pass all valid MLD packets up to any process(es) 334 * listening on a raw ICMP socket. 335 */ 336 case MLD_LISTENER_QUERY: 337 case MLD_LISTENER_REPORT: 338 case MLD_LISTENER_REDUCTION: 339 mp = mld_input(mp, ira); 340 return (mp); 341 default: 342 break; 343 } 344 /* 345 * See if there is an ICMP client to avoid an extra copymsg/freemsg 346 * if there isn't one. 347 */ 348 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) { 349 /* If there is an ICMP client and we want one too, copy it. */ 350 351 if (!interested) { 352 /* Caller will deliver to RAW sockets */ 353 return (mp); 354 } 355 mp_ret = copymsg(mp); 356 if (mp_ret == NULL) { 357 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 358 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 359 } 360 } else if (!interested) { 361 /* Neither we nor raw sockets are interested. Drop packet now */ 362 freemsg(mp); 363 return (NULL); 364 } 365 366 /* 367 * ICMP error or redirect packet. Make sure we have enough of 368 * the header and that db_ref == 1 since we might end up modifying 369 * the packet. 370 */ 371 if (mp->b_cont != NULL) { 372 if (ip_pullup(mp, -1, ira) == NULL) { 373 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 374 ip_drop_input("ipIfStatsInDiscards - ip_pullup", 375 mp, ill); 376 freemsg(mp); 377 return (mp_ret); 378 } 379 } 380 381 if (mp->b_datap->db_ref > 1) { 382 mblk_t *mp1; 383 384 mp1 = copymsg(mp); 385 if (mp1 == NULL) { 386 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 387 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 388 freemsg(mp); 389 return (mp_ret); 390 } 391 freemsg(mp); 392 mp = mp1; 393 } 394 395 /* 396 * In case mp has changed, verify the message before any further 397 * processes. 398 */ 399 ip6h = (ip6_t *)mp->b_rptr; 400 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 401 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 402 freemsg(mp); 403 return (mp_ret); 404 } 405 406 switch (icmp6->icmp6_type) { 407 case ND_REDIRECT: 408 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira); 409 break; 410 case ICMP6_PACKET_TOO_BIG: 411 /* Update DCE and adjust MTU is icmp header if needed */ 412 icmp_inbound_too_big_v6(icmp6, ira); 413 /* FALLTHRU */ 414 default: 415 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 416 break; 417 } 418 419 return (mp_ret); 420 } 421 422 /* 423 * Send an ICMP echo reply. 424 * The caller has already updated the payload part of the packet. 425 * We handle the ICMP checksum, IP source address selection and feed 426 * the packet into ip_output_simple. 427 */ 428 static void 429 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6, 430 ip_recv_attr_t *ira) 431 { 432 uint_t ip_hdr_length = ira->ira_ip_hdr_length; 433 ill_t *ill = ira->ira_ill; 434 ip_stack_t *ipst = ill->ill_ipst; 435 ip_xmit_attr_t ixas; 436 in6_addr_t origsrc; 437 438 /* 439 * Remove any extension headers (do not reverse a source route) 440 * and clear the flow id (keep traffic class for now). 441 */ 442 if (ip_hdr_length != IPV6_HDR_LEN) { 443 int i; 444 445 for (i = 0; i < IPV6_HDR_LEN; i++) { 446 mp->b_rptr[ip_hdr_length - i - 1] = 447 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 448 } 449 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN); 450 ip6h = (ip6_t *)mp->b_rptr; 451 ip6h->ip6_nxt = IPPROTO_ICMPV6; 452 i = ntohs(ip6h->ip6_plen); 453 i -= (ip_hdr_length - IPV6_HDR_LEN); 454 ip6h->ip6_plen = htons(i); 455 ip_hdr_length = IPV6_HDR_LEN; 456 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp)); 457 } 458 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 459 460 /* Reverse the source and destination addresses. */ 461 origsrc = ip6h->ip6_src; 462 ip6h->ip6_src = ip6h->ip6_dst; 463 ip6h->ip6_dst = origsrc; 464 465 /* set the hop limit */ 466 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 467 468 /* 469 * Prepare for checksum by putting icmp length in the icmp 470 * checksum field. The checksum is calculated in ip_output 471 */ 472 icmp6->icmp6_cksum = ip6h->ip6_plen; 473 474 bzero(&ixas, sizeof (ixas)); 475 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 476 ixas.ixa_zoneid = ira->ira_zoneid; 477 ixas.ixa_cred = kcred; 478 ixas.ixa_cpid = NOPID; 479 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 480 ixas.ixa_ifindex = 0; 481 ixas.ixa_ipst = ipst; 482 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 483 484 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 485 /* 486 * This packet should go out the same way as it 487 * came in i.e in clear, independent of the IPsec 488 * policy for transmitting packets. 489 */ 490 ixas.ixa_flags |= IXAF_NO_IPSEC; 491 } else { 492 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 493 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 494 /* Note: mp already consumed and ip_drop_packet done */ 495 return; 496 } 497 } 498 499 /* Was the destination (now source) link-local? Send out same group */ 500 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 501 ixas.ixa_flags |= IXAF_SCOPEID_SET; 502 if (IS_UNDER_IPMP(ill)) 503 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 504 else 505 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 506 } 507 508 if (ira->ira_flags & IRAF_MULTIBROADCAST) { 509 /* 510 * Not one or our addresses (IRE_LOCALs), thus we let 511 * ip_output_simple pick the source. 512 */ 513 ip6h->ip6_src = ipv6_all_zeros; 514 ixas.ixa_flags |= IXAF_SET_SOURCE; 515 } 516 517 /* Should we send using dce_pmtu? */ 518 if (ipst->ips_ipv6_icmp_return_pmtu) 519 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; 520 521 (void) ip_output_simple(mp, &ixas); 522 ixa_cleanup(&ixas); 523 524 } 525 526 /* 527 * Verify the ICMP messages for either for ICMP error or redirect packet. 528 * The caller should have fully pulled up the message. If it's a redirect 529 * packet, only basic checks on IP header will be done; otherwise, verify 530 * the packet by looking at the included ULP header. 531 * 532 * Called before icmp_inbound_error_fanout_v6 is called. 533 */ 534 static boolean_t 535 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 536 { 537 ill_t *ill = ira->ira_ill; 538 uint16_t hdr_length; 539 uint8_t *nexthdrp; 540 uint8_t nexthdr; 541 ip_stack_t *ipst = ill->ill_ipst; 542 conn_t *connp; 543 ip6_t *ip6h; /* Inner header */ 544 545 ip6h = (ip6_t *)&icmp6[1]; 546 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr) 547 goto truncated; 548 549 if (icmp6->icmp6_type == ND_REDIRECT) { 550 hdr_length = sizeof (nd_redirect_t); 551 } else { 552 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION)) 553 goto discard_pkt; 554 hdr_length = IPV6_HDR_LEN; 555 } 556 557 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr) 558 goto truncated; 559 560 /* 561 * Stop here for ICMP_REDIRECT. 562 */ 563 if (icmp6->icmp6_type == ND_REDIRECT) 564 return (B_TRUE); 565 566 /* 567 * ICMP errors only. 568 */ 569 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 570 goto discard_pkt; 571 nexthdr = *nexthdrp; 572 573 /* Try to pass the ICMP message to clients who need it */ 574 switch (nexthdr) { 575 case IPPROTO_UDP: 576 /* 577 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 578 * transport header. 579 */ 580 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 581 mp->b_wptr) 582 goto truncated; 583 break; 584 case IPPROTO_TCP: { 585 tcpha_t *tcpha; 586 587 /* 588 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 589 * transport header. 590 */ 591 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 592 mp->b_wptr) 593 goto truncated; 594 595 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 596 /* 597 * With IPMP we need to match across group, which we do 598 * since we have the upper ill from ira_ill. 599 */ 600 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, 601 ill->ill_phyint->phyint_ifindex, ipst); 602 if (connp == NULL) 603 goto discard_pkt; 604 605 if ((connp->conn_verifyicmp != NULL) && 606 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) { 607 CONN_DEC_REF(connp); 608 goto discard_pkt; 609 } 610 CONN_DEC_REF(connp); 611 break; 612 } 613 case IPPROTO_SCTP: 614 /* 615 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 616 * transport header. 617 */ 618 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 619 mp->b_wptr) 620 goto truncated; 621 break; 622 case IPPROTO_ESP: 623 case IPPROTO_AH: 624 break; 625 case IPPROTO_ENCAP: 626 case IPPROTO_IPV6: { 627 /* Look for self-encapsulated packets that caused an error */ 628 ip6_t *in_ip6h; 629 630 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 631 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ? 632 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr) 633 goto truncated; 634 break; 635 } 636 default: 637 break; 638 } 639 640 return (B_TRUE); 641 642 discard_pkt: 643 /* Bogus ICMP error. */ 644 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 645 return (B_FALSE); 646 647 truncated: 648 /* We pulled up everthing already. Must be truncated */ 649 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 650 return (B_FALSE); 651 } 652 653 /* 654 * Process received IPv6 ICMP Packet too big. 655 * The caller is responsible for validating the packet before passing it in 656 * and also to fanout the ICMP error to any matching transport conns. Assumes 657 * the message has been fully pulled up. 658 * 659 * Before getting here, the caller has called icmp_inbound_verify_v6() 660 * that should have verified with ULP to prevent undoing the changes we're 661 * going to make to DCE. For example, TCP might have verified that the packet 662 * which generated error is in the send window. 663 * 664 * In some cases modified this MTU in the ICMP header packet; the caller 665 * should pass to the matching ULP after this returns. 666 */ 667 static void 668 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira) 669 { 670 uint32_t mtu; 671 dce_t *dce; 672 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */ 673 ip_stack_t *ipst = ill->ill_ipst; 674 int old_max_frag; 675 in6_addr_t final_dst; 676 ip6_t *ip6h; /* Inner IP header */ 677 678 /* Caller has already pulled up everything. */ 679 ip6h = (ip6_t *)&icmp6[1]; 680 final_dst = ip_get_dst_v6(ip6h, NULL, NULL); 681 682 /* 683 * For link local destinations matching simply on address is not 684 * sufficient. Same link local addresses for different ILL's is 685 * possible. 686 */ 687 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) { 688 dce = dce_lookup_and_add_v6(&final_dst, 689 ill->ill_phyint->phyint_ifindex, ipst); 690 } else { 691 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst); 692 } 693 if (dce == NULL) { 694 /* Couldn't add a unique one - ENOMEM */ 695 if (ip_debug > 2) { 696 /* ip1dbg */ 697 pr_addr_dbg("icmp_inbound_too_big_v6:" 698 "no dce for dst %s\n", AF_INET6, 699 &final_dst); 700 } 701 return; 702 } 703 704 mtu = ntohl(icmp6->icmp6_mtu); 705 706 mutex_enter(&dce->dce_lock); 707 if (dce->dce_flags & DCEF_PMTU) 708 old_max_frag = dce->dce_pmtu; 709 else 710 old_max_frag = ill->ill_mtu; 711 712 if (mtu < IPV6_MIN_MTU) { 713 ip1dbg(("Received mtu less than IPv6 " 714 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 715 mtu = IPV6_MIN_MTU; 716 /* 717 * If an mtu less than IPv6 min mtu is received, 718 * we must include a fragment header in 719 * subsequent packets. 720 */ 721 dce->dce_flags |= DCEF_TOO_SMALL_PMTU; 722 } else { 723 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU; 724 } 725 ip1dbg(("Received mtu from router: %d\n", mtu)); 726 dce->dce_pmtu = MIN(old_max_frag, mtu); 727 728 /* Prepare to send the new max frag size for the ULP. */ 729 if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) { 730 /* 731 * If we need a fragment header in every packet 732 * (above case or multirouting), make sure the 733 * ULP takes it into account when computing the 734 * payload size. 735 */ 736 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t)); 737 } else { 738 icmp6->icmp6_mtu = htonl(dce->dce_pmtu); 739 } 740 /* We now have a PMTU for sure */ 741 dce->dce_flags |= DCEF_PMTU; 742 dce->dce_last_change_time = TICK_TO_SEC(lbolt64); 743 mutex_exit(&dce->dce_lock); 744 /* 745 * After dropping the lock the new value is visible to everyone. 746 * Then we bump the generation number so any cached values reinspect 747 * the dce_t. 748 */ 749 dce_increment_generation(dce); 750 dce_refrele(dce); 751 } 752 753 /* 754 * Fanout received ICMPv6 error packets to the transports. 755 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 756 * 757 * The caller must have called icmp_inbound_verify_v6. 758 */ 759 void 760 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 761 { 762 uint16_t *up; /* Pointer to ports in ULP header */ 763 uint32_t ports; /* reversed ports for fanout */ 764 ip6_t rip6h; /* With reversed addresses */ 765 ip6_t *ip6h; /* Inner IP header */ 766 uint16_t hdr_length; /* Inner IP header length */ 767 uint8_t *nexthdrp; 768 uint8_t nexthdr; 769 tcpha_t *tcpha; 770 conn_t *connp; 771 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */ 772 ip_stack_t *ipst = ill->ill_ipst; 773 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 774 775 /* Caller has already pulled up everything. */ 776 ip6h = (ip6_t *)&icmp6[1]; 777 ASSERT(mp->b_cont == NULL); 778 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 779 780 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 781 goto drop_pkt; 782 nexthdr = *nexthdrp; 783 ira->ira_protocol = nexthdr; 784 785 /* 786 * We need a separate IP header with the source and destination 787 * addresses reversed to do fanout/classification because the ip6h in 788 * the ICMPv6 error is in the form we sent it out. 789 */ 790 rip6h.ip6_src = ip6h->ip6_dst; 791 rip6h.ip6_dst = ip6h->ip6_src; 792 rip6h.ip6_nxt = nexthdr; 793 794 /* Try to pass the ICMP message to clients who need it */ 795 switch (nexthdr) { 796 case IPPROTO_UDP: { 797 /* Attempt to find a client stream based on port. */ 798 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 799 800 /* Note that we send error to all matches. */ 801 ira->ira_flags |= IRAF_ICMP_ERROR; 802 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira); 803 ira->ira_flags &= ~IRAF_ICMP_ERROR; 804 return; 805 } 806 case IPPROTO_TCP: { 807 /* 808 * Attempt to find a client stream based on port. 809 * Note that we do a reverse lookup since the header is 810 * in the form we sent it out. 811 */ 812 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 813 /* 814 * With IPMP we need to match across group, which we do 815 * since we have the upper ill from ira_ill. 816 */ 817 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 818 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 819 if (connp == NULL) { 820 goto drop_pkt; 821 } 822 823 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 824 (ira->ira_flags & IRAF_IPSEC_SECURE)) { 825 mp = ipsec_check_inbound_policy(mp, connp, 826 NULL, ip6h, ira); 827 if (mp == NULL) { 828 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 829 /* Note that mp is NULL */ 830 ip_drop_input("ipIfStatsInDiscards", mp, ill); 831 CONN_DEC_REF(connp); 832 return; 833 } 834 } 835 836 ira->ira_flags |= IRAF_ICMP_ERROR; 837 if (IPCL_IS_TCP(connp)) { 838 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 839 connp->conn_recvicmp, connp, ira, SQ_FILL, 840 SQTAG_TCP6_INPUT_ICMP_ERR); 841 } else { 842 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 843 ill_t *rill = ira->ira_rill; 844 845 ira->ira_ill = ira->ira_rill = NULL; 846 (connp->conn_recv)(connp, mp, NULL, ira); 847 CONN_DEC_REF(connp); 848 ira->ira_ill = ill; 849 ira->ira_rill = rill; 850 } 851 ira->ira_flags &= ~IRAF_ICMP_ERROR; 852 return; 853 854 } 855 case IPPROTO_SCTP: 856 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 857 /* Find a SCTP client stream for this packet. */ 858 ((uint16_t *)&ports)[0] = up[1]; 859 ((uint16_t *)&ports)[1] = up[0]; 860 861 ira->ira_flags |= IRAF_ICMP_ERROR; 862 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira); 863 ira->ira_flags &= ~IRAF_ICMP_ERROR; 864 return; 865 866 case IPPROTO_ESP: 867 case IPPROTO_AH: 868 if (!ipsec_loaded(ipss)) { 869 ip_proto_not_sup(mp, ira); 870 return; 871 } 872 873 if (nexthdr == IPPROTO_ESP) 874 mp = ipsecesp_icmp_error(mp, ira); 875 else 876 mp = ipsecah_icmp_error(mp, ira); 877 if (mp == NULL) 878 return; 879 880 /* Just in case ipsec didn't preserve the NULL b_cont */ 881 if (mp->b_cont != NULL) { 882 if (!pullupmsg(mp, -1)) 883 goto drop_pkt; 884 } 885 886 /* 887 * If succesful, the mp has been modified to not include 888 * the ESP/AH header so we can fanout to the ULP's icmp 889 * error handler. 890 */ 891 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN) 892 goto drop_pkt; 893 894 ip6h = (ip6_t *)mp->b_rptr; 895 /* Don't call hdr_length_v6() unless you have to. */ 896 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 897 hdr_length = ip_hdr_length_v6(mp, ip6h); 898 else 899 hdr_length = IPV6_HDR_LEN; 900 901 /* Verify the modified message before any further processes. */ 902 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 903 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 904 freemsg(mp); 905 return; 906 } 907 908 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 909 return; 910 911 case IPPROTO_IPV6: { 912 /* Look for self-encapsulated packets that caused an error */ 913 ip6_t *in_ip6h; 914 915 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 916 917 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) && 918 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) { 919 /* 920 * Self-encapsulated case. As in the ipv4 case, 921 * we need to strip the 2nd IP header. Since mp 922 * is already pulled-up, we can simply bcopy 923 * the 3rd header + data over the 2nd header. 924 */ 925 uint16_t unused_len; 926 927 /* 928 * Make sure we don't do recursion more than once. 929 */ 930 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h, 931 &unused_len, &nexthdrp) || 932 *nexthdrp == IPPROTO_IPV6) { 933 goto drop_pkt; 934 } 935 936 /* 937 * Copy the 3rd header + remaining data on top 938 * of the 2nd header. 939 */ 940 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h); 941 942 /* 943 * Subtract length of the 2nd header. 944 */ 945 mp->b_wptr -= hdr_length; 946 947 ip6h = (ip6_t *)mp->b_rptr; 948 /* Don't call hdr_length_v6() unless you have to. */ 949 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 950 hdr_length = ip_hdr_length_v6(mp, ip6h); 951 else 952 hdr_length = IPV6_HDR_LEN; 953 954 /* 955 * Verify the modified message before any further 956 * processes. 957 */ 958 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 959 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 960 freemsg(mp); 961 return; 962 } 963 964 /* 965 * Now recurse, and see what I _really_ should be 966 * doing here. 967 */ 968 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 969 return; 970 } 971 /* FALLTHRU */ 972 } 973 case IPPROTO_ENCAP: 974 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src, 975 &rip6h.ip6_dst, ipst)) != NULL) { 976 ira->ira_flags |= IRAF_ICMP_ERROR; 977 connp->conn_recvicmp(connp, mp, NULL, ira); 978 CONN_DEC_REF(connp); 979 ira->ira_flags &= ~IRAF_ICMP_ERROR; 980 return; 981 } 982 /* 983 * No IP tunnel is interested, fallthrough and see 984 * if a raw socket will want it. 985 */ 986 /* FALLTHRU */ 987 default: 988 ira->ira_flags |= IRAF_ICMP_ERROR; 989 ASSERT(ira->ira_protocol == nexthdr); 990 ip_fanout_proto_v6(mp, &rip6h, ira); 991 ira->ira_flags &= ~IRAF_ICMP_ERROR; 992 return; 993 } 994 /* NOTREACHED */ 995 drop_pkt: 996 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 997 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 998 freemsg(mp); 999 } 1000 1001 /* 1002 * Process received IPv6 ICMP Redirect messages. 1003 * Assumes the caller has verified that the headers are in the pulled up mblk. 1004 * Consumes mp. 1005 */ 1006 /* ARGSUSED */ 1007 static void 1008 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd, 1009 ip_recv_attr_t *ira) 1010 { 1011 ire_t *ire, *nire; 1012 ire_t *prev_ire = NULL; 1013 ire_t *redir_ire; 1014 in6_addr_t *src, *dst, *gateway; 1015 nd_opt_hdr_t *opt; 1016 nce_t *nce; 1017 int ncec_flags = 0; 1018 int err = 0; 1019 boolean_t redirect_to_router = B_FALSE; 1020 int len; 1021 int optlen; 1022 ill_t *ill = ira->ira_rill; 1023 ill_t *rill = ira->ira_rill; 1024 ip_stack_t *ipst = ill->ill_ipst; 1025 1026 /* 1027 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 1028 * and make it be the IPMP upper so avoid being confused by a packet 1029 * addressed to a unicast address on a different ill. 1030 */ 1031 if (IS_UNDER_IPMP(rill)) { 1032 rill = ipmp_ill_hold_ipmp_ill(rill); 1033 if (rill == NULL) { 1034 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1035 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill", 1036 mp, ill); 1037 freemsg(mp); 1038 return; 1039 } 1040 ASSERT(rill != ira->ira_rill); 1041 } 1042 1043 len = mp->b_wptr - (uchar_t *)rd; 1044 src = &ip6h->ip6_src; 1045 dst = &rd->nd_rd_dst; 1046 gateway = &rd->nd_rd_target; 1047 1048 /* Verify if it is a valid redirect */ 1049 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1050 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1051 (rd->nd_rd_code != 0) || 1052 (len < sizeof (nd_redirect_t)) || 1053 (IN6_IS_ADDR_V4MAPPED(dst)) || 1054 (IN6_IS_ADDR_MULTICAST(dst))) { 1055 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1056 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill); 1057 goto fail_redirect; 1058 } 1059 1060 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1061 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1062 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1063 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway", 1064 mp, ill); 1065 goto fail_redirect; 1066 } 1067 1068 optlen = len - sizeof (nd_redirect_t); 1069 if (optlen != 0) { 1070 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) { 1071 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1072 ip_drop_input("ipv6IfIcmpInBadRedirects - options", 1073 mp, ill); 1074 goto fail_redirect; 1075 } 1076 } 1077 1078 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1079 redirect_to_router = B_TRUE; 1080 ncec_flags |= NCE_F_ISROUTER; 1081 } else { 1082 gateway = dst; /* Add nce for dst */ 1083 } 1084 1085 1086 /* 1087 * Verify that the IP source address of the redirect is 1088 * the same as the current first-hop router for the specified 1089 * ICMP destination address. 1090 * Also, Make sure we had a route for the dest in question and 1091 * that route was pointing to the old gateway (the source of the 1092 * redirect packet.) 1093 * Note: this merely says that there is some IRE which matches that 1094 * gateway; not that the longest match matches that gateway. 1095 */ 1096 prev_ire = ire_ftable_lookup_v6(dst, 0, src, 0, rill, 1097 ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL, 0, ipst, NULL); 1098 1099 /* 1100 * Check that 1101 * the redirect was not from ourselves 1102 * old gateway is still directly reachable 1103 */ 1104 if (prev_ire == NULL || 1105 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) || 1106 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 1107 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1108 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill); 1109 goto fail_redirect; 1110 } 1111 1112 ASSERT(prev_ire->ire_ill != NULL); 1113 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD) 1114 ncec_flags |= NCE_F_NONUD; 1115 1116 opt = (nd_opt_hdr_t *)&rd[1]; 1117 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1118 if (opt != NULL) { 1119 err = nce_lookup_then_add_v6(rill, 1120 (uchar_t *)&opt[1], /* Link layer address */ 1121 rill->ill_phys_addr_length, 1122 gateway, ncec_flags, ND_STALE, &nce); 1123 switch (err) { 1124 case 0: 1125 nce_refrele(nce); 1126 break; 1127 case EEXIST: 1128 /* 1129 * Check to see if link layer address has changed and 1130 * process the ncec_state accordingly. 1131 */ 1132 nce_process(nce->nce_common, 1133 (uchar_t *)&opt[1], 0, B_FALSE); 1134 nce_refrele(nce); 1135 break; 1136 default: 1137 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1138 err)); 1139 goto fail_redirect; 1140 } 1141 } 1142 if (redirect_to_router) { 1143 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1144 1145 /* 1146 * Create a Route Association. This will allow us to remember 1147 * a router told us to use the particular gateway. 1148 */ 1149 ire = ire_create_v6( 1150 dst, 1151 &ipv6_all_ones, /* mask */ 1152 gateway, /* gateway addr */ 1153 IRE_HOST, 1154 prev_ire->ire_ill, 1155 ALL_ZONES, 1156 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1157 NULL, 1158 ipst); 1159 } else { 1160 ipif_t *ipif; 1161 in6_addr_t gw; 1162 1163 /* 1164 * Just create an on link entry, i.e. interface route. 1165 * The gateway field is our link-local on the ill. 1166 */ 1167 mutex_enter(&rill->ill_lock); 1168 for (ipif = rill->ill_ipif; ipif != NULL; 1169 ipif = ipif->ipif_next) { 1170 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1171 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr)) 1172 break; 1173 } 1174 if (ipif == NULL) { 1175 /* We have no link-local address! */ 1176 mutex_exit(&rill->ill_lock); 1177 goto fail_redirect; 1178 } 1179 gw = ipif->ipif_v6lcl_addr; 1180 mutex_exit(&rill->ill_lock); 1181 1182 ire = ire_create_v6( 1183 dst, /* gateway == dst */ 1184 &ipv6_all_ones, /* mask */ 1185 &gw, /* gateway addr */ 1186 rill->ill_net_type, /* IF_[NO]RESOLVER */ 1187 prev_ire->ire_ill, 1188 ALL_ZONES, 1189 (RTF_DYNAMIC | RTF_HOST), 1190 NULL, 1191 ipst); 1192 } 1193 1194 if (ire == NULL) 1195 goto fail_redirect; 1196 1197 nire = ire_add(ire); 1198 /* Check if it was a duplicate entry */ 1199 if (nire != NULL && nire != ire) { 1200 ASSERT(nire->ire_identical_ref > 1); 1201 ire_delete(nire); 1202 ire_refrele(nire); 1203 nire = NULL; 1204 } 1205 ire = nire; 1206 if (ire != NULL) { 1207 ire_refrele(ire); /* Held in ire_add */ 1208 1209 /* tell routing sockets that we received a redirect */ 1210 ip_rts_change_v6(RTM_REDIRECT, 1211 &rd->nd_rd_dst, 1212 &rd->nd_rd_target, 1213 &ipv6_all_ones, 0, src, 1214 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1215 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1216 1217 /* 1218 * Delete any existing IRE_HOST type ires for this destination. 1219 * This together with the added IRE has the effect of 1220 * modifying an existing redirect. 1221 */ 1222 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1223 prev_ire->ire_ill, ALL_ZONES, NULL, 1224 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst, 1225 NULL); 1226 1227 if (redir_ire != NULL) { 1228 if (redir_ire->ire_flags & RTF_DYNAMIC) 1229 ire_delete(redir_ire); 1230 ire_refrele(redir_ire); 1231 } 1232 } 1233 1234 ire_refrele(prev_ire); 1235 prev_ire = NULL; 1236 1237 fail_redirect: 1238 if (prev_ire != NULL) 1239 ire_refrele(prev_ire); 1240 freemsg(mp); 1241 if (rill != ira->ira_rill) 1242 ill_refrele(rill); 1243 } 1244 1245 /* 1246 * Build and ship an IPv6 ICMP message using the packet data in mp, 1247 * and the ICMP header pointed to by "stuff". (May be called as 1248 * writer.) 1249 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1250 * verify that an icmp error packet can be sent. 1251 * 1252 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1253 * source address (see above function). 1254 */ 1255 static void 1256 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len, 1257 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira) 1258 { 1259 ip6_t *ip6h; 1260 in6_addr_t v6dst; 1261 size_t len_needed; 1262 size_t msg_len; 1263 mblk_t *mp1; 1264 icmp6_t *icmp6; 1265 in6_addr_t v6src; 1266 ill_t *ill = ira->ira_ill; 1267 ip_stack_t *ipst = ill->ill_ipst; 1268 ip_xmit_attr_t ixas; 1269 1270 ip6h = (ip6_t *)mp->b_rptr; 1271 1272 bzero(&ixas, sizeof (ixas)); 1273 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 1274 ixas.ixa_zoneid = ira->ira_zoneid; 1275 ixas.ixa_ifindex = 0; 1276 ixas.ixa_ipst = ipst; 1277 ixas.ixa_cred = kcred; 1278 ixas.ixa_cpid = NOPID; 1279 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 1280 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1281 1282 /* 1283 * If the source of the original packet was link-local, then 1284 * make sure we send on the same ill (group) as we received it on. 1285 */ 1286 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 1287 ixas.ixa_flags |= IXAF_SCOPEID_SET; 1288 if (IS_UNDER_IPMP(ill)) 1289 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 1290 else 1291 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 1292 } 1293 1294 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1295 /* 1296 * Apply IPsec based on how IPsec was applied to 1297 * the packet that had the error. 1298 * 1299 * If it was an outbound packet that caused the ICMP 1300 * error, then the caller will have setup the IRA 1301 * appropriately. 1302 */ 1303 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 1304 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1305 /* Note: mp already consumed and ip_drop_packet done */ 1306 return; 1307 } 1308 } else { 1309 /* 1310 * This is in clear. The icmp message we are building 1311 * here should go out in clear, independent of our policy. 1312 */ 1313 ixas.ixa_flags |= IXAF_NO_IPSEC; 1314 } 1315 1316 /* 1317 * If the caller specified the source we use that. 1318 * Otherwise, if the packet was for one of our unicast addresses, make 1319 * sure we respond with that as the source. Otherwise 1320 * have ip_output_simple pick the source address. 1321 */ 1322 if (v6src_ptr != NULL) { 1323 v6src = *v6src_ptr; 1324 } else { 1325 ire_t *ire; 1326 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY; 1327 1328 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1329 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) 1330 match_flags |= MATCH_IRE_ILL; 1331 1332 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 1333 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL, 1334 match_flags, 0, ipst, NULL); 1335 if (ire != NULL) { 1336 v6src = ip6h->ip6_dst; 1337 ire_refrele(ire); 1338 } else { 1339 v6src = ipv6_all_zeros; 1340 ixas.ixa_flags |= IXAF_SET_SOURCE; 1341 } 1342 } 1343 v6dst = ip6h->ip6_src; 1344 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1345 msg_len = msgdsize(mp); 1346 if (msg_len > len_needed) { 1347 if (!adjmsg(mp, len_needed - msg_len)) { 1348 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1349 freemsg(mp); 1350 return; 1351 } 1352 msg_len = len_needed; 1353 } 1354 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED); 1355 if (mp1 == NULL) { 1356 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1357 freemsg(mp); 1358 return; 1359 } 1360 mp1->b_cont = mp; 1361 mp = mp1; 1362 1363 /* 1364 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this 1365 * node generates be accepted in peace by all on-host destinations. 1366 * If we do NOT assume that all on-host destinations trust 1367 * self-generated ICMP messages, then rework here, ip6.c, and spd.c. 1368 * (Look for IXAF_TRUSTED_ICMP). 1369 */ 1370 ixas.ixa_flags |= IXAF_TRUSTED_ICMP; 1371 1372 ip6h = (ip6_t *)mp->b_rptr; 1373 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1374 1375 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1376 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1377 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1378 ip6h->ip6_dst = v6dst; 1379 ip6h->ip6_src = v6src; 1380 msg_len += IPV6_HDR_LEN + len; 1381 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1382 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1383 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1384 } 1385 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1386 icmp6 = (icmp6_t *)&ip6h[1]; 1387 bcopy(stuff, (char *)icmp6, len); 1388 /* 1389 * Prepare for checksum by putting icmp length in the icmp 1390 * checksum field. The checksum is calculated in ip_output_wire_v6. 1391 */ 1392 icmp6->icmp6_cksum = ip6h->ip6_plen; 1393 if (icmp6->icmp6_type == ND_REDIRECT) { 1394 ip6h->ip6_hops = IPV6_MAX_HOPS; 1395 } 1396 1397 (void) ip_output_simple(mp, &ixas); 1398 ixa_cleanup(&ixas); 1399 } 1400 1401 /* 1402 * Update the output mib when ICMPv6 packets are sent. 1403 */ 1404 void 1405 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1406 { 1407 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1408 1409 switch (icmp6->icmp6_type) { 1410 case ICMP6_DST_UNREACH: 1411 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1412 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1413 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1414 break; 1415 1416 case ICMP6_TIME_EXCEEDED: 1417 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1418 break; 1419 1420 case ICMP6_PARAM_PROB: 1421 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1422 break; 1423 1424 case ICMP6_PACKET_TOO_BIG: 1425 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1426 break; 1427 1428 case ICMP6_ECHO_REQUEST: 1429 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1430 break; 1431 1432 case ICMP6_ECHO_REPLY: 1433 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1434 break; 1435 1436 case ND_ROUTER_SOLICIT: 1437 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1438 break; 1439 1440 case ND_ROUTER_ADVERT: 1441 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1442 break; 1443 1444 case ND_NEIGHBOR_SOLICIT: 1445 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1446 break; 1447 1448 case ND_NEIGHBOR_ADVERT: 1449 BUMP_MIB(ill->ill_icmp6_mib, 1450 ipv6IfIcmpOutNeighborAdvertisements); 1451 break; 1452 1453 case ND_REDIRECT: 1454 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1455 break; 1456 1457 case MLD_LISTENER_QUERY: 1458 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1459 break; 1460 1461 case MLD_LISTENER_REPORT: 1462 case MLD_V2_LISTENER_REPORT: 1463 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1464 break; 1465 1466 case MLD_LISTENER_REDUCTION: 1467 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1468 break; 1469 } 1470 } 1471 1472 /* 1473 * Check if it is ok to send an ICMPv6 error packet in 1474 * response to the IP packet in mp. 1475 * Free the message and return null if no 1476 * ICMP error packet should be sent. 1477 */ 1478 static mblk_t * 1479 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira) 1480 { 1481 ill_t *ill = ira->ira_ill; 1482 ip_stack_t *ipst = ill->ill_ipst; 1483 boolean_t llbcast; 1484 ip6_t *ip6h; 1485 1486 if (!mp) 1487 return (NULL); 1488 1489 /* We view multicast and broadcast as the same.. */ 1490 llbcast = (ira->ira_flags & 1491 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0; 1492 ip6h = (ip6_t *)mp->b_rptr; 1493 1494 /* Check if source address uniquely identifies the host */ 1495 1496 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1497 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1498 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1499 freemsg(mp); 1500 return (NULL); 1501 } 1502 1503 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1504 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1505 icmp6_t *icmp6; 1506 1507 if (mp->b_wptr - mp->b_rptr < len_needed) { 1508 if (!pullupmsg(mp, len_needed)) { 1509 BUMP_MIB(ill->ill_icmp6_mib, 1510 ipv6IfIcmpInErrors); 1511 freemsg(mp); 1512 return (NULL); 1513 } 1514 ip6h = (ip6_t *)mp->b_rptr; 1515 } 1516 icmp6 = (icmp6_t *)&ip6h[1]; 1517 /* Explicitly do not generate errors in response to redirects */ 1518 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1519 icmp6->icmp6_type == ND_REDIRECT) { 1520 freemsg(mp); 1521 return (NULL); 1522 } 1523 } 1524 /* 1525 * Check that the destination is not multicast and that the packet 1526 * was not sent on link layer broadcast or multicast. (Exception 1527 * is Packet too big message as per the draft - when mcast_ok is set.) 1528 */ 1529 if (!mcast_ok && 1530 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1531 freemsg(mp); 1532 return (NULL); 1533 } 1534 /* 1535 * If this is a labeled system, then check to see if we're allowed to 1536 * send a response to this particular sender. If not, then just drop. 1537 */ 1538 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) { 1539 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1540 freemsg(mp); 1541 return (NULL); 1542 } 1543 1544 if (icmp_err_rate_limit(ipst)) { 1545 /* 1546 * Only send ICMP error packets every so often. 1547 * This should be done on a per port/source basis, 1548 * but for now this will suffice. 1549 */ 1550 freemsg(mp); 1551 return (NULL); 1552 } 1553 return (mp); 1554 } 1555 1556 /* 1557 * Called when a packet was sent out the same link that it arrived on. 1558 * Check if it is ok to send a redirect and then send it. 1559 */ 1560 void 1561 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire, 1562 ip_recv_attr_t *ira) 1563 { 1564 ill_t *ill = ira->ira_ill; 1565 ip_stack_t *ipst = ill->ill_ipst; 1566 in6_addr_t *v6targ; 1567 ire_t *src_ire_v6 = NULL; 1568 mblk_t *mp1; 1569 ire_t *nhop_ire = NULL; 1570 1571 /* 1572 * Don't send a redirect when forwarding a source 1573 * routed packet. 1574 */ 1575 if (ip_source_routed_v6(ip6h, mp, ipst)) 1576 return; 1577 1578 if (ire->ire_type & IRE_ONLINK) { 1579 /* Target is directly connected */ 1580 v6targ = &ip6h->ip6_dst; 1581 } else { 1582 /* Determine the most specific IRE used to send the packets */ 1583 nhop_ire = ire_nexthop(ire); 1584 if (nhop_ire == NULL) 1585 return; 1586 1587 /* 1588 * We won't send redirects to a router 1589 * that doesn't have a link local 1590 * address, but will forward. 1591 */ 1592 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) { 1593 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1594 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1595 ire_refrele(nhop_ire); 1596 return; 1597 } 1598 v6targ = &nhop_ire->ire_addr_v6; 1599 } 1600 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 1601 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL, 1602 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL); 1603 1604 if (src_ire_v6 == NULL) { 1605 if (nhop_ire != NULL) 1606 ire_refrele(nhop_ire); 1607 return; 1608 } 1609 1610 /* 1611 * The source is directly connected. 1612 */ 1613 mp1 = copymsg(mp); 1614 if (mp1 != NULL) 1615 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira); 1616 1617 if (nhop_ire != NULL) 1618 ire_refrele(nhop_ire); 1619 ire_refrele(src_ire_v6); 1620 } 1621 1622 /* 1623 * Generate an ICMPv6 redirect message. 1624 * Include target link layer address option if it exits. 1625 * Always include redirect header. 1626 */ 1627 static void 1628 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest, 1629 ip_recv_attr_t *ira) 1630 { 1631 nd_redirect_t *rd; 1632 nd_opt_rd_hdr_t *rdh; 1633 uchar_t *buf; 1634 ncec_t *ncec = NULL; 1635 nd_opt_hdr_t *opt; 1636 int len; 1637 int ll_opt_len = 0; 1638 int max_redir_hdr_data_len; 1639 int pkt_len; 1640 in6_addr_t *srcp; 1641 ill_t *ill; 1642 boolean_t need_refrele; 1643 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1644 1645 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira); 1646 if (mp == NULL) 1647 return; 1648 1649 if (IS_UNDER_IPMP(ira->ira_ill)) { 1650 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill); 1651 if (ill == NULL) { 1652 ill = ira->ira_ill; 1653 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1654 ip_drop_output("no IPMP ill for sending redirect", 1655 mp, ill); 1656 freemsg(mp); 1657 return; 1658 } 1659 need_refrele = B_TRUE; 1660 } else { 1661 ill = ira->ira_ill; 1662 need_refrele = B_FALSE; 1663 } 1664 1665 ncec = ncec_lookup_illgrp_v6(ill, targetp); 1666 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE && 1667 ncec->ncec_lladdr != NULL) { 1668 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1669 ill->ill_phys_addr_length + 7)/8 * 8; 1670 } 1671 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1672 ASSERT(len % 4 == 0); 1673 buf = kmem_alloc(len, KM_NOSLEEP); 1674 if (buf == NULL) { 1675 if (ncec != NULL) 1676 ncec_refrele(ncec); 1677 if (need_refrele) 1678 ill_refrele(ill); 1679 freemsg(mp); 1680 return; 1681 } 1682 1683 rd = (nd_redirect_t *)buf; 1684 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1685 rd->nd_rd_code = 0; 1686 rd->nd_rd_reserved = 0; 1687 rd->nd_rd_target = *targetp; 1688 rd->nd_rd_dst = *dest; 1689 1690 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1691 if (ncec != NULL && ll_opt_len != 0) { 1692 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1693 opt->nd_opt_len = ll_opt_len/8; 1694 bcopy((char *)ncec->ncec_lladdr, &opt[1], 1695 ill->ill_phys_addr_length); 1696 } 1697 if (ncec != NULL) 1698 ncec_refrele(ncec); 1699 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1700 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1701 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1702 max_redir_hdr_data_len = 1703 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1704 pkt_len = msgdsize(mp); 1705 /* Make sure mp is 8 byte aligned */ 1706 if (pkt_len > max_redir_hdr_data_len) { 1707 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1708 sizeof (nd_opt_rd_hdr_t))/8; 1709 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1710 } else { 1711 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1712 (void) adjmsg(mp, -(pkt_len % 8)); 1713 } 1714 rdh->nd_opt_rh_reserved1 = 0; 1715 rdh->nd_opt_rh_reserved2 = 0; 1716 /* ipif_v6lcl_addr contains the link-local source address */ 1717 srcp = &ill->ill_ipif->ipif_v6lcl_addr; 1718 1719 /* Redirects sent by router, and router is global zone */ 1720 ASSERT(ira->ira_zoneid == ALL_ZONES); 1721 ira->ira_zoneid = GLOBAL_ZONEID; 1722 icmp_pkt_v6(mp, buf, len, srcp, ira); 1723 kmem_free(buf, len); 1724 if (need_refrele) 1725 ill_refrele(ill); 1726 } 1727 1728 1729 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1730 void 1731 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1732 ip_recv_attr_t *ira) 1733 { 1734 icmp6_t icmp6; 1735 1736 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1737 if (mp == NULL) 1738 return; 1739 1740 bzero(&icmp6, sizeof (icmp6_t)); 1741 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1742 icmp6.icmp6_code = code; 1743 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1744 } 1745 1746 /* 1747 * Generate an ICMP unreachable message. 1748 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1749 * constructed by the caller. 1750 */ 1751 void 1752 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1753 ip_recv_attr_t *ira) 1754 { 1755 icmp6_t icmp6; 1756 1757 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1758 if (mp == NULL) 1759 return; 1760 1761 bzero(&icmp6, sizeof (icmp6_t)); 1762 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1763 icmp6.icmp6_code = code; 1764 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1765 } 1766 1767 /* 1768 * Generate an ICMP pkt too big message. 1769 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1770 * constructed by the caller. 1771 */ 1772 void 1773 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok, 1774 ip_recv_attr_t *ira) 1775 { 1776 icmp6_t icmp6; 1777 1778 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1779 if (mp == NULL) 1780 return; 1781 1782 bzero(&icmp6, sizeof (icmp6_t)); 1783 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1784 icmp6.icmp6_code = 0; 1785 icmp6.icmp6_mtu = htonl(mtu); 1786 1787 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1788 } 1789 1790 /* 1791 * Generate an ICMP parameter problem message. (May be called as writer.) 1792 * 'offset' is the offset from the beginning of the packet in error. 1793 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1794 * constructed by the caller. 1795 */ 1796 static void 1797 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset, 1798 boolean_t mcast_ok, ip_recv_attr_t *ira) 1799 { 1800 icmp6_t icmp6; 1801 1802 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1803 if (mp == NULL) 1804 return; 1805 1806 bzero((char *)&icmp6, sizeof (icmp6_t)); 1807 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1808 icmp6.icmp6_code = code; 1809 icmp6.icmp6_pptr = htonl(offset); 1810 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1811 } 1812 1813 void 1814 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok, 1815 ip_recv_attr_t *ira) 1816 { 1817 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1818 uint16_t hdr_length; 1819 uint8_t *nexthdrp; 1820 uint32_t offset; 1821 ill_t *ill = ira->ira_ill; 1822 1823 /* Determine the offset of the bad nexthdr value */ 1824 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { 1825 /* Malformed packet */ 1826 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1827 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1828 freemsg(mp); 1829 return; 1830 } 1831 1832 offset = nexthdrp - mp->b_rptr; 1833 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset, 1834 mcast_ok, ira); 1835 } 1836 1837 /* 1838 * Verify whether or not the IP address is a valid local address. 1839 * Could be a unicast, including one for a down interface. 1840 * If allow_mcbc then a multicast or broadcast address is also 1841 * acceptable. 1842 * 1843 * In the case of a multicast address, however, the 1844 * upper protocol is expected to reset the src address 1845 * to zero when we return IPVL_MCAST so that 1846 * no packets are emitted with multicast address as 1847 * source address. 1848 * The addresses valid for bind are: 1849 * (1) - in6addr_any 1850 * (2) - IP address of an UP interface 1851 * (3) - IP address of a DOWN interface 1852 * (4) - a multicast address. In this case 1853 * the conn will only receive packets destined to 1854 * the specified multicast address. Note: the 1855 * application still has to issue an 1856 * IPV6_JOIN_GROUP socket option. 1857 * 1858 * In all the above cases, the bound address must be valid in the current zone. 1859 * When the address is loopback or multicast, there might be many matching IREs 1860 * so bind has to look up based on the zone. 1861 */ 1862 ip_laddr_t 1863 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid, 1864 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid) 1865 { 1866 ire_t *src_ire; 1867 uint_t match_flags; 1868 ill_t *ill = NULL; 1869 1870 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src)); 1871 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src)); 1872 1873 match_flags = MATCH_IRE_ZONEONLY; 1874 if (scopeid != 0) { 1875 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst); 1876 if (ill == NULL) 1877 return (IPVL_BAD); 1878 match_flags |= MATCH_IRE_ILL; 1879 } 1880 1881 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0, 1882 ill, zoneid, NULL, match_flags, 0, ipst, NULL); 1883 if (ill != NULL) 1884 ill_refrele(ill); 1885 1886 /* 1887 * If an address other than in6addr_any is requested, 1888 * we verify that it is a valid address for bind 1889 * Note: Following code is in if-else-if form for 1890 * readability compared to a condition check. 1891 */ 1892 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) { 1893 /* 1894 * (2) Bind to address of local UP interface 1895 */ 1896 ire_refrele(src_ire); 1897 return (IPVL_UNICAST_UP); 1898 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 1899 /* (4) bind to multicast address. */ 1900 if (src_ire != NULL) 1901 ire_refrele(src_ire); 1902 1903 /* 1904 * Note: caller should take IPV6_MULTICAST_IF 1905 * into account when selecting a real source address. 1906 */ 1907 if (allow_mcbc) 1908 return (IPVL_MCAST); 1909 else 1910 return (IPVL_BAD); 1911 } else { 1912 ipif_t *ipif; 1913 1914 /* 1915 * (3) Bind to address of local DOWN interface? 1916 * (ipif_lookup_addr() looks up all interfaces 1917 * but we do not get here for UP interfaces 1918 * - case (2) above) 1919 */ 1920 if (src_ire != NULL) 1921 ire_refrele(src_ire); 1922 1923 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst); 1924 if (ipif == NULL) 1925 return (IPVL_BAD); 1926 1927 /* Not a useful source? */ 1928 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) { 1929 ipif_refrele(ipif); 1930 return (IPVL_BAD); 1931 } 1932 ipif_refrele(ipif); 1933 return (IPVL_UNICAST_DOWN); 1934 } 1935 } 1936 1937 /* 1938 * Verify that both the source and destination addresses are valid. If 1939 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable, 1940 * i.e. have no route to it. Protocols like TCP want to verify destination 1941 * reachability, while tunnels do not. 1942 * 1943 * Determine the route, the interface, and (optionally) the source address 1944 * to use to reach a given destination. 1945 * Note that we allow connect to broadcast and multicast addresses when 1946 * IPDF_ALLOW_MCBC is set. 1947 * first_hop and dst_addr are normally the same, but if source routing 1948 * they will differ; in that case the first_hop is what we'll use for the 1949 * routing lookup but the dce and label checks will be done on dst_addr, 1950 * 1951 * If uinfo is set, then we fill in the best available information 1952 * we have for the destination. This is based on (in priority order) any 1953 * metrics and path MTU stored in a dce_t, route metrics, and finally the 1954 * ill_mtu. 1955 * 1956 * Tsol note: If we have a source route then dst_addr != firsthop. But we 1957 * always do the label check on dst_addr. 1958 * 1959 * Assumes that the caller has set ixa_scopeid for link-local communication. 1960 */ 1961 int 1962 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr, 1963 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo, 1964 uint32_t flags, uint_t mac_mode) 1965 { 1966 ire_t *ire; 1967 int error = 0; 1968 in6_addr_t setsrc; /* RTF_SETSRC */ 1969 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */ 1970 ip_stack_t *ipst = ixa->ixa_ipst; 1971 dce_t *dce; 1972 uint_t pmtu; 1973 uint_t ifindex; 1974 uint_t generation; 1975 nce_t *nce; 1976 ill_t *ill = NULL; 1977 boolean_t multirt = B_FALSE; 1978 1979 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr)); 1980 1981 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1982 1983 /* 1984 * We never send to zero; the ULPs map it to the loopback address. 1985 * We can't allow it since we use zero to mean unitialized in some 1986 * places. 1987 */ 1988 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr)); 1989 1990 if (is_system_labeled()) { 1991 ts_label_t *tsl = NULL; 1992 1993 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION, 1994 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl); 1995 if (error != 0) 1996 return (error); 1997 if (tsl != NULL) { 1998 /* Update the label */ 1999 ip_xmit_attr_replace_tsl(ixa, tsl); 2000 } 2001 } 2002 2003 setsrc = ipv6_all_zeros; 2004 /* 2005 * Select a route; For IPMP interfaces, we would only select 2006 * a "hidden" route (i.e., going through a specific under_ill) 2007 * if ixa_ifindex has been specified. 2008 */ 2009 ire = ip_select_route_v6(firsthop, ixa, &generation, &setsrc, &error, 2010 &multirt); 2011 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 2012 if (error != 0) 2013 goto bad_addr; 2014 2015 /* 2016 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set. 2017 * If IPDF_VERIFY_DST is set, the destination must be reachable. 2018 * Otherwise the destination needn't be reachable. 2019 * 2020 * If we match on a reject or black hole, then we've got a 2021 * local failure. May as well fail out the connect() attempt, 2022 * since it's never going to succeed. 2023 */ 2024 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2025 /* 2026 * If we're verifying destination reachability, we always want 2027 * to complain here. 2028 * 2029 * If we're not verifying destination reachability but the 2030 * destination has a route, we still want to fail on the 2031 * temporary address and broadcast address tests. 2032 * 2033 * In both cases do we let the code continue so some reasonable 2034 * information is returned to the caller. That enables the 2035 * caller to use (and even cache) the IRE. conn_ip_ouput will 2036 * use the generation mismatch path to check for the unreachable 2037 * case thereby avoiding any specific check in the main path. 2038 */ 2039 ASSERT(generation == IRE_GENERATION_VERIFY); 2040 if (flags & IPDF_VERIFY_DST) { 2041 /* 2042 * Set errno but continue to set up ixa_ire to be 2043 * the RTF_REJECT|RTF_BLACKHOLE IRE. 2044 * That allows callers to use ip_output to get an 2045 * ICMP error back. 2046 */ 2047 if (!(ire->ire_type & IRE_HOST)) 2048 error = ENETUNREACH; 2049 else 2050 error = EHOSTUNREACH; 2051 } 2052 } 2053 2054 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) && 2055 !(flags & IPDF_ALLOW_MCBC)) { 2056 ire_refrele(ire); 2057 ire = ire_reject(ipst, B_FALSE); 2058 generation = IRE_GENERATION_VERIFY; 2059 error = ENETUNREACH; 2060 } 2061 2062 /* Cache things */ 2063 if (ixa->ixa_ire != NULL) 2064 ire_refrele_notr(ixa->ixa_ire); 2065 #ifdef DEBUG 2066 ire_refhold_notr(ire); 2067 ire_refrele(ire); 2068 #endif 2069 ixa->ixa_ire = ire; 2070 ixa->ixa_ire_generation = generation; 2071 2072 /* 2073 * For multicast with multirt we have a flag passed back from 2074 * ire_lookup_multi_ill_v6 since we don't have an IRE for each 2075 * possible multicast address. 2076 * We also need a flag for multicast since we can't check 2077 * whether RTF_MULTIRT is set in ixa_ire for multicast. 2078 */ 2079 if (multirt) { 2080 ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 2081 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 2082 } else { 2083 ixa->ixa_postfragfn = ire->ire_postfragfn; 2084 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 2085 } 2086 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2087 /* Get an nce to cache. */ 2088 nce = ire_to_nce(ire, NULL, firsthop); 2089 if (nce == NULL) { 2090 /* Allocation failure? */ 2091 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2092 } else { 2093 if (ixa->ixa_nce != NULL) 2094 nce_refrele(ixa->ixa_nce); 2095 ixa->ixa_nce = nce; 2096 } 2097 } 2098 2099 /* 2100 * We use use ire_nexthop_ill to avoid the under ipmp 2101 * interface for source address selection. Note that for ipmp 2102 * probe packets, ixa_ifindex would have been specified, and 2103 * the ip_select_route() invocation would have picked an ire 2104 * will ire_ill pointing at an under interface. 2105 */ 2106 ill = ire_nexthop_ill(ire); 2107 2108 /* 2109 * If the source address is a loopback address, the 2110 * destination had best be local or multicast. 2111 * If we are sending to an IRE_LOCAL using a loopback source then 2112 * it had better be the same zoneid. 2113 */ 2114 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) { 2115 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) { 2116 ire = NULL; /* Stored in ixa_ire */ 2117 error = EADDRNOTAVAIL; 2118 goto bad_addr; 2119 } 2120 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) { 2121 ire = NULL; /* Stored in ixa_ire */ 2122 error = EADDRNOTAVAIL; 2123 goto bad_addr; 2124 } 2125 } 2126 2127 /* 2128 * Does the caller want us to pick a source address? 2129 */ 2130 if (flags & IPDF_SELECT_SRC) { 2131 in6_addr_t src_addr; 2132 2133 /* If unreachable we have no ill but need some source */ 2134 if (ill == NULL) { 2135 src_addr = ipv6_loopback; 2136 /* Make sure we look for a better source address */ 2137 generation = SRC_GENERATION_VERIFY; 2138 } else { 2139 error = ip_select_source_v6(ill, &setsrc, dst_addr, 2140 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences, 2141 &src_addr, &generation, NULL); 2142 if (error != 0) { 2143 ire = NULL; /* Stored in ixa_ire */ 2144 goto bad_addr; 2145 } 2146 } 2147 2148 /* 2149 * We allow the source address to to down. 2150 * However, we check that we don't use the loopback address 2151 * as a source when sending out on the wire. 2152 */ 2153 if (IN6_IS_ADDR_LOOPBACK(&src_addr) && 2154 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) && 2155 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2156 ire = NULL; /* Stored in ixa_ire */ 2157 error = EADDRNOTAVAIL; 2158 goto bad_addr; 2159 } 2160 2161 *src_addrp = src_addr; 2162 ixa->ixa_src_generation = generation; 2163 } 2164 2165 /* 2166 * Make sure we don't leave an unreachable ixa_nce in place 2167 * since ip_select_route is used when we unplumb i.e., remove 2168 * references on ixa_ire, ixa_nce, and ixa_dce. 2169 */ 2170 nce = ixa->ixa_nce; 2171 if (nce != NULL && nce->nce_is_condemned) { 2172 nce_refrele(nce); 2173 ixa->ixa_nce = NULL; 2174 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2175 } 2176 2177 2178 ifindex = 0; 2179 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) { 2180 /* If we are creating a DCE we'd better have an ifindex */ 2181 if (ill != NULL) 2182 ifindex = ill->ill_phyint->phyint_ifindex; 2183 else 2184 flags &= ~IPDF_UNIQUE_DCE; 2185 } 2186 2187 if (flags & IPDF_UNIQUE_DCE) { 2188 /* Fallback to the default dce if allocation fails */ 2189 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst); 2190 if (dce != NULL) { 2191 generation = dce->dce_generation; 2192 } else { 2193 dce = dce_lookup_v6(dst_addr, ifindex, ipst, 2194 &generation); 2195 } 2196 } else { 2197 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation); 2198 } 2199 ASSERT(dce != NULL); 2200 if (ixa->ixa_dce != NULL) 2201 dce_refrele_notr(ixa->ixa_dce); 2202 #ifdef DEBUG 2203 dce_refhold_notr(dce); 2204 dce_refrele(dce); 2205 #endif 2206 ixa->ixa_dce = dce; 2207 ixa->ixa_dce_generation = generation; 2208 2209 /* 2210 * Note that IPv6 multicast supports PMTU discovery unlike IPv4 2211 * multicast. But pmtu discovery is only enabled for connected 2212 * sockets in general. 2213 */ 2214 2215 /* 2216 * Set initial value for fragmentation limit. Either conn_ip_output 2217 * or ULP might updates it when there are routing changes. 2218 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT. 2219 */ 2220 pmtu = ip_get_pmtu(ixa); 2221 ixa->ixa_fragsize = pmtu; 2222 /* Make sure ixa_fragsize and ixa_pmtu remain identical */ 2223 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) 2224 ixa->ixa_pmtu = pmtu; 2225 2226 /* 2227 * Extract information useful for some transports. 2228 * First we look for DCE metrics. Then we take what we have in 2229 * the metrics in the route, where the offlink is used if we have 2230 * one. 2231 */ 2232 if (uinfo != NULL) { 2233 bzero(uinfo, sizeof (*uinfo)); 2234 2235 if (dce->dce_flags & DCEF_UINFO) 2236 *uinfo = dce->dce_uinfo; 2237 2238 rts_merge_metrics(uinfo, &ire->ire_metrics); 2239 2240 /* Allow ire_metrics to decrease the path MTU from above */ 2241 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu) 2242 uinfo->iulp_mtu = pmtu; 2243 2244 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0; 2245 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0; 2246 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0; 2247 } 2248 2249 if (ill != NULL) 2250 ill_refrele(ill); 2251 2252 return (error); 2253 2254 bad_addr: 2255 if (ire != NULL) 2256 ire_refrele(ire); 2257 2258 if (ill != NULL) 2259 ill_refrele(ill); 2260 2261 /* 2262 * Make sure we don't leave an unreachable ixa_nce in place 2263 * since ip_select_route is used when we unplumb i.e., remove 2264 * references on ixa_ire, ixa_nce, and ixa_dce. 2265 */ 2266 nce = ixa->ixa_nce; 2267 if (nce != NULL && nce->nce_is_condemned) { 2268 nce_refrele(nce); 2269 ixa->ixa_nce = NULL; 2270 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2271 } 2272 2273 return (error); 2274 } 2275 2276 /* 2277 * Handle protocols with which IP is less intimate. There 2278 * can be more than one stream bound to a particular 2279 * protocol. When this is the case, normally each one gets a copy 2280 * of any incoming packets. 2281 * 2282 * Zones notes: 2283 * Packets will be distributed to conns in all zones. This is really only 2284 * useful for ICMPv6 as only applications in the global zone can create raw 2285 * sockets for other protocols. 2286 */ 2287 void 2288 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 2289 { 2290 mblk_t *mp1; 2291 in6_addr_t laddr = ip6h->ip6_dst; 2292 conn_t *connp, *first_connp, *next_connp; 2293 connf_t *connfp; 2294 ill_t *ill = ira->ira_ill; 2295 ip_stack_t *ipst = ill->ill_ipst; 2296 2297 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol]; 2298 mutex_enter(&connfp->connf_lock); 2299 connp = connfp->connf_head; 2300 for (connp = connfp->connf_head; connp != NULL; 2301 connp = connp->conn_next) { 2302 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2303 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2304 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2305 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2306 break; 2307 } 2308 2309 if (connp == NULL) { 2310 /* 2311 * No one bound to this port. Is 2312 * there a client that wants all 2313 * unclaimed datagrams? 2314 */ 2315 mutex_exit(&connfp->connf_lock); 2316 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB, 2317 ICMP6_PARAMPROB_NEXTHEADER, ira); 2318 return; 2319 } 2320 2321 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 2322 2323 CONN_INC_REF(connp); 2324 first_connp = connp; 2325 2326 /* 2327 * XXX: Fix the multiple protocol listeners case. We should not 2328 * be walking the conn->conn_next list here. 2329 */ 2330 connp = connp->conn_next; 2331 for (;;) { 2332 while (connp != NULL) { 2333 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2334 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2335 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2336 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2337 ira, connp))) 2338 break; 2339 connp = connp->conn_next; 2340 } 2341 2342 if (connp == NULL) { 2343 /* No more interested clients */ 2344 connp = first_connp; 2345 break; 2346 } 2347 if (((mp1 = dupmsg(mp)) == NULL) && 2348 ((mp1 = copymsg(mp)) == NULL)) { 2349 /* Memory allocation failed */ 2350 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2351 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2352 connp = first_connp; 2353 break; 2354 } 2355 2356 CONN_INC_REF(connp); 2357 mutex_exit(&connfp->connf_lock); 2358 2359 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr, 2360 ira); 2361 2362 mutex_enter(&connfp->connf_lock); 2363 /* Follow the next pointer before releasing the conn. */ 2364 next_connp = connp->conn_next; 2365 CONN_DEC_REF(connp); 2366 connp = next_connp; 2367 } 2368 2369 /* Last one. Send it upstream. */ 2370 mutex_exit(&connfp->connf_lock); 2371 2372 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira); 2373 2374 CONN_DEC_REF(connp); 2375 } 2376 2377 /* 2378 * Called when it is conceptually a ULP that would sent the packet 2379 * e.g., port unreachable and nexthdr unknown. Check that the packet 2380 * would have passed the IPsec global policy before sending the error. 2381 * 2382 * Send an ICMP error after patching up the packet appropriately. 2383 * Uses ip_drop_input and bumps the appropriate MIB. 2384 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use. 2385 */ 2386 void 2387 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code, 2388 ip_recv_attr_t *ira) 2389 { 2390 ip6_t *ip6h; 2391 boolean_t secure; 2392 ill_t *ill = ira->ira_ill; 2393 ip_stack_t *ipst = ill->ill_ipst; 2394 netstack_t *ns = ipst->ips_netstack; 2395 ipsec_stack_t *ipss = ns->netstack_ipsec; 2396 2397 secure = ira->ira_flags & IRAF_IPSEC_SECURE; 2398 2399 /* 2400 * We are generating an icmp error for some inbound packet. 2401 * Called from all ip_fanout_(udp, tcp, proto) functions. 2402 * Before we generate an error, check with global policy 2403 * to see whether this is allowed to enter the system. As 2404 * there is no "conn", we are checking with global policy. 2405 */ 2406 ip6h = (ip6_t *)mp->b_rptr; 2407 if (secure || ipss->ipsec_inbound_v6_policy_present) { 2408 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns); 2409 if (mp == NULL) 2410 return; 2411 } 2412 2413 /* We never send errors for protocols that we do implement */ 2414 if (ira->ira_protocol == IPPROTO_ICMPV6) { 2415 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2416 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill); 2417 freemsg(mp); 2418 return; 2419 } 2420 2421 switch (icmp_type) { 2422 case ICMP6_DST_UNREACH: 2423 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT); 2424 2425 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 2426 ip_drop_input("ipIfStatsNoPorts", mp, ill); 2427 2428 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira); 2429 break; 2430 case ICMP6_PARAM_PROB: 2431 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER); 2432 2433 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 2434 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill); 2435 2436 /* Let the system determine the offset for this one */ 2437 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2438 break; 2439 default: 2440 #ifdef DEBUG 2441 panic("ip_fanout_send_icmp_v6: wrong type"); 2442 /*NOTREACHED*/ 2443 #else 2444 freemsg(mp); 2445 break; 2446 #endif 2447 } 2448 } 2449 2450 /* 2451 * Fanout for UDP packets that are multicast or ICMP errors. 2452 * (Unicast fanout is handled in ip_input_v6.) 2453 * 2454 * If SO_REUSEADDR is set all multicast packets 2455 * will be delivered to all conns bound to the same port. 2456 * 2457 * Fanout for UDP packets. 2458 * The caller puts <fport, lport> in the ports parameter. 2459 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 2460 * 2461 * If SO_REUSEADDR is set all multicast and broadcast packets 2462 * will be delivered to all conns bound to the same port. 2463 * 2464 * Zones notes: 2465 * Earlier in ip_input on a system with multiple shared-IP zones we 2466 * duplicate the multicast and broadcast packets and send them up 2467 * with each explicit zoneid that exists on that ill. 2468 * This means that here we can match the zoneid with SO_ALLZONES being special. 2469 */ 2470 void 2471 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport, 2472 ip_recv_attr_t *ira) 2473 { 2474 in6_addr_t laddr; 2475 conn_t *connp; 2476 connf_t *connfp; 2477 in6_addr_t faddr; 2478 ill_t *ill = ira->ira_ill; 2479 ip_stack_t *ipst = ill->ill_ipst; 2480 2481 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR)); 2482 2483 laddr = ip6h->ip6_dst; 2484 faddr = ip6h->ip6_src; 2485 2486 /* Attempt to find a client stream based on destination port. */ 2487 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2488 mutex_enter(&connfp->connf_lock); 2489 connp = connfp->connf_head; 2490 while (connp != NULL) { 2491 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) && 2492 conn_wantpacket_v6(connp, ira, ip6h) && 2493 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2494 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2495 break; 2496 connp = connp->conn_next; 2497 } 2498 2499 if (connp == NULL) 2500 goto notfound; 2501 2502 CONN_INC_REF(connp); 2503 2504 if (connp->conn_reuseaddr) { 2505 conn_t *first_connp = connp; 2506 conn_t *next_connp; 2507 mblk_t *mp1; 2508 2509 connp = connp->conn_next; 2510 for (;;) { 2511 while (connp != NULL) { 2512 if (IPCL_UDP_MATCH_V6(connp, lport, laddr, 2513 fport, faddr) && 2514 conn_wantpacket_v6(connp, ira, ip6h) && 2515 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2516 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2517 ira, connp))) 2518 break; 2519 connp = connp->conn_next; 2520 } 2521 if (connp == NULL) { 2522 /* No more interested clients */ 2523 connp = first_connp; 2524 break; 2525 } 2526 if (((mp1 = dupmsg(mp)) == NULL) && 2527 ((mp1 = copymsg(mp)) == NULL)) { 2528 /* Memory allocation failed */ 2529 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2530 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2531 connp = first_connp; 2532 break; 2533 } 2534 2535 CONN_INC_REF(connp); 2536 mutex_exit(&connfp->connf_lock); 2537 2538 IP6_STAT(ipst, ip6_udp_fanmb); 2539 ip_fanout_udp_conn(connp, mp1, NULL, 2540 (ip6_t *)mp1->b_rptr, ira); 2541 2542 mutex_enter(&connfp->connf_lock); 2543 /* Follow the next pointer before releasing the conn. */ 2544 next_connp = connp->conn_next; 2545 IP6_STAT(ipst, ip6_udp_fanmb); 2546 CONN_DEC_REF(connp); 2547 connp = next_connp; 2548 } 2549 } 2550 2551 /* Last one. Send it upstream. */ 2552 mutex_exit(&connfp->connf_lock); 2553 2554 IP6_STAT(ipst, ip6_udp_fanmb); 2555 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira); 2556 CONN_DEC_REF(connp); 2557 return; 2558 2559 notfound: 2560 mutex_exit(&connfp->connf_lock); 2561 /* 2562 * No one bound to this port. Is 2563 * there a client that wants all 2564 * unclaimed datagrams? 2565 */ 2566 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 2567 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2568 ip_fanout_proto_v6(mp, ip6h, ira); 2569 } else { 2570 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2571 ICMP6_DST_UNREACH_NOPORT, ira); 2572 } 2573 } 2574 2575 /* 2576 * int ip_find_hdr_v6() 2577 * 2578 * This routine is used by the upper layer protocols, iptun, and IPsec: 2579 * - Set extension header pointers to appropriate locations 2580 * - Determine IPv6 header length and return it 2581 * - Return a pointer to the last nexthdr value 2582 * 2583 * The caller must initialize ipp_fields. 2584 * The upper layer protocols normally set label_separate which makes the 2585 * routine put the TX label in ipp_label_v6. If this is not set then 2586 * the hop-by-hop options including the label are placed in ipp_hopopts. 2587 * 2588 * NOTE: If multiple extension headers of the same type are present, 2589 * ip_find_hdr_v6() will set the respective extension header pointers 2590 * to the first one that it encounters in the IPv6 header. It also 2591 * skips fragment headers. This routine deals with malformed packets 2592 * of various sorts in which case the returned length is up to the 2593 * malformed part. 2594 */ 2595 int 2596 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp, 2597 uint8_t *nexthdrp) 2598 { 2599 uint_t length, ehdrlen; 2600 uint8_t nexthdr; 2601 uint8_t *whereptr, *endptr; 2602 ip6_dest_t *tmpdstopts; 2603 ip6_rthdr_t *tmprthdr; 2604 ip6_hbh_t *tmphopopts; 2605 ip6_frag_t *tmpfraghdr; 2606 2607 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR; 2608 ipp->ipp_hoplimit = ip6h->ip6_hops; 2609 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 2610 ipp->ipp_addr = ip6h->ip6_dst; 2611 2612 length = IPV6_HDR_LEN; 2613 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2614 endptr = mp->b_wptr; 2615 2616 nexthdr = ip6h->ip6_nxt; 2617 while (whereptr < endptr) { 2618 /* Is there enough left for len + nexthdr? */ 2619 if (whereptr + MIN_EHDR_LEN > endptr) 2620 goto done; 2621 2622 switch (nexthdr) { 2623 case IPPROTO_HOPOPTS: { 2624 /* We check for any CIPSO */ 2625 uchar_t *secopt; 2626 boolean_t hbh_needed; 2627 uchar_t *after_secopt; 2628 2629 tmphopopts = (ip6_hbh_t *)whereptr; 2630 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 2631 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 2632 goto done; 2633 nexthdr = tmphopopts->ip6h_nxt; 2634 2635 if (!label_separate) { 2636 secopt = NULL; 2637 after_secopt = whereptr; 2638 } else { 2639 /* 2640 * We have dropped packets with bad options in 2641 * ip6_input. No need to check return value 2642 * here. 2643 */ 2644 (void) tsol_find_secopt_v6(whereptr, ehdrlen, 2645 &secopt, &after_secopt, &hbh_needed); 2646 } 2647 if (secopt != NULL && after_secopt - whereptr > 0) { 2648 ipp->ipp_fields |= IPPF_LABEL_V6; 2649 ipp->ipp_label_v6 = secopt; 2650 ipp->ipp_label_len_v6 = after_secopt - whereptr; 2651 } else { 2652 ipp->ipp_label_len_v6 = 0; 2653 after_secopt = whereptr; 2654 hbh_needed = B_TRUE; 2655 } 2656 /* return only 1st hbh */ 2657 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) { 2658 ipp->ipp_fields |= IPPF_HOPOPTS; 2659 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt; 2660 ipp->ipp_hopoptslen = ehdrlen - 2661 ipp->ipp_label_len_v6; 2662 } 2663 break; 2664 } 2665 case IPPROTO_DSTOPTS: 2666 tmpdstopts = (ip6_dest_t *)whereptr; 2667 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 2668 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 2669 goto done; 2670 nexthdr = tmpdstopts->ip6d_nxt; 2671 /* 2672 * ipp_dstopts is set to the destination header after a 2673 * routing header. 2674 * Assume it is a post-rthdr destination header 2675 * and adjust when we find an rthdr. 2676 */ 2677 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2678 ipp->ipp_fields |= IPPF_DSTOPTS; 2679 ipp->ipp_dstopts = tmpdstopts; 2680 ipp->ipp_dstoptslen = ehdrlen; 2681 } 2682 break; 2683 case IPPROTO_ROUTING: 2684 tmprthdr = (ip6_rthdr_t *)whereptr; 2685 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 2686 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 2687 goto done; 2688 nexthdr = tmprthdr->ip6r_nxt; 2689 /* return only 1st rthdr */ 2690 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 2691 ipp->ipp_fields |= IPPF_RTHDR; 2692 ipp->ipp_rthdr = tmprthdr; 2693 ipp->ipp_rthdrlen = ehdrlen; 2694 } 2695 /* 2696 * Make any destination header we've seen be a 2697 * pre-rthdr destination header. 2698 */ 2699 if (ipp->ipp_fields & IPPF_DSTOPTS) { 2700 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2701 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 2702 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts; 2703 ipp->ipp_dstopts = NULL; 2704 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen; 2705 ipp->ipp_dstoptslen = 0; 2706 } 2707 break; 2708 case IPPROTO_FRAGMENT: 2709 tmpfraghdr = (ip6_frag_t *)whereptr; 2710 ehdrlen = sizeof (ip6_frag_t); 2711 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 2712 goto done; 2713 nexthdr = tmpfraghdr->ip6f_nxt; 2714 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 2715 ipp->ipp_fields |= IPPF_FRAGHDR; 2716 ipp->ipp_fraghdr = tmpfraghdr; 2717 ipp->ipp_fraghdrlen = ehdrlen; 2718 } 2719 break; 2720 case IPPROTO_NONE: 2721 default: 2722 goto done; 2723 } 2724 length += ehdrlen; 2725 whereptr += ehdrlen; 2726 } 2727 done: 2728 if (nexthdrp != NULL) 2729 *nexthdrp = nexthdr; 2730 return (length); 2731 } 2732 2733 /* 2734 * Try to determine where and what are the IPv6 header length and 2735 * pointer to nexthdr value for the upper layer protocol (or an 2736 * unknown next hdr). 2737 * 2738 * Parameters returns a pointer to the nexthdr value; 2739 * Must handle malformed packets of various sorts. 2740 * Function returns failure for malformed cases. 2741 */ 2742 boolean_t 2743 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 2744 uint8_t **nexthdrpp) 2745 { 2746 uint16_t length; 2747 uint_t ehdrlen; 2748 uint8_t *nexthdrp; 2749 uint8_t *whereptr; 2750 uint8_t *endptr; 2751 ip6_dest_t *desthdr; 2752 ip6_rthdr_t *rthdr; 2753 ip6_frag_t *fraghdr; 2754 2755 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 2756 length = IPV6_HDR_LEN; 2757 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2758 endptr = mp->b_wptr; 2759 2760 nexthdrp = &ip6h->ip6_nxt; 2761 while (whereptr < endptr) { 2762 /* Is there enough left for len + nexthdr? */ 2763 if (whereptr + MIN_EHDR_LEN > endptr) 2764 break; 2765 2766 switch (*nexthdrp) { 2767 case IPPROTO_HOPOPTS: 2768 case IPPROTO_DSTOPTS: 2769 /* Assumes the headers are identical for hbh and dst */ 2770 desthdr = (ip6_dest_t *)whereptr; 2771 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2772 if ((uchar_t *)desthdr + ehdrlen > endptr) 2773 return (B_FALSE); 2774 nexthdrp = &desthdr->ip6d_nxt; 2775 break; 2776 case IPPROTO_ROUTING: 2777 rthdr = (ip6_rthdr_t *)whereptr; 2778 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2779 if ((uchar_t *)rthdr + ehdrlen > endptr) 2780 return (B_FALSE); 2781 nexthdrp = &rthdr->ip6r_nxt; 2782 break; 2783 case IPPROTO_FRAGMENT: 2784 fraghdr = (ip6_frag_t *)whereptr; 2785 ehdrlen = sizeof (ip6_frag_t); 2786 if ((uchar_t *)&fraghdr[1] > endptr) 2787 return (B_FALSE); 2788 nexthdrp = &fraghdr->ip6f_nxt; 2789 break; 2790 case IPPROTO_NONE: 2791 /* No next header means we're finished */ 2792 default: 2793 *hdr_length_ptr = length; 2794 *nexthdrpp = nexthdrp; 2795 return (B_TRUE); 2796 } 2797 length += ehdrlen; 2798 whereptr += ehdrlen; 2799 *hdr_length_ptr = length; 2800 *nexthdrpp = nexthdrp; 2801 } 2802 switch (*nexthdrp) { 2803 case IPPROTO_HOPOPTS: 2804 case IPPROTO_DSTOPTS: 2805 case IPPROTO_ROUTING: 2806 case IPPROTO_FRAGMENT: 2807 /* 2808 * If any know extension headers are still to be processed, 2809 * the packet's malformed (or at least all the IP header(s) are 2810 * not in the same mblk - and that should never happen. 2811 */ 2812 return (B_FALSE); 2813 2814 default: 2815 /* 2816 * If we get here, we know that all of the IP headers were in 2817 * the same mblk, even if the ULP header is in the next mblk. 2818 */ 2819 *hdr_length_ptr = length; 2820 *nexthdrpp = nexthdrp; 2821 return (B_TRUE); 2822 } 2823 } 2824 2825 /* 2826 * Return the length of the IPv6 related headers (including extension headers) 2827 * Returns a length even if the packet is malformed. 2828 */ 2829 int 2830 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 2831 { 2832 uint16_t hdr_len; 2833 uint8_t *nexthdrp; 2834 2835 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 2836 return (hdr_len); 2837 } 2838 2839 /* 2840 * Parse and process any hop-by-hop or destination options. 2841 * 2842 * Assumes that q is an ill read queue so that ICMP errors for link-local 2843 * destinations are sent out the correct interface. 2844 * 2845 * Returns -1 if there was an error and mp has been consumed. 2846 * Returns 0 if no special action is needed. 2847 * Returns 1 if the packet contained a router alert option for this node 2848 * which is verified to be "interesting/known" for our implementation. 2849 * 2850 * XXX Note: In future as more hbh or dest options are defined, 2851 * it may be better to have different routines for hbh and dest 2852 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 2853 * may have same value in different namespaces. Or is it same namespace ?? 2854 * Current code checks for each opt_type (other than pads) if it is in 2855 * the expected nexthdr (hbh or dest) 2856 */ 2857 int 2858 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, 2859 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) 2860 { 2861 uint8_t opt_type; 2862 uint_t optused; 2863 int ret = 0; 2864 const char *errtype; 2865 ill_t *ill = ira->ira_ill; 2866 ip_stack_t *ipst = ill->ill_ipst; 2867 2868 while (optlen != 0) { 2869 opt_type = *optptr; 2870 if (opt_type == IP6OPT_PAD1) { 2871 optused = 1; 2872 } else { 2873 if (optlen < 2) 2874 goto bad_opt; 2875 errtype = "malformed"; 2876 if (opt_type == ip6opt_ls) { 2877 optused = 2 + optptr[1]; 2878 if (optused > optlen) 2879 goto bad_opt; 2880 } else switch (opt_type) { 2881 case IP6OPT_PADN: 2882 /* 2883 * Note:We don't verify that (N-2) pad octets 2884 * are zero as required by spec. Adhere to 2885 * "be liberal in what you accept..." part of 2886 * implementation philosophy (RFC791,RFC1122) 2887 */ 2888 optused = 2 + optptr[1]; 2889 if (optused > optlen) 2890 goto bad_opt; 2891 break; 2892 2893 case IP6OPT_JUMBO: 2894 if (hdr_type != IPPROTO_HOPOPTS) 2895 goto opt_error; 2896 goto opt_error; /* XXX Not implemented! */ 2897 2898 case IP6OPT_ROUTER_ALERT: { 2899 struct ip6_opt_router *or; 2900 2901 if (hdr_type != IPPROTO_HOPOPTS) 2902 goto opt_error; 2903 optused = 2 + optptr[1]; 2904 if (optused > optlen) 2905 goto bad_opt; 2906 or = (struct ip6_opt_router *)optptr; 2907 /* Check total length and alignment */ 2908 if (optused != sizeof (*or) || 2909 ((uintptr_t)or->ip6or_value & 0x1) != 0) 2910 goto opt_error; 2911 /* Check value */ 2912 switch (*((uint16_t *)or->ip6or_value)) { 2913 case IP6_ALERT_MLD: 2914 case IP6_ALERT_RSVP: 2915 ret = 1; 2916 } 2917 break; 2918 } 2919 case IP6OPT_HOME_ADDRESS: { 2920 /* 2921 * Minimal support for the home address option 2922 * (which is required by all IPv6 nodes). 2923 * Implement by just swapping the home address 2924 * and source address. 2925 * XXX Note: this has IPsec implications since 2926 * AH needs to take this into account. 2927 * Also, when IPsec is used we need to ensure 2928 * that this is only processed once 2929 * in the received packet (to avoid swapping 2930 * back and forth). 2931 * NOTE:This option processing is considered 2932 * to be unsafe and prone to a denial of 2933 * service attack. 2934 * The current processing is not safe even with 2935 * IPsec secured IP packets. Since the home 2936 * address option processing requirement still 2937 * is in the IETF draft and in the process of 2938 * being redefined for its usage, it has been 2939 * decided to turn off the option by default. 2940 * If this section of code needs to be executed, 2941 * ndd variable ip6_ignore_home_address_opt 2942 * should be set to 0 at the user's own risk. 2943 */ 2944 struct ip6_opt_home_address *oh; 2945 in6_addr_t tmp; 2946 2947 if (ipst->ips_ipv6_ignore_home_address_opt) 2948 goto opt_error; 2949 2950 if (hdr_type != IPPROTO_DSTOPTS) 2951 goto opt_error; 2952 optused = 2 + optptr[1]; 2953 if (optused > optlen) 2954 goto bad_opt; 2955 2956 /* 2957 * We did this dest. opt the first time 2958 * around (i.e. before AH processing). 2959 * If we've done AH... stop now. 2960 */ 2961 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && 2962 ira->ira_ipsec_ah_sa != NULL) 2963 break; 2964 2965 oh = (struct ip6_opt_home_address *)optptr; 2966 /* Check total length and alignment */ 2967 if (optused < sizeof (*oh) || 2968 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 2969 goto opt_error; 2970 /* Swap ip6_src and the home address */ 2971 tmp = ip6h->ip6_src; 2972 /* XXX Note: only 8 byte alignment option */ 2973 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 2974 *(in6_addr_t *)oh->ip6oh_addr = tmp; 2975 break; 2976 } 2977 2978 case IP6OPT_TUNNEL_LIMIT: 2979 if (hdr_type != IPPROTO_DSTOPTS) { 2980 goto opt_error; 2981 } 2982 optused = 2 + optptr[1]; 2983 if (optused > optlen) { 2984 goto bad_opt; 2985 } 2986 if (optused != 3) { 2987 goto opt_error; 2988 } 2989 break; 2990 2991 default: 2992 errtype = "unknown"; 2993 /* FALLTHROUGH */ 2994 opt_error: 2995 /* Determine which zone should send error */ 2996 switch (IP6OPT_TYPE(opt_type)) { 2997 case IP6OPT_TYPE_SKIP: 2998 optused = 2 + optptr[1]; 2999 if (optused > optlen) 3000 goto bad_opt; 3001 ip1dbg(("ip_process_options_v6: %s " 3002 "opt 0x%x skipped\n", 3003 errtype, opt_type)); 3004 break; 3005 case IP6OPT_TYPE_DISCARD: 3006 ip1dbg(("ip_process_options_v6: %s " 3007 "opt 0x%x; packet dropped\n", 3008 errtype, opt_type)); 3009 BUMP_MIB(ill->ill_ip_mib, 3010 ipIfStatsInHdrErrors); 3011 ip_drop_input("ipIfStatsInHdrErrors", 3012 mp, ill); 3013 freemsg(mp); 3014 return (-1); 3015 case IP6OPT_TYPE_ICMP: 3016 BUMP_MIB(ill->ill_ip_mib, 3017 ipIfStatsInHdrErrors); 3018 ip_drop_input("ipIfStatsInHdrErrors", 3019 mp, ill); 3020 icmp_param_problem_v6(mp, 3021 ICMP6_PARAMPROB_OPTION, 3022 (uint32_t)(optptr - 3023 (uint8_t *)ip6h), 3024 B_FALSE, ira); 3025 return (-1); 3026 case IP6OPT_TYPE_FORCEICMP: 3027 BUMP_MIB(ill->ill_ip_mib, 3028 ipIfStatsInHdrErrors); 3029 ip_drop_input("ipIfStatsInHdrErrors", 3030 mp, ill); 3031 icmp_param_problem_v6(mp, 3032 ICMP6_PARAMPROB_OPTION, 3033 (uint32_t)(optptr - 3034 (uint8_t *)ip6h), 3035 B_TRUE, ira); 3036 return (-1); 3037 default: 3038 ASSERT(0); 3039 } 3040 } 3041 } 3042 optlen -= optused; 3043 optptr += optused; 3044 } 3045 return (ret); 3046 3047 bad_opt: 3048 /* Determine which zone should send error */ 3049 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3050 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION, 3051 (uint32_t)(optptr - (uint8_t *)ip6h), 3052 B_FALSE, ira); 3053 return (-1); 3054 } 3055 3056 /* 3057 * Process a routing header that is not yet empty. 3058 * Because of RFC 5095, we now reject all route headers. 3059 */ 3060 void 3061 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 3062 ip_recv_attr_t *ira) 3063 { 3064 ill_t *ill = ira->ira_ill; 3065 ip_stack_t *ipst = ill->ill_ipst; 3066 3067 ASSERT(rth->ip6r_segleft != 0); 3068 3069 if (!ipst->ips_ipv6_forward_src_routed) { 3070 /* XXX Check for source routed out same interface? */ 3071 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 3072 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 3073 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 3074 freemsg(mp); 3075 return; 3076 } 3077 3078 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3079 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3080 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 3081 B_FALSE, ira); 3082 } 3083 3084 /* 3085 * Read side put procedure for IPv6 module. 3086 */ 3087 void 3088 ip_rput_v6(queue_t *q, mblk_t *mp) 3089 { 3090 ill_t *ill; 3091 3092 ill = (ill_t *)q->q_ptr; 3093 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { 3094 union DL_primitives *dl; 3095 3096 dl = (union DL_primitives *)mp->b_rptr; 3097 /* 3098 * Things are opening or closing - only accept DLPI 3099 * ack messages. If the stream is closing and ip_wsrv 3100 * has completed, ip_close is out of the qwait, but has 3101 * not yet completed qprocsoff. Don't proceed any further 3102 * because the ill has been cleaned up and things hanging 3103 * off the ill have been freed. 3104 */ 3105 if ((mp->b_datap->db_type != M_PCPROTO) || 3106 (dl->dl_primitive == DL_UNITDATA_IND)) { 3107 inet_freemsg(mp); 3108 return; 3109 } 3110 } 3111 if (DB_TYPE(mp) == M_DATA) { 3112 struct mac_header_info_s mhi; 3113 3114 ip_mdata_to_mhi(ill, mp, &mhi); 3115 ip_input_v6(ill, NULL, mp, &mhi); 3116 } else { 3117 ip_rput_notdata(ill, mp); 3118 } 3119 } 3120 3121 /* 3122 * Walk through the IPv6 packet in mp and see if there's an AH header 3123 * in it. See if the AH header needs to get done before other headers in 3124 * the packet. (Worker function for ipsec_early_ah_v6().) 3125 */ 3126 #define IPSEC_HDR_DONT_PROCESS 0 3127 #define IPSEC_HDR_PROCESS 1 3128 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 3129 static int 3130 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 3131 { 3132 uint_t length; 3133 uint_t ehdrlen; 3134 uint8_t *whereptr; 3135 uint8_t *endptr; 3136 uint8_t *nexthdrp; 3137 ip6_dest_t *desthdr; 3138 ip6_rthdr_t *rthdr; 3139 ip6_t *ip6h; 3140 3141 /* 3142 * For now just pullup everything. In general, the less pullups, 3143 * the better, but there's so much squirrelling through anyway, 3144 * it's just easier this way. 3145 */ 3146 if (!pullupmsg(mp, -1)) { 3147 return (IPSEC_MEMORY_ERROR); 3148 } 3149 3150 ip6h = (ip6_t *)mp->b_rptr; 3151 length = IPV6_HDR_LEN; 3152 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3153 endptr = mp->b_wptr; 3154 3155 /* 3156 * We can't just use the argument nexthdr in the place 3157 * of nexthdrp becaue we don't dereference nexthdrp 3158 * till we confirm whether it is a valid address. 3159 */ 3160 nexthdrp = &ip6h->ip6_nxt; 3161 while (whereptr < endptr) { 3162 /* Is there enough left for len + nexthdr? */ 3163 if (whereptr + MIN_EHDR_LEN > endptr) 3164 return (IPSEC_MEMORY_ERROR); 3165 3166 switch (*nexthdrp) { 3167 case IPPROTO_HOPOPTS: 3168 case IPPROTO_DSTOPTS: 3169 /* Assumes the headers are identical for hbh and dst */ 3170 desthdr = (ip6_dest_t *)whereptr; 3171 ehdrlen = 8 * (desthdr->ip6d_len + 1); 3172 if ((uchar_t *)desthdr + ehdrlen > endptr) 3173 return (IPSEC_MEMORY_ERROR); 3174 /* 3175 * Return DONT_PROCESS because the destination 3176 * options header may be for each hop in a 3177 * routing-header, and we only want AH if we're 3178 * finished with routing headers. 3179 */ 3180 if (*nexthdrp == IPPROTO_DSTOPTS) 3181 return (IPSEC_HDR_DONT_PROCESS); 3182 nexthdrp = &desthdr->ip6d_nxt; 3183 break; 3184 case IPPROTO_ROUTING: 3185 rthdr = (ip6_rthdr_t *)whereptr; 3186 3187 /* 3188 * If there's more hops left on the routing header, 3189 * return now with DON'T PROCESS. 3190 */ 3191 if (rthdr->ip6r_segleft > 0) 3192 return (IPSEC_HDR_DONT_PROCESS); 3193 3194 ehdrlen = 8 * (rthdr->ip6r_len + 1); 3195 if ((uchar_t *)rthdr + ehdrlen > endptr) 3196 return (IPSEC_MEMORY_ERROR); 3197 nexthdrp = &rthdr->ip6r_nxt; 3198 break; 3199 case IPPROTO_FRAGMENT: 3200 /* Wait for reassembly */ 3201 return (IPSEC_HDR_DONT_PROCESS); 3202 case IPPROTO_AH: 3203 *nexthdr = IPPROTO_AH; 3204 return (IPSEC_HDR_PROCESS); 3205 case IPPROTO_NONE: 3206 /* No next header means we're finished */ 3207 default: 3208 return (IPSEC_HDR_DONT_PROCESS); 3209 } 3210 length += ehdrlen; 3211 whereptr += ehdrlen; 3212 } 3213 /* 3214 * Malformed/truncated packet. 3215 */ 3216 return (IPSEC_MEMORY_ERROR); 3217 } 3218 3219 /* 3220 * Path for AH if options are present. 3221 * Returns NULL if the mblk was consumed. 3222 * 3223 * Sometimes AH needs to be done before other IPv6 headers for security 3224 * reasons. This function (and its ipsec_needs_processing_v6() above) 3225 * indicates if that is so, and fans out to the appropriate IPsec protocol 3226 * for the datagram passed in. 3227 */ 3228 mblk_t * 3229 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira) 3230 { 3231 uint8_t nexthdr; 3232 ah_t *ah; 3233 ill_t *ill = ira->ira_ill; 3234 ip_stack_t *ipst = ill->ill_ipst; 3235 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3236 3237 switch (ipsec_needs_processing_v6(mp, &nexthdr)) { 3238 case IPSEC_MEMORY_ERROR: 3239 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3240 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3241 freemsg(mp); 3242 return (NULL); 3243 case IPSEC_HDR_DONT_PROCESS: 3244 return (mp); 3245 } 3246 3247 /* Default means send it to AH! */ 3248 ASSERT(nexthdr == IPPROTO_AH); 3249 3250 if (!ipsec_loaded(ipss)) { 3251 ip_proto_not_sup(mp, ira); 3252 return (NULL); 3253 } 3254 3255 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 3256 if (mp == NULL) 3257 return (NULL); 3258 ASSERT(ah != NULL); 3259 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3260 ASSERT(ira->ira_ipsec_ah_sa != NULL); 3261 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 3262 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira); 3263 3264 if (mp == NULL) { 3265 /* 3266 * Either it failed or is pending. In the former case 3267 * ipIfStatsInDiscards was increased. 3268 */ 3269 return (NULL); 3270 } 3271 3272 /* we're done with IPsec processing, send it up */ 3273 ip_input_post_ipsec(mp, ira); 3274 return (NULL); 3275 } 3276 3277 /* 3278 * Reassemble fragment. 3279 * When it returns a completed message the first mblk will only contain 3280 * the headers prior to the fragment header, with the nexthdr value updated 3281 * to be the header after the fragment header. 3282 */ 3283 mblk_t * 3284 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h, 3285 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira) 3286 { 3287 uint32_t ident = ntohl(fraghdr->ip6f_ident); 3288 uint16_t offset; 3289 boolean_t more_frags; 3290 uint8_t nexthdr = fraghdr->ip6f_nxt; 3291 in6_addr_t *v6dst_ptr; 3292 in6_addr_t *v6src_ptr; 3293 uint_t end; 3294 uint_t hdr_length; 3295 size_t count; 3296 ipf_t *ipf; 3297 ipf_t **ipfp; 3298 ipfb_t *ipfb; 3299 mblk_t *mp1; 3300 uint8_t ecn_info = 0; 3301 size_t msg_len; 3302 mblk_t *tail_mp; 3303 mblk_t *t_mp; 3304 boolean_t pruned = B_FALSE; 3305 uint32_t sum_val; 3306 uint16_t sum_flags; 3307 ill_t *ill = ira->ira_ill; 3308 ip_stack_t *ipst = ill->ill_ipst; 3309 uint_t prev_nexthdr_offset; 3310 uint8_t prev_nexthdr; 3311 uint8_t *ptr; 3312 uint32_t packet_size; 3313 3314 /* 3315 * We utilize hardware computed checksum info only for UDP since 3316 * IP fragmentation is a normal occurence for the protocol. In 3317 * addition, checksum offload support for IP fragments carrying 3318 * UDP payload is commonly implemented across network adapters. 3319 */ 3320 ASSERT(ira->ira_rill != NULL); 3321 if (nexthdr == IPPROTO_UDP && dohwcksum && 3322 ILL_HCKSUM_CAPABLE(ira->ira_rill) && 3323 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 3324 mblk_t *mp1 = mp->b_cont; 3325 int32_t len; 3326 3327 /* Record checksum information from the packet */ 3328 sum_val = (uint32_t)DB_CKSUM16(mp); 3329 sum_flags = DB_CKSUMFLAGS(mp); 3330 3331 /* fragmented payload offset from beginning of mblk */ 3332 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 3333 3334 if ((sum_flags & HCK_PARTIALCKSUM) && 3335 (mp1 == NULL || mp1->b_cont == NULL) && 3336 offset >= DB_CKSUMSTART(mp) && 3337 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) { 3338 uint32_t adj; 3339 /* 3340 * Partial checksum has been calculated by hardware 3341 * and attached to the packet; in addition, any 3342 * prepended extraneous data is even byte aligned. 3343 * If any such data exists, we adjust the checksum; 3344 * this would also handle any postpended data. 3345 */ 3346 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 3347 mp, mp1, len, adj); 3348 3349 /* One's complement subtract extraneous checksum */ 3350 if (adj >= sum_val) 3351 sum_val = ~(adj - sum_val) & 0xFFFF; 3352 else 3353 sum_val -= adj; 3354 } 3355 } else { 3356 sum_val = 0; 3357 sum_flags = 0; 3358 } 3359 3360 /* Clear hardware checksumming flag */ 3361 DB_CKSUMFLAGS(mp) = 0; 3362 3363 /* 3364 * Determine the offset (from the begining of the IP header) 3365 * of the nexthdr value which has IPPROTO_FRAGMENT. We use 3366 * this when removing the fragment header from the packet. 3367 * This packet consists of the IPv6 header, a potential 3368 * hop-by-hop options header, a potential pre-routing-header 3369 * destination options header, and a potential routing header. 3370 */ 3371 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 3372 prev_nexthdr = ip6h->ip6_nxt; 3373 ptr = (uint8_t *)&ip6h[1]; 3374 3375 if (prev_nexthdr == IPPROTO_HOPOPTS) { 3376 ip6_hbh_t *hbh_hdr; 3377 uint_t hdr_len; 3378 3379 hbh_hdr = (ip6_hbh_t *)ptr; 3380 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 3381 prev_nexthdr = hbh_hdr->ip6h_nxt; 3382 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 3383 - (uint8_t *)ip6h; 3384 ptr += hdr_len; 3385 } 3386 if (prev_nexthdr == IPPROTO_DSTOPTS) { 3387 ip6_dest_t *dest_hdr; 3388 uint_t hdr_len; 3389 3390 dest_hdr = (ip6_dest_t *)ptr; 3391 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 3392 prev_nexthdr = dest_hdr->ip6d_nxt; 3393 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 3394 - (uint8_t *)ip6h; 3395 ptr += hdr_len; 3396 } 3397 if (prev_nexthdr == IPPROTO_ROUTING) { 3398 ip6_rthdr_t *rthdr; 3399 uint_t hdr_len; 3400 3401 rthdr = (ip6_rthdr_t *)ptr; 3402 prev_nexthdr = rthdr->ip6r_nxt; 3403 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 3404 - (uint8_t *)ip6h; 3405 hdr_len = 8 * (rthdr->ip6r_len + 1); 3406 ptr += hdr_len; 3407 } 3408 if (prev_nexthdr != IPPROTO_FRAGMENT) { 3409 /* Can't handle other headers before the fragment header */ 3410 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3411 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3412 freemsg(mp); 3413 return (NULL); 3414 } 3415 3416 /* 3417 * Note: Fragment offset in header is in 8-octet units. 3418 * Clearing least significant 3 bits not only extracts 3419 * it but also gets it in units of octets. 3420 */ 3421 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 3422 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 3423 3424 /* 3425 * Is the more frags flag on and the payload length not a multiple 3426 * of eight? 3427 */ 3428 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 3429 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3430 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3431 (uint32_t)((char *)&ip6h->ip6_plen - 3432 (char *)ip6h), B_FALSE, ira); 3433 return (NULL); 3434 } 3435 3436 v6src_ptr = &ip6h->ip6_src; 3437 v6dst_ptr = &ip6h->ip6_dst; 3438 end = remlen; 3439 3440 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 3441 end += offset; 3442 3443 /* 3444 * Would fragment cause reassembled packet to have a payload length 3445 * greater than IP_MAXPACKET - the max payload size? 3446 */ 3447 if (end > IP_MAXPACKET) { 3448 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3449 ip_drop_input("Reassembled packet too large", mp, ill); 3450 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3451 (uint32_t)((char *)&fraghdr->ip6f_offlg - 3452 (char *)ip6h), B_FALSE, ira); 3453 return (NULL); 3454 } 3455 3456 /* 3457 * This packet just has one fragment. Reassembly not 3458 * needed. 3459 */ 3460 if (!more_frags && offset == 0) { 3461 goto reass_done; 3462 } 3463 3464 /* 3465 * Drop the fragmented as early as possible, if 3466 * we don't have resource(s) to re-assemble. 3467 */ 3468 if (ipst->ips_ip_reass_queue_bytes == 0) { 3469 freemsg(mp); 3470 return (NULL); 3471 } 3472 3473 /* Record the ECN field info. */ 3474 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 3475 /* 3476 * If this is not the first fragment, dump the unfragmentable 3477 * portion of the packet. 3478 */ 3479 if (offset) 3480 mp->b_rptr = (uchar_t *)&fraghdr[1]; 3481 3482 /* 3483 * Fragmentation reassembly. Each ILL has a hash table for 3484 * queueing packets undergoing reassembly for all IPIFs 3485 * associated with the ILL. The hash is based on the packet 3486 * IP ident field. The ILL frag hash table was allocated 3487 * as a timer block at the time the ILL was created. Whenever 3488 * there is anything on the reassembly queue, the timer will 3489 * be running. 3490 */ 3491 /* Handle vnic loopback of fragments */ 3492 if (mp->b_datap->db_ref > 2) 3493 msg_len = 0; 3494 else 3495 msg_len = MBLKSIZE(mp); 3496 3497 tail_mp = mp; 3498 while (tail_mp->b_cont != NULL) { 3499 tail_mp = tail_mp->b_cont; 3500 if (tail_mp->b_datap->db_ref <= 2) 3501 msg_len += MBLKSIZE(tail_mp); 3502 } 3503 /* 3504 * If the reassembly list for this ILL will get too big 3505 * prune it. 3506 */ 3507 3508 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 3509 ipst->ips_ip_reass_queue_bytes) { 3510 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len, 3511 uint_t, ill->ill_frag_count, 3512 uint_t, ipst->ips_ip_reass_queue_bytes); 3513 ill_frag_prune(ill, 3514 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 3515 (ipst->ips_ip_reass_queue_bytes - msg_len)); 3516 pruned = B_TRUE; 3517 } 3518 3519 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 3520 mutex_enter(&ipfb->ipfb_lock); 3521 3522 ipfp = &ipfb->ipfb_ipf; 3523 /* Try to find an existing fragment queue for this packet. */ 3524 for (;;) { 3525 ipf = ipfp[0]; 3526 if (ipf) { 3527 /* 3528 * It has to match on ident, source address, and 3529 * dest address. 3530 */ 3531 if (ipf->ipf_ident == ident && 3532 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 3533 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 3534 3535 /* 3536 * If we have received too many 3537 * duplicate fragments for this packet 3538 * free it. 3539 */ 3540 if (ipf->ipf_num_dups > ip_max_frag_dups) { 3541 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3542 freemsg(mp); 3543 mutex_exit(&ipfb->ipfb_lock); 3544 return (NULL); 3545 } 3546 3547 break; 3548 } 3549 ipfp = &ipf->ipf_hash_next; 3550 continue; 3551 } 3552 3553 3554 /* 3555 * If we pruned the list, do we want to store this new 3556 * fragment?. We apply an optimization here based on the 3557 * fact that most fragments will be received in order. 3558 * So if the offset of this incoming fragment is zero, 3559 * it is the first fragment of a new packet. We will 3560 * keep it. Otherwise drop the fragment, as we have 3561 * probably pruned the packet already (since the 3562 * packet cannot be found). 3563 */ 3564 3565 if (pruned && offset != 0) { 3566 mutex_exit(&ipfb->ipfb_lock); 3567 freemsg(mp); 3568 return (NULL); 3569 } 3570 3571 /* New guy. Allocate a frag message. */ 3572 mp1 = allocb(sizeof (*ipf), BPRI_MED); 3573 if (!mp1) { 3574 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3575 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3576 freemsg(mp); 3577 partial_reass_done: 3578 mutex_exit(&ipfb->ipfb_lock); 3579 return (NULL); 3580 } 3581 3582 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 3583 /* 3584 * Too many fragmented packets in this hash bucket. 3585 * Free the oldest. 3586 */ 3587 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 3588 } 3589 3590 mp1->b_cont = mp; 3591 3592 /* Initialize the fragment header. */ 3593 ipf = (ipf_t *)mp1->b_rptr; 3594 ipf->ipf_mp = mp1; 3595 ipf->ipf_ptphn = ipfp; 3596 ipfp[0] = ipf; 3597 ipf->ipf_hash_next = NULL; 3598 ipf->ipf_ident = ident; 3599 ipf->ipf_v6src = *v6src_ptr; 3600 ipf->ipf_v6dst = *v6dst_ptr; 3601 /* Record reassembly start time. */ 3602 ipf->ipf_timestamp = gethrestime_sec(); 3603 /* Record ipf generation and account for frag header */ 3604 ipf->ipf_gen = ill->ill_ipf_gen++; 3605 ipf->ipf_count = MBLKSIZE(mp1); 3606 ipf->ipf_protocol = nexthdr; 3607 ipf->ipf_nf_hdr_len = 0; 3608 ipf->ipf_prev_nexthdr_offset = 0; 3609 ipf->ipf_last_frag_seen = B_FALSE; 3610 ipf->ipf_ecn = ecn_info; 3611 ipf->ipf_num_dups = 0; 3612 ipfb->ipfb_frag_pkts++; 3613 ipf->ipf_checksum = 0; 3614 ipf->ipf_checksum_flags = 0; 3615 3616 /* Store checksum value in fragment header */ 3617 if (sum_flags != 0) { 3618 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3619 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3620 ipf->ipf_checksum = sum_val; 3621 ipf->ipf_checksum_flags = sum_flags; 3622 } 3623 3624 /* 3625 * We handle reassembly two ways. In the easy case, 3626 * where all the fragments show up in order, we do 3627 * minimal bookkeeping, and just clip new pieces on 3628 * the end. If we ever see a hole, then we go off 3629 * to ip_reassemble which has to mark the pieces and 3630 * keep track of the number of holes, etc. Obviously, 3631 * the point of having both mechanisms is so we can 3632 * handle the easy case as efficiently as possible. 3633 */ 3634 if (offset == 0) { 3635 /* Easy case, in-order reassembly so far. */ 3636 /* Update the byte count */ 3637 ipf->ipf_count += msg_len; 3638 ipf->ipf_tail_mp = tail_mp; 3639 /* 3640 * Keep track of next expected offset in 3641 * ipf_end. 3642 */ 3643 ipf->ipf_end = end; 3644 ipf->ipf_nf_hdr_len = hdr_length; 3645 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset; 3646 } else { 3647 /* Hard case, hole at the beginning. */ 3648 ipf->ipf_tail_mp = NULL; 3649 /* 3650 * ipf_end == 0 means that we have given up 3651 * on easy reassembly. 3652 */ 3653 ipf->ipf_end = 0; 3654 3655 /* Forget checksum offload from now on */ 3656 ipf->ipf_checksum_flags = 0; 3657 3658 /* 3659 * ipf_hole_cnt is set by ip_reassemble. 3660 * ipf_count is updated by ip_reassemble. 3661 * No need to check for return value here 3662 * as we don't expect reassembly to complete or 3663 * fail for the first fragment itself. 3664 */ 3665 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 3666 msg_len); 3667 } 3668 /* Update per ipfb and ill byte counts */ 3669 ipfb->ipfb_count += ipf->ipf_count; 3670 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3671 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 3672 /* If the frag timer wasn't already going, start it. */ 3673 mutex_enter(&ill->ill_lock); 3674 ill_frag_timer_start(ill); 3675 mutex_exit(&ill->ill_lock); 3676 goto partial_reass_done; 3677 } 3678 3679 /* 3680 * If the packet's flag has changed (it could be coming up 3681 * from an interface different than the previous, therefore 3682 * possibly different checksum capability), then forget about 3683 * any stored checksum states. Otherwise add the value to 3684 * the existing one stored in the fragment header. 3685 */ 3686 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 3687 sum_val += ipf->ipf_checksum; 3688 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3689 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3690 ipf->ipf_checksum = sum_val; 3691 } else if (ipf->ipf_checksum_flags != 0) { 3692 /* Forget checksum offload from now on */ 3693 ipf->ipf_checksum_flags = 0; 3694 } 3695 3696 /* 3697 * We have a new piece of a datagram which is already being 3698 * reassembled. Update the ECN info if all IP fragments 3699 * are ECN capable. If there is one which is not, clear 3700 * all the info. If there is at least one which has CE 3701 * code point, IP needs to report that up to transport. 3702 */ 3703 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 3704 if (ecn_info == IPH_ECN_CE) 3705 ipf->ipf_ecn = IPH_ECN_CE; 3706 } else { 3707 ipf->ipf_ecn = IPH_ECN_NECT; 3708 } 3709 3710 if (offset && ipf->ipf_end == offset) { 3711 /* The new fragment fits at the end */ 3712 ipf->ipf_tail_mp->b_cont = mp; 3713 /* Update the byte count */ 3714 ipf->ipf_count += msg_len; 3715 /* Update per ipfb and ill byte counts */ 3716 ipfb->ipfb_count += msg_len; 3717 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3718 atomic_add_32(&ill->ill_frag_count, msg_len); 3719 if (more_frags) { 3720 /* More to come. */ 3721 ipf->ipf_end = end; 3722 ipf->ipf_tail_mp = tail_mp; 3723 goto partial_reass_done; 3724 } 3725 } else { 3726 /* 3727 * Go do the hard cases. 3728 * Call ip_reassemble(). 3729 */ 3730 int ret; 3731 3732 if (offset == 0) { 3733 if (ipf->ipf_prev_nexthdr_offset == 0) { 3734 ipf->ipf_nf_hdr_len = hdr_length; 3735 ipf->ipf_prev_nexthdr_offset = 3736 prev_nexthdr_offset; 3737 } 3738 } 3739 /* Save current byte count */ 3740 count = ipf->ipf_count; 3741 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 3742 3743 /* Count of bytes added and subtracted (freeb()ed) */ 3744 count = ipf->ipf_count - count; 3745 if (count) { 3746 /* Update per ipfb and ill byte counts */ 3747 ipfb->ipfb_count += count; 3748 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3749 atomic_add_32(&ill->ill_frag_count, count); 3750 } 3751 if (ret == IP_REASS_PARTIAL) { 3752 goto partial_reass_done; 3753 } else if (ret == IP_REASS_FAILED) { 3754 /* Reassembly failed. Free up all resources */ 3755 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3756 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 3757 IP_REASS_SET_START(t_mp, 0); 3758 IP_REASS_SET_END(t_mp, 0); 3759 } 3760 freemsg(mp); 3761 goto partial_reass_done; 3762 } 3763 3764 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 3765 } 3766 /* 3767 * We have completed reassembly. Unhook the frag header from 3768 * the reassembly list. 3769 * 3770 * Grab the unfragmentable header length next header value out 3771 * of the first fragment 3772 */ 3773 ASSERT(ipf->ipf_nf_hdr_len != 0); 3774 hdr_length = ipf->ipf_nf_hdr_len; 3775 3776 /* 3777 * Before we free the frag header, record the ECN info 3778 * to report back to the transport. 3779 */ 3780 ecn_info = ipf->ipf_ecn; 3781 3782 /* 3783 * Store the nextheader field in the header preceding the fragment 3784 * header 3785 */ 3786 nexthdr = ipf->ipf_protocol; 3787 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 3788 ipfp = ipf->ipf_ptphn; 3789 3790 /* We need to supply these to caller */ 3791 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 3792 sum_val = ipf->ipf_checksum; 3793 else 3794 sum_val = 0; 3795 3796 mp1 = ipf->ipf_mp; 3797 count = ipf->ipf_count; 3798 ipf = ipf->ipf_hash_next; 3799 if (ipf) 3800 ipf->ipf_ptphn = ipfp; 3801 ipfp[0] = ipf; 3802 atomic_add_32(&ill->ill_frag_count, -count); 3803 ASSERT(ipfb->ipfb_count >= count); 3804 ipfb->ipfb_count -= count; 3805 ipfb->ipfb_frag_pkts--; 3806 mutex_exit(&ipfb->ipfb_lock); 3807 /* Ditch the frag header. */ 3808 mp = mp1->b_cont; 3809 freeb(mp1); 3810 3811 /* 3812 * Make sure the packet is good by doing some sanity 3813 * check. If bad we can silentely drop the packet. 3814 */ 3815 reass_done: 3816 if (hdr_length < sizeof (ip6_frag_t)) { 3817 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3818 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3819 ip1dbg(("ip_input_fragment_v6: bad packet\n")); 3820 freemsg(mp); 3821 return (NULL); 3822 } 3823 3824 /* 3825 * Remove the fragment header from the initial header by 3826 * splitting the mblk into the non-fragmentable header and 3827 * everthing after the fragment extension header. This has the 3828 * side effect of putting all the headers that need destination 3829 * processing into the b_cont block-- on return this fact is 3830 * used in order to avoid having to look at the extensions 3831 * already processed. 3832 * 3833 * Note that this code assumes that the unfragmentable portion 3834 * of the header is in the first mblk and increments 3835 * the read pointer past it. If this assumption is broken 3836 * this code fails badly. 3837 */ 3838 if (mp->b_rptr + hdr_length != mp->b_wptr) { 3839 mblk_t *nmp; 3840 3841 if (!(nmp = dupb(mp))) { 3842 ip1dbg(("ip_input_fragment_v6: dupb failed\n")); 3843 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3844 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3845 freemsg(mp); 3846 return (NULL); 3847 } 3848 nmp->b_cont = mp->b_cont; 3849 mp->b_cont = nmp; 3850 nmp->b_rptr += hdr_length; 3851 } 3852 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 3853 3854 ip6h = (ip6_t *)mp->b_rptr; 3855 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr; 3856 3857 /* Restore original IP length in header. */ 3858 packet_size = msgdsize(mp); 3859 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN)); 3860 /* Record the ECN info. */ 3861 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 3862 ip6h->ip6_vcf |= htonl(ecn_info << 20); 3863 3864 /* Update the receive attributes */ 3865 ira->ira_pktlen = packet_size; 3866 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t); 3867 ira->ira_protocol = nexthdr; 3868 3869 /* Reassembly is successful; set checksum information in packet */ 3870 DB_CKSUM16(mp) = (uint16_t)sum_val; 3871 DB_CKSUMFLAGS(mp) = sum_flags; 3872 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length; 3873 3874 return (mp); 3875 } 3876 3877 /* 3878 * Given an mblk and a ptr, find the destination address in an IPv6 routing 3879 * header. 3880 */ 3881 static in6_addr_t 3882 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 3883 { 3884 ip6_rthdr0_t *rt0; 3885 int segleft, numaddr; 3886 in6_addr_t *ap, rv = oldrv; 3887 3888 rt0 = (ip6_rthdr0_t *)whereptr; 3889 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 3890 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 3891 uint8_t *, whereptr); 3892 return (rv); 3893 } 3894 segleft = rt0->ip6r0_segleft; 3895 numaddr = rt0->ip6r0_len / 2; 3896 3897 if ((rt0->ip6r0_len & 0x1) || 3898 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) || 3899 (segleft > rt0->ip6r0_len / 2)) { 3900 /* 3901 * Corrupt packet. Either the routing header length is odd 3902 * (can't happen) or mismatched compared to the packet, or the 3903 * number of addresses is. Return what we can. This will 3904 * only be a problem on forwarded packets that get squeezed 3905 * through an outbound tunnel enforcing IPsec Tunnel Mode. 3906 */ 3907 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 3908 whereptr); 3909 return (rv); 3910 } 3911 3912 if (segleft != 0) { 3913 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 3914 rv = ap[numaddr - 1]; 3915 } 3916 3917 return (rv); 3918 } 3919 3920 /* 3921 * Walk through the options to see if there is a routing header. 3922 * If present get the destination which is the last address of 3923 * the option. 3924 * mp needs to be provided in cases when the extension headers might span 3925 * b_cont; mp is never modified by this function. 3926 */ 3927 in6_addr_t 3928 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment) 3929 { 3930 const mblk_t *current_mp = mp; 3931 uint8_t nexthdr; 3932 uint8_t *whereptr; 3933 int ehdrlen; 3934 in6_addr_t rv; 3935 3936 whereptr = (uint8_t *)ip6h; 3937 ehdrlen = sizeof (ip6_t); 3938 3939 /* We assume at least the IPv6 base header is within one mblk. */ 3940 ASSERT(mp == NULL || 3941 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen)); 3942 3943 rv = ip6h->ip6_dst; 3944 nexthdr = ip6h->ip6_nxt; 3945 if (is_fragment != NULL) 3946 *is_fragment = B_FALSE; 3947 3948 /* 3949 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 3950 * no extension headers will be split across mblks. 3951 */ 3952 3953 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 3954 nexthdr == IPPROTO_ROUTING) { 3955 if (nexthdr == IPPROTO_ROUTING) 3956 rv = pluck_out_dst(current_mp, whereptr, rv); 3957 3958 /* 3959 * All IPv6 extension headers have the next-header in byte 3960 * 0, and the (length - 8) in 8-byte-words. 3961 */ 3962 while (current_mp != NULL && 3963 whereptr + ehdrlen >= current_mp->b_wptr) { 3964 ehdrlen -= (current_mp->b_wptr - whereptr); 3965 current_mp = current_mp->b_cont; 3966 if (current_mp == NULL) { 3967 /* Bad packet. Return what we can. */ 3968 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 3969 mp, mblk_t *, current_mp, ip6_t *, ip6h); 3970 goto done; 3971 } 3972 whereptr = current_mp->b_rptr; 3973 } 3974 whereptr += ehdrlen; 3975 3976 nexthdr = *whereptr; 3977 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr); 3978 ehdrlen = (*(whereptr + 1) + 1) * 8; 3979 } 3980 3981 done: 3982 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 3983 *is_fragment = B_TRUE; 3984 return (rv); 3985 } 3986 3987 /* 3988 * ip_source_routed_v6: 3989 * This function is called by redirect code (called from ip_input_v6) to 3990 * know whether this packet is source routed through this node i.e 3991 * whether this node (router) is part of the journey. This 3992 * function is called under two cases : 3993 * 3994 * case 1 : Routing header was processed by this node and 3995 * ip_process_rthdr replaced ip6_dst with the next hop 3996 * and we are forwarding the packet to the next hop. 3997 * 3998 * case 2 : Routing header was not processed by this node and we 3999 * are just forwarding the packet. 4000 * 4001 * For case (1) we don't want to send redirects. For case(2) we 4002 * want to send redirects. 4003 */ 4004 static boolean_t 4005 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 4006 { 4007 uint8_t nexthdr; 4008 in6_addr_t *addrptr; 4009 ip6_rthdr0_t *rthdr; 4010 uint8_t numaddr; 4011 ip6_hbh_t *hbhhdr; 4012 uint_t ehdrlen; 4013 uint8_t *byteptr; 4014 4015 ip2dbg(("ip_source_routed_v6\n")); 4016 nexthdr = ip6h->ip6_nxt; 4017 ehdrlen = IPV6_HDR_LEN; 4018 4019 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 4020 while (nexthdr == IPPROTO_HOPOPTS || 4021 nexthdr == IPPROTO_DSTOPTS) { 4022 byteptr = (uint8_t *)ip6h + ehdrlen; 4023 /* 4024 * Check if we have already processed 4025 * packets or we are just a forwarding 4026 * router which only pulled up msgs up 4027 * to IPV6HDR and one HBH ext header 4028 */ 4029 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4030 ip2dbg(("ip_source_routed_v6: Extension" 4031 " headers not processed\n")); 4032 return (B_FALSE); 4033 } 4034 hbhhdr = (ip6_hbh_t *)byteptr; 4035 nexthdr = hbhhdr->ip6h_nxt; 4036 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 4037 } 4038 switch (nexthdr) { 4039 case IPPROTO_ROUTING: 4040 byteptr = (uint8_t *)ip6h + ehdrlen; 4041 /* 4042 * If for some reason, we haven't pulled up 4043 * the routing hdr data mblk, then we must 4044 * not have processed it at all. So for sure 4045 * we are not part of the source routed journey. 4046 */ 4047 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4048 ip2dbg(("ip_source_routed_v6: Routing" 4049 " header not processed\n")); 4050 return (B_FALSE); 4051 } 4052 rthdr = (ip6_rthdr0_t *)byteptr; 4053 /* 4054 * Either we are an intermediate router or the 4055 * last hop before destination and we have 4056 * already processed the routing header. 4057 * If segment_left is greater than or equal to zero, 4058 * then we must be the (numaddr - segleft) entry 4059 * of the routing header. Although ip6r0_segleft 4060 * is a unit8_t variable, we still check for zero 4061 * or greater value, if in case the data type 4062 * is changed someday in future. 4063 */ 4064 if (rthdr->ip6r0_segleft > 0 || 4065 rthdr->ip6r0_segleft == 0) { 4066 numaddr = rthdr->ip6r0_len / 2; 4067 addrptr = (in6_addr_t *)((char *)rthdr + 4068 sizeof (*rthdr)); 4069 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 4070 if (addrptr != NULL) { 4071 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL) 4072 return (B_TRUE); 4073 ip1dbg(("ip_source_routed_v6: Not local\n")); 4074 } 4075 } 4076 /* FALLTHRU */ 4077 default: 4078 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 4079 return (B_FALSE); 4080 } 4081 } 4082 4083 /* 4084 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 4085 * We have not optimized this in terms of number of mblks 4086 * allocated. For instance, for each fragment sent we always allocate a 4087 * mblk to hold the IPv6 header and fragment header. 4088 * 4089 * Assumes that all the extension headers are contained in the first mblk 4090 * and that the fragment header has has already been added by calling 4091 * ip_fraghdr_add_v6. 4092 */ 4093 int 4094 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len, 4095 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 4096 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie) 4097 { 4098 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4099 ip6_t *fip6h; 4100 mblk_t *hmp; 4101 mblk_t *hmp0; 4102 mblk_t *dmp; 4103 ip6_frag_t *fraghdr; 4104 size_t unfragmentable_len; 4105 size_t mlen; 4106 size_t max_chunk; 4107 uint16_t off_flags; 4108 uint16_t offset = 0; 4109 ill_t *ill = nce->nce_ill; 4110 uint8_t nexthdr; 4111 uint8_t *ptr; 4112 ip_stack_t *ipst = ill->ill_ipst; 4113 uint_t priority = mp->b_band; 4114 int error = 0; 4115 4116 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 4117 if (max_frag == 0) { 4118 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4119 ip_drop_output("FragFails: zero max_frag", mp, ill); 4120 freemsg(mp); 4121 return (EINVAL); 4122 } 4123 4124 /* 4125 * Caller should have added fraghdr_t to pkt_len, and also 4126 * updated ip6_plen. 4127 */ 4128 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len); 4129 ASSERT(msgdsize(mp) == pkt_len); 4130 4131 /* 4132 * Determine the length of the unfragmentable portion of this 4133 * datagram. This consists of the IPv6 header, a potential 4134 * hop-by-hop options header, a potential pre-routing-header 4135 * destination options header, and a potential routing header. 4136 */ 4137 nexthdr = ip6h->ip6_nxt; 4138 ptr = (uint8_t *)&ip6h[1]; 4139 4140 if (nexthdr == IPPROTO_HOPOPTS) { 4141 ip6_hbh_t *hbh_hdr; 4142 uint_t hdr_len; 4143 4144 hbh_hdr = (ip6_hbh_t *)ptr; 4145 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4146 nexthdr = hbh_hdr->ip6h_nxt; 4147 ptr += hdr_len; 4148 } 4149 if (nexthdr == IPPROTO_DSTOPTS) { 4150 ip6_dest_t *dest_hdr; 4151 uint_t hdr_len; 4152 4153 dest_hdr = (ip6_dest_t *)ptr; 4154 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4155 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4156 nexthdr = dest_hdr->ip6d_nxt; 4157 ptr += hdr_len; 4158 } 4159 } 4160 if (nexthdr == IPPROTO_ROUTING) { 4161 ip6_rthdr_t *rthdr; 4162 uint_t hdr_len; 4163 4164 rthdr = (ip6_rthdr_t *)ptr; 4165 nexthdr = rthdr->ip6r_nxt; 4166 hdr_len = 8 * (rthdr->ip6r_len + 1); 4167 ptr += hdr_len; 4168 } 4169 if (nexthdr != IPPROTO_FRAGMENT) { 4170 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4171 ip_drop_output("FragFails: bad nexthdr", mp, ill); 4172 freemsg(mp); 4173 return (EINVAL); 4174 } 4175 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4176 unfragmentable_len += sizeof (ip6_frag_t); 4177 4178 max_chunk = (max_frag - unfragmentable_len) & ~7; 4179 4180 /* 4181 * Allocate an mblk with enough room for the link-layer 4182 * header and the unfragmentable part of the datagram, which includes 4183 * the fragment header. This (or a copy) will be used as the 4184 * first mblk for each fragment we send. 4185 */ 4186 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp); 4187 if (hmp == NULL) { 4188 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4189 ip_drop_output("FragFails: no hmp", mp, ill); 4190 freemsg(mp); 4191 return (ENOBUFS); 4192 } 4193 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4194 hmp->b_wptr = hmp->b_rptr + unfragmentable_len; 4195 4196 fip6h = (ip6_t *)hmp->b_rptr; 4197 bcopy(ip6h, fip6h, unfragmentable_len); 4198 4199 /* 4200 * pkt_len is set to the total length of the fragmentable data in this 4201 * datagram. For each fragment sent, we will decrement pkt_len 4202 * by the amount of fragmentable data sent in that fragment 4203 * until len reaches zero. 4204 */ 4205 pkt_len -= unfragmentable_len; 4206 4207 /* 4208 * Move read ptr past unfragmentable portion, we don't want this part 4209 * of the data in our fragments. 4210 */ 4211 mp->b_rptr += unfragmentable_len; 4212 if (mp->b_rptr == mp->b_wptr) { 4213 mblk_t *mp1 = mp->b_cont; 4214 freeb(mp); 4215 mp = mp1; 4216 } 4217 4218 while (pkt_len != 0) { 4219 mlen = MIN(pkt_len, max_chunk); 4220 pkt_len -= mlen; 4221 if (pkt_len != 0) { 4222 /* Not last */ 4223 hmp0 = copyb(hmp); 4224 if (hmp0 == NULL) { 4225 BUMP_MIB(ill->ill_ip_mib, 4226 ipIfStatsOutFragFails); 4227 ip_drop_output("FragFails: copyb failed", 4228 mp, ill); 4229 freeb(hmp); 4230 freemsg(mp); 4231 ip1dbg(("ip_fragment_v6: copyb failed\n")); 4232 return (ENOBUFS); 4233 } 4234 off_flags = IP6F_MORE_FRAG; 4235 } else { 4236 /* Last fragment */ 4237 hmp0 = hmp; 4238 hmp = NULL; 4239 off_flags = 0; 4240 } 4241 fip6h = (ip6_t *)(hmp0->b_rptr); 4242 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len - 4243 sizeof (ip6_frag_t)); 4244 4245 fip6h->ip6_plen = htons((uint16_t)(mlen + 4246 unfragmentable_len - IPV6_HDR_LEN)); 4247 /* 4248 * Note: Optimization alert. 4249 * In IPv6 (and IPv4) protocol header, Fragment Offset 4250 * ("offset") is 13 bits wide and in 8-octet units. 4251 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 4252 * it occupies the most significant 13 bits. 4253 * (least significant 13 bits in IPv4). 4254 * We do not do any shifts here. Not shifting is same effect 4255 * as taking offset value in octet units, dividing by 8 and 4256 * then shifting 3 bits left to line it up in place in proper 4257 * place protocol header. 4258 */ 4259 fraghdr->ip6f_offlg = htons(offset) | off_flags; 4260 4261 if (!(dmp = ip_carve_mp(&mp, mlen))) { 4262 /* mp has already been freed by ip_carve_mp() */ 4263 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4264 ip_drop_output("FragFails: could not carve mp", 4265 hmp0, ill); 4266 if (hmp != NULL) 4267 freeb(hmp); 4268 freeb(hmp0); 4269 ip1dbg(("ip_carve_mp: failed\n")); 4270 return (ENOBUFS); 4271 } 4272 hmp0->b_cont = dmp; 4273 /* Get the priority marking, if any */ 4274 hmp0->b_band = priority; 4275 4276 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 4277 4278 error = postfragfn(hmp0, nce, ixaflags, 4279 mlen + unfragmentable_len, xmit_hint, szone, nolzid, 4280 ixa_cookie); 4281 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) { 4282 /* No point in sending the other fragments */ 4283 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4284 ip_drop_output("FragFails: postfragfn failed", 4285 hmp, ill); 4286 freeb(hmp); 4287 freemsg(mp); 4288 return (error); 4289 } 4290 /* No need to redo state machine in loop */ 4291 ixaflags &= ~IXAF_REACH_CONF; 4292 4293 offset += mlen; 4294 } 4295 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 4296 return (error); 4297 } 4298 4299 /* 4300 * Add a fragment header to an IPv6 packet. 4301 * Assumes that all the extension headers are contained in the first mblk. 4302 * 4303 * The fragment header is inserted after an hop-by-hop options header 4304 * and after [an optional destinations header followed by] a routing header. 4305 */ 4306 mblk_t * 4307 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa) 4308 { 4309 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4310 ip6_t *fip6h; 4311 mblk_t *hmp; 4312 ip6_frag_t *fraghdr; 4313 size_t unfragmentable_len; 4314 uint8_t nexthdr; 4315 uint_t prev_nexthdr_offset; 4316 uint8_t *ptr; 4317 uint_t priority = mp->b_band; 4318 ip_stack_t *ipst = ixa->ixa_ipst; 4319 4320 /* 4321 * Determine the length of the unfragmentable portion of this 4322 * datagram. This consists of the IPv6 header, a potential 4323 * hop-by-hop options header, a potential pre-routing-header 4324 * destination options header, and a potential routing header. 4325 */ 4326 nexthdr = ip6h->ip6_nxt; 4327 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 4328 ptr = (uint8_t *)&ip6h[1]; 4329 4330 if (nexthdr == IPPROTO_HOPOPTS) { 4331 ip6_hbh_t *hbh_hdr; 4332 uint_t hdr_len; 4333 4334 hbh_hdr = (ip6_hbh_t *)ptr; 4335 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4336 nexthdr = hbh_hdr->ip6h_nxt; 4337 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 4338 - (uint8_t *)ip6h; 4339 ptr += hdr_len; 4340 } 4341 if (nexthdr == IPPROTO_DSTOPTS) { 4342 ip6_dest_t *dest_hdr; 4343 uint_t hdr_len; 4344 4345 dest_hdr = (ip6_dest_t *)ptr; 4346 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4347 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4348 nexthdr = dest_hdr->ip6d_nxt; 4349 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 4350 - (uint8_t *)ip6h; 4351 ptr += hdr_len; 4352 } 4353 } 4354 if (nexthdr == IPPROTO_ROUTING) { 4355 ip6_rthdr_t *rthdr; 4356 uint_t hdr_len; 4357 4358 rthdr = (ip6_rthdr_t *)ptr; 4359 nexthdr = rthdr->ip6r_nxt; 4360 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 4361 - (uint8_t *)ip6h; 4362 hdr_len = 8 * (rthdr->ip6r_len + 1); 4363 ptr += hdr_len; 4364 } 4365 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4366 4367 /* 4368 * Allocate an mblk with enough room for the link-layer 4369 * header, the unfragmentable part of the datagram, and the 4370 * fragment header. 4371 */ 4372 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 4373 ipst->ips_ip_wroff_extra, mp); 4374 if (hmp == NULL) { 4375 ill_t *ill = ixa->ixa_nce->nce_ill; 4376 4377 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 4378 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill); 4379 freemsg(mp); 4380 return (NULL); 4381 } 4382 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4383 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 4384 4385 fip6h = (ip6_t *)hmp->b_rptr; 4386 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 4387 4388 bcopy(ip6h, fip6h, unfragmentable_len); 4389 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t)); 4390 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 4391 4392 fraghdr->ip6f_nxt = nexthdr; 4393 fraghdr->ip6f_reserved = 0; 4394 fraghdr->ip6f_offlg = 0; 4395 fraghdr->ip6f_ident = htonl(ident); 4396 4397 /* Get the priority marking, if any */ 4398 hmp->b_band = priority; 4399 4400 /* 4401 * Move read ptr past unfragmentable portion, we don't want this part 4402 * of the data in our fragments. 4403 */ 4404 mp->b_rptr += unfragmentable_len; 4405 hmp->b_cont = mp; 4406 return (hmp); 4407 } 4408 4409 /* 4410 * Determine if the ill and multicast aspects of that packets 4411 * "matches" the conn. 4412 */ 4413 boolean_t 4414 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h) 4415 { 4416 ill_t *ill = ira->ira_rill; 4417 zoneid_t zoneid = ira->ira_zoneid; 4418 uint_t in_ifindex; 4419 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 4420 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 4421 4422 /* 4423 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local 4424 * scopeid. This is used to limit 4425 * unicast and multicast reception to conn_incoming_ifindex. 4426 * conn_wantpacket_v6 is called both for unicast and 4427 * multicast packets. 4428 */ 4429 in_ifindex = connp->conn_incoming_ifindex; 4430 4431 /* mpathd can bind to the under IPMP interface, which we allow */ 4432 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) { 4433 if (!IS_UNDER_IPMP(ill)) 4434 return (B_FALSE); 4435 4436 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill)) 4437 return (B_FALSE); 4438 } 4439 4440 if (!IPCL_ZONE_MATCH(connp, zoneid)) 4441 return (B_FALSE); 4442 4443 if (!(ira->ira_flags & IRAF_MULTICAST)) 4444 return (B_TRUE); 4445 4446 if (connp->conn_multi_router) 4447 return (B_TRUE); 4448 4449 if (ira->ira_protocol == IPPROTO_RSVP) 4450 return (B_TRUE); 4451 4452 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, 4453 ira->ira_ill)); 4454 } 4455 4456 /* 4457 * pr_addr_dbg function provides the needed buffer space to call 4458 * inet_ntop() function's 3rd argument. This function should be 4459 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 4460 * stack buffer space in it's own stack frame. This function uses 4461 * a buffer from it's own stack and prints the information. 4462 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 4463 * 4464 * Note: This function can call inet_ntop() once. 4465 */ 4466 void 4467 pr_addr_dbg(char *fmt1, int af, const void *addr) 4468 { 4469 char buf[INET6_ADDRSTRLEN]; 4470 4471 if (fmt1 == NULL) { 4472 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 4473 return; 4474 } 4475 4476 /* 4477 * This does not compare debug level and just prints 4478 * out. Thus it is the responsibility of the caller 4479 * to check the appropriate debug-level before calling 4480 * this function. 4481 */ 4482 if (ip_debug > 0) { 4483 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 4484 } 4485 4486 4487 } 4488 4489 4490 /* 4491 * Return the length in bytes of the IPv6 headers (base header 4492 * extension headers) that will be needed based on the 4493 * ip_pkt_t structure passed by the caller. 4494 * 4495 * The returned length does not include the length of the upper level 4496 * protocol (ULP) header. 4497 */ 4498 int 4499 ip_total_hdrs_len_v6(const ip_pkt_t *ipp) 4500 { 4501 int len; 4502 4503 len = IPV6_HDR_LEN; 4504 4505 /* 4506 * If there's a security label here, then we ignore any hop-by-hop 4507 * options the user may try to set. 4508 */ 4509 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4510 uint_t hopoptslen; 4511 /* 4512 * Note that ipp_label_len_v6 is just the option - not 4513 * the hopopts extension header. It also needs to be padded 4514 * to a multiple of 8 bytes. 4515 */ 4516 ASSERT(ipp->ipp_label_len_v6 != 0); 4517 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4518 hopoptslen = (hopoptslen + 7)/8 * 8; 4519 len += hopoptslen; 4520 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4521 ASSERT(ipp->ipp_hopoptslen != 0); 4522 len += ipp->ipp_hopoptslen; 4523 } 4524 4525 /* 4526 * En-route destination options 4527 * Only do them if there's a routing header as well 4528 */ 4529 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4530 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4531 ASSERT(ipp->ipp_rthdrdstoptslen != 0); 4532 len += ipp->ipp_rthdrdstoptslen; 4533 } 4534 if (ipp->ipp_fields & IPPF_RTHDR) { 4535 ASSERT(ipp->ipp_rthdrlen != 0); 4536 len += ipp->ipp_rthdrlen; 4537 } 4538 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4539 ASSERT(ipp->ipp_dstoptslen != 0); 4540 len += ipp->ipp_dstoptslen; 4541 } 4542 return (len); 4543 } 4544 4545 /* 4546 * All-purpose routine to build a header chain of an IPv6 header 4547 * followed by any required extension headers and a proto header. 4548 * 4549 * The caller has to set the source and destination address as well as 4550 * ip6_plen. The caller has to massage any routing header and compensate 4551 * for the ULP pseudo-header checksum due to the source route. 4552 * 4553 * The extension headers will all be fully filled in. 4554 */ 4555 void 4556 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp, 4557 uint8_t protocol, uint32_t flowinfo) 4558 { 4559 uint8_t *nxthdr_ptr; 4560 uint8_t *cp; 4561 ip6_t *ip6h = (ip6_t *)buf; 4562 4563 /* Initialize IPv6 header */ 4564 ip6h->ip6_vcf = 4565 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4566 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4567 4568 if (ipp->ipp_fields & IPPF_TCLASS) { 4569 /* Overrides the class part of flowinfo */ 4570 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4571 ipp->ipp_tclass); 4572 } 4573 4574 if (ipp->ipp_fields & IPPF_HOPLIMIT) 4575 ip6h->ip6_hops = ipp->ipp_hoplimit; 4576 else 4577 ip6h->ip6_hops = ipp->ipp_unicast_hops; 4578 4579 if ((ipp->ipp_fields & IPPF_ADDR) && 4580 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4581 ip6h->ip6_src = ipp->ipp_addr; 4582 4583 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4584 cp = (uint8_t *)&ip6h[1]; 4585 /* 4586 * Here's where we have to start stringing together 4587 * any extension headers in the right order: 4588 * Hop-by-hop, destination, routing, and final destination opts. 4589 */ 4590 /* 4591 * If there's a security label here, then we ignore any hop-by-hop 4592 * options the user may try to set. 4593 */ 4594 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4595 /* 4596 * Hop-by-hop options with the label. 4597 * Note that ipp_label_v6 is just the option - not 4598 * the hopopts extension header. It also needs to be padded 4599 * to a multiple of 8 bytes. 4600 */ 4601 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4602 uint_t hopoptslen; 4603 uint_t padlen; 4604 4605 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4606 hopoptslen = (padlen + 7)/8 * 8; 4607 padlen = hopoptslen - padlen; 4608 4609 *nxthdr_ptr = IPPROTO_HOPOPTS; 4610 nxthdr_ptr = &hbh->ip6h_nxt; 4611 hbh->ip6h_len = hopoptslen/8 - 1; 4612 cp += sizeof (ip6_hbh_t); 4613 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6); 4614 cp += ipp->ipp_label_len_v6; 4615 4616 ASSERT(padlen <= 7); 4617 switch (padlen) { 4618 case 0: 4619 break; 4620 case 1: 4621 cp[0] = IP6OPT_PAD1; 4622 break; 4623 default: 4624 cp[0] = IP6OPT_PADN; 4625 cp[1] = padlen - 2; 4626 bzero(&cp[2], padlen - 2); 4627 break; 4628 } 4629 cp += padlen; 4630 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4631 /* Hop-by-hop options */ 4632 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4633 4634 *nxthdr_ptr = IPPROTO_HOPOPTS; 4635 nxthdr_ptr = &hbh->ip6h_nxt; 4636 4637 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 4638 cp += ipp->ipp_hopoptslen; 4639 } 4640 /* 4641 * En-route destination options 4642 * Only do them if there's a routing header as well 4643 */ 4644 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4645 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4646 ip6_dest_t *dst = (ip6_dest_t *)cp; 4647 4648 *nxthdr_ptr = IPPROTO_DSTOPTS; 4649 nxthdr_ptr = &dst->ip6d_nxt; 4650 4651 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen); 4652 cp += ipp->ipp_rthdrdstoptslen; 4653 } 4654 /* 4655 * Routing header next 4656 */ 4657 if (ipp->ipp_fields & IPPF_RTHDR) { 4658 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4659 4660 *nxthdr_ptr = IPPROTO_ROUTING; 4661 nxthdr_ptr = &rt->ip6r_nxt; 4662 4663 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 4664 cp += ipp->ipp_rthdrlen; 4665 } 4666 /* 4667 * Do ultimate destination options 4668 */ 4669 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4670 ip6_dest_t *dest = (ip6_dest_t *)cp; 4671 4672 *nxthdr_ptr = IPPROTO_DSTOPTS; 4673 nxthdr_ptr = &dest->ip6d_nxt; 4674 4675 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 4676 cp += ipp->ipp_dstoptslen; 4677 } 4678 /* 4679 * Now set the last header pointer to the proto passed in 4680 */ 4681 *nxthdr_ptr = protocol; 4682 ASSERT((int)(cp - buf) == buf_len); 4683 } 4684 4685 /* 4686 * Return a pointer to the routing header extension header 4687 * in the IPv6 header(s) chain passed in. 4688 * If none found, return NULL 4689 * Assumes that all extension headers are in same mblk as the v6 header 4690 */ 4691 ip6_rthdr_t * 4692 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 4693 { 4694 ip6_dest_t *desthdr; 4695 ip6_frag_t *fraghdr; 4696 uint_t hdrlen; 4697 uint8_t nexthdr; 4698 uint8_t *ptr = (uint8_t *)&ip6h[1]; 4699 4700 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 4701 return ((ip6_rthdr_t *)ptr); 4702 4703 /* 4704 * The routing header will precede all extension headers 4705 * other than the hop-by-hop and destination options 4706 * extension headers, so if we see anything other than those, 4707 * we're done and didn't find it. 4708 * We could see a destination options header alone but no 4709 * routing header, in which case we'll return NULL as soon as 4710 * we see anything after that. 4711 * Hop-by-hop and destination option headers are identical, 4712 * so we can use either one we want as a template. 4713 */ 4714 nexthdr = ip6h->ip6_nxt; 4715 while (ptr < endptr) { 4716 /* Is there enough left for len + nexthdr? */ 4717 if (ptr + MIN_EHDR_LEN > endptr) 4718 return (NULL); 4719 4720 switch (nexthdr) { 4721 case IPPROTO_HOPOPTS: 4722 case IPPROTO_DSTOPTS: 4723 /* Assumes the headers are identical for hbh and dst */ 4724 desthdr = (ip6_dest_t *)ptr; 4725 hdrlen = 8 * (desthdr->ip6d_len + 1); 4726 nexthdr = desthdr->ip6d_nxt; 4727 break; 4728 4729 case IPPROTO_ROUTING: 4730 return ((ip6_rthdr_t *)ptr); 4731 4732 case IPPROTO_FRAGMENT: 4733 fraghdr = (ip6_frag_t *)ptr; 4734 hdrlen = sizeof (ip6_frag_t); 4735 nexthdr = fraghdr->ip6f_nxt; 4736 break; 4737 4738 default: 4739 return (NULL); 4740 } 4741 ptr += hdrlen; 4742 } 4743 return (NULL); 4744 } 4745 4746 /* 4747 * Called for source-routed packets originating on this node. 4748 * Manipulates the original routing header by moving every entry up 4749 * one slot, placing the first entry in the v6 header's v6_dst field, 4750 * and placing the ultimate destination in the routing header's last 4751 * slot. 4752 * 4753 * Returns the checksum diference between the ultimate destination 4754 * (last hop in the routing header when the packet is sent) and 4755 * the first hop (ip6_dst when the packet is sent) 4756 */ 4757 /* ARGSUSED2 */ 4758 uint32_t 4759 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 4760 { 4761 uint_t numaddr; 4762 uint_t i; 4763 in6_addr_t *addrptr; 4764 in6_addr_t tmp; 4765 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 4766 uint32_t cksm; 4767 uint32_t addrsum = 0; 4768 uint16_t *ptr; 4769 4770 /* 4771 * Perform any processing needed for source routing. 4772 * We know that all extension headers will be in the same mblk 4773 * as the IPv6 header. 4774 */ 4775 4776 /* 4777 * If no segments left in header, or the header length field is zero, 4778 * don't move hop addresses around; 4779 * Checksum difference is zero. 4780 */ 4781 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 4782 return (0); 4783 4784 ptr = (uint16_t *)&ip6h->ip6_dst; 4785 cksm = 0; 4786 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4787 cksm += ptr[i]; 4788 } 4789 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4790 4791 /* 4792 * Here's where the fun begins - we have to 4793 * move all addresses up one spot, take the 4794 * first hop and make it our first ip6_dst, 4795 * and place the ultimate destination in the 4796 * newly-opened last slot. 4797 */ 4798 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 4799 numaddr = rthdr->ip6r0_len / 2; 4800 tmp = *addrptr; 4801 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 4802 *addrptr = addrptr[1]; 4803 } 4804 *addrptr = ip6h->ip6_dst; 4805 ip6h->ip6_dst = tmp; 4806 4807 /* 4808 * From the checksummed ultimate destination subtract the checksummed 4809 * current ip6_dst (the first hop address). Return that number. 4810 * (In the v4 case, the second part of this is done in each routine 4811 * that calls ip_massage_options(). We do it all in this one place 4812 * for v6). 4813 */ 4814 ptr = (uint16_t *)&ip6h->ip6_dst; 4815 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4816 addrsum += ptr[i]; 4817 } 4818 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 4819 if ((int)cksm < 0) 4820 cksm--; 4821 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4822 4823 return (cksm); 4824 } 4825 4826 void 4827 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 4828 { 4829 kstat_t *ksp; 4830 4831 ip6_stat_t template = { 4832 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 4833 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 4834 { "ip6_recv_pullup", KSTAT_DATA_UINT64 }, 4835 { "ip6_db_ref", KSTAT_DATA_UINT64 }, 4836 { "ip6_notaligned", KSTAT_DATA_UINT64 }, 4837 { "ip6_multimblk", KSTAT_DATA_UINT64 }, 4838 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, 4839 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 4840 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 4841 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 4842 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4843 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4844 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4845 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4846 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4847 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4848 }; 4849 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 4850 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4851 KSTAT_FLAG_VIRTUAL, stackid); 4852 4853 if (ksp == NULL) 4854 return (NULL); 4855 4856 bcopy(&template, ip6_statisticsp, sizeof (template)); 4857 ksp->ks_data = (void *)ip6_statisticsp; 4858 ksp->ks_private = (void *)(uintptr_t)stackid; 4859 4860 kstat_install(ksp); 4861 return (ksp); 4862 } 4863 4864 void 4865 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4866 { 4867 if (ksp != NULL) { 4868 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4869 kstat_delete_netstack(ksp, stackid); 4870 } 4871 } 4872 4873 /* 4874 * The following two functions set and get the value for the 4875 * IPV6_SRC_PREFERENCES socket option. 4876 */ 4877 int 4878 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs) 4879 { 4880 /* 4881 * We only support preferences that are covered by 4882 * IPV6_PREFER_SRC_MASK. 4883 */ 4884 if (prefs & ~IPV6_PREFER_SRC_MASK) 4885 return (EINVAL); 4886 4887 /* 4888 * Look for conflicting preferences or default preferences. If 4889 * both bits of a related pair are clear, the application wants the 4890 * system's default value for that pair. Both bits in a pair can't 4891 * be set. 4892 */ 4893 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 4894 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 4895 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 4896 IPV6_PREFER_SRC_MIPMASK) { 4897 return (EINVAL); 4898 } 4899 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 4900 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 4901 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 4902 IPV6_PREFER_SRC_TMPMASK) { 4903 return (EINVAL); 4904 } 4905 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 4906 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 4907 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 4908 IPV6_PREFER_SRC_CGAMASK) { 4909 return (EINVAL); 4910 } 4911 4912 ixa->ixa_src_preferences = prefs; 4913 return (0); 4914 } 4915 4916 size_t 4917 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val) 4918 { 4919 *val = ixa->ixa_src_preferences; 4920 return (sizeof (ixa->ixa_src_preferences)); 4921 } 4922 4923 /* 4924 * Get the size of the IP options (including the IP headers size) 4925 * without including the AH header's size. If till_ah is B_FALSE, 4926 * and if AH header is present, dest options beyond AH header will 4927 * also be included in the returned size. 4928 */ 4929 int 4930 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 4931 { 4932 ip6_t *ip6h; 4933 uint8_t nexthdr; 4934 uint8_t *whereptr; 4935 ip6_hbh_t *hbhhdr; 4936 ip6_dest_t *dsthdr; 4937 ip6_rthdr_t *rthdr; 4938 int ehdrlen; 4939 int size; 4940 ah_t *ah; 4941 4942 ip6h = (ip6_t *)mp->b_rptr; 4943 size = IPV6_HDR_LEN; 4944 nexthdr = ip6h->ip6_nxt; 4945 whereptr = (uint8_t *)&ip6h[1]; 4946 for (;;) { 4947 /* Assume IP has already stripped it */ 4948 ASSERT(nexthdr != IPPROTO_FRAGMENT); 4949 switch (nexthdr) { 4950 case IPPROTO_HOPOPTS: 4951 hbhhdr = (ip6_hbh_t *)whereptr; 4952 nexthdr = hbhhdr->ip6h_nxt; 4953 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 4954 break; 4955 case IPPROTO_DSTOPTS: 4956 dsthdr = (ip6_dest_t *)whereptr; 4957 nexthdr = dsthdr->ip6d_nxt; 4958 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4959 break; 4960 case IPPROTO_ROUTING: 4961 rthdr = (ip6_rthdr_t *)whereptr; 4962 nexthdr = rthdr->ip6r_nxt; 4963 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4964 break; 4965 default : 4966 if (till_ah) { 4967 ASSERT(nexthdr == IPPROTO_AH); 4968 return (size); 4969 } 4970 /* 4971 * If we don't have a AH header to traverse, 4972 * return now. This happens normally for 4973 * outbound datagrams where we have not inserted 4974 * the AH header. 4975 */ 4976 if (nexthdr != IPPROTO_AH) { 4977 return (size); 4978 } 4979 4980 /* 4981 * We don't include the AH header's size 4982 * to be symmetrical with other cases where 4983 * we either don't have a AH header (outbound) 4984 * or peek into the AH header yet (inbound and 4985 * not pulled up yet). 4986 */ 4987 ah = (ah_t *)whereptr; 4988 nexthdr = ah->ah_nexthdr; 4989 ehdrlen = (ah->ah_length << 2) + 8; 4990 4991 if (nexthdr == IPPROTO_DSTOPTS) { 4992 if (whereptr + ehdrlen >= mp->b_wptr) { 4993 /* 4994 * The destination options header 4995 * is not part of the first mblk. 4996 */ 4997 whereptr = mp->b_cont->b_rptr; 4998 } else { 4999 whereptr += ehdrlen; 5000 } 5001 5002 dsthdr = (ip6_dest_t *)whereptr; 5003 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 5004 size += ehdrlen; 5005 } 5006 return (size); 5007 } 5008 whereptr += ehdrlen; 5009 size += ehdrlen; 5010 } 5011 } 5012 5013 /* 5014 * Utility routine that checks if `v6srcp' is a valid address on underlying 5015 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 5016 * associated with `v6srcp' on success. NOTE: if this is not called from 5017 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 5018 * group during or after this lookup. 5019 */ 5020 boolean_t 5021 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 5022 { 5023 ipif_t *ipif; 5024 5025 5026 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 5027 if (ipif != NULL) { 5028 if (ipifp != NULL) 5029 *ipifp = ipif; 5030 else 5031 ipif_refrele(ipif); 5032 return (B_TRUE); 5033 } 5034 5035 if (ip_debug > 2) { 5036 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 5037 "src %s\n", AF_INET6, v6srcp); 5038 } 5039 return (B_FALSE); 5040 } 5041