1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved
24 *
25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2019 Joyent, Inc.
27 * Copyright 2024 Oxide Computer Company
28 */
29 /* Copyright (c) 1990 Mentat Inc. */
30
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/dlpi.h>
34 #include <sys/stropts.h>
35 #include <sys/sysmacros.h>
36 #include <sys/strsubr.h>
37 #include <sys/strlog.h>
38 #include <sys/strsun.h>
39 #include <sys/zone.h>
40 #define _SUN_TPI_VERSION 2
41 #include <sys/tihdr.h>
42 #include <sys/xti_inet.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/kobj.h>
48 #include <sys/modctl.h>
49 #include <sys/atomic.h>
50 #include <sys/policy.h>
51 #include <sys/priv.h>
52
53 #include <sys/systm.h>
54 #include <sys/param.h>
55 #include <sys/kmem.h>
56 #include <sys/sdt.h>
57 #include <sys/socket.h>
58 #include <sys/vtrace.h>
59 #include <sys/isa_defs.h>
60 #include <sys/mac.h>
61 #include <net/if.h>
62 #include <net/if_arp.h>
63 #include <net/route.h>
64 #include <sys/sockio.h>
65 #include <netinet/in.h>
66 #include <net/if_dl.h>
67
68 #include <inet/common.h>
69 #include <inet/mi.h>
70 #include <inet/mib2.h>
71 #include <inet/nd.h>
72 #include <inet/arp.h>
73 #include <inet/snmpcom.h>
74 #include <inet/kstatcom.h>
75
76 #include <netinet/igmp_var.h>
77 #include <netinet/ip6.h>
78 #include <netinet/icmp6.h>
79 #include <netinet/sctp.h>
80
81 #include <inet/ip.h>
82 #include <inet/ip_impl.h>
83 #include <inet/ip6.h>
84 #include <inet/ip6_asp.h>
85 #include <inet/optcom.h>
86 #include <inet/tcp.h>
87 #include <inet/tcp_impl.h>
88 #include <inet/ip_multi.h>
89 #include <inet/ip_if.h>
90 #include <inet/ip_ire.h>
91 #include <inet/ip_ftable.h>
92 #include <inet/ip_rts.h>
93 #include <inet/ip_ndp.h>
94 #include <inet/ip_listutils.h>
95 #include <netinet/igmp.h>
96 #include <netinet/ip_mroute.h>
97 #include <inet/ipp_common.h>
98
99 #include <net/pfkeyv2.h>
100 #include <inet/sadb.h>
101 #include <inet/ipsec_impl.h>
102 #include <inet/ipdrop.h>
103 #include <inet/ip_netinfo.h>
104 #include <inet/ilb_ip.h>
105 #include <sys/squeue_impl.h>
106 #include <sys/squeue.h>
107
108 #include <sys/ethernet.h>
109 #include <net/if_types.h>
110 #include <sys/cpuvar.h>
111
112 #include <ipp/ipp.h>
113 #include <ipp/ipp_impl.h>
114 #include <ipp/ipgpc/ipgpc.h>
115
116 #include <sys/pattr.h>
117 #include <inet/ipclassifier.h>
118 #include <inet/sctp_ip.h>
119 #include <inet/sctp/sctp_impl.h>
120 #include <inet/udp_impl.h>
121 #include <sys/sunddi.h>
122
123 #include <sys/tsol/label.h>
124 #include <sys/tsol/tnet.h>
125
126 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */
127
128 #ifdef DEBUG
129 extern boolean_t skip_sctp_cksum;
130 #endif
131
132 static void ip_input_local_v6(ire_t *, mblk_t *, ip6_t *, ip_recv_attr_t *);
133
134 static void ip_input_multicast_v6(ire_t *, mblk_t *, ip6_t *,
135 ip_recv_attr_t *);
136
137 #pragma inline(ip_input_common_v6, ip_input_local_v6, ip_forward_xmit_v6)
138
139 /*
140 * Direct read side procedure capable of dealing with chains. GLDv3 based
141 * drivers call this function directly with mblk chains while STREAMS
142 * read side procedure ip_rput() calls this for single packet with ip_ring
143 * set to NULL to process one packet at a time.
144 *
145 * The ill will always be valid if this function is called directly from
146 * the driver.
147 *
148 * If this chain is part of a VLAN stream, then the VLAN tag is
149 * stripped from the MAC header before being delivered to this
150 * function.
151 *
152 * If the IP header in packet is not 32-bit aligned, every message in the
153 * chain will be aligned before further operations. This is required on SPARC
154 * platform.
155 */
156 void
ip_input_v6(ill_t * ill,ill_rx_ring_t * ip_ring,mblk_t * mp_chain,struct mac_header_info_s * mhip)157 ip_input_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
158 struct mac_header_info_s *mhip)
159 {
160 (void) ip_input_common_v6(ill, ip_ring, mp_chain, mhip, NULL, NULL,
161 NULL);
162 }
163
164 /*
165 * ip_accept_tcp_v6() - This function is called by the squeue when it retrieves
166 * a chain of packets in the poll mode. The packets have gone through the
167 * data link processing but not IP processing. For performance and latency
168 * reasons, the squeue wants to process the chain in line instead of feeding
169 * it back via ip_input path.
170 *
171 * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v6
172 * will pass back any TCP packets matching the target sqp to
173 * ip_input_common_v6 using ira_target_sqp_mp. Other packets are handled by
174 * ip_input_v6 and ip_fanout_v6 as normal.
175 * The TCP packets that match the target squeue are returned to the caller
176 * as a b_next chain after each packet has been prepend with an mblk
177 * from ip_recv_attr_to_mblk.
178 */
179 mblk_t *
ip_accept_tcp_v6(ill_t * ill,ill_rx_ring_t * ip_ring,squeue_t * target_sqp,mblk_t * mp_chain,mblk_t ** last,uint_t * cnt)180 ip_accept_tcp_v6(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp,
181 mblk_t *mp_chain, mblk_t **last, uint_t *cnt)
182 {
183 return (ip_input_common_v6(ill, ip_ring, mp_chain, NULL, target_sqp,
184 last, cnt));
185 }
186
187 /*
188 * Used by ip_input_v6 and ip_accept_tcp_v6
189 * The last three arguments are only used by ip_accept_tcp_v6, and mhip is
190 * only used by ip_input_v6.
191 */
192 mblk_t *
ip_input_common_v6(ill_t * ill,ill_rx_ring_t * ip_ring,mblk_t * mp_chain,struct mac_header_info_s * mhip,squeue_t * target_sqp,mblk_t ** last,uint_t * cnt)193 ip_input_common_v6(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain,
194 struct mac_header_info_s *mhip, squeue_t *target_sqp,
195 mblk_t **last, uint_t *cnt)
196 {
197 mblk_t *mp;
198 ip6_t *ip6h;
199 ip_recv_attr_t iras; /* Receive attributes */
200 rtc_t rtc;
201 iaflags_t chain_flags = 0; /* Fixed for chain */
202 mblk_t *ahead = NULL; /* Accepted head */
203 mblk_t *atail = NULL; /* Accepted tail */
204 uint_t acnt = 0; /* Accepted count */
205
206 ASSERT(mp_chain != NULL);
207 ASSERT(ill != NULL);
208
209 /* These ones do not change as we loop over packets */
210 iras.ira_ill = iras.ira_rill = ill;
211 iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
212 iras.ira_rifindex = iras.ira_ruifindex;
213 iras.ira_sqp = NULL;
214 iras.ira_ring = ip_ring;
215 /* For ECMP and outbound transmit ring selection */
216 iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring);
217
218 iras.ira_target_sqp = target_sqp;
219 iras.ira_target_sqp_mp = NULL;
220 if (target_sqp != NULL)
221 chain_flags |= IRAF_TARGET_SQP;
222
223 /*
224 * We try to have a mhip pointer when possible, but
225 * it might be NULL in some cases. In those cases we
226 * have to assume unicast.
227 */
228 iras.ira_mhip = mhip;
229 iras.ira_flags = 0;
230 if (mhip != NULL) {
231 switch (mhip->mhi_dsttype) {
232 case MAC_ADDRTYPE_MULTICAST :
233 chain_flags |= IRAF_L2DST_MULTICAST;
234 break;
235 case MAC_ADDRTYPE_BROADCAST :
236 chain_flags |= IRAF_L2DST_BROADCAST;
237 break;
238 }
239 }
240
241 /*
242 * Initialize the one-element route cache.
243 *
244 * We do ire caching from one iteration to
245 * another. In the event the packet chain contains
246 * all packets from the same dst, this caching saves
247 * an ire_route_recursive for each of the succeeding
248 * packets in a packet chain.
249 */
250 rtc.rtc_ire = NULL;
251 rtc.rtc_ip6addr = ipv6_all_zeros;
252
253 /* Loop over b_next */
254 for (mp = mp_chain; mp != NULL; mp = mp_chain) {
255 mp_chain = mp->b_next;
256 mp->b_next = NULL;
257
258 /*
259 * if db_ref > 1 then copymsg and free original. Packet
260 * may be changed and we do not want the other entity
261 * who has a reference to this message to trip over the
262 * changes. This is a blind change because trying to
263 * catch all places that might change the packet is too
264 * difficult.
265 *
266 * This corresponds to the fast path case, where we have
267 * a chain of M_DATA mblks. We check the db_ref count
268 * of only the 1st data block in the mblk chain. There
269 * doesn't seem to be a reason why a device driver would
270 * send up data with varying db_ref counts in the mblk
271 * chain. In any case the Fast path is a private
272 * interface, and our drivers don't do such a thing.
273 * Given the above assumption, there is no need to walk
274 * down the entire mblk chain (which could have a
275 * potential performance problem)
276 *
277 * The "(DB_REF(mp) > 1)" check was moved from ip_rput()
278 * to here because of exclusive ip stacks and vnics.
279 * Packets transmitted from exclusive stack over vnic
280 * can have db_ref > 1 and when it gets looped back to
281 * another vnic in a different zone, you have ip_input()
282 * getting dblks with db_ref > 1. So if someone
283 * complains of TCP performance under this scenario,
284 * take a serious look here on the impact of copymsg().
285 */
286 if (DB_REF(mp) > 1) {
287 if ((mp = ip_fix_dbref(mp, &iras)) == NULL)
288 continue;
289 }
290
291 /*
292 * IP header ptr not aligned?
293 * OR IP header not complete in first mblk
294 */
295 ip6h = (ip6_t *)mp->b_rptr;
296 if (!OK_32PTR(ip6h) || MBLKL(mp) < IPV6_HDR_LEN) {
297 mp = ip_check_and_align_header(mp, IPV6_HDR_LEN, &iras);
298 if (mp == NULL)
299 continue;
300 ip6h = (ip6_t *)mp->b_rptr;
301 }
302
303 /* Protect against a mix of Ethertypes and IP versions */
304 if (IPH_HDR_VERSION(ip6h) != IPV6_VERSION) {
305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
306 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
307 freemsg(mp);
308 /* mhip might point into 1st packet in the chain. */
309 iras.ira_mhip = NULL;
310 continue;
311 }
312
313 /*
314 * Check for Martian addrs; we have to explicitly
315 * test for for zero dst since this is also used as
316 * an indication that the rtc is not used.
317 */
318 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_dst)) {
319 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
320 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
321 freemsg(mp);
322 /* mhip might point into 1st packet in the chain. */
323 iras.ira_mhip = NULL;
324 continue;
325 }
326 /*
327 * Keep L2SRC from a previous packet in chain since mhip
328 * might point into an earlier packet in the chain.
329 */
330 chain_flags |= (iras.ira_flags & IRAF_L2SRC_SET);
331
332 iras.ira_flags = IRAF_VERIFY_ULP_CKSUM | chain_flags;
333 iras.ira_free_flags = 0;
334 iras.ira_cred = NULL;
335 iras.ira_cpid = NOPID;
336 iras.ira_tsl = NULL;
337 iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */
338
339 /*
340 * We must count all incoming packets, even if they end
341 * up being dropped later on. Defer counting bytes until
342 * we have the whole IP header in first mblk.
343 */
344 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
345
346 iras.ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
347 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets,
348 iras.ira_pktlen);
349 iras.ira_ttl = ip6h->ip6_hlim;
350
351 /*
352 * Call one of:
353 * ill_input_full_v6
354 * ill_input_short_v6
355 * The former is used in the case of TX. See ill_set_inputfn().
356 */
357 (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
358
359 /* Any references to clean up? No hold on ira_ill */
360 if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
361 ira_cleanup(&iras, B_FALSE);
362
363 if (iras.ira_target_sqp_mp != NULL) {
364 /* Better be called from ip_accept_tcp */
365 ASSERT(target_sqp != NULL);
366
367 /* Found one packet to accept */
368 mp = iras.ira_target_sqp_mp;
369 iras.ira_target_sqp_mp = NULL;
370 ASSERT(ip_recv_attr_is_mblk(mp));
371
372 if (atail != NULL)
373 atail->b_next = mp;
374 else
375 ahead = mp;
376 atail = mp;
377 acnt++;
378 mp = NULL;
379 }
380 /* mhip might point into 1st packet in the chain. */
381 iras.ira_mhip = NULL;
382 }
383 /* Any remaining references to the route cache? */
384 if (rtc.rtc_ire != NULL) {
385 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
386 ire_refrele(rtc.rtc_ire);
387 }
388
389 if (ahead != NULL) {
390 /* Better be called from ip_accept_tcp */
391 ASSERT(target_sqp != NULL);
392 *last = atail;
393 *cnt = acnt;
394 return (ahead);
395 }
396
397 return (NULL);
398 }
399
400 /*
401 * This input function is used when
402 * - is_system_labeled()
403 *
404 * Note that for IPv6 CGTP filtering is handled only when receiving fragment
405 * headers, and RSVP uses router alert options, thus we don't need anything
406 * extra for them.
407 */
408 void
ill_input_full_v6(mblk_t * mp,void * iph_arg,void * nexthop_arg,ip_recv_attr_t * ira,rtc_t * rtc)409 ill_input_full_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
410 ip_recv_attr_t *ira, rtc_t *rtc)
411 {
412 ip6_t *ip6h = (ip6_t *)iph_arg;
413 in6_addr_t *nexthop = (in6_addr_t *)nexthop_arg;
414 ill_t *ill = ira->ira_ill;
415
416 ASSERT(ira->ira_tsl == NULL);
417
418 /*
419 * Attach any necessary label information to
420 * this packet
421 */
422 if (is_system_labeled()) {
423 ira->ira_flags |= IRAF_SYSTEM_LABELED;
424
425 /*
426 * This updates ira_cred, ira_tsl and ira_free_flags based
427 * on the label.
428 */
429 if (!tsol_get_pkt_label(mp, IPV6_VERSION, ira)) {
430 if (ip6opt_ls != 0)
431 ip0dbg(("tsol_get_pkt_label v6 failed\n"));
432 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
433 ip_drop_input("ipIfStatsInDiscards", mp, ill);
434 freemsg(mp);
435 return;
436 }
437 /* Note that ira_tsl can be NULL here. */
438
439 /* tsol_get_pkt_label sometimes does pullupmsg */
440 ip6h = (ip6_t *)mp->b_rptr;
441 }
442 ill_input_short_v6(mp, ip6h, nexthop, ira, rtc);
443 }
444
445 /*
446 * Check for IPv6 addresses that should not appear on the wire
447 * as either source or destination.
448 * If we ever implement Stateless IPv6 Translators (SIIT) we'd have
449 * to revisit the IPv4-mapped part.
450 */
451 static boolean_t
ip6_bad_address(in6_addr_t * addr,boolean_t is_src)452 ip6_bad_address(in6_addr_t *addr, boolean_t is_src)
453 {
454 if (IN6_IS_ADDR_V4MAPPED(addr)) {
455 ip1dbg(("ip_input_v6: pkt with IPv4-mapped addr"));
456 return (B_TRUE);
457 }
458 if (IN6_IS_ADDR_LOOPBACK(addr)) {
459 ip1dbg(("ip_input_v6: pkt with loopback addr"));
460 return (B_TRUE);
461 }
462 if (!is_src && IN6_IS_ADDR_UNSPECIFIED(addr)) {
463 /*
464 * having :: in the src is ok: it's used for DAD.
465 */
466 ip1dbg(("ip_input_v6: pkt with unspecified addr"));
467 return (B_TRUE);
468 }
469 return (B_FALSE);
470 }
471
472 /*
473 * Routing lookup for IPv6 link-locals.
474 * First we look on the inbound interface, then we check for IPMP and
475 * look on the upper interface.
476 * We update ira_ruifindex if we find the IRE on the upper interface.
477 */
478 static ire_t *
ire_linklocal(const in6_addr_t * nexthop,ill_t * ill,ip_recv_attr_t * ira,uint_t irr_flags,ip_stack_t * ipst)479 ire_linklocal(const in6_addr_t *nexthop, ill_t *ill, ip_recv_attr_t *ira,
480 uint_t irr_flags, ip_stack_t *ipst)
481 {
482 int match_flags = MATCH_IRE_SECATTR | MATCH_IRE_ILL;
483 ire_t *ire;
484
485 ASSERT(IN6_IS_ADDR_LINKLOCAL(nexthop));
486 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
487 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
488 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
489 !IS_UNDER_IPMP(ill))
490 return (ire);
491
492 /*
493 * When we are using IMP we need to look for an IRE on both the
494 * under and upper interfaces since there are different
495 * link-local addresses for the under and upper.
496 */
497 ill = ipmp_ill_hold_ipmp_ill(ill);
498 if (ill == NULL)
499 return (ire);
500
501 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
502
503 ire_refrele(ire);
504 ire = ire_route_recursive_v6(nexthop, 0, ill, ALL_ZONES, ira->ira_tsl,
505 match_flags, irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
506 ill_refrele(ill);
507 return (ire);
508 }
509
510 /*
511 * This is the tail-end of the full receive side packet handling.
512 * It can be used directly when the configuration is simple.
513 */
514 void
ill_input_short_v6(mblk_t * mp,void * iph_arg,void * nexthop_arg,ip_recv_attr_t * ira,rtc_t * rtc)515 ill_input_short_v6(mblk_t *mp, void *iph_arg, void *nexthop_arg,
516 ip_recv_attr_t *ira, rtc_t *rtc)
517 {
518 ire_t *ire;
519 ill_t *ill = ira->ira_ill;
520 ip_stack_t *ipst = ill->ill_ipst;
521 uint_t pkt_len;
522 ssize_t len;
523 ip6_t *ip6h = (ip6_t *)iph_arg;
524 in6_addr_t nexthop = *(in6_addr_t *)nexthop_arg;
525 ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb;
526 uint_t irr_flags;
527 #define rptr ((uchar_t *)ip6h)
528
529 ASSERT(DB_TYPE(mp) == M_DATA);
530
531 /*
532 * Check for source/dest being a bad address: loopback, any, or
533 * v4mapped. All of them start with a 64 bits of zero.
534 */
535 if (ip6h->ip6_src.s6_addr32[0] == 0 &&
536 ip6h->ip6_src.s6_addr32[1] == 0) {
537 if (ip6_bad_address(&ip6h->ip6_src, B_TRUE)) {
538 ip1dbg(("ip_input_v6: pkt with bad src addr\n"));
539 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
540 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
541 freemsg(mp);
542 return;
543 }
544 }
545 if (ip6h->ip6_dst.s6_addr32[0] == 0 &&
546 ip6h->ip6_dst.s6_addr32[1] == 0) {
547 if (ip6_bad_address(&ip6h->ip6_dst, B_FALSE)) {
548 ip1dbg(("ip_input_v6: pkt with bad dst addr\n"));
549 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
550 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
551 freemsg(mp);
552 return;
553 }
554 }
555
556 len = mp->b_wptr - rptr;
557 pkt_len = ira->ira_pktlen;
558
559 /* multiple mblk or too short */
560 len -= pkt_len;
561 if (len != 0) {
562 mp = ip_check_length(mp, rptr, len, pkt_len, IPV6_HDR_LEN, ira);
563 if (mp == NULL)
564 return;
565 ip6h = (ip6_t *)mp->b_rptr;
566 }
567
568 DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
569 ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h,
570 int, 0);
571 /*
572 * The event for packets being received from a 'physical'
573 * interface is placed after validation of the source and/or
574 * destination address as being local so that packets can be
575 * redirected to loopback addresses using ipnat.
576 */
577 DTRACE_PROBE4(ip6__physical__in__start,
578 ill_t *, ill, ill_t *, NULL,
579 ip6_t *, ip6h, mblk_t *, mp);
580
581 if (HOOKS6_INTERESTED_PHYSICAL_IN(ipst)) {
582 int ll_multicast = 0;
583 int error;
584 in6_addr_t orig_dst = ip6h->ip6_dst;
585
586 if (ira->ira_flags & IRAF_L2DST_MULTICAST)
587 ll_multicast = HPE_MULTICAST;
588 else if (ira->ira_flags & IRAF_L2DST_BROADCAST)
589 ll_multicast = HPE_BROADCAST;
590
591 FW_HOOKS6(ipst->ips_ip6_physical_in_event,
592 ipst->ips_ipv6firewall_physical_in,
593 ill, NULL, ip6h, mp, mp, ll_multicast, ipst, error);
594
595 DTRACE_PROBE1(ip6__physical__in__end, mblk_t *, mp);
596
597 if (mp == NULL)
598 return;
599
600 /* The length could have changed */
601 ip6h = (ip6_t *)mp->b_rptr;
602 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
603 pkt_len = ira->ira_pktlen;
604
605 /*
606 * In case the destination changed we override any previous
607 * change to nexthop.
608 */
609 if (!IN6_ARE_ADDR_EQUAL(&orig_dst, &ip6h->ip6_dst))
610 nexthop = ip6h->ip6_dst;
611
612 if (IN6_IS_ADDR_UNSPECIFIED(&nexthop)) {
613 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
614 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
615 freemsg(mp);
616 return;
617 }
618
619 }
620
621 if (ipst->ips_ip6_observe.he_interested) {
622 zoneid_t dzone;
623
624 /*
625 * On the inbound path the src zone will be unknown as
626 * this packet has come from the wire.
627 */
628 dzone = ip_get_zoneid_v6(&nexthop, mp, ill, ira, ALL_ZONES);
629 ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst);
630 }
631
632 if ((ip6h->ip6_vcf & IPV6_VERS_AND_FLOW_MASK) !=
633 IPV6_DEFAULT_VERS_AND_FLOW) {
634 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
635 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInWrongIPVersion);
636 ip_drop_input("ipIfStatsInWrongIPVersion", mp, ill);
637 freemsg(mp);
638 return;
639 }
640
641 /*
642 * For IPv6 we update ira_ip_hdr_length and ira_protocol as
643 * we parse the headers, starting with the hop-by-hop options header.
644 */
645 ira->ira_ip_hdr_length = IPV6_HDR_LEN;
646 if ((ira->ira_protocol = ip6h->ip6_nxt) == IPPROTO_HOPOPTS) {
647 ip6_hbh_t *hbhhdr;
648 uint_t ehdrlen;
649 uint8_t *optptr;
650
651 if (pkt_len < IPV6_HDR_LEN + MIN_EHDR_LEN) {
652 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
653 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
654 freemsg(mp);
655 return;
656 }
657 if (mp->b_cont != NULL &&
658 rptr + IPV6_HDR_LEN + MIN_EHDR_LEN > mp->b_wptr) {
659 ip6h = ip_pullup(mp, IPV6_HDR_LEN + MIN_EHDR_LEN, ira);
660 if (ip6h == NULL) {
661 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
662 ip_drop_input("ipIfStatsInDiscards", mp, ill);
663 freemsg(mp);
664 return;
665 }
666 }
667 hbhhdr = (ip6_hbh_t *)&ip6h[1];
668 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
669
670 if (pkt_len < IPV6_HDR_LEN + ehdrlen) {
671 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
672 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
673 freemsg(mp);
674 return;
675 }
676 if (mp->b_cont != NULL &&
677 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
678 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
679 if (ip6h == NULL) {
680 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
681 ip_drop_input("ipIfStatsInDiscards", mp, ill);
682 freemsg(mp);
683 return;
684 }
685 hbhhdr = (ip6_hbh_t *)&ip6h[1];
686 }
687
688 /*
689 * Update ira_ip_hdr_length to skip the hop-by-hop header
690 * once we get to ip_fanout_v6
691 */
692 ira->ira_ip_hdr_length += ehdrlen;
693 ira->ira_protocol = hbhhdr->ip6h_nxt;
694
695 optptr = (uint8_t *)&hbhhdr[1];
696 switch (ip_process_options_v6(mp, ip6h, optptr,
697 ehdrlen - 2, IPPROTO_HOPOPTS, ira)) {
698 case -1:
699 /*
700 * Packet has been consumed and any
701 * needed ICMP messages sent.
702 */
703 return;
704 case 0:
705 /* no action needed */
706 break;
707 case 1:
708 /*
709 * Known router alert. Make use handle it as local
710 * by setting the nexthop to be the all-host multicast
711 * address, and skip multicast membership filter by
712 * marking as a router alert.
713 */
714 ira->ira_flags |= IRAF_ROUTER_ALERT;
715 nexthop = ipv6_all_hosts_mcast;
716 break;
717 }
718 }
719
720 /*
721 * Here we check to see if we machine is setup as
722 * L3 loadbalancer and if the incoming packet is for a VIP
723 *
724 * Check the following:
725 * - there is at least a rule
726 * - protocol of the packet is supported
727 *
728 * We don't load balance IPv6 link-locals.
729 */
730 if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ira->ira_protocol) &&
731 !IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
732 in6_addr_t lb_dst;
733 int lb_ret;
734
735 /* For convenience, we just pull up the mblk. */
736 if (mp->b_cont != NULL) {
737 if (pullupmsg(mp, -1) == 0) {
738 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
739 ip_drop_input("ipIfStatsInDiscards - pullupmsg",
740 mp, ill);
741 freemsg(mp);
742 return;
743 }
744 ip6h = (ip6_t *)mp->b_rptr;
745 }
746 lb_ret = ilb_check_v6(ilbs, ill, mp, ip6h, ira->ira_protocol,
747 (uint8_t *)ip6h + ira->ira_ip_hdr_length, &lb_dst);
748 if (lb_ret == ILB_DROPPED) {
749 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
750 ip_drop_input("ILB_DROPPED", mp, ill);
751 freemsg(mp);
752 return;
753 }
754 if (lb_ret == ILB_BALANCED) {
755 /* Set the dst to that of the chosen server */
756 nexthop = lb_dst;
757 DB_CKSUMFLAGS(mp) = 0;
758 }
759 }
760
761 if (ill->ill_flags & ILLF_ROUTER)
762 irr_flags = IRR_ALLOCATE;
763 else
764 irr_flags = IRR_NONE;
765
766 /* Can not use route cache with TX since the labels can differ */
767 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
768 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
769 ire = ire_multicast(ill);
770 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
771 ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
772 ipst);
773 } else {
774 /* Match destination and label */
775 ire = ire_route_recursive_v6(&nexthop, 0, NULL,
776 ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR,
777 irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL,
778 NULL);
779 }
780 /* Update the route cache so we do the ire_refrele */
781 ASSERT(ire != NULL);
782 if (rtc->rtc_ire != NULL)
783 ire_refrele(rtc->rtc_ire);
784 rtc->rtc_ire = ire;
785 rtc->rtc_ip6addr = nexthop;
786 } else if (IN6_ARE_ADDR_EQUAL(&nexthop, &rtc->rtc_ip6addr) &&
787 rtc->rtc_ire != NULL) {
788 /* Use the route cache */
789 ire = rtc->rtc_ire;
790 } else {
791 /* Update the route cache */
792 if (IN6_IS_ADDR_MULTICAST(&nexthop)) {
793 ire = ire_multicast(ill);
794 } else if (IN6_IS_ADDR_LINKLOCAL(&nexthop)) {
795 ire = ire_linklocal(&nexthop, ill, ira, irr_flags,
796 ipst);
797 } else {
798 ire = ire_route_recursive_dstonly_v6(&nexthop,
799 irr_flags, ira->ira_xmit_hint, ipst);
800 }
801 ASSERT(ire != NULL);
802 if (rtc->rtc_ire != NULL)
803 ire_refrele(rtc->rtc_ire);
804 rtc->rtc_ire = ire;
805 rtc->rtc_ip6addr = nexthop;
806 }
807
808 ire->ire_ib_pkt_count++;
809
810 /*
811 * Based on ire_type and ire_flags call one of:
812 * ire_recv_local_v6 - for IRE_LOCAL
813 * ire_recv_loopback_v6 - for IRE_LOOPBACK
814 * ire_recv_multirt_v6 - if RTF_MULTIRT
815 * ire_recv_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
816 * ire_recv_multicast_v6 - for IRE_MULTICAST
817 * ire_recv_noaccept_v6 - for ire_noaccept ones
818 * ire_recv_forward_v6 - for the rest.
819 */
820
821 (*ire->ire_recvfn)(ire, mp, ip6h, ira);
822 }
823 #undef rptr
824
825 /*
826 * ire_recvfn for IREs that need forwarding
827 */
828 void
ire_recv_forward_v6(ire_t * ire,mblk_t * mp,void * iph_arg,ip_recv_attr_t * ira)829 ire_recv_forward_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
830 {
831 ip6_t *ip6h = (ip6_t *)iph_arg;
832 ill_t *ill = ira->ira_ill;
833 ip_stack_t *ipst = ill->ill_ipst;
834 iaflags_t iraflags = ira->ira_flags;
835 ill_t *dst_ill;
836 nce_t *nce;
837 uint32_t added_tx_len;
838 uint32_t mtu, iremtu;
839
840 if (iraflags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
841 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
842 ip_drop_input("l2 multicast not forwarded", mp, ill);
843 freemsg(mp);
844 return;
845 }
846
847 if (!(ill->ill_flags & ILLF_ROUTER)) {
848 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
849 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
850 freemsg(mp);
851 return;
852 }
853
854 /*
855 * Either ire_nce_capable or ire_dep_parent would be set for the IRE
856 * when it is found by ire_route_recursive, but that some other thread
857 * could have changed the routes with the effect of clearing
858 * ire_dep_parent. In that case we'd end up dropping the packet, or
859 * finding a new nce below.
860 * Get, allocate, or update the nce.
861 * We get a refhold on ire_nce_cache as a result of this to avoid races
862 * where ire_nce_cache is deleted.
863 *
864 * This ensures that we don't forward if the interface is down since
865 * ipif_down removes all the nces.
866 */
867 mutex_enter(&ire->ire_lock);
868 nce = ire->ire_nce_cache;
869 if (nce == NULL) {
870 /* Not yet set up - try to set one up */
871 mutex_exit(&ire->ire_lock);
872 (void) ire_revalidate_nce(ire);
873 mutex_enter(&ire->ire_lock);
874 nce = ire->ire_nce_cache;
875 if (nce == NULL) {
876 mutex_exit(&ire->ire_lock);
877 /* The ire_dep_parent chain went bad, or no memory */
878 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
879 ip_drop_input("No ire_dep_parent", mp, ill);
880 freemsg(mp);
881 return;
882 }
883 }
884 nce_refhold(nce);
885 mutex_exit(&ire->ire_lock);
886
887 if (nce->nce_is_condemned) {
888 nce_t *nce1;
889
890 nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_FALSE);
891 nce_refrele(nce);
892 if (nce1 == NULL) {
893 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
894 ip_drop_input("No nce", mp, ill);
895 freemsg(mp);
896 return;
897 }
898 nce = nce1;
899 }
900 dst_ill = nce->nce_ill;
901
902 /*
903 * Unless we are forwarding, drop the packet.
904 * Unlike IPv4 we don't allow source routed packets out the same
905 * interface when we are not a router.
906 * Note that ill_forward_set() will set the ILLF_ROUTER on
907 * all the group members when it gets an ipmp-ill or under-ill.
908 */
909 if (!(dst_ill->ill_flags & ILLF_ROUTER)) {
910 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
911 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
912 freemsg(mp);
913 nce_refrele(nce);
914 return;
915 }
916
917 if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) {
918 ire->ire_ib_pkt_count--;
919 /*
920 * Should only use IREs that are visible from the
921 * global zone for forwarding.
922 * For IPv6 any source route would have already been
923 * advanced in ip_fanout_v6
924 */
925 ire = ire_route_recursive_v6(&ip6h->ip6_dst, 0, NULL,
926 GLOBAL_ZONEID, ira->ira_tsl, MATCH_IRE_SECATTR,
927 (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE,
928 ira->ira_xmit_hint, ipst, NULL, NULL, NULL);
929 ire->ire_ib_pkt_count++;
930 (*ire->ire_recvfn)(ire, mp, ip6h, ira);
931 ire_refrele(ire);
932 nce_refrele(nce);
933 return;
934 }
935 /*
936 * ipIfStatsHCInForwDatagrams should only be increment if there
937 * will be an attempt to forward the packet, which is why we
938 * increment after the above condition has been checked.
939 */
940 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
941
942 /* Initiate Read side IPPF processing */
943 if (IPP_ENABLED(IPP_FWD_IN, ipst)) {
944 /* ip_process translates an IS_UNDER_IPMP */
945 mp = ip_process(IPP_FWD_IN, mp, ill, ill);
946 if (mp == NULL) {
947 /* ip_drop_packet and MIB done */
948 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred "
949 "during IPPF processing\n"));
950 nce_refrele(nce);
951 return;
952 }
953 }
954
955 DTRACE_PROBE4(ip6__forwarding__start,
956 ill_t *, ill, ill_t *, dst_ill, ip6_t *, ip6h, mblk_t *, mp);
957
958 if (HOOKS6_INTERESTED_FORWARDING(ipst)) {
959 int error;
960
961 FW_HOOKS(ipst->ips_ip6_forwarding_event,
962 ipst->ips_ipv6firewall_forwarding,
963 ill, dst_ill, ip6h, mp, mp, 0, ipst, error);
964
965 DTRACE_PROBE1(ip6__forwarding__end, mblk_t *, mp);
966
967 if (mp == NULL) {
968 nce_refrele(nce);
969 return;
970 }
971 /*
972 * Even if the destination was changed by the filter we use the
973 * forwarding decision that was made based on the address
974 * in ip_input.
975 */
976
977 /* Might have changed */
978 ip6h = (ip6_t *)mp->b_rptr;
979 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
980 }
981
982 /* Packet is being forwarded. Turning off hwcksum flag. */
983 DB_CKSUMFLAGS(mp) = 0;
984
985 /*
986 * Per RFC 3513 section 2.5.2, we must not forward packets with
987 * an unspecified source address.
988 * The loopback address check for both src and dst has already
989 * been checked in ip_input_v6
990 * In the future one can envision adding RPF checks using number 3.
991 */
992 switch (ipst->ips_src_check) {
993 case 0:
994 break;
995 case 1:
996 case 2:
997 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) ||
998 IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
999 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1000 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1001 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1002 nce_refrele(nce);
1003 freemsg(mp);
1004 return;
1005 }
1006 break;
1007 }
1008
1009 /*
1010 * Check to see if we're forwarding the packet to a
1011 * different link from which it came. If so, check the
1012 * source and destination addresses since routers must not
1013 * forward any packets with link-local source or
1014 * destination addresses to other links. Otherwise (if
1015 * we're forwarding onto the same link), conditionally send
1016 * a redirect message.
1017 */
1018 if (!IS_ON_SAME_LAN(dst_ill, ill)) {
1019 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst) ||
1020 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) {
1021 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1022 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1023 freemsg(mp);
1024 nce_refrele(nce);
1025 return;
1026 }
1027 /* TBD add site-local check at site boundary? */
1028 } else if (ipst->ips_ipv6_send_redirects) {
1029 ip_send_potential_redirect_v6(mp, ip6h, ire, ira);
1030 }
1031
1032 added_tx_len = 0;
1033 if (iraflags & IRAF_SYSTEM_LABELED) {
1034 mblk_t *mp1;
1035 uint32_t old_pkt_len = ira->ira_pktlen;
1036
1037 /*
1038 * Check if it can be forwarded and add/remove
1039 * CIPSO options as needed.
1040 */
1041 if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) {
1042 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1043 ip_drop_input("tsol_ip_forward", mp, ill);
1044 freemsg(mp);
1045 nce_refrele(nce);
1046 return;
1047 }
1048 /*
1049 * Size may have changed. Remember amount added in case
1050 * ip_fragment needs to send an ICMP too big.
1051 */
1052 mp = mp1;
1053 ip6h = (ip6_t *)mp->b_rptr;
1054 ira->ira_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN;
1055 ira->ira_ip_hdr_length = IPV6_HDR_LEN;
1056 ira->ira_ttl = ip6h->ip6_hlim;
1057 if (ira->ira_pktlen > old_pkt_len)
1058 added_tx_len = ira->ira_pktlen - old_pkt_len;
1059 }
1060
1061 mtu = dst_ill->ill_mtu;
1062 if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu)
1063 mtu = iremtu;
1064 ip_forward_xmit_v6(nce, mp, ip6h, ira, mtu, added_tx_len);
1065 nce_refrele(nce);
1066 return;
1067
1068 }
1069
1070 /*
1071 * Used for sending out unicast and multicast packets that are
1072 * forwarded.
1073 */
1074 void
ip_forward_xmit_v6(nce_t * nce,mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira,uint32_t mtu,uint32_t added_tx_len)1075 ip_forward_xmit_v6(nce_t *nce, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira,
1076 uint32_t mtu, uint32_t added_tx_len)
1077 {
1078 ill_t *dst_ill = nce->nce_ill;
1079 uint32_t pkt_len;
1080 iaflags_t iraflags = ira->ira_flags;
1081 ip_stack_t *ipst = dst_ill->ill_ipst;
1082
1083 if (ip6h->ip6_hops-- <= 1) {
1084 BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
1085 ip_drop_input("ICMP6_TIME_EXCEED_TRANSIT", mp, ira->ira_ill);
1086 icmp_time_exceeded_v6(mp, ICMP6_TIME_EXCEED_TRANSIT, B_FALSE,
1087 ira);
1088 return;
1089 }
1090
1091 /* Initiate Write side IPPF processing before any fragmentation */
1092 if (IPP_ENABLED(IPP_FWD_OUT, ipst)) {
1093 /* ip_process translates an IS_UNDER_IPMP */
1094 mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill);
1095 if (mp == NULL) {
1096 /* ip_drop_packet and MIB done */
1097 ip2dbg(("ire_recv_forward_v6: pkt dropped/deferred" \
1098 " during IPPF processing\n"));
1099 return;
1100 }
1101 }
1102
1103 pkt_len = ira->ira_pktlen;
1104
1105 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams);
1106
1107 if (pkt_len > mtu) {
1108 BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails);
1109 ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill);
1110 if (iraflags & IRAF_SYSTEM_LABELED) {
1111 /*
1112 * Remove any CIPSO option added by
1113 * tsol_ip_forward, and make sure we report
1114 * a path MTU so that there
1115 * is room to add such a CIPSO option for future
1116 * packets.
1117 */
1118 mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, AF_INET6);
1119 }
1120 icmp_pkt2big_v6(mp, mtu, B_TRUE, ira);
1121 return;
1122 }
1123
1124 ASSERT(pkt_len ==
1125 ntohs(((ip6_t *)mp->b_rptr)->ip6_plen) + IPV6_HDR_LEN);
1126
1127 if (iraflags & IRAF_LOOPBACK_COPY) {
1128 /*
1129 * IXAF_NO_LOOP_ZONEID is not set hence 6th arg
1130 * is don't care
1131 */
1132 (void) ip_postfrag_loopcheck(mp, nce,
1133 (IXAF_LOOPBACK_COPY | IXAF_NO_DEV_FLOW_CTL),
1134 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1135 } else {
1136 (void) ip_xmit(mp, nce, IXAF_NO_DEV_FLOW_CTL,
1137 pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL);
1138 }
1139 }
1140
1141 /*
1142 * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE,
1143 * which is what ire_route_recursive returns when there is no matching ire.
1144 * Send ICMP unreachable unless blackhole.
1145 */
1146 void
ire_recv_noroute_v6(ire_t * ire,mblk_t * mp,void * iph_arg,ip_recv_attr_t * ira)1147 ire_recv_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1148 {
1149 ip6_t *ip6h = (ip6_t *)iph_arg;
1150 ill_t *ill = ira->ira_ill;
1151 ip_stack_t *ipst = ill->ill_ipst;
1152
1153 /* Would we have forwarded this packet if we had a route? */
1154 if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
1155 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1156 ip_drop_input("l2 multicast not forwarded", mp, ill);
1157 freemsg(mp);
1158 return;
1159 }
1160
1161 if (!(ill->ill_flags & ILLF_ROUTER)) {
1162 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1163 ip_drop_input("ipIfStatsForwProhibits", mp, ill);
1164 freemsg(mp);
1165 return;
1166 }
1167 /*
1168 * If we had a route this could have been forwarded. Count as such.
1169 *
1170 * ipIfStatsHCInForwDatagrams should only be increment if there
1171 * will be an attempt to forward the packet, which is why we
1172 * increment after the above condition has been checked.
1173 */
1174 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams);
1175
1176 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1177
1178 ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, RTA_DST,
1179 ipst);
1180
1181 if (ire->ire_flags & RTF_BLACKHOLE) {
1182 ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill);
1183 freemsg(mp);
1184 } else {
1185 ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill);
1186
1187 icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE,
1188 ira);
1189 }
1190 }
1191
1192 /*
1193 * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for
1194 * VRRP when in noaccept mode.
1195 * We silently drop packets except for Neighbor Solicitations and
1196 * Neighbor Advertisements.
1197 */
1198 void
ire_recv_noaccept_v6(ire_t * ire,mblk_t * mp,void * iph_arg,ip_recv_attr_t * ira)1199 ire_recv_noaccept_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1200 ip_recv_attr_t *ira)
1201 {
1202 ip6_t *ip6h = (ip6_t *)iph_arg;
1203 ill_t *ill = ira->ira_ill;
1204 icmp6_t *icmp6;
1205 int ip_hdr_length;
1206
1207 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
1208 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1209 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1210 freemsg(mp);
1211 return;
1212 }
1213 ip_hdr_length = ira->ira_ip_hdr_length;
1214 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
1215 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
1216 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
1217 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
1218 freemsg(mp);
1219 return;
1220 }
1221 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
1222 if (ip6h == NULL) {
1223 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1224 freemsg(mp);
1225 return;
1226 }
1227 }
1228 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
1229
1230 if (icmp6->icmp6_type != ND_NEIGHBOR_SOLICIT &&
1231 icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) {
1232 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1233 ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill);
1234 freemsg(mp);
1235 return;
1236 }
1237 ire_recv_local_v6(ire, mp, ip6h, ira);
1238 }
1239
1240 /*
1241 * ire_recvfn for IRE_MULTICAST.
1242 */
1243 void
ire_recv_multicast_v6(ire_t * ire,mblk_t * mp,void * iph_arg,ip_recv_attr_t * ira)1244 ire_recv_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg,
1245 ip_recv_attr_t *ira)
1246 {
1247 ip6_t *ip6h = (ip6_t *)iph_arg;
1248 ill_t *ill = ira->ira_ill;
1249
1250 ASSERT(ire->ire_ill == ira->ira_ill);
1251
1252 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts);
1253 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen);
1254
1255 /* Tag for higher-level protocols */
1256 ira->ira_flags |= IRAF_MULTICAST;
1257
1258 /*
1259 * So that we don't end up with dups, only one ill an IPMP group is
1260 * nominated to receive multicast traffic.
1261 * If we have no cast_ill we are liberal and accept everything.
1262 */
1263 if (IS_UNDER_IPMP(ill)) {
1264 ip_stack_t *ipst = ill->ill_ipst;
1265
1266 /* For an under ill_grp can change under lock */
1267 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1268 if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
1269 ill->ill_grp->ig_cast_ill != NULL) {
1270 rw_exit(&ipst->ips_ill_g_lock);
1271 ip_drop_input("not on cast ill", mp, ill);
1272 freemsg(mp);
1273 return;
1274 }
1275 rw_exit(&ipst->ips_ill_g_lock);
1276 /*
1277 * We switch to the upper ill so that mrouter and hasmembers
1278 * can operate on upper here and in ip_input_multicast.
1279 */
1280 ill = ipmp_ill_hold_ipmp_ill(ill);
1281 if (ill != NULL) {
1282 ASSERT(ill != ira->ira_ill);
1283 ASSERT(ire->ire_ill == ira->ira_ill);
1284 ira->ira_ill = ill;
1285 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1286 } else {
1287 ill = ira->ira_ill;
1288 }
1289 }
1290
1291 #ifdef notdef
1292 /*
1293 * Check if we are a multicast router - send ip_mforward a copy of
1294 * the packet.
1295 * Due to mroute_decap tunnels we consider forwarding packets even if
1296 * mrouted has not joined the allmulti group on this interface.
1297 */
1298 if (ipst->ips_ip_g_mrouter) {
1299 int retval;
1300
1301 /*
1302 * Clear the indication that this may have hardware
1303 * checksum as we are not using it for forwarding.
1304 */
1305 DB_CKSUMFLAGS(mp) = 0;
1306
1307 /*
1308 * ip_mforward helps us make these distinctions: If received
1309 * on tunnel and not IGMP, then drop.
1310 * If IGMP packet, then don't check membership
1311 * If received on a phyint and IGMP or PIM, then
1312 * don't check membership
1313 */
1314 retval = ip_mforward_v6(mp, ira);
1315 /* ip_mforward updates mib variables if needed */
1316
1317 switch (retval) {
1318 case 0:
1319 /*
1320 * pkt is okay and arrived on phyint.
1321 */
1322 break;
1323 case -1:
1324 /* pkt is mal-formed, toss it */
1325 freemsg(mp);
1326 goto done;
1327 case 1:
1328 /*
1329 * pkt is okay and arrived on a tunnel
1330 *
1331 * If we are running a multicast router
1332 * we need to see all mld packets, which
1333 * are marked with router alerts.
1334 */
1335 if (ira->ira_flags & IRAF_ROUTER_ALERT)
1336 goto forus;
1337 ip_drop_input("Multicast on tunnel ignored", mp, ill);
1338 freemsg(mp);
1339 goto done;
1340 }
1341 }
1342 #endif /* notdef */
1343
1344 /*
1345 * If this was a router alert we skip the group membership check.
1346 */
1347 if (ira->ira_flags & IRAF_ROUTER_ALERT)
1348 goto forus;
1349
1350 /*
1351 * Check if we have members on this ill. This is not necessary for
1352 * correctness because even if the NIC/GLD had a leaky filter, we
1353 * filter before passing to each conn_t.
1354 */
1355 if (!ill_hasmembers_v6(ill, &ip6h->ip6_dst)) {
1356 /*
1357 * Nobody interested
1358 *
1359 * This might just be caused by the fact that
1360 * multiple IP Multicast addresses map to the same
1361 * link layer multicast - no need to increment counter!
1362 */
1363 ip_drop_input("Multicast with no members", mp, ill);
1364 freemsg(mp);
1365 goto done;
1366 }
1367 forus:
1368 ip2dbg(("ire_recv_multicast_v6: multicast for us\n"));
1369
1370 /*
1371 * After reassembly and IPsec we will need to duplicate the
1372 * multicast packet for all matching zones on the ill.
1373 */
1374 ira->ira_zoneid = ALL_ZONES;
1375
1376 /* Reassemble on the ill on which the packet arrived */
1377 ip_input_local_v6(ire, mp, ip6h, ira);
1378 done:
1379 if (ill != ire->ire_ill) {
1380 ill_refrele(ill);
1381 ira->ira_ill = ire->ire_ill;
1382 ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
1383 }
1384 }
1385
1386 /*
1387 * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT.
1388 * Drop packets since we don't forward out multirt routes.
1389 */
1390 /* ARGSUSED */
1391 void
ire_recv_multirt_v6(ire_t * ire,mblk_t * mp,void * iph_arg,ip_recv_attr_t * ira)1392 ire_recv_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1393 {
1394 ill_t *ill = ira->ira_ill;
1395
1396 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes);
1397 ip_drop_input("Not forwarding out MULTIRT", mp, ill);
1398 freemsg(mp);
1399 }
1400
1401 /*
1402 * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK
1403 * has rewritten the packet to have a loopback destination address (We
1404 * filter out packet with a loopback destination from arriving over the wire).
1405 * We don't know what zone to use, thus we always use the GLOBAL_ZONEID.
1406 */
1407 void
ire_recv_loopback_v6(ire_t * ire,mblk_t * mp,void * iph_arg,ip_recv_attr_t * ira)1408 ire_recv_loopback_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1409 {
1410 ip6_t *ip6h = (ip6_t *)iph_arg;
1411 ill_t *ill = ira->ira_ill;
1412 ill_t *ire_ill = ire->ire_ill;
1413
1414 ira->ira_zoneid = GLOBAL_ZONEID;
1415
1416 /* Switch to the lo0 ill for further processing */
1417 if (ire_ill != ill) {
1418 /*
1419 * Update ira_ill to be the ILL on which the IP address
1420 * is hosted.
1421 * No need to hold the ill since we have a hold on the ire
1422 */
1423 ASSERT(ira->ira_ill == ira->ira_rill);
1424 ira->ira_ill = ire_ill;
1425
1426 ip_input_local_v6(ire, mp, ip6h, ira);
1427
1428 /* Restore */
1429 ASSERT(ira->ira_ill == ire_ill);
1430 ira->ira_ill = ill;
1431 return;
1432
1433 }
1434 ip_input_local_v6(ire, mp, ip6h, ira);
1435 }
1436
1437 /*
1438 * ire_recvfn for IRE_LOCAL.
1439 */
1440 void
ire_recv_local_v6(ire_t * ire,mblk_t * mp,void * iph_arg,ip_recv_attr_t * ira)1441 ire_recv_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira)
1442 {
1443 ip6_t *ip6h = (ip6_t *)iph_arg;
1444 ill_t *ill = ira->ira_ill;
1445 ill_t *ire_ill = ire->ire_ill;
1446
1447 /* Make a note for DAD that this address is in use */
1448 ire->ire_last_used_time = LBOLT_FASTPATH;
1449
1450 /* Only target the IRE_LOCAL with the right zoneid. */
1451 ira->ira_zoneid = ire->ire_zoneid;
1452
1453 /*
1454 * If the packet arrived on the wrong ill, we check that
1455 * this is ok.
1456 * If it is, then we ensure that we do the reassembly on
1457 * the ill on which the address is hosted. We keep ira_rill as
1458 * the one on which the packet arrived, so that IP_PKTINFO and
1459 * friends can report this.
1460 */
1461 if (ire_ill != ill) {
1462 ire_t *new_ire;
1463
1464 new_ire = ip_check_multihome(&ip6h->ip6_dst, ire, ill);
1465 if (new_ire == NULL) {
1466 /* Drop packet */
1467 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
1468 ip_drop_input("ipIfStatsInForwProhibits", mp, ill);
1469 freemsg(mp);
1470 return;
1471 }
1472 /*
1473 * Update ira_ill to be the ILL on which the IP address
1474 * is hosted. No need to hold the ill since we have a
1475 * hold on the ire. Note that we do the switch even if
1476 * new_ire == ire (for IPMP, ire would be the one corresponding
1477 * to the IPMP ill).
1478 */
1479 ASSERT(ira->ira_ill == ira->ira_rill);
1480 ira->ira_ill = new_ire->ire_ill;
1481
1482 /* ira_ruifindex tracks the upper for ira_rill */
1483 if (IS_UNDER_IPMP(ill))
1484 ira->ira_ruifindex = ill_get_upper_ifindex(ill);
1485
1486 ip_input_local_v6(new_ire, mp, ip6h, ira);
1487
1488 /* Restore */
1489 ASSERT(ira->ira_ill == new_ire->ire_ill);
1490 ira->ira_ill = ill;
1491 ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex;
1492
1493 if (new_ire != ire)
1494 ire_refrele(new_ire);
1495 return;
1496 }
1497
1498 ip_input_local_v6(ire, mp, ip6h, ira);
1499 }
1500
1501 /*
1502 * Common function for packets arriving for the host. Handles
1503 * checksum verification, reassembly checks, etc.
1504 */
1505 static void
ip_input_local_v6(ire_t * ire,mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)1506 ip_input_local_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1507 {
1508 iaflags_t iraflags = ira->ira_flags;
1509
1510 /*
1511 * For multicast we need some extra work before
1512 * we call ip_fanout_v6(), since in the case of shared-IP zones
1513 * we need to pretend that a packet arrived for each zoneid.
1514 */
1515 if (iraflags & IRAF_MULTICAST) {
1516 ip_input_multicast_v6(ire, mp, ip6h, ira);
1517 return;
1518 }
1519 ip_fanout_v6(mp, ip6h, ira);
1520 }
1521
1522 /*
1523 * Handle multiple zones which want to receive the same multicast packets
1524 * on this ill by delivering a packet to each of them.
1525 *
1526 * Note that for packets delivered to transports we could instead do this
1527 * as part of the fanout code, but since we need to handle icmp_inbound
1528 * it is simpler to have multicast work the same as IPv4 broadcast.
1529 *
1530 * The ip_fanout matching for multicast matches based on ilm independent of
1531 * zoneid since the zoneid restriction is applied when joining a multicast
1532 * group.
1533 */
1534 /* ARGSUSED */
1535 static void
ip_input_multicast_v6(ire_t * ire,mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)1536 ip_input_multicast_v6(ire_t *ire, mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1537 {
1538 ill_t *ill = ira->ira_ill;
1539 iaflags_t iraflags = ira->ira_flags;
1540 ip_stack_t *ipst = ill->ill_ipst;
1541 netstack_t *ns = ipst->ips_netstack;
1542 zoneid_t zoneid;
1543 mblk_t *mp1;
1544 ip6_t *ip6h1;
1545 uint_t ira_pktlen = ira->ira_pktlen;
1546 uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length;
1547
1548 /* ire_recv_multicast has switched to the upper ill for IPMP */
1549 ASSERT(!IS_UNDER_IPMP(ill));
1550
1551 /*
1552 * If we don't have more than one shared-IP zone, or if
1553 * there are no members in anything but the global zone,
1554 * then just set the zoneid and proceed.
1555 */
1556 if (ns->netstack_numzones == 1 ||
1557 !ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst,
1558 GLOBAL_ZONEID)) {
1559 ira->ira_zoneid = GLOBAL_ZONEID;
1560
1561 /* If sender didn't want this zone to receive it, drop */
1562 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1563 ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1564 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1565 freemsg(mp);
1566 return;
1567 }
1568 ip_fanout_v6(mp, ip6h, ira);
1569 return;
1570 }
1571
1572 /*
1573 * Here we loop over all zoneids that have members in the group
1574 * and deliver a packet to ip_fanout for each zoneid.
1575 *
1576 * First find any members in the lowest numeric zoneid by looking for
1577 * first zoneid larger than -1 (ALL_ZONES).
1578 * We terminate the loop when we receive -1 (ALL_ZONES).
1579 */
1580 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, ALL_ZONES);
1581 for (; zoneid != ALL_ZONES;
1582 zoneid = ill_hasmembers_nextzone_v6(ill, &ip6h->ip6_dst, zoneid)) {
1583 /*
1584 * Avoid an extra copymsg/freemsg by skipping global zone here
1585 * and doing that at the end.
1586 */
1587 if (zoneid == GLOBAL_ZONEID)
1588 continue;
1589
1590 ira->ira_zoneid = zoneid;
1591
1592 /* If sender didn't want this zone to receive it, skip */
1593 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1594 ira->ira_no_loop_zoneid == ira->ira_zoneid)
1595 continue;
1596
1597 mp1 = copymsg(mp);
1598 if (mp1 == NULL) {
1599 /* Failed to deliver to one zone */
1600 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1601 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1602 continue;
1603 }
1604 ip6h1 = (ip6_t *)mp1->b_rptr;
1605 ip_fanout_v6(mp1, ip6h1, ira);
1606 /*
1607 * IPsec might have modified ira_pktlen and ira_ip_hdr_length
1608 * so we restore them for a potential next iteration
1609 */
1610 ira->ira_pktlen = ira_pktlen;
1611 ira->ira_ip_hdr_length = ira_ip_hdr_length;
1612 }
1613
1614 /* Do the main ire */
1615 ira->ira_zoneid = GLOBAL_ZONEID;
1616 /* If sender didn't want this zone to receive it, drop */
1617 if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) &&
1618 ira->ira_no_loop_zoneid == ira->ira_zoneid) {
1619 ip_drop_input("Multicast but wrong zoneid", mp, ill);
1620 freemsg(mp);
1621 } else {
1622 ip_fanout_v6(mp, ip6h, ira);
1623 }
1624 }
1625
1626
1627 /*
1628 * Determine the zoneid and IRAF_TX_MAC_EXEMPTABLE if trusted extensions
1629 * is in use. Updates ira_zoneid and ira_flags as a result.
1630 */
1631 static void
ip_fanout_tx_v6(mblk_t * mp,ip6_t * ip6h,uint8_t protocol,uint_t ip_hdr_length,ip_recv_attr_t * ira)1632 ip_fanout_tx_v6(mblk_t *mp, ip6_t *ip6h, uint8_t protocol, uint_t ip_hdr_length,
1633 ip_recv_attr_t *ira)
1634 {
1635 uint16_t *up;
1636 uint16_t lport;
1637 zoneid_t zoneid;
1638
1639 ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED);
1640
1641 /*
1642 * If the packet is unlabeled we might allow read-down
1643 * for MAC_EXEMPT. Below we clear this if it is a multi-level
1644 * port (MLP).
1645 * Note that ira_tsl can be NULL here.
1646 */
1647 if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED)
1648 ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE;
1649
1650 if (ira->ira_zoneid != ALL_ZONES)
1651 return;
1652
1653 ira->ira_flags |= IRAF_TX_SHARED_ADDR;
1654
1655 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
1656 switch (protocol) {
1657 case IPPROTO_TCP:
1658 case IPPROTO_SCTP:
1659 case IPPROTO_UDP:
1660 /* Caller ensures this */
1661 ASSERT(((uchar_t *)ip6h) + ip_hdr_length +4 <= mp->b_wptr);
1662
1663 /*
1664 * Only these transports support MLP.
1665 * We know their destination port numbers is in
1666 * the same place in the header.
1667 */
1668 lport = up[1];
1669
1670 /*
1671 * No need to handle exclusive-stack zones
1672 * since ALL_ZONES only applies to the shared IP instance.
1673 */
1674 zoneid = tsol_mlp_findzone(protocol, lport);
1675 /*
1676 * If no shared MLP is found, tsol_mlp_findzone returns
1677 * ALL_ZONES. In that case, we assume it's SLP, and
1678 * search for the zone based on the packet label.
1679 *
1680 * If there is such a zone, we prefer to find a
1681 * connection in it. Otherwise, we look for a
1682 * MAC-exempt connection in any zone whose label
1683 * dominates the default label on the packet.
1684 */
1685 if (zoneid == ALL_ZONES)
1686 zoneid = tsol_attr_to_zoneid(ira);
1687 else
1688 ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE;
1689 break;
1690 default:
1691 /* Handle shared address for other protocols */
1692 zoneid = tsol_attr_to_zoneid(ira);
1693 break;
1694 }
1695 ira->ira_zoneid = zoneid;
1696 }
1697
1698 /*
1699 * Increment checksum failure statistics
1700 */
1701 static void
ip_input_cksum_err_v6(uint8_t protocol,uint16_t hck_flags,ill_t * ill)1702 ip_input_cksum_err_v6(uint8_t protocol, uint16_t hck_flags, ill_t *ill)
1703 {
1704 ip_stack_t *ipst = ill->ill_ipst;
1705
1706 switch (protocol) {
1707 case IPPROTO_TCP:
1708 BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs);
1709
1710 if (hck_flags & HCK_FULLCKSUM)
1711 IP6_STAT(ipst, ip6_tcp_in_full_hw_cksum_err);
1712 else if (hck_flags & HCK_PARTIALCKSUM)
1713 IP6_STAT(ipst, ip6_tcp_in_part_hw_cksum_err);
1714 else
1715 IP6_STAT(ipst, ip6_tcp_in_sw_cksum_err);
1716 break;
1717 case IPPROTO_UDP:
1718 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1719 if (hck_flags & HCK_FULLCKSUM)
1720 IP6_STAT(ipst, ip6_udp_in_full_hw_cksum_err);
1721 else if (hck_flags & HCK_PARTIALCKSUM)
1722 IP6_STAT(ipst, ip6_udp_in_part_hw_cksum_err);
1723 else
1724 IP6_STAT(ipst, ip6_udp_in_sw_cksum_err);
1725 break;
1726 case IPPROTO_ICMPV6:
1727 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
1728 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1729 break;
1730 default:
1731 ASSERT(0);
1732 break;
1733 }
1734 }
1735
1736 /* Calculate the IPv6 pseudo-header checksum for TCP, UDP, and ICMPV6 */
1737 uint32_t
ip_input_cksum_pseudo_v6(ip6_t * ip6h,ip_recv_attr_t * ira)1738 ip_input_cksum_pseudo_v6(ip6_t *ip6h, ip_recv_attr_t *ira)
1739 {
1740 uint_t ulp_len;
1741 uint32_t cksum;
1742 uint8_t protocol = ira->ira_protocol;
1743 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
1744
1745 #define iphs ((uint16_t *)ip6h)
1746
1747 switch (protocol) {
1748 case IPPROTO_TCP:
1749 ulp_len = ira->ira_pktlen - ip_hdr_length;
1750
1751 /* Protocol and length */
1752 cksum = htons(ulp_len) + IP_TCP_CSUM_COMP;
1753 /* IP addresses */
1754 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1755 iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1756 iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1757 iphs[16] + iphs[17] + iphs[18] + iphs[19];
1758 break;
1759
1760 case IPPROTO_UDP: {
1761 udpha_t *udpha;
1762
1763 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length);
1764
1765 /* Protocol and length */
1766 cksum = udpha->uha_length + IP_UDP_CSUM_COMP;
1767 /* IP addresses */
1768 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1769 iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1770 iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1771 iphs[16] + iphs[17] + iphs[18] + iphs[19];
1772 break;
1773 }
1774 case IPPROTO_ICMPV6:
1775 ulp_len = ira->ira_pktlen - ip_hdr_length;
1776
1777 /* Protocol and length */
1778 cksum = htons(ulp_len) + IP_ICMPV6_CSUM_COMP;
1779 /* IP addresses */
1780 cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] +
1781 iphs[8] + iphs[9] + iphs[10] + iphs[11] +
1782 iphs[12] + iphs[13] + iphs[14] + iphs[15] +
1783 iphs[16] + iphs[17] + iphs[18] + iphs[19];
1784 break;
1785 default:
1786 cksum = 0;
1787 break;
1788 }
1789 #undef iphs
1790 return (cksum);
1791 }
1792
1793
1794 /*
1795 * Software verification of the ULP checksums.
1796 * Returns B_TRUE if ok.
1797 * Increments statistics of failed.
1798 */
1799 static boolean_t
ip_input_sw_cksum_v6(mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)1800 ip_input_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1801 {
1802 ip_stack_t *ipst = ira->ira_ill->ill_ipst;
1803 uint32_t cksum;
1804 uint8_t protocol = ira->ira_protocol;
1805 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
1806
1807 IP6_STAT(ipst, ip6_in_sw_cksum);
1808
1809 ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP ||
1810 protocol == IPPROTO_ICMPV6);
1811
1812 cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1813 cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1814 if (cksum == 0)
1815 return (B_TRUE);
1816
1817 ip_input_cksum_err_v6(protocol, 0, ira->ira_ill);
1818 return (B_FALSE);
1819 }
1820
1821 /*
1822 * Verify the ULP checksums.
1823 * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum
1824 * algorithm.
1825 * Increments statistics if failed.
1826 */
1827 static boolean_t
ip_input_cksum_v6(iaflags_t iraflags,mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)1828 ip_input_cksum_v6(iaflags_t iraflags, mblk_t *mp, ip6_t *ip6h,
1829 ip_recv_attr_t *ira)
1830 {
1831 ill_t *ill = ira->ira_rill;
1832 uint16_t hck_flags;
1833 uint32_t cksum;
1834 mblk_t *mp1;
1835 uint_t len;
1836 uint8_t protocol = ira->ira_protocol;
1837 uint16_t ip_hdr_length = ira->ira_ip_hdr_length;
1838
1839
1840 switch (protocol) {
1841 case IPPROTO_TCP:
1842 case IPPROTO_ICMPV6:
1843 break;
1844
1845 case IPPROTO_UDP: {
1846 udpha_t *udpha;
1847
1848 udpha = (udpha_t *)((uchar_t *)ip6h + ip_hdr_length);
1849 /*
1850 * Before going through the regular checksum
1851 * calculation, make sure the received checksum
1852 * is non-zero. RFC 2460 says, a 0x0000 checksum
1853 * in a UDP packet (within IPv6 packet) is invalid
1854 * and should be replaced by 0xffff. This makes
1855 * sense as regular checksum calculation will
1856 * pass for both the cases i.e. 0x0000 and 0xffff.
1857 * Removing one of the case makes error detection
1858 * stronger.
1859 */
1860 if (udpha->uha_checksum == 0) {
1861 /* 0x0000 checksum is invalid */
1862 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs);
1863 return (B_FALSE);
1864 }
1865 break;
1866 }
1867 case IPPROTO_SCTP: {
1868 sctp_hdr_t *sctph;
1869 uint32_t pktsum;
1870
1871 sctph = (sctp_hdr_t *)((uchar_t *)ip6h + ip_hdr_length);
1872 #ifdef DEBUG
1873 if (skip_sctp_cksum)
1874 return (B_TRUE);
1875 #endif
1876 pktsum = sctph->sh_chksum;
1877 sctph->sh_chksum = 0;
1878 cksum = sctp_cksum(mp, ip_hdr_length);
1879 sctph->sh_chksum = pktsum;
1880 if (cksum == pktsum)
1881 return (B_TRUE);
1882
1883 /*
1884 * Defer until later whether a bad checksum is ok
1885 * in order to allow RAW sockets to use Adler checksum
1886 * with SCTP.
1887 */
1888 ira->ira_flags |= IRAF_SCTP_CSUM_ERR;
1889 return (B_TRUE);
1890 }
1891
1892 default:
1893 /* No ULP checksum to verify. */
1894 return (B_TRUE);
1895 }
1896
1897 /*
1898 * Revert to software checksum calculation if the interface
1899 * isn't capable of checksum offload.
1900 * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout.
1901 * Note: IRAF_NO_HW_CKSUM is not currently used.
1902 */
1903 ASSERT(!IS_IPMP(ill));
1904 if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1905 !dohwcksum) {
1906 return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1907 }
1908
1909 hck_flags = DB_CKSUMFLAGS(mp);
1910
1911 /*
1912 * We apply this for all ULP protocols. Does the HW know to
1913 * not set the flags for SCTP and other protocols.
1914 */
1915 if (hck_flags & HCK_FULLCKSUM_OK) {
1916 /*
1917 * Hardware has already verified the checksum.
1918 */
1919 return (B_TRUE);
1920 }
1921
1922 if (hck_flags & HCK_FULLCKSUM) {
1923 /*
1924 * Full checksum has been computed by the hardware
1925 * and has been attached. If the driver wants us to
1926 * verify the correctness of the attached value, in
1927 * order to protect against faulty hardware, compare
1928 * it against -0 (0xFFFF) to see if it's valid.
1929 */
1930 cksum = DB_CKSUM16(mp);
1931 if (cksum == 0xFFFF)
1932 return (B_TRUE);
1933 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1934 return (B_FALSE);
1935 }
1936
1937 mp1 = mp->b_cont;
1938 if ((hck_flags & HCK_PARTIALCKSUM) &&
1939 (mp1 == NULL || mp1->b_cont == NULL) &&
1940 ip_hdr_length >= DB_CKSUMSTART(mp) &&
1941 ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) {
1942 uint32_t adj;
1943 uchar_t *cksum_start;
1944
1945 cksum = ip_input_cksum_pseudo_v6(ip6h, ira);
1946
1947 cksum_start = ((uchar_t *)ip6h + DB_CKSUMSTART(mp));
1948
1949 /*
1950 * Partial checksum has been calculated by hardware
1951 * and attached to the packet; in addition, any
1952 * prepended extraneous data is even byte aligned,
1953 * and there are at most two mblks associated with
1954 * the packet. If any such data exists, we adjust
1955 * the checksum; also take care any postpended data.
1956 */
1957 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj);
1958 /*
1959 * One's complement subtract extraneous checksum
1960 */
1961 cksum += DB_CKSUM16(mp);
1962 if (adj >= cksum)
1963 cksum = ~(adj - cksum) & 0xFFFF;
1964 else
1965 cksum -= adj;
1966 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1967 cksum = (cksum & 0xFFFF) + ((int)cksum >> 16);
1968 if (!(~cksum & 0xFFFF))
1969 return (B_TRUE);
1970
1971 ip_input_cksum_err_v6(protocol, hck_flags, ira->ira_ill);
1972 return (B_FALSE);
1973 }
1974 return (ip_input_sw_cksum_v6(mp, ip6h, ira));
1975 }
1976
1977
1978 /*
1979 * Handle fanout of received packets.
1980 * Unicast packets that are looped back (from ire_send_local_v6) and packets
1981 * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM.
1982 *
1983 * IPQoS Notes
1984 * Before sending it to the client, invoke IPPF processing. Policy processing
1985 * takes place only if the callout_position, IPP_LOCAL_IN, is enabled.
1986 */
1987 void
ip_fanout_v6(mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)1988 ip_fanout_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
1989 {
1990 ill_t *ill = ira->ira_ill;
1991 iaflags_t iraflags = ira->ira_flags;
1992 ip_stack_t *ipst = ill->ill_ipst;
1993 uint8_t protocol;
1994 conn_t *connp;
1995 #define rptr ((uchar_t *)ip6h)
1996 uint_t ip_hdr_length;
1997 uint_t min_ulp_header_length;
1998 int offset;
1999 ssize_t len;
2000 netstack_t *ns = ipst->ips_netstack;
2001 ipsec_stack_t *ipss = ns->netstack_ipsec;
2002 ill_t *rill = ira->ira_rill;
2003
2004 ASSERT(ira->ira_pktlen == ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN);
2005
2006 /*
2007 * We repeat this as we parse over destination options header and
2008 * fragment headers (earlier we've handled any hop-by-hop options
2009 * header.)
2010 * We update ira_protocol and ira_ip_hdr_length as we skip past
2011 * the intermediate headers; they already point past any
2012 * hop-by-hop header.
2013 */
2014 repeat:
2015 protocol = ira->ira_protocol;
2016 ip_hdr_length = ira->ira_ip_hdr_length;
2017
2018 /*
2019 * Time for IPP once we've done reassembly and IPsec.
2020 * We skip this for loopback packets since we don't do IPQoS
2021 * on loopback.
2022 */
2023 if (IPP_ENABLED(IPP_LOCAL_IN, ipst) &&
2024 !(iraflags & IRAF_LOOPBACK) &&
2025 (protocol != IPPROTO_ESP && protocol != IPPROTO_AH &&
2026 protocol != IPPROTO_DSTOPTS && protocol != IPPROTO_ROUTING &&
2027 protocol != IPPROTO_FRAGMENT)) {
2028 /*
2029 * Use the interface on which the packet arrived - not where
2030 * the IP address is hosted.
2031 */
2032 /* ip_process translates an IS_UNDER_IPMP */
2033 mp = ip_process(IPP_LOCAL_IN, mp, rill, ill);
2034 if (mp == NULL) {
2035 /* ip_drop_packet and MIB done */
2036 return;
2037 }
2038 }
2039
2040 /* Determine the minimum required size of the upper-layer header */
2041 /* Need to do this for at least the set of ULPs that TX handles. */
2042 switch (protocol) {
2043 case IPPROTO_TCP:
2044 min_ulp_header_length = TCP_MIN_HEADER_LENGTH;
2045 break;
2046 case IPPROTO_SCTP:
2047 min_ulp_header_length = SCTP_COMMON_HDR_LENGTH;
2048 break;
2049 case IPPROTO_UDP:
2050 min_ulp_header_length = UDPH_SIZE;
2051 break;
2052 case IPPROTO_ICMP:
2053 case IPPROTO_ICMPV6:
2054 min_ulp_header_length = ICMPH_SIZE;
2055 break;
2056 case IPPROTO_FRAGMENT:
2057 case IPPROTO_DSTOPTS:
2058 case IPPROTO_ROUTING:
2059 min_ulp_header_length = MIN_EHDR_LEN;
2060 break;
2061 default:
2062 min_ulp_header_length = 0;
2063 break;
2064 }
2065 /* Make sure we have the min ULP header length */
2066 len = mp->b_wptr - rptr;
2067 if (len < ip_hdr_length + min_ulp_header_length) {
2068 if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length)
2069 goto pkt_too_short;
2070
2071 IP6_STAT(ipst, ip6_recv_pullup);
2072 ip6h = ip_pullup(mp, ip_hdr_length + min_ulp_header_length,
2073 ira);
2074 if (ip6h == NULL)
2075 goto discard;
2076 len = mp->b_wptr - rptr;
2077 }
2078
2079 /*
2080 * If trusted extensions then determine the zoneid and TX specific
2081 * ira_flags.
2082 */
2083 if (iraflags & IRAF_SYSTEM_LABELED) {
2084 /* This can update ira->ira_flags and ira->ira_zoneid */
2085 ip_fanout_tx_v6(mp, ip6h, protocol, ip_hdr_length, ira);
2086 iraflags = ira->ira_flags;
2087 }
2088
2089
2090 /* Verify ULP checksum. Handles TCP, UDP, and SCTP */
2091 if (iraflags & IRAF_VERIFY_ULP_CKSUM) {
2092 if (!ip_input_cksum_v6(iraflags, mp, ip6h, ira)) {
2093 /* Bad checksum. Stats are already incremented */
2094 ip_drop_input("Bad ULP checksum", mp, ill);
2095 freemsg(mp);
2096 return;
2097 }
2098 /* IRAF_SCTP_CSUM_ERR could have been set */
2099 iraflags = ira->ira_flags;
2100 }
2101 switch (protocol) {
2102 case IPPROTO_TCP:
2103 /* For TCP, discard multicast packets. */
2104 if (iraflags & IRAF_MULTIBROADCAST)
2105 goto discard;
2106
2107 /* First mblk contains IP+TCP headers per above check */
2108 ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH);
2109
2110 /* TCP options present? */
2111 offset = ((uchar_t *)ip6h)[ip_hdr_length + 12] >> 4;
2112 if (offset != 5) {
2113 if (offset < 5)
2114 goto discard;
2115
2116 /*
2117 * There must be TCP options.
2118 * Make sure we can grab them.
2119 */
2120 offset <<= 2;
2121 offset += ip_hdr_length;
2122 if (len < offset) {
2123 if (ira->ira_pktlen < offset)
2124 goto pkt_too_short;
2125
2126 IP6_STAT(ipst, ip6_recv_pullup);
2127 ip6h = ip_pullup(mp, offset, ira);
2128 if (ip6h == NULL)
2129 goto discard;
2130 len = mp->b_wptr - rptr;
2131 }
2132 }
2133
2134 /*
2135 * Pass up a squeue hint to tcp.
2136 * If ira_sqp is already set (this is loopback) we leave it
2137 * alone.
2138 */
2139 if (ira->ira_sqp == NULL) {
2140 ira->ira_sqp = ip_squeue_get(ira->ira_ring);
2141 }
2142
2143 /* Look for AF_INET or AF_INET6 that matches */
2144 connp = ipcl_classify_v6(mp, IPPROTO_TCP, ip_hdr_length,
2145 ira, ipst);
2146 if (connp == NULL) {
2147 /* Send the TH_RST */
2148 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2149 tcp_xmit_listeners_reset(mp, ira, ipst, NULL);
2150 return;
2151 }
2152 if (connp->conn_min_ttl != 0 &&
2153 connp->conn_min_ttl > ira->ira_ttl) {
2154 CONN_DEC_REF(connp);
2155 goto discard;
2156 }
2157 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2158 (iraflags & IRAF_IPSEC_SECURE)) {
2159 mp = ipsec_check_inbound_policy(mp, connp,
2160 NULL, ip6h, ira);
2161 if (mp == NULL) {
2162 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2163 /* Note that mp is NULL */
2164 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2165 CONN_DEC_REF(connp);
2166 return;
2167 }
2168 }
2169 /* Found a client; up it goes */
2170 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2171 ira->ira_ill = ira->ira_rill = NULL;
2172 if (!IPCL_IS_TCP(connp)) {
2173 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
2174 (connp->conn_recv)(connp, mp, NULL, ira);
2175 CONN_DEC_REF(connp);
2176 ira->ira_ill = ill;
2177 ira->ira_rill = rill;
2178 return;
2179 }
2180
2181 /*
2182 * We do different processing whether called from
2183 * ip_accept_tcp and we match the target, don't match
2184 * the target, and when we are called by ip_input.
2185 */
2186 if (iraflags & IRAF_TARGET_SQP) {
2187 if (ira->ira_target_sqp == connp->conn_sqp) {
2188 mblk_t *attrmp;
2189
2190 attrmp = ip_recv_attr_to_mblk(ira);
2191 if (attrmp == NULL) {
2192 BUMP_MIB(ill->ill_ip_mib,
2193 ipIfStatsInDiscards);
2194 ip_drop_input("ipIfStatsInDiscards",
2195 mp, ill);
2196 freemsg(mp);
2197 CONN_DEC_REF(connp);
2198 } else {
2199 SET_SQUEUE(attrmp, connp->conn_recv,
2200 connp);
2201 attrmp->b_cont = mp;
2202 ASSERT(ira->ira_target_sqp_mp == NULL);
2203 ira->ira_target_sqp_mp = attrmp;
2204 /*
2205 * Conn ref release when drained from
2206 * the squeue.
2207 */
2208 }
2209 } else {
2210 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
2211 connp->conn_recv, connp, ira, SQ_FILL,
2212 SQTAG_IP6_TCP_INPUT);
2213 }
2214 } else {
2215 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv,
2216 connp, ira, ip_squeue_flag, SQTAG_IP6_TCP_INPUT);
2217 }
2218 ira->ira_ill = ill;
2219 ira->ira_rill = rill;
2220 return;
2221
2222 case IPPROTO_SCTP: {
2223 sctp_hdr_t *sctph;
2224 uint32_t ports; /* Source and destination ports */
2225 sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp;
2226
2227 /* For SCTP, discard multicast packets. */
2228 if (iraflags & IRAF_MULTIBROADCAST)
2229 goto discard;
2230
2231 /*
2232 * Since there is no SCTP h/w cksum support yet, just
2233 * clear the flag.
2234 */
2235 DB_CKSUMFLAGS(mp) = 0;
2236
2237 /* Length ensured above */
2238 ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH);
2239 sctph = (sctp_hdr_t *)(rptr + ip_hdr_length);
2240
2241 /* get the ports */
2242 ports = *(uint32_t *)&sctph->sh_sport;
2243
2244 if (iraflags & IRAF_SCTP_CSUM_ERR) {
2245 /*
2246 * No potential sctp checksum errors go to the Sun
2247 * sctp stack however they might be Adler-32 summed
2248 * packets a userland stack bound to a raw IP socket
2249 * could reasonably use. Note though that Adler-32 is
2250 * a long deprecated algorithm and customer sctp
2251 * networks should eventually migrate to CRC-32 at
2252 * which time this facility should be removed.
2253 */
2254 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2255 return;
2256 }
2257 connp = sctp_fanout(&ip6h->ip6_src, &ip6h->ip6_dst, ports,
2258 ira, mp, sctps, sctph);
2259 if (connp == NULL) {
2260 /* Check for raw socket or OOTB handling */
2261 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2262 return;
2263 }
2264 if (connp->conn_incoming_ifindex != 0 &&
2265 connp->conn_incoming_ifindex != ira->ira_ruifindex) {
2266 CONN_DEC_REF(connp);
2267
2268 /* Check for raw socket or OOTB handling */
2269 ip_fanout_sctp_raw(mp, NULL, ip6h, ports, ira);
2270 return;
2271 }
2272
2273 /* Found a client; up it goes */
2274 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2275 sctp_input(connp, NULL, ip6h, mp, ira);
2276 /* sctp_input does a rele of the sctp_t */
2277 return;
2278 }
2279
2280 case IPPROTO_UDP:
2281 /* First mblk contains IP+UDP headers as checked above */
2282 ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE);
2283
2284 if (iraflags & IRAF_MULTIBROADCAST) {
2285 uint16_t *up; /* Pointer to ports in ULP header */
2286
2287 up = (uint16_t *)((uchar_t *)ip6h + ip_hdr_length);
2288
2289 ip_fanout_udp_multi_v6(mp, ip6h, up[1], up[0], ira);
2290 return;
2291 }
2292
2293 /* Look for AF_INET or AF_INET6 that matches */
2294 connp = ipcl_classify_v6(mp, IPPROTO_UDP, ip_hdr_length,
2295 ira, ipst);
2296 if (connp == NULL) {
2297 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].
2298 connf_head != NULL) {
2299 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2300 ip_fanout_proto_v6(mp, ip6h, ira);
2301 } else {
2302 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2303 ICMP6_DST_UNREACH_NOPORT, ira);
2304 }
2305 return;
2306
2307 }
2308 if (connp->conn_min_ttl != 0 &&
2309 connp->conn_min_ttl > ira->ira_ttl) {
2310 CONN_DEC_REF(connp);
2311 goto discard;
2312 }
2313 if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld :
2314 !canputnext(connp->conn_rq)) {
2315 CONN_DEC_REF(connp);
2316 BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows);
2317 ip_drop_input("udpIfStatsInOverflows", mp, ill);
2318 freemsg(mp);
2319 return;
2320 }
2321 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
2322 (iraflags & IRAF_IPSEC_SECURE)) {
2323 mp = ipsec_check_inbound_policy(mp, connp,
2324 NULL, ip6h, ira);
2325 if (mp == NULL) {
2326 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2327 /* Note that mp is NULL */
2328 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2329 CONN_DEC_REF(connp);
2330 return;
2331 }
2332 }
2333
2334 /* Found a client; up it goes */
2335 IP6_STAT(ipst, ip6_udp_fannorm);
2336 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2337 ira->ira_ill = ira->ira_rill = NULL;
2338 (connp->conn_recv)(connp, mp, NULL, ira);
2339 CONN_DEC_REF(connp);
2340 ira->ira_ill = ill;
2341 ira->ira_rill = rill;
2342 return;
2343 default:
2344 break;
2345 }
2346
2347 /*
2348 * Clear hardware checksumming flag as it is currently only
2349 * used by TCP and UDP.
2350 */
2351 DB_CKSUMFLAGS(mp) = 0;
2352
2353 switch (protocol) {
2354 case IPPROTO_ICMPV6:
2355 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
2356
2357 /* Check variable for testing applications */
2358 if (ipst->ips_ipv6_drop_inbound_icmpv6) {
2359 ip_drop_input("ipv6_drop_inbound_icmpv6", mp, ill);
2360 freemsg(mp);
2361 return;
2362 }
2363 /*
2364 * We need to accomodate icmp messages coming in clear
2365 * until we get everything secure from the wire. If
2366 * icmp_accept_clear_messages is zero we check with
2367 * the global policy and act accordingly. If it is
2368 * non-zero, we accept the message without any checks.
2369 * But *this does not mean* that this will be delivered
2370 * to RAW socket clients. By accepting we might send
2371 * replies back, change our MTU value etc.,
2372 * but delivery to the ULP/clients depends on their
2373 * policy dispositions.
2374 */
2375 if (ipst->ips_icmp_accept_clear_messages == 0) {
2376 mp = ipsec_check_global_policy(mp, NULL,
2377 NULL, ip6h, ira, ns);
2378 if (mp == NULL)
2379 return;
2380 }
2381
2382 /*
2383 * On a labeled system, we have to check whether the zone
2384 * itself is permitted to receive raw traffic.
2385 */
2386 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2387 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2388 BUMP_MIB(ill->ill_icmp6_mib,
2389 ipv6IfIcmpInErrors);
2390 ip_drop_input("tsol_can_accept_raw", mp, ill);
2391 freemsg(mp);
2392 return;
2393 }
2394 }
2395
2396 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2397 mp = icmp_inbound_v6(mp, ira);
2398 if (mp == NULL) {
2399 /* No need to pass to RAW sockets */
2400 return;
2401 }
2402 break;
2403
2404 case IPPROTO_DSTOPTS: {
2405 ip6_dest_t *desthdr;
2406 uint_t ehdrlen;
2407 uint8_t *optptr;
2408
2409 /* We already check for MIN_EHDR_LEN above */
2410
2411 /* Check if AH is present and needs to be processed. */
2412 mp = ipsec_early_ah_v6(mp, ira);
2413 if (mp == NULL)
2414 return;
2415
2416 /*
2417 * Reinitialize pointers, as ipsec_early_ah_v6() does
2418 * complete pullups. We don't have to do more pullups
2419 * as a result.
2420 */
2421 ip6h = (ip6_t *)mp->b_rptr;
2422
2423 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2424 goto pkt_too_short;
2425
2426 if (mp->b_cont != NULL &&
2427 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2428 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2429 if (ip6h == NULL)
2430 goto discard;
2431 }
2432 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2433 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2434 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2435 goto pkt_too_short;
2436 if (mp->b_cont != NULL &&
2437 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2438 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2439 if (ip6h == NULL)
2440 goto discard;
2441
2442 desthdr = (ip6_dest_t *)(rptr + ip_hdr_length);
2443 }
2444 optptr = (uint8_t *)&desthdr[1];
2445
2446 /*
2447 * Update ira_ip_hdr_length to skip the destination header
2448 * when we repeat.
2449 */
2450 ira->ira_ip_hdr_length += ehdrlen;
2451
2452 ira->ira_protocol = desthdr->ip6d_nxt;
2453
2454 /*
2455 * Note: XXX This code does not seem to make
2456 * distinction between Destination Options Header
2457 * being before/after Routing Header which can
2458 * happen if we are at the end of source route.
2459 * This may become significant in future.
2460 * (No real significant Destination Options are
2461 * defined/implemented yet ).
2462 */
2463 switch (ip_process_options_v6(mp, ip6h, optptr,
2464 ehdrlen - 2, IPPROTO_DSTOPTS, ira)) {
2465 case -1:
2466 /*
2467 * Packet has been consumed and any needed
2468 * ICMP errors sent.
2469 */
2470 return;
2471 case 0:
2472 /* No action needed continue */
2473 break;
2474 case 1:
2475 /*
2476 * Unnexpected return value
2477 * (Router alert is a Hop-by-Hop option)
2478 */
2479 #ifdef DEBUG
2480 panic("ip_fanout_v6: router "
2481 "alert hbh opt indication in dest opt");
2482 /*NOTREACHED*/
2483 #else
2484 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2485 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2486 freemsg(mp);
2487 return;
2488 #endif
2489 }
2490 goto repeat;
2491 }
2492 case IPPROTO_FRAGMENT: {
2493 ip6_frag_t *fraghdr;
2494
2495 if (ira->ira_pktlen - ip_hdr_length < sizeof (ip6_frag_t))
2496 goto pkt_too_short;
2497
2498 if (mp->b_cont != NULL &&
2499 rptr + ip_hdr_length + sizeof (ip6_frag_t) > mp->b_wptr) {
2500 ip6h = ip_pullup(mp,
2501 ip_hdr_length + sizeof (ip6_frag_t), ira);
2502 if (ip6h == NULL)
2503 goto discard;
2504 }
2505
2506 fraghdr = (ip6_frag_t *)(rptr + ip_hdr_length);
2507 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmReqds);
2508
2509 /*
2510 * Invoke the CGTP (multirouting) filtering module to
2511 * process the incoming packet. Packets identified as
2512 * duplicates must be discarded. Filtering is active
2513 * only if the ip_cgtp_filter ndd variable is
2514 * non-zero.
2515 */
2516 if (ipst->ips_ip_cgtp_filter &&
2517 ipst->ips_ip_cgtp_filter_ops != NULL) {
2518 int cgtp_flt_pkt;
2519 netstackid_t stackid;
2520
2521 stackid = ipst->ips_netstack->netstack_stackid;
2522
2523 /*
2524 * CGTP and IPMP are mutually exclusive so
2525 * phyint_ifindex is fine here.
2526 */
2527 cgtp_flt_pkt =
2528 ipst->ips_ip_cgtp_filter_ops->cfo_filter_v6(
2529 stackid, ill->ill_phyint->phyint_ifindex,
2530 ip6h, fraghdr);
2531 if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) {
2532 ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill);
2533 freemsg(mp);
2534 return;
2535 }
2536 }
2537
2538 /*
2539 * Update ip_hdr_length to skip the frag header
2540 * ip_input_fragment_v6 will determine the extension header
2541 * prior to the fragment header and update its nexthdr value,
2542 * and also set ira_protocol to the nexthdr that follows the
2543 * completed fragment.
2544 */
2545 ip_hdr_length += sizeof (ip6_frag_t);
2546
2547 /*
2548 * Make sure we have ira_l2src before we loose the original
2549 * mblk
2550 */
2551 if (!(ira->ira_flags & IRAF_L2SRC_SET))
2552 ip_setl2src(mp, ira, ira->ira_rill);
2553
2554 mp = ip_input_fragment_v6(mp, ip6h, fraghdr,
2555 ira->ira_pktlen - ip_hdr_length, ira);
2556 if (mp == NULL) {
2557 /* Reassembly is still pending */
2558 return;
2559 }
2560 BUMP_MIB(ill->ill_ip_mib, ipIfStatsReasmOKs);
2561
2562 /*
2563 * The mblk chain has the frag header removed and
2564 * ira_protocol, ira_pktlen, ira_ip_hdr_length as well as the
2565 * IP header has been updated to refleact the result.
2566 */
2567 ip6h = (ip6_t *)mp->b_rptr;
2568 ip_hdr_length = ira->ira_ip_hdr_length;
2569 goto repeat;
2570 }
2571 case IPPROTO_HOPOPTS:
2572 /*
2573 * Illegal header sequence.
2574 * (Hop-by-hop headers are processed above
2575 * and required to immediately follow IPv6 header)
2576 */
2577 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2578 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2579 return;
2580
2581 case IPPROTO_ROUTING: {
2582 uint_t ehdrlen;
2583 ip6_rthdr_t *rthdr;
2584
2585 /* Check if AH is present and needs to be processed. */
2586 mp = ipsec_early_ah_v6(mp, ira);
2587 if (mp == NULL)
2588 return;
2589
2590 /*
2591 * Reinitialize pointers, as ipsec_early_ah_v6() does
2592 * complete pullups. We don't have to do more pullups
2593 * as a result.
2594 */
2595 ip6h = (ip6_t *)mp->b_rptr;
2596
2597 if (ira->ira_pktlen - ip_hdr_length < MIN_EHDR_LEN)
2598 goto pkt_too_short;
2599
2600 if (mp->b_cont != NULL &&
2601 rptr + ip_hdr_length + MIN_EHDR_LEN > mp->b_wptr) {
2602 ip6h = ip_pullup(mp, ip_hdr_length + MIN_EHDR_LEN, ira);
2603 if (ip6h == NULL)
2604 goto discard;
2605 }
2606 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2607 protocol = ira->ira_protocol = rthdr->ip6r_nxt;
2608 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2609 if (ira->ira_pktlen - ip_hdr_length < ehdrlen)
2610 goto pkt_too_short;
2611 if (mp->b_cont != NULL &&
2612 rptr + IPV6_HDR_LEN + ehdrlen > mp->b_wptr) {
2613 ip6h = ip_pullup(mp, IPV6_HDR_LEN + ehdrlen, ira);
2614 if (ip6h == NULL)
2615 goto discard;
2616 rthdr = (ip6_rthdr_t *)(rptr + ip_hdr_length);
2617 }
2618 if (rthdr->ip6r_segleft != 0) {
2619 /* Not end of source route */
2620 if (ira->ira_flags &
2621 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) {
2622 BUMP_MIB(ill->ill_ip_mib,
2623 ipIfStatsForwProhibits);
2624 ip_drop_input("ipIfStatsInForwProhibits",
2625 mp, ill);
2626 freemsg(mp);
2627 return;
2628 }
2629 ip_process_rthdr(mp, ip6h, rthdr, ira);
2630 return;
2631 }
2632 ira->ira_ip_hdr_length += ehdrlen;
2633 goto repeat;
2634 }
2635
2636 case IPPROTO_AH:
2637 case IPPROTO_ESP: {
2638 /*
2639 * Fast path for AH/ESP.
2640 */
2641 netstack_t *ns = ipst->ips_netstack;
2642 ipsec_stack_t *ipss = ns->netstack_ipsec;
2643
2644 IP_STAT(ipst, ipsec_proto_ahesp);
2645
2646 if (!ipsec_loaded(ipss)) {
2647 ip_proto_not_sup(mp, ira);
2648 return;
2649 }
2650
2651 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2652 /* select inbound SA and have IPsec process the pkt */
2653 if (protocol == IPPROTO_ESP) {
2654 esph_t *esph;
2655
2656 mp = ipsec_inbound_esp_sa(mp, ira, &esph);
2657 if (mp == NULL)
2658 return;
2659
2660 ASSERT(esph != NULL);
2661 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2662 ASSERT(ira->ira_ipsec_esp_sa != NULL);
2663 ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL);
2664
2665 mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph,
2666 ira);
2667 } else {
2668 ah_t *ah;
2669
2670 mp = ipsec_inbound_ah_sa(mp, ira, &ah);
2671 if (mp == NULL)
2672 return;
2673
2674 ASSERT(ah != NULL);
2675 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
2676 ASSERT(ira->ira_ipsec_ah_sa != NULL);
2677 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
2678 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah,
2679 ira);
2680 }
2681
2682 if (mp == NULL) {
2683 /*
2684 * Either it failed or is pending. In the former case
2685 * ipIfStatsInDiscards was increased.
2686 */
2687 return;
2688 }
2689 /* we're done with IPsec processing, send it up */
2690 ip_input_post_ipsec(mp, ira);
2691 return;
2692 }
2693 case IPPROTO_NONE:
2694 /* All processing is done. Count as "delivered". */
2695 freemsg(mp);
2696 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2697 return;
2698
2699 case IPPROTO_ENCAP:
2700 case IPPROTO_IPV6:
2701 /* iptun will verify trusted label */
2702 connp = ipcl_classify_v6(mp, protocol, ip_hdr_length,
2703 ira, ipst);
2704 if (connp != NULL) {
2705 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers);
2706 ira->ira_ill = ira->ira_rill = NULL;
2707 connp->conn_recv(connp, mp, NULL, ira);
2708 CONN_DEC_REF(connp);
2709 ira->ira_ill = ill;
2710 ira->ira_rill = rill;
2711 return;
2712 }
2713 /* FALLTHRU */
2714 default:
2715 /*
2716 * On a labeled system, we have to check whether the zone
2717 * itself is permitted to receive raw traffic.
2718 */
2719 if (ira->ira_flags & IRAF_SYSTEM_LABELED) {
2720 if (!tsol_can_accept_raw(mp, ira, B_FALSE)) {
2721 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2722 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2723 freemsg(mp);
2724 return;
2725 }
2726 }
2727 break;
2728 }
2729
2730 /*
2731 * The above input functions may have returned the pulled up message.
2732 * So ip6h need to be reinitialized.
2733 */
2734 ip6h = (ip6_t *)mp->b_rptr;
2735 ira->ira_protocol = protocol;
2736 if (ipst->ips_ipcl_proto_fanout_v6[protocol].connf_head == NULL) {
2737 /* No user-level listener for these packets packets */
2738 ip_proto_not_sup(mp, ira);
2739 return;
2740 }
2741
2742 /*
2743 * Handle fanout to raw sockets. There
2744 * can be more than one stream bound to a particular
2745 * protocol. When this is the case, each one gets a copy
2746 * of any incoming packets.
2747 */
2748 ASSERT(ira->ira_protocol == protocol);
2749 ip_fanout_proto_v6(mp, ip6h, ira);
2750 return;
2751
2752 pkt_too_short:
2753 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
2754 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
2755 freemsg(mp);
2756 return;
2757
2758 discard:
2759 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2760 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2761 freemsg(mp);
2762 #undef rptr
2763 }
2764