1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /* Copyright (c) 1990 Mentat Inc. */
25
26 /*
27 * Internet Group Management Protocol (IGMP) routines.
28 * Multicast Listener Discovery Protocol (MLD) routines.
29 *
30 * Written by Steve Deering, Stanford, May 1988.
31 * Modified by Rosen Sharma, Stanford, Aug 1994.
32 * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
33 *
34 * MULTICAST 3.5.1.1
35 */
36
37 #include <sys/types.h>
38 #include <sys/stream.h>
39 #include <sys/stropts.h>
40 #include <sys/strlog.h>
41 #include <sys/strsun.h>
42 #include <sys/systm.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cmn_err.h>
46 #include <sys/atomic.h>
47 #include <sys/zone.h>
48 #include <sys/callb.h>
49 #include <sys/param.h>
50 #include <sys/socket.h>
51 #include <inet/ipclassifier.h>
52 #include <net/if.h>
53 #include <net/route.h>
54 #include <netinet/in.h>
55 #include <netinet/igmp_var.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <inet/ipsec_impl.h>
59
60 #include <inet/common.h>
61 #include <inet/mi.h>
62 #include <inet/nd.h>
63 #include <inet/tunables.h>
64 #include <inet/ip.h>
65 #include <inet/ip6.h>
66 #include <inet/ip_multi.h>
67 #include <inet/ip_listutils.h>
68
69 #include <netinet/igmp.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/ip_if.h>
72
73 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
74 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
75 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill);
76 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
77 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
78 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
79 static void igmpv3_sendrpt(ill_t *ill, mrec_t *reclist);
80 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
81 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
82 slist_t *srclist, mrec_t *next);
83 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
84 mcast_record_t rtype, slist_t *flist);
85 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
86
87 /*
88 * Macros used to do timer len conversions. Timer values are always
89 * stored and passed to the timer functions as milliseconds; but the
90 * default values and values from the wire may not be.
91 *
92 * And yes, it's obscure, but decisecond is easier to abbreviate than
93 * "tenths of a second".
94 */
95 #define DSEC_TO_MSEC(dsec) ((dsec) * 100)
96 #define SEC_TO_MSEC(sec) ((sec) * 1000)
97
98 /*
99 * A running timer (scheduled thru timeout) can be cancelled if another
100 * timer with a shorter timeout value is scheduled before it has timed
101 * out. When the shorter timer expires, the original timer is updated
102 * to account for the time elapsed while the shorter timer ran; but this
103 * does not take into account the amount of time already spent in timeout
104 * state before being preempted by the shorter timer, that is the time
105 * interval between time scheduled to time cancelled. This can cause
106 * delays in sending out multicast membership reports. To resolve this
107 * problem, wallclock time (absolute time) is used instead of deltas
108 * (relative time) to track timers.
109 *
110 * The MACRO below gets the lbolt value, used for proper timer scheduling
111 * and firing. Therefore multicast membership reports are sent on time.
112 * The timer does not exactly fire at the time it was scehduled to fire,
113 * there is a difference of a few milliseconds observed. An offset is used
114 * to take care of the difference.
115 */
116
117 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
118 #define CURRENT_OFFSET (999)
119
120 /*
121 * The first multicast join will trigger the igmp timers / mld timers
122 * The unit for next is milliseconds.
123 */
124 void
igmp_start_timers(unsigned next,ip_stack_t * ipst)125 igmp_start_timers(unsigned next, ip_stack_t *ipst)
126 {
127 int time_left;
128 int ret;
129 timeout_id_t id;
130
131 ASSERT(next != 0 && next != INFINITY);
132
133 mutex_enter(&ipst->ips_igmp_timer_lock);
134
135 if (ipst->ips_igmp_timer_setter_active) {
136 /*
137 * Serialize timer setters, one at a time. If the
138 * timer is currently being set by someone,
139 * just record the next time when it has to be
140 * invoked and return. The current setter will
141 * take care.
142 */
143 ipst->ips_igmp_time_to_next =
144 MIN(ipst->ips_igmp_time_to_next, next);
145 mutex_exit(&ipst->ips_igmp_timer_lock);
146 return;
147 } else {
148 ipst->ips_igmp_timer_setter_active = B_TRUE;
149 }
150 if (ipst->ips_igmp_timeout_id == 0) {
151 /*
152 * The timer is inactive. We need to start a timer if we haven't
153 * been asked to quiesce.
154 */
155 ipst->ips_igmp_time_to_next = next;
156 if (ipst->ips_igmp_timer_quiesce != B_TRUE) {
157 ipst->ips_igmp_timeout_id =
158 timeout(igmp_timeout_handler, (void *)ipst,
159 MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
160 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
161 }
162 ipst->ips_igmp_timer_setter_active = B_FALSE;
163 mutex_exit(&ipst->ips_igmp_timer_lock);
164 return;
165 }
166
167 /*
168 * The timer was scheduled sometime back for firing in
169 * 'igmp_time_to_next' ms and is active. We need to
170 * reschedule the timeout if the new 'next' will happen
171 * earlier than the currently scheduled timeout
172 */
173 time_left = ipst->ips_igmp_timer_scheduled_last +
174 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
175 if (time_left < MSEC_TO_TICK(next)) {
176 ipst->ips_igmp_timer_setter_active = B_FALSE;
177 mutex_exit(&ipst->ips_igmp_timer_lock);
178 return;
179 }
180 id = ipst->ips_igmp_timeout_id;
181
182 mutex_exit(&ipst->ips_igmp_timer_lock);
183 ret = untimeout(id);
184 mutex_enter(&ipst->ips_igmp_timer_lock);
185 /*
186 * The timeout was cancelled, or the timeout handler
187 * completed, while we were blocked in the untimeout.
188 * No other thread could have set the timer meanwhile
189 * since we serialized all the timer setters. Thus
190 * no timer is currently active nor executing nor will
191 * any timer fire in the future. We start the timer now
192 * if needed.
193 */
194 if (ret == -1) {
195 ASSERT(ipst->ips_igmp_timeout_id == 0);
196 } else {
197 ASSERT(ipst->ips_igmp_timeout_id != 0);
198 ipst->ips_igmp_timeout_id = 0;
199 }
200 if (ipst->ips_igmp_time_to_next != 0 &&
201 ipst->ips_igmp_timer_quiesce != B_TRUE) {
202 ipst->ips_igmp_time_to_next =
203 MIN(ipst->ips_igmp_time_to_next, next);
204 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
205 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
206 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
207 }
208 ipst->ips_igmp_timer_setter_active = B_FALSE;
209 mutex_exit(&ipst->ips_igmp_timer_lock);
210 }
211
212 /*
213 * mld_start_timers:
214 * The unit for next is milliseconds.
215 */
216 void
mld_start_timers(unsigned next,ip_stack_t * ipst)217 mld_start_timers(unsigned next, ip_stack_t *ipst)
218 {
219 int time_left;
220 int ret;
221 timeout_id_t id;
222
223 ASSERT(next != 0 && next != INFINITY);
224
225 mutex_enter(&ipst->ips_mld_timer_lock);
226 if (ipst->ips_mld_timer_setter_active) {
227 /*
228 * Serialize timer setters, one at a time. If the
229 * timer is currently being set by someone,
230 * just record the next time when it has to be
231 * invoked and return. The current setter will
232 * take care.
233 */
234 ipst->ips_mld_time_to_next =
235 MIN(ipst->ips_mld_time_to_next, next);
236 mutex_exit(&ipst->ips_mld_timer_lock);
237 return;
238 } else {
239 ipst->ips_mld_timer_setter_active = B_TRUE;
240 }
241 if (ipst->ips_mld_timeout_id == 0) {
242 /*
243 * The timer is inactive. We need to start a timer, if we
244 * haven't been asked to quiesce.
245 */
246 ipst->ips_mld_time_to_next = next;
247 if (ipst->ips_mld_timer_quiesce != B_TRUE) {
248 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
249 (void *)ipst,
250 MSEC_TO_TICK(ipst->ips_mld_time_to_next));
251 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
252 }
253 ipst->ips_mld_timer_setter_active = B_FALSE;
254 mutex_exit(&ipst->ips_mld_timer_lock);
255 return;
256 }
257
258 /*
259 * The timer was scheduled sometime back for firing in
260 * 'igmp_time_to_next' ms and is active. We need to
261 * reschedule the timeout if the new 'next' will happen
262 * earlier than the currently scheduled timeout
263 */
264 time_left = ipst->ips_mld_timer_scheduled_last +
265 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
266 if (time_left < MSEC_TO_TICK(next)) {
267 ipst->ips_mld_timer_setter_active = B_FALSE;
268 mutex_exit(&ipst->ips_mld_timer_lock);
269 return;
270 }
271 id = ipst->ips_mld_timeout_id;
272
273 mutex_exit(&ipst->ips_mld_timer_lock);
274 ret = untimeout(id);
275 mutex_enter(&ipst->ips_mld_timer_lock);
276 /*
277 * The timeout was cancelled, or the timeout handler
278 * completed, while we were blocked in the untimeout.
279 * No other thread could have set the timer meanwhile
280 * since we serialized all the timer setters. Thus
281 * no timer is currently active nor executing nor will
282 * any timer fire in the future. We start the timer now
283 * if needed.
284 */
285 if (ret == -1) {
286 ASSERT(ipst->ips_mld_timeout_id == 0);
287 } else {
288 ASSERT(ipst->ips_mld_timeout_id != 0);
289 ipst->ips_mld_timeout_id = 0;
290 }
291 if (ipst->ips_mld_time_to_next != 0 &&
292 ipst->ips_mld_timer_quiesce == B_FALSE) {
293 ipst->ips_mld_time_to_next =
294 MIN(ipst->ips_mld_time_to_next, next);
295 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
296 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
297 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
298 }
299 ipst->ips_mld_timer_setter_active = B_FALSE;
300 mutex_exit(&ipst->ips_mld_timer_lock);
301 }
302
303 /*
304 * igmp_input:
305 * Return NULL for a bad packet that is discarded here.
306 * Return mp if the message is OK and should be handed to "raw" receivers.
307 * Callers of igmp_input() may need to reinitialize variables that were copied
308 * from the mblk as this calls pullupmsg().
309 */
310 mblk_t *
igmp_input(mblk_t * mp,ip_recv_attr_t * ira)311 igmp_input(mblk_t *mp, ip_recv_attr_t *ira)
312 {
313 igmpa_t *igmpa;
314 ipha_t *ipha = (ipha_t *)(mp->b_rptr);
315 int iphlen, igmplen, mblklen;
316 ilm_t *ilm;
317 uint32_t src, dst;
318 uint32_t group;
319 in6_addr_t v6group;
320 uint_t next;
321 ipif_t *ipif;
322 ill_t *ill = ira->ira_ill;
323 ip_stack_t *ipst = ill->ill_ipst;
324
325 ASSERT(!ill->ill_isv6);
326 ++ipst->ips_igmpstat.igps_rcv_total;
327
328 mblklen = MBLKL(mp);
329 iphlen = ira->ira_ip_hdr_length;
330 if (mblklen < 1 || mblklen < iphlen) {
331 ++ipst->ips_igmpstat.igps_rcv_tooshort;
332 goto bad_pkt;
333 }
334 igmplen = ira->ira_pktlen - iphlen;
335 /*
336 * Since msg sizes are more variable with v3, just pullup the
337 * whole thing now.
338 */
339 if (MBLKL(mp) < (igmplen + iphlen)) {
340 mblk_t *mp1;
341 if ((mp1 = msgpullup(mp, -1)) == NULL) {
342 ++ipst->ips_igmpstat.igps_rcv_tooshort;
343 goto bad_pkt;
344 }
345 freemsg(mp);
346 mp = mp1;
347 ipha = (ipha_t *)(mp->b_rptr);
348 }
349
350 /*
351 * Validate lengths
352 */
353 if (igmplen < IGMP_MINLEN) {
354 ++ipst->ips_igmpstat.igps_rcv_tooshort;
355 goto bad_pkt;
356 }
357
358 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
359 src = ipha->ipha_src;
360 dst = ipha->ipha_dst;
361 if (ip_debug > 1)
362 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
363 "igmp_input: src 0x%x, dst 0x%x on %s\n",
364 (int)ntohl(src), (int)ntohl(dst),
365 ill->ill_name);
366
367 switch (igmpa->igmpa_type) {
368 case IGMP_MEMBERSHIP_QUERY:
369 /*
370 * packet length differentiates between v1/v2 and v3
371 * v1/v2 should be exactly 8 octets long; v3 is >= 12
372 */
373 if ((igmplen == IGMP_MINLEN) ||
374 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
375 next = igmp_query_in(ipha, igmpa, ill);
376 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
377 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
378 igmplen);
379 } else {
380 ++ipst->ips_igmpstat.igps_rcv_tooshort;
381 goto bad_pkt;
382 }
383 if (next == 0)
384 goto bad_pkt;
385
386 if (next != INFINITY)
387 igmp_start_timers(next, ipst);
388
389 break;
390
391 case IGMP_V1_MEMBERSHIP_REPORT:
392 case IGMP_V2_MEMBERSHIP_REPORT:
393 /*
394 * For fast leave to work, we have to know that we are the
395 * last person to send a report for this group. Reports
396 * generated by us are looped back since we could potentially
397 * be a multicast router, so discard reports sourced by me.
398 */
399 mutex_enter(&ill->ill_lock);
400 for (ipif = ill->ill_ipif; ipif != NULL;
401 ipif = ipif->ipif_next) {
402 if (ipif->ipif_lcl_addr == src) {
403 if (ip_debug > 1) {
404 (void) mi_strlog(ill->ill_rq,
405 1,
406 SL_TRACE,
407 "igmp_input: we are only "
408 "member src 0x%x\n",
409 (int)ntohl(src));
410 }
411 mutex_exit(&ill->ill_lock);
412 return (mp);
413 }
414 }
415 mutex_exit(&ill->ill_lock);
416
417 ++ipst->ips_igmpstat.igps_rcv_reports;
418 group = igmpa->igmpa_group;
419 if (!CLASSD(group)) {
420 ++ipst->ips_igmpstat.igps_rcv_badreports;
421 goto bad_pkt;
422 }
423
424 /*
425 * KLUDGE: if the IP source address of the report has an
426 * unspecified (i.e., zero) subnet number, as is allowed for
427 * a booting host, replace it with the correct subnet number
428 * so that a process-level multicast routing demon can
429 * determine which subnet it arrived from. This is necessary
430 * to compensate for the lack of any way for a process to
431 * determine the arrival interface of an incoming packet.
432 *
433 * Requires that a copy of *this* message it passed up
434 * to the raw interface which is done by our caller.
435 */
436 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */
437 /* Pick the first ipif on this ill */
438 mutex_enter(&ill->ill_lock);
439 src = ill->ill_ipif->ipif_subnet;
440 mutex_exit(&ill->ill_lock);
441 ip1dbg(("igmp_input: changed src to 0x%x\n",
442 (int)ntohl(src)));
443 ipha->ipha_src = src;
444 }
445
446 /*
447 * If our ill has ILMs that belong to the group being
448 * reported, and we are a 'Delaying Member' in the RFC
449 * terminology, stop our timer for that group and 'clear
450 * flag' i.e. mark as IGMP_OTHERMEMBER.
451 */
452 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
453 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
454 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
455 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
456 continue;
457
458 ++ipst->ips_igmpstat.igps_rcv_ourreports;
459 ilm->ilm_timer = INFINITY;
460 ilm->ilm_state = IGMP_OTHERMEMBER;
461 } /* for */
462 rw_exit(&ill->ill_mcast_lock);
463 ill_mcast_timer_start(ill->ill_ipst);
464 break;
465
466 case IGMP_V3_MEMBERSHIP_REPORT:
467 /*
468 * Currently nothing to do here; IGMP router is not
469 * implemented in ip, and v3 hosts don't pay attention
470 * to membership reports.
471 */
472 break;
473 }
474 /*
475 * Pass all valid IGMP packets up to any process(es) listening
476 * on a raw IGMP socket. Do not free the packet.
477 */
478 return (mp);
479
480 bad_pkt:
481 freemsg(mp);
482 return (NULL);
483 }
484
485 static uint_t
igmp_query_in(ipha_t * ipha,igmpa_t * igmpa,ill_t * ill)486 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
487 {
488 ilm_t *ilm;
489 int timer;
490 uint_t next, current;
491 ip_stack_t *ipst;
492
493 ipst = ill->ill_ipst;
494 ++ipst->ips_igmpstat.igps_rcv_queries;
495
496 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
497 /*
498 * In the IGMPv2 specification, there are 3 states and a flag.
499 *
500 * In Non-Member state, we simply don't have a membership record.
501 * In Delaying Member state, our timer is running (ilm->ilm_timer
502 * < INFINITY). In Idle Member state, our timer is not running
503 * (ilm->ilm_timer == INFINITY).
504 *
505 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
506 * we have heard a report from another member, or IGMP_IREPORTEDLAST
507 * if I sent the last report.
508 */
509 if ((igmpa->igmpa_code == 0) ||
510 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
511 /*
512 * Query from an old router.
513 * Remember that the querier on this interface is old,
514 * and set the timer to the value in RFC 1112.
515 */
516 ill->ill_mcast_v1_time = 0;
517 ill->ill_mcast_v1_tset = 1;
518 if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
519 ip1dbg(("Received IGMPv1 Query on %s, switching mode "
520 "to IGMP_V1_ROUTER\n", ill->ill_name));
521 atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
522 ill->ill_mcast_type = IGMP_V1_ROUTER;
523 }
524
525 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
526
527 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
528 igmpa->igmpa_group != 0) {
529 ++ipst->ips_igmpstat.igps_rcv_badqueries;
530 rw_exit(&ill->ill_mcast_lock);
531 ill_mcast_timer_start(ill->ill_ipst);
532 return (0);
533 }
534
535 } else {
536 in_addr_t group;
537
538 /*
539 * Query from a new router
540 * Simply do a validity check
541 */
542 group = igmpa->igmpa_group;
543 if (group != 0 && (!CLASSD(group))) {
544 ++ipst->ips_igmpstat.igps_rcv_badqueries;
545 rw_exit(&ill->ill_mcast_lock);
546 ill_mcast_timer_start(ill->ill_ipst);
547 return (0);
548 }
549
550 /*
551 * Switch interface state to v2 on receipt of a v2 query
552 * ONLY IF current state is v3. Let things be if current
553 * state if v1 but do reset the v2-querier-present timer.
554 */
555 if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
556 ip1dbg(("Received IGMPv2 Query on %s, switching mode "
557 "to IGMP_V2_ROUTER", ill->ill_name));
558 atomic_inc_16(&ill->ill_ifptr->illif_mcast_v2);
559 ill->ill_mcast_type = IGMP_V2_ROUTER;
560 }
561 ill->ill_mcast_v2_time = 0;
562 ill->ill_mcast_v2_tset = 1;
563
564 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
565 }
566
567 if (ip_debug > 1) {
568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
570 (int)ntohs(igmpa->igmpa_code),
571 (int)ntohs(igmpa->igmpa_type));
572 }
573
574 /*
575 * -Start the timers in all of our membership records
576 * for the physical interface on which the query
577 * arrived, excluding those that belong to the "all
578 * hosts" group (224.0.0.1).
579 *
580 * -Restart any timer that is already running but has
581 * a value longer than the requested timeout.
582 *
583 * -Use the value specified in the query message as
584 * the maximum timeout.
585 */
586 next = (unsigned)INFINITY;
587
588 current = CURRENT_MSTIME;
589 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
590
591 /*
592 * A multicast router joins INADDR_ANY address
593 * to enable promiscuous reception of all
594 * mcasts from the interface. This INADDR_ANY
595 * is stored in the ilm_v6addr as V6 unspec addr
596 */
597 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
598 continue;
599 if (ilm->ilm_addr == htonl(INADDR_ANY))
600 continue;
601 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
602 (igmpa->igmpa_group == 0) ||
603 (igmpa->igmpa_group == ilm->ilm_addr)) {
604 if (ilm->ilm_timer > timer) {
605 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
606 if (ilm->ilm_timer < next)
607 next = ilm->ilm_timer;
608 ilm->ilm_timer += current;
609 }
610 }
611 }
612 rw_exit(&ill->ill_mcast_lock);
613 /*
614 * No packets have been sent above - no
615 * ill_mcast_send_queued is needed.
616 */
617 ill_mcast_timer_start(ill->ill_ipst);
618
619 return (next);
620 }
621
622 static uint_t
igmpv3_query_in(igmp3qa_t * igmp3qa,ill_t * ill,int igmplen)623 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
624 {
625 uint_t i, next, mrd, qqi, timer, delay, numsrc;
626 uint_t current;
627 ilm_t *ilm;
628 ipaddr_t *src_array;
629 uint8_t qrv;
630 ip_stack_t *ipst;
631
632 ipst = ill->ill_ipst;
633 /* make sure numsrc matches packet size */
634 numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
635 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
636 ++ipst->ips_igmpstat.igps_rcv_tooshort;
637 return (0);
638 }
639 src_array = (ipaddr_t *)&igmp3qa[1];
640
641 ++ipst->ips_igmpstat.igps_rcv_queries;
642
643 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
644
645 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
646 uint_t hdrval, mant, exp;
647 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
648 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
649 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
650 mrd = (mant | 0x10) << (exp + 3);
651 }
652 if (mrd == 0)
653 mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
654 timer = DSEC_TO_MSEC(mrd);
655 MCAST_RANDOM_DELAY(delay, timer);
656 next = (unsigned)INFINITY;
657 current = CURRENT_MSTIME;
658
659 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
660 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
661 else
662 ill->ill_mcast_rv = qrv;
663
664 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
665 uint_t hdrval, mant, exp;
666 hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
667 mant = hdrval & IGMP_V3_QQI_MANT_MASK;
668 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
669 qqi = (mant | 0x10) << (exp + 3);
670 }
671 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
672
673 /*
674 * If we have a pending general query response that's scheduled
675 * sooner than the delay we calculated for this response, then
676 * no action is required (RFC3376 section 5.2 rule 1)
677 */
678 if (ill->ill_global_timer < (current + delay)) {
679 rw_exit(&ill->ill_mcast_lock);
680 ill_mcast_timer_start(ill->ill_ipst);
681 return (next);
682 }
683
684 /*
685 * Now take action depending upon query type:
686 * general, group specific, or group/source specific.
687 */
688 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
689 /*
690 * general query
691 * We know global timer is either not running or is
692 * greater than our calculated delay, so reset it to
693 * our delay (random value in range [0, response time]).
694 */
695 ill->ill_global_timer = current + delay;
696 next = delay;
697 } else {
698 /* group or group/source specific query */
699 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
700 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
701 (ilm->ilm_addr == htonl(INADDR_ANY)) ||
702 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
703 (igmp3qa->igmp3qa_group != ilm->ilm_addr))
704 continue;
705 /*
706 * If the query is group specific or we have a
707 * pending group specific query, the response is
708 * group specific (pending sources list should be
709 * empty). Otherwise, need to update the pending
710 * sources list for the group and source specific
711 * response.
712 */
713 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
714 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
715 group_query:
716 FREE_SLIST(ilm->ilm_pendsrcs);
717 ilm->ilm_pendsrcs = NULL;
718 } else {
719 boolean_t overflow;
720 slist_t *pktl;
721 if (numsrc > MAX_FILTER_SIZE ||
722 (ilm->ilm_pendsrcs == NULL &&
723 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
724 /*
725 * We've been sent more sources than
726 * we can deal with; or we can't deal
727 * with a source list at all. Revert
728 * to a group specific query.
729 */
730 goto group_query;
731 }
732 if ((pktl = l_alloc()) == NULL)
733 goto group_query;
734 pktl->sl_numsrc = numsrc;
735 for (i = 0; i < numsrc; i++)
736 IN6_IPADDR_TO_V4MAPPED(src_array[i],
737 &(pktl->sl_addr[i]));
738 l_union_in_a(ilm->ilm_pendsrcs, pktl,
739 &overflow);
740 l_free(pktl);
741 if (overflow)
742 goto group_query;
743 }
744
745 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
746 INFINITY : (ilm->ilm_timer - current);
747 /* choose soonest timer */
748 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
749 if (ilm->ilm_timer < next)
750 next = ilm->ilm_timer;
751 ilm->ilm_timer += current;
752 }
753 }
754 rw_exit(&ill->ill_mcast_lock);
755 /*
756 * No packets have been sent above - no
757 * ill_mcast_send_queued is needed.
758 */
759 ill_mcast_timer_start(ill->ill_ipst);
760
761 return (next);
762 }
763
764 /*
765 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
766 * and it gets sent after the lock is dropped.
767 */
768 void
igmp_joingroup(ilm_t * ilm)769 igmp_joingroup(ilm_t *ilm)
770 {
771 uint_t timer;
772 ill_t *ill;
773 ip_stack_t *ipst = ilm->ilm_ipst;
774
775 ill = ilm->ilm_ill;
776
777 ASSERT(!ill->ill_isv6);
778 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
779
780 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
781 ilm->ilm_rtx.rtx_timer = timer = INFINITY;
782 ilm->ilm_state = IGMP_OTHERMEMBER;
783 } else {
784 ip1dbg(("Querier mode %d, sending report, group %x\n",
785 ill->ill_mcast_type, htonl(ilm->ilm_addr)));
786 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
787 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
788 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
789 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
790 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
791 mrec_t *rp;
792 mcast_record_t rtype;
793 /*
794 * The possible state changes we need to handle here:
795 * Old State New State Report
796 *
797 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
798 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
799 *
800 * No need to send the BLOCK(0) report; ALLOW(X)
801 * is enough.
802 */
803 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
804 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
805 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
806 ilm->ilm_filter, NULL);
807 igmpv3_sendrpt(ill, rp);
808 /*
809 * Set up retransmission state. Timer is set below,
810 * for both v3 and older versions.
811 */
812 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
813 ilm->ilm_filter);
814 }
815
816 /* Set the ilm timer value */
817 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
818 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
819 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
820 timer = ilm->ilm_rtx.rtx_timer;
821 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
822 ilm->ilm_state = IGMP_IREPORTEDLAST;
823
824 /*
825 * We are holding ill_mcast_lock here and the timeout
826 * handler (igmp_timeout_handler_per_ill) acquires that
827 * lock. Hence we can't call igmp_start_timers since it could
828 * deadlock in untimeout().
829 * Instead the thread which drops ill_mcast_lock will have
830 * to call ill_mcast_timer_start().
831 */
832 mutex_enter(&ipst->ips_igmp_timer_lock);
833 ipst->ips_igmp_deferred_next = MIN(timer,
834 ipst->ips_igmp_deferred_next);
835 mutex_exit(&ipst->ips_igmp_timer_lock);
836 }
837
838 if (ip_debug > 1) {
839 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
840 "igmp_joingroup: multicast_type %d timer %d",
841 (ilm->ilm_ill->ill_mcast_type),
842 (int)ntohl(timer));
843 }
844 }
845
846 /*
847 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
848 * and it gets sent after the lock is dropped.
849 */
850 void
mld_joingroup(ilm_t * ilm)851 mld_joingroup(ilm_t *ilm)
852 {
853 uint_t timer;
854 ill_t *ill;
855 ip_stack_t *ipst = ilm->ilm_ipst;
856
857 ill = ilm->ilm_ill;
858
859 ASSERT(ill->ill_isv6);
860 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
861
862 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
863 ilm->ilm_rtx.rtx_timer = timer = INFINITY;
864 ilm->ilm_state = IGMP_OTHERMEMBER;
865 } else {
866 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
867 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
868 } else {
869 mrec_t *rp;
870 mcast_record_t rtype;
871 /*
872 * The possible state changes we need to handle here:
873 * Old State New State Report
874 *
875 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
876 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
877 *
878 * No need to send the BLOCK(0) report; ALLOW(X)
879 * is enough
880 */
881 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
882 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
883 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
884 ilm->ilm_filter, NULL);
885 mldv2_sendrpt(ill, rp);
886 /*
887 * Set up retransmission state. Timer is set below,
888 * for both v2 and v1.
889 */
890 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
891 ilm->ilm_filter);
892 }
893
894 /* Set the ilm timer value */
895 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
896 ilm->ilm_rtx.rtx_cnt > 0);
897
898 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
899 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
900 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
901 timer = ilm->ilm_rtx.rtx_timer;
902 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
903 ilm->ilm_state = IGMP_IREPORTEDLAST;
904
905 /*
906 * We are holding ill_mcast_lock here and the timeout
907 * handler (mld_timeout_handler_per_ill) acquires that
908 * lock. Hence we can't call mld_start_timers since it could
909 * deadlock in untimeout().
910 * Instead the thread which drops ill_mcast_lock will have
911 * to call ill_mcast_timer_start().
912 */
913 mutex_enter(&ipst->ips_mld_timer_lock);
914 ipst->ips_mld_deferred_next = MIN(timer,
915 ipst->ips_mld_deferred_next);
916 mutex_exit(&ipst->ips_mld_timer_lock);
917 }
918
919 if (ip_debug > 1) {
920 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
921 "mld_joingroup: multicast_type %d timer %d",
922 (ilm->ilm_ill->ill_mcast_type),
923 (int)ntohl(timer));
924 }
925 }
926
927 /*
928 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
929 * and it gets sent after the lock is dropped.
930 */
931 void
igmp_leavegroup(ilm_t * ilm)932 igmp_leavegroup(ilm_t *ilm)
933 {
934 ill_t *ill = ilm->ilm_ill;
935
936 ASSERT(!ill->ill_isv6);
937
938 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
939 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
940 ill->ill_mcast_type == IGMP_V2_ROUTER &&
941 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
942 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
943 (htonl(INADDR_ALLRTRS_GROUP)));
944 return;
945 }
946 if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
947 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
948 mrec_t *rp;
949 /*
950 * The possible state changes we need to handle here:
951 * Old State New State Report
952 *
953 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
954 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
955 *
956 * No need to send the ALLOW(0) report; BLOCK(X) is enough
957 */
958 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
959 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
960 ilm->ilm_filter, NULL);
961 } else {
962 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
963 NULL, NULL);
964 }
965 igmpv3_sendrpt(ill, rp);
966 return;
967 }
968 }
969
970 /*
971 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
972 * and it gets sent after the lock is dropped.
973 */
974 void
mld_leavegroup(ilm_t * ilm)975 mld_leavegroup(ilm_t *ilm)
976 {
977 ill_t *ill = ilm->ilm_ill;
978
979 ASSERT(ill->ill_isv6);
980
981 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
982 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
983 ill->ill_mcast_type == MLD_V1_ROUTER &&
984 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
985 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
986 return;
987 }
988 if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
989 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
990 mrec_t *rp;
991 /*
992 * The possible state changes we need to handle here:
993 * Old State New State Report
994 *
995 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
996 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
997 *
998 * No need to send the ALLOW(0) report; BLOCK(X) is enough
999 */
1000 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1001 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1002 ilm->ilm_filter, NULL);
1003 } else {
1004 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
1005 NULL, NULL);
1006 }
1007 mldv2_sendrpt(ill, rp);
1008 return;
1009 }
1010 }
1011
1012 /*
1013 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1014 * and it gets sent after the lock is dropped.
1015 */
1016 void
igmp_statechange(ilm_t * ilm,mcast_record_t fmode,slist_t * flist)1017 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1018 {
1019 ill_t *ill;
1020 mrec_t *rp;
1021 ip_stack_t *ipst = ilm->ilm_ipst;
1022
1023 ASSERT(ilm != NULL);
1024
1025 /* state change reports should only be sent if the router is v3 */
1026 if (ilm->ilm_ill->ill_mcast_type != IGMP_V3_ROUTER)
1027 return;
1028
1029 ill = ilm->ilm_ill;
1030 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1031
1032 /*
1033 * Compare existing(old) state with the new state and prepare
1034 * State Change Report, according to the rules in RFC 3376:
1035 *
1036 * Old State New State State Change Report
1037 *
1038 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1039 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1040 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1041 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1042 */
1043
1044 if (ilm->ilm_fmode == fmode) {
1045 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1046 slist_t *allow, *block;
1047 if (((a_minus_b = l_alloc()) == NULL) ||
1048 ((b_minus_a = l_alloc()) == NULL)) {
1049 l_free(a_minus_b);
1050 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1051 goto send_to_ex;
1052 else
1053 goto send_to_in;
1054 }
1055 l_difference(ilm->ilm_filter, flist, a_minus_b);
1056 l_difference(flist, ilm->ilm_filter, b_minus_a);
1057 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1058 allow = b_minus_a;
1059 block = a_minus_b;
1060 } else {
1061 allow = a_minus_b;
1062 block = b_minus_a;
1063 }
1064 rp = NULL;
1065 if (!SLIST_IS_EMPTY(allow))
1066 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1067 allow, rp);
1068 if (!SLIST_IS_EMPTY(block))
1069 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1070 block, rp);
1071 l_free(a_minus_b);
1072 l_free(b_minus_a);
1073 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1074 send_to_ex:
1075 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1076 NULL);
1077 } else {
1078 send_to_in:
1079 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1080 NULL);
1081 }
1082
1083 /*
1084 * Need to set up retransmission state; merge the new info with the
1085 * current state (which may be null). If the timer is not currently
1086 * running, the caller will start it when dropping ill_mcast_lock.
1087 */
1088 rp = mcast_merge_rtx(ilm, rp, flist);
1089 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1090 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1091 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1092 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1093 mutex_enter(&ipst->ips_igmp_timer_lock);
1094 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
1095 ilm->ilm_rtx.rtx_timer);
1096 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1097 mutex_exit(&ipst->ips_igmp_timer_lock);
1098 }
1099
1100 igmpv3_sendrpt(ill, rp);
1101 }
1102
1103 /*
1104 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1105 * and it gets sent after the lock is dropped.
1106 */
1107 void
mld_statechange(ilm_t * ilm,mcast_record_t fmode,slist_t * flist)1108 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1109 {
1110 ill_t *ill;
1111 mrec_t *rp = NULL;
1112 ip_stack_t *ipst = ilm->ilm_ipst;
1113
1114 ASSERT(ilm != NULL);
1115
1116 ill = ilm->ilm_ill;
1117 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1118
1119 /* only need to send if we have an mldv2-capable router */
1120 if (ill->ill_mcast_type != MLD_V2_ROUTER) {
1121 return;
1122 }
1123
1124 /*
1125 * Compare existing (old) state with the new state passed in
1126 * and send appropriate MLDv2 State Change Report.
1127 *
1128 * Old State New State State Change Report
1129 *
1130 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1131 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1132 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1133 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1134 */
1135 if (ilm->ilm_fmode == fmode) {
1136 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1137 slist_t *allow, *block;
1138 if (((a_minus_b = l_alloc()) == NULL) ||
1139 ((b_minus_a = l_alloc()) == NULL)) {
1140 l_free(a_minus_b);
1141 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1142 goto send_to_ex;
1143 else
1144 goto send_to_in;
1145 }
1146 l_difference(ilm->ilm_filter, flist, a_minus_b);
1147 l_difference(flist, ilm->ilm_filter, b_minus_a);
1148 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1149 allow = b_minus_a;
1150 block = a_minus_b;
1151 } else {
1152 allow = a_minus_b;
1153 block = b_minus_a;
1154 }
1155 if (!SLIST_IS_EMPTY(allow))
1156 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1157 allow, rp);
1158 if (!SLIST_IS_EMPTY(block))
1159 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1160 block, rp);
1161 l_free(a_minus_b);
1162 l_free(b_minus_a);
1163 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1164 send_to_ex:
1165 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1166 NULL);
1167 } else {
1168 send_to_in:
1169 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1170 NULL);
1171 }
1172
1173 /*
1174 * Need to set up retransmission state; merge the new info with the
1175 * current state (which may be null). If the timer is not currently
1176 * running, the caller will start it when dropping ill_mcast_lock.
1177 */
1178 rp = mcast_merge_rtx(ilm, rp, flist);
1179 ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
1180 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1181 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1182 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1183 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1184 mutex_enter(&ipst->ips_mld_timer_lock);
1185 ipst->ips_mld_deferred_next =
1186 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
1187 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1188 mutex_exit(&ipst->ips_mld_timer_lock);
1189 }
1190
1191 mldv2_sendrpt(ill, rp);
1192 }
1193
1194 uint_t
igmp_timeout_handler_per_ill(ill_t * ill)1195 igmp_timeout_handler_per_ill(ill_t *ill)
1196 {
1197 uint_t next = INFINITY, current;
1198 ilm_t *ilm;
1199 mrec_t *rp = NULL;
1200 mrec_t *rtxrp = NULL;
1201 rtx_state_t *rtxp;
1202 mcast_record_t rtype;
1203
1204 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1205
1206 current = CURRENT_MSTIME;
1207 /* First check the global timer on this interface */
1208 if (ill->ill_global_timer == INFINITY)
1209 goto per_ilm_timer;
1210 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1211 ill->ill_global_timer = INFINITY;
1212 /*
1213 * Send report for each group on this interface.
1214 * Since we just set the global timer (received a v3 general
1215 * query), need to skip the all hosts addr (224.0.0.1), per
1216 * RFC 3376 section 5.
1217 */
1218 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1219 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
1220 continue;
1221 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1222 ilm->ilm_filter, rp);
1223 /*
1224 * Since we're sending a report on this group, okay
1225 * to delete pending group-specific timers. Note
1226 * that group-specific retransmit timers still need
1227 * to be checked in the per_ilm_timer for-loop.
1228 */
1229 ilm->ilm_timer = INFINITY;
1230 ilm->ilm_state = IGMP_IREPORTEDLAST;
1231 FREE_SLIST(ilm->ilm_pendsrcs);
1232 ilm->ilm_pendsrcs = NULL;
1233 }
1234 igmpv3_sendrpt(ill, rp);
1235 rp = NULL;
1236 } else {
1237 if ((ill->ill_global_timer - current) < next)
1238 next = ill->ill_global_timer - current;
1239 }
1240
1241 per_ilm_timer:
1242 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1243 if (ilm->ilm_timer == INFINITY)
1244 goto per_ilm_rtxtimer;
1245
1246 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1247 if ((ilm->ilm_timer - current) < next)
1248 next = ilm->ilm_timer - current;
1249
1250 if (ip_debug > 1) {
1251 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1252 "igmp_timo_hlr 2: ilm_timr %d "
1253 "typ %d nxt %d",
1254 (int)ntohl(ilm->ilm_timer - current),
1255 (ill->ill_mcast_type), next);
1256 }
1257
1258 goto per_ilm_rtxtimer;
1259 }
1260
1261 /* the timer has expired, need to take action */
1262 ilm->ilm_timer = INFINITY;
1263 ilm->ilm_state = IGMP_IREPORTEDLAST;
1264 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1265 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1266 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1267 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1268 } else {
1269 slist_t *rsp;
1270 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1271 (rsp = l_alloc()) != NULL) {
1272 /*
1273 * Contents of reply depend on pending
1274 * requested source list.
1275 */
1276 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1277 l_intersection(ilm->ilm_filter,
1278 ilm->ilm_pendsrcs, rsp);
1279 } else {
1280 l_difference(ilm->ilm_pendsrcs,
1281 ilm->ilm_filter, rsp);
1282 }
1283 FREE_SLIST(ilm->ilm_pendsrcs);
1284 ilm->ilm_pendsrcs = NULL;
1285 if (!SLIST_IS_EMPTY(rsp))
1286 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1287 &ilm->ilm_v6addr, rsp, rp);
1288 FREE_SLIST(rsp);
1289 } else {
1290 /*
1291 * Either the pending request is just group-
1292 * specific, or we couldn't get the resources
1293 * (rsp) to build a source-specific reply.
1294 */
1295 rp = mcast_bldmrec(ilm->ilm_fmode,
1296 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1297 }
1298 igmpv3_sendrpt(ill, rp);
1299 rp = NULL;
1300 }
1301
1302 per_ilm_rtxtimer:
1303 rtxp = &ilm->ilm_rtx;
1304
1305 if (rtxp->rtx_timer == INFINITY)
1306 continue;
1307 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1308 if ((rtxp->rtx_timer - current) < next)
1309 next = rtxp->rtx_timer - current;
1310 continue;
1311 }
1312
1313 rtxp->rtx_timer = INFINITY;
1314 ilm->ilm_state = IGMP_IREPORTEDLAST;
1315 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1316 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1317 continue;
1318 }
1319 if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1320 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1321 continue;
1322 }
1323
1324 /*
1325 * The retransmit timer has popped, and our router is
1326 * IGMPv3. We have to delve into the retransmit state
1327 * stored in the ilm.
1328 *
1329 * Decrement the retransmit count. If the fmode rtx
1330 * count is active, decrement it, and send a filter
1331 * mode change report with the ilm's source list.
1332 * Otherwise, send a source list change report with
1333 * the current retransmit lists.
1334 */
1335 ASSERT(rtxp->rtx_cnt > 0);
1336 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1337 rtxp->rtx_cnt--;
1338 if (rtxp->rtx_fmode_cnt > 0) {
1339 rtxp->rtx_fmode_cnt--;
1340 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1341 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1342 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1343 ilm->ilm_filter, rtxrp);
1344 } else {
1345 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1346 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1347 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1348 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1349 }
1350 if (rtxp->rtx_cnt > 0) {
1351 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1352 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1353 if (rtxp->rtx_timer < next)
1354 next = rtxp->rtx_timer;
1355 rtxp->rtx_timer += current;
1356 } else {
1357 ASSERT(rtxp->rtx_timer == INFINITY);
1358 CLEAR_SLIST(rtxp->rtx_allow);
1359 CLEAR_SLIST(rtxp->rtx_block);
1360 }
1361 igmpv3_sendrpt(ill, rtxrp);
1362 rtxrp = NULL;
1363 }
1364
1365 rw_exit(&ill->ill_mcast_lock);
1366 /* Send any deferred/queued IP packets */
1367 ill_mcast_send_queued(ill);
1368 /* Defer ill_mcast_timer_start() until the caller is done */
1369
1370 return (next);
1371 }
1372
1373 /*
1374 * igmp_timeout_handler:
1375 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1376 * Returns number of ticks to next event (or 0 if none).
1377 *
1378 * As part of multicast join and leave igmp we may need to send out an
1379 * igmp request. The igmp related state variables in the ilm are protected
1380 * by ill_mcast_lock. A single global igmp timer is used to track igmp timeouts.
1381 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1382 * starts the igmp timer if needed. It serializes multiple threads trying to
1383 * simultaneously start the timer using the igmp_timer_setter_active flag.
1384 *
1385 * igmp_input() receives igmp queries and responds to the queries
1386 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1387 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler()
1388 * performs the action exclusively after acquiring ill_mcast_lock.
1389 *
1390 * The igmp_slowtimeo() function is called thru another timer.
1391 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1392 */
1393 void
igmp_timeout_handler(void * arg)1394 igmp_timeout_handler(void *arg)
1395 {
1396 ill_t *ill;
1397 uint_t global_next = INFINITY;
1398 uint_t next;
1399 ill_walk_context_t ctx;
1400 ip_stack_t *ipst = arg;
1401
1402 ASSERT(arg != NULL);
1403 mutex_enter(&ipst->ips_igmp_timer_lock);
1404 ASSERT(ipst->ips_igmp_timeout_id != 0);
1405 ipst->ips_igmp_timeout_id = 0;
1406 ipst->ips_igmp_timer_scheduled_last = 0;
1407 ipst->ips_igmp_time_to_next = 0;
1408 mutex_exit(&ipst->ips_igmp_timer_lock);
1409
1410 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1411 ill = ILL_START_WALK_V4(&ctx, ipst);
1412 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1413 ASSERT(!ill->ill_isv6);
1414 /* Make sure the ill isn't going away. */
1415 if (!ill_check_and_refhold(ill))
1416 continue;
1417 rw_exit(&ipst->ips_ill_g_lock);
1418 next = igmp_timeout_handler_per_ill(ill);
1419 if (next < global_next)
1420 global_next = next;
1421 ill_refrele(ill);
1422 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1423 }
1424 rw_exit(&ipst->ips_ill_g_lock);
1425 if (global_next != INFINITY)
1426 igmp_start_timers(global_next, ipst);
1427 }
1428
1429 /*
1430 * mld_timeout_handler:
1431 * Called when there are timeout events, every next (tick).
1432 * Returns number of ticks to next event (or 0 if none).
1433 */
1434 uint_t
mld_timeout_handler_per_ill(ill_t * ill)1435 mld_timeout_handler_per_ill(ill_t *ill)
1436 {
1437 ilm_t *ilm;
1438 uint_t next = INFINITY, current;
1439 mrec_t *rp, *rtxrp;
1440 rtx_state_t *rtxp;
1441 mcast_record_t rtype;
1442
1443 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1444
1445 current = CURRENT_MSTIME;
1446 /*
1447 * First check the global timer on this interface; the global timer
1448 * is not used for MLDv1, so if it's set we can assume we're v2.
1449 */
1450 if (ill->ill_global_timer == INFINITY)
1451 goto per_ilm_timer;
1452 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1453 ill->ill_global_timer = INFINITY;
1454 /*
1455 * Send report for each group on this interface.
1456 * Since we just set the global timer (received a v2 general
1457 * query), need to skip the all hosts addr (ff02::1), per
1458 * RFC 3810 section 6.
1459 */
1460 rp = NULL;
1461 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1462 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
1463 &ipv6_all_hosts_mcast))
1464 continue;
1465 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1466 ilm->ilm_filter, rp);
1467 /*
1468 * Since we're sending a report on this group, okay
1469 * to delete pending group-specific timers. Note
1470 * that group-specific retransmit timers still need
1471 * to be checked in the per_ilm_timer for-loop.
1472 */
1473 ilm->ilm_timer = INFINITY;
1474 ilm->ilm_state = IGMP_IREPORTEDLAST;
1475 FREE_SLIST(ilm->ilm_pendsrcs);
1476 ilm->ilm_pendsrcs = NULL;
1477 }
1478 mldv2_sendrpt(ill, rp);
1479 } else {
1480 if ((ill->ill_global_timer - current) < next)
1481 next = ill->ill_global_timer - current;
1482 }
1483
1484 per_ilm_timer:
1485 rp = rtxrp = NULL;
1486 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1487 if (ilm->ilm_timer == INFINITY)
1488 goto per_ilm_rtxtimer;
1489
1490 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1491 if ((ilm->ilm_timer - current) < next)
1492 next = ilm->ilm_timer - current;
1493
1494 if (ip_debug > 1) {
1495 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1496 "igmp_timo_hlr 2: ilm_timr"
1497 " %d typ %d nxt %d",
1498 (int)ntohl(ilm->ilm_timer - current),
1499 (ill->ill_mcast_type), next);
1500 }
1501
1502 goto per_ilm_rtxtimer;
1503 }
1504
1505 /* the timer has expired, need to take action */
1506 ilm->ilm_timer = INFINITY;
1507 ilm->ilm_state = IGMP_IREPORTEDLAST;
1508 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1509 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1510 } else {
1511 slist_t *rsp;
1512 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1513 (rsp = l_alloc()) != NULL) {
1514 /*
1515 * Contents of reply depend on pending
1516 * requested source list.
1517 */
1518 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1519 l_intersection(ilm->ilm_filter,
1520 ilm->ilm_pendsrcs, rsp);
1521 } else {
1522 l_difference(ilm->ilm_pendsrcs,
1523 ilm->ilm_filter, rsp);
1524 }
1525 FREE_SLIST(ilm->ilm_pendsrcs);
1526 ilm->ilm_pendsrcs = NULL;
1527 if (!SLIST_IS_EMPTY(rsp))
1528 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1529 &ilm->ilm_v6addr, rsp, rp);
1530 FREE_SLIST(rsp);
1531 } else {
1532 rp = mcast_bldmrec(ilm->ilm_fmode,
1533 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1534 }
1535 }
1536
1537 per_ilm_rtxtimer:
1538 rtxp = &ilm->ilm_rtx;
1539
1540 if (rtxp->rtx_timer == INFINITY)
1541 continue;
1542 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1543 if ((rtxp->rtx_timer - current) < next)
1544 next = rtxp->rtx_timer - current;
1545 continue;
1546 }
1547
1548 rtxp->rtx_timer = INFINITY;
1549 ilm->ilm_state = IGMP_IREPORTEDLAST;
1550 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1551 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1552 continue;
1553 }
1554
1555 /*
1556 * The retransmit timer has popped, and our router is
1557 * MLDv2. We have to delve into the retransmit state
1558 * stored in the ilm.
1559 *
1560 * Decrement the retransmit count. If the fmode rtx
1561 * count is active, decrement it, and send a filter
1562 * mode change report with the ilm's source list.
1563 * Otherwise, send a source list change report with
1564 * the current retransmit lists.
1565 */
1566 ASSERT(rtxp->rtx_cnt > 0);
1567 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1568 rtxp->rtx_cnt--;
1569 if (rtxp->rtx_fmode_cnt > 0) {
1570 rtxp->rtx_fmode_cnt--;
1571 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1572 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1573 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1574 ilm->ilm_filter, rtxrp);
1575 } else {
1576 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1577 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1578 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1579 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1580 }
1581 if (rtxp->rtx_cnt > 0) {
1582 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1583 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1584 if (rtxp->rtx_timer < next)
1585 next = rtxp->rtx_timer;
1586 rtxp->rtx_timer += current;
1587 } else {
1588 ASSERT(rtxp->rtx_timer == INFINITY);
1589 CLEAR_SLIST(rtxp->rtx_allow);
1590 CLEAR_SLIST(rtxp->rtx_block);
1591 }
1592 }
1593
1594 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
1595 mldv2_sendrpt(ill, rp);
1596 mldv2_sendrpt(ill, rtxrp);
1597 }
1598 rw_exit(&ill->ill_mcast_lock);
1599 /* Send any deferred/queued IP packets */
1600 ill_mcast_send_queued(ill);
1601 /* Defer ill_mcast_timer_start() until the caller is done */
1602
1603 return (next);
1604 }
1605
1606 /*
1607 * mld_timeout_handler:
1608 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1609 * Returns number of ticks to next event (or 0 if none).
1610 * MT issues are same as igmp_timeout_handler
1611 */
1612 void
mld_timeout_handler(void * arg)1613 mld_timeout_handler(void *arg)
1614 {
1615 ill_t *ill;
1616 uint_t global_next = INFINITY;
1617 uint_t next;
1618 ill_walk_context_t ctx;
1619 ip_stack_t *ipst = arg;
1620
1621 ASSERT(arg != NULL);
1622 mutex_enter(&ipst->ips_mld_timer_lock);
1623 ASSERT(ipst->ips_mld_timeout_id != 0);
1624 ipst->ips_mld_timeout_id = 0;
1625 ipst->ips_mld_timer_scheduled_last = 0;
1626 ipst->ips_mld_time_to_next = 0;
1627 mutex_exit(&ipst->ips_mld_timer_lock);
1628
1629 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1630 ill = ILL_START_WALK_V6(&ctx, ipst);
1631 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1632 ASSERT(ill->ill_isv6);
1633 /* Make sure the ill isn't going away. */
1634 if (!ill_check_and_refhold(ill))
1635 continue;
1636 rw_exit(&ipst->ips_ill_g_lock);
1637 next = mld_timeout_handler_per_ill(ill);
1638 if (next < global_next)
1639 global_next = next;
1640 ill_refrele(ill);
1641 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1642 }
1643 rw_exit(&ipst->ips_ill_g_lock);
1644 if (global_next != INFINITY)
1645 mld_start_timers(global_next, ipst);
1646 }
1647
1648 /*
1649 * Calculate the Older Version Querier Present timeout value, in number
1650 * of slowtimo intervals, for the given ill.
1651 */
1652 #define OVQP(ill) \
1653 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1654 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1655
1656 /*
1657 * igmp_slowtimo:
1658 * - Resets to new router if we didnt we hear from the router
1659 * in IGMP_AGE_THRESHOLD seconds.
1660 * - Resets slowtimeout.
1661 * Check for ips_igmp_max_version ensures that we don't revert to a higher
1662 * IGMP version than configured.
1663 */
1664 void
igmp_slowtimo(void * arg)1665 igmp_slowtimo(void *arg)
1666 {
1667 ill_t *ill;
1668 ill_if_t *ifp;
1669 avl_tree_t *avl_tree;
1670 ip_stack_t *ipst = (ip_stack_t *)arg;
1671
1672 ASSERT(arg != NULL);
1673
1674 /*
1675 * The ill_if_t list is circular, hence the odd loop parameters.
1676 *
1677 * We can't use the ILL_START_WALK and ill_next() wrappers for this
1678 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1679 * structure (allowing us to skip if none of the instances have timers
1680 * running).
1681 */
1682 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1683 for (ifp = IP_V4_ILL_G_LIST(ipst);
1684 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
1685 ifp = ifp->illif_next) {
1686 /*
1687 * illif_mcast_v[12] are set using atomics. If an ill hears
1688 * a V1 or V2 query now and we miss seeing the count now,
1689 * we will see it the next time igmp_slowtimo is called.
1690 */
1691 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
1692 continue;
1693
1694 avl_tree = &ifp->illif_avl_by_ppa;
1695 for (ill = avl_first(avl_tree); ill != NULL;
1696 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1697 /* Make sure the ill isn't going away. */
1698 if (!ill_check_and_refhold(ill))
1699 continue;
1700 rw_exit(&ipst->ips_ill_g_lock);
1701 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1702 if (ill->ill_mcast_v1_tset == 1)
1703 ill->ill_mcast_v1_time++;
1704 if (ill->ill_mcast_v2_tset == 1)
1705 ill->ill_mcast_v2_time++;
1706 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
1707 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
1708 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1709 if ((ill->ill_mcast_v2_tset > 0) ||
1710 (ipst->ips_igmp_max_version ==
1711 IGMP_V2_ROUTER)) {
1712 ip1dbg(("V1 query timer "
1713 "expired on %s; switching "
1714 "mode to IGMP_V2\n",
1715 ill->ill_name));
1716 ill->ill_mcast_type =
1717 IGMP_V2_ROUTER;
1718 } else {
1719 ip1dbg(("V1 query timer "
1720 "expired on %s; switching "
1721 "mode to IGMP_V3\n",
1722 ill->ill_name));
1723 ill->ill_mcast_type =
1724 IGMP_V3_ROUTER;
1725 }
1726 ill->ill_mcast_v1_time = 0;
1727 ill->ill_mcast_v1_tset = 0;
1728 atomic_dec_16(&ifp->illif_mcast_v1);
1729 }
1730 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
1731 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
1732 (ill->ill_mcast_v2_time >= OVQP(ill))) {
1733 ip1dbg(("V2 query timer expired on "
1734 "%s; switching mode to IGMP_V3\n",
1735 ill->ill_name));
1736 ill->ill_mcast_type = IGMP_V3_ROUTER;
1737 ill->ill_mcast_v2_time = 0;
1738 ill->ill_mcast_v2_tset = 0;
1739 atomic_dec_16(&ifp->illif_mcast_v2);
1740 }
1741 rw_exit(&ill->ill_mcast_lock);
1742 ill_refrele(ill);
1743 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1744 }
1745 }
1746 rw_exit(&ipst->ips_ill_g_lock);
1747 ill_mcast_timer_start(ipst);
1748 mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
1749 if (ipst->ips_igmp_slowtimeout_quiesce != B_TRUE) {
1750 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo,
1751 (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1752 } else {
1753 ipst->ips_igmp_slowtimeout_id = 0;
1754 }
1755 mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
1756 }
1757
1758 /*
1759 * mld_slowtimo:
1760 * - Resets to newer version if we didn't hear from the older version router
1761 * in MLD_AGE_THRESHOLD seconds.
1762 * - Restarts slowtimeout.
1763 * Check for ips_mld_max_version ensures that we don't revert to a higher
1764 * IGMP version than configured.
1765 */
1766 void
mld_slowtimo(void * arg)1767 mld_slowtimo(void *arg)
1768 {
1769 ill_t *ill;
1770 ill_if_t *ifp;
1771 avl_tree_t *avl_tree;
1772 ip_stack_t *ipst = (ip_stack_t *)arg;
1773
1774 ASSERT(arg != NULL);
1775 /* See comments in igmp_slowtimo() above... */
1776 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1777 for (ifp = IP_V6_ILL_G_LIST(ipst);
1778 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
1779 ifp = ifp->illif_next) {
1780 if (ifp->illif_mcast_v1 == 0)
1781 continue;
1782
1783 avl_tree = &ifp->illif_avl_by_ppa;
1784 for (ill = avl_first(avl_tree); ill != NULL;
1785 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1786 /* Make sure the ill isn't going away. */
1787 if (!ill_check_and_refhold(ill))
1788 continue;
1789 rw_exit(&ipst->ips_ill_g_lock);
1790 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1791 if (ill->ill_mcast_v1_tset == 1)
1792 ill->ill_mcast_v1_time++;
1793 if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
1794 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
1795 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1796 ip1dbg(("MLD query timer expired on"
1797 " %s; switching mode to MLD_V2\n",
1798 ill->ill_name));
1799 ill->ill_mcast_type = MLD_V2_ROUTER;
1800 ill->ill_mcast_v1_time = 0;
1801 ill->ill_mcast_v1_tset = 0;
1802 atomic_dec_16(&ifp->illif_mcast_v1);
1803 }
1804 rw_exit(&ill->ill_mcast_lock);
1805 ill_refrele(ill);
1806 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1807 }
1808 }
1809 rw_exit(&ipst->ips_ill_g_lock);
1810 ill_mcast_timer_start(ipst);
1811 mutex_enter(&ipst->ips_mld_slowtimeout_lock);
1812 if (ipst->ips_mld_slowtimeout_quiesce != B_TRUE) {
1813 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo,
1814 (void *)ipst, MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1815 } else {
1816 ipst->ips_mld_slowtimeout_id = 0;
1817 }
1818 mutex_exit(&ipst->ips_mld_slowtimeout_lock);
1819 }
1820
1821 /*
1822 * igmp_sendpkt:
1823 * This will send to ip_output_simple just like icmp_inbound.
1824 */
1825 static void
igmp_sendpkt(ilm_t * ilm,uchar_t type,ipaddr_t addr)1826 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
1827 {
1828 mblk_t *mp;
1829 igmpa_t *igmpa;
1830 uint8_t *rtralert;
1831 ipha_t *ipha;
1832 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
1833 size_t size = hdrlen + sizeof (igmpa_t);
1834 ill_t *ill = ilm->ilm_ill;
1835 ip_stack_t *ipst = ill->ill_ipst;
1836
1837 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1838
1839 mp = allocb(size, BPRI_HI);
1840 if (mp == NULL) {
1841 return;
1842 }
1843 mp->b_wptr = mp->b_rptr + size;
1844
1845 ipha = (ipha_t *)mp->b_rptr;
1846 rtralert = (uint8_t *)&(ipha[1]);
1847 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
1848 igmpa->igmpa_type = type;
1849 igmpa->igmpa_code = 0;
1850 igmpa->igmpa_group = ilm->ilm_addr;
1851 igmpa->igmpa_cksum = 0;
1852 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0);
1853
1854 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1855 rtralert[1] = RTRALERT_LEN;
1856 rtralert[2] = 0;
1857 rtralert[3] = 0;
1858
1859 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
1860 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
1861 ipha->ipha_type_of_service = 0;
1862 ipha->ipha_length = htons(size);
1863 ipha->ipha_ident = 0;
1864 ipha->ipha_fragment_offset_and_flags = 0;
1865 ipha->ipha_ttl = IGMP_TTL;
1866 ipha->ipha_protocol = IPPROTO_IGMP;
1867 ipha->ipha_hdr_checksum = 0;
1868 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
1869 ipha->ipha_src = INADDR_ANY;
1870
1871 ill_mcast_queue(ill, mp);
1872
1873 ++ipst->ips_igmpstat.igps_snd_reports;
1874 }
1875
1876 /*
1877 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill.
1878 * The report will contain one group record
1879 * for each element of reclist. If this causes packet length to
1880 * exceed ill->ill_mc_mtu, multiple reports are sent.
1881 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1882 * and those buffers are freed here.
1883 */
1884 static void
igmpv3_sendrpt(ill_t * ill,mrec_t * reclist)1885 igmpv3_sendrpt(ill_t *ill, mrec_t *reclist)
1886 {
1887 igmp3ra_t *igmp3ra;
1888 grphdra_t *grphdr;
1889 mblk_t *mp;
1890 ipha_t *ipha;
1891 uint8_t *rtralert;
1892 ipaddr_t *src_array;
1893 int i, j, numrec, more_src_cnt;
1894 size_t hdrsize, size, rsize;
1895 mrec_t *rp, *cur_reclist;
1896 mrec_t *next_reclist = reclist;
1897 boolean_t morepkts;
1898 ip_stack_t *ipst = ill->ill_ipst;
1899
1900 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1901
1902 /* if there aren't any records, there's nothing to send */
1903 if (reclist == NULL)
1904 return;
1905
1906 hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
1907 nextpkt:
1908 size = hdrsize + sizeof (igmp3ra_t);
1909 morepkts = B_FALSE;
1910 more_src_cnt = 0;
1911 cur_reclist = next_reclist;
1912 numrec = 0;
1913 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
1914 rsize = sizeof (grphdra_t) +
1915 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
1916 if (size + rsize > ill->ill_mc_mtu) {
1917 if (rp == cur_reclist) {
1918 /*
1919 * If the first mrec we looked at is too big
1920 * to fit in a single packet (i.e the source
1921 * list is too big), we must either truncate
1922 * the list (if TO_EX or IS_EX), or send
1923 * multiple reports for the same group (all
1924 * other types).
1925 */
1926 int srcspace, srcsperpkt;
1927 srcspace = ill->ill_mc_mtu - (size +
1928 sizeof (grphdra_t));
1929
1930 /*
1931 * Skip if there's not even enough room in
1932 * a single packet to send something useful.
1933 */
1934 if (srcspace <= sizeof (ipaddr_t))
1935 continue;
1936
1937 srcsperpkt = srcspace / sizeof (ipaddr_t);
1938 /*
1939 * Increment size and numrec, because we will
1940 * be sending a record for the mrec we're
1941 * looking at now.
1942 */
1943 size += sizeof (grphdra_t) +
1944 (srcsperpkt * sizeof (ipaddr_t));
1945 numrec++;
1946 if (rp->mrec_type == MODE_IS_EXCLUDE ||
1947 rp->mrec_type == CHANGE_TO_EXCLUDE) {
1948 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1949 if (rp->mrec_next == NULL) {
1950 /* no more packets to send */
1951 break;
1952 } else {
1953 /*
1954 * more packets, but we're
1955 * done with this mrec.
1956 */
1957 next_reclist = rp->mrec_next;
1958 }
1959 } else {
1960 more_src_cnt = rp->mrec_srcs.sl_numsrc
1961 - srcsperpkt;
1962 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1963 /*
1964 * We'll fix up this mrec (remove the
1965 * srcs we've already sent) before
1966 * returning to nextpkt above.
1967 */
1968 next_reclist = rp;
1969 }
1970 } else {
1971 next_reclist = rp;
1972 }
1973 morepkts = B_TRUE;
1974 break;
1975 }
1976 size += rsize;
1977 numrec++;
1978 }
1979
1980 mp = allocb(size, BPRI_HI);
1981 if (mp == NULL) {
1982 goto free_reclist;
1983 }
1984 bzero((char *)mp->b_rptr, size);
1985 mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
1986
1987 ipha = (ipha_t *)mp->b_rptr;
1988 rtralert = (uint8_t *)&(ipha[1]);
1989 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
1990 grphdr = (grphdra_t *)&(igmp3ra[1]);
1991
1992 rp = cur_reclist;
1993 for (i = 0; i < numrec; i++) {
1994 grphdr->grphdra_type = rp->mrec_type;
1995 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
1996 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
1997 src_array = (ipaddr_t *)&(grphdr[1]);
1998
1999 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
2000 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
2001
2002 grphdr = (grphdra_t *)&(src_array[j]);
2003 rp = rp->mrec_next;
2004 }
2005
2006 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
2007 igmp3ra->igmp3ra_numrec = htons(numrec);
2008 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
2009
2010 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
2011 rtralert[1] = RTRALERT_LEN;
2012 rtralert[2] = 0;
2013 rtralert[3] = 0;
2014
2015 ipha->ipha_version_and_hdr_length = IP_VERSION << 4
2016 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
2017 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
2018 ipha->ipha_length = htons(size);
2019 ipha->ipha_ttl = IGMP_TTL;
2020 ipha->ipha_protocol = IPPROTO_IGMP;
2021 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
2022 ipha->ipha_src = INADDR_ANY;
2023
2024 ill_mcast_queue(ill, mp);
2025
2026 ++ipst->ips_igmpstat.igps_snd_reports;
2027
2028 if (morepkts) {
2029 if (more_src_cnt > 0) {
2030 int index, mvsize;
2031 slist_t *sl = &next_reclist->mrec_srcs;
2032 index = sl->sl_numsrc;
2033 mvsize = more_src_cnt * sizeof (in6_addr_t);
2034 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2035 mvsize);
2036 sl->sl_numsrc = more_src_cnt;
2037 }
2038 goto nextpkt;
2039 }
2040
2041 free_reclist:
2042 while (reclist != NULL) {
2043 rp = reclist->mrec_next;
2044 mi_free(reclist);
2045 reclist = rp;
2046 }
2047 }
2048
2049 /*
2050 * mld_input:
2051 * Return NULL for a bad packet that is discarded here.
2052 * Return mp if the message is OK and should be handed to "raw" receivers.
2053 * Callers of mld_input() may need to reinitialize variables that were copied
2054 * from the mblk as this calls pullupmsg().
2055 */
2056 mblk_t *
mld_input(mblk_t * mp,ip_recv_attr_t * ira)2057 mld_input(mblk_t *mp, ip_recv_attr_t *ira)
2058 {
2059 ip6_t *ip6h = (ip6_t *)(mp->b_rptr);
2060 mld_hdr_t *mldh;
2061 ilm_t *ilm;
2062 ipif_t *ipif;
2063 uint16_t hdr_length, exthdr_length;
2064 in6_addr_t *v6group_ptr;
2065 uint_t next;
2066 int mldlen;
2067 ill_t *ill = ira->ira_ill;
2068 ip_stack_t *ipst = ill->ill_ipst;
2069
2070 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
2071
2072 /* Make sure the src address of the packet is link-local */
2073 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
2074 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2075 freemsg(mp);
2076 return (NULL);
2077 }
2078
2079 if (ip6h->ip6_hlim != 1) {
2080 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
2081 freemsg(mp);
2082 return (NULL);
2083 }
2084
2085 /* Get to the icmp header part */
2086 hdr_length = ira->ira_ip_hdr_length;
2087 exthdr_length = hdr_length - IPV6_HDR_LEN;
2088
2089 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
2090
2091 /* An MLD packet must at least be 24 octets to be valid */
2092 if (mldlen < MLD_MINLEN) {
2093 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2094 freemsg(mp);
2095 return (NULL);
2096 }
2097
2098 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
2099
2100 switch (mldh->mld_type) {
2101 case MLD_LISTENER_QUERY:
2102 /*
2103 * packet length differentiates between v1 and v2. v1
2104 * query should be exactly 24 octets long; v2 is >= 28.
2105 */
2106 if ((mldlen == MLD_MINLEN) ||
2107 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
2108 next = mld_query_in(mldh, ill);
2109 } else if (mldlen >= MLD_V2_QUERY_MINLEN) {
2110 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
2111 } else {
2112 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2113 freemsg(mp);
2114 return (NULL);
2115 }
2116 if (next == 0) {
2117 return (mp);
2118 }
2119
2120 if (next != INFINITY)
2121 mld_start_timers(next, ipst);
2122 break;
2123
2124 case MLD_LISTENER_REPORT:
2125 /*
2126 * For fast leave to work, we have to know that we are the
2127 * last person to send a report for this group. Reports
2128 * generated by us are looped back since we could potentially
2129 * be a multicast router, so discard reports sourced by me.
2130 */
2131 mutex_enter(&ill->ill_lock);
2132 for (ipif = ill->ill_ipif; ipif != NULL;
2133 ipif = ipif->ipif_next) {
2134 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
2135 &ip6h->ip6_src)) {
2136 if (ip_debug > 1) {
2137 char buf1[INET6_ADDRSTRLEN];
2138
2139 (void) mi_strlog(ill->ill_rq,
2140 1,
2141 SL_TRACE,
2142 "mld_input: we are only "
2143 "member src %s\n",
2144 inet_ntop(AF_INET6, &ip6h->ip6_src,
2145 buf1, sizeof (buf1)));
2146 }
2147 mutex_exit(&ill->ill_lock);
2148 return (mp);
2149 }
2150 }
2151 mutex_exit(&ill->ill_lock);
2152 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
2153
2154 v6group_ptr = &mldh->mld_addr;
2155 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
2156 BUMP_MIB(ill->ill_icmp6_mib,
2157 ipv6IfIcmpInGroupMembBadReports);
2158 freemsg(mp);
2159 return (NULL);
2160 }
2161
2162
2163 /*
2164 * If we belong to the group being reported, and we are a
2165 * 'Delaying member' per the RFC terminology, stop our timer
2166 * for that group and 'clear flag' i.e. mark ilm_state as
2167 * IGMP_OTHERMEMBER. With zones, there can be multiple group
2168 * membership entries for the same group address (one per zone)
2169 * so we need to walk the ill_ilm list.
2170 */
2171 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2172 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2173 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
2174 continue;
2175 BUMP_MIB(ill->ill_icmp6_mib,
2176 ipv6IfIcmpInGroupMembOurReports);
2177
2178 ilm->ilm_timer = INFINITY;
2179 ilm->ilm_state = IGMP_OTHERMEMBER;
2180 }
2181 rw_exit(&ill->ill_mcast_lock);
2182 /*
2183 * No packets have been sent above - no
2184 * ill_mcast_send_queued is needed.
2185 */
2186 ill_mcast_timer_start(ill->ill_ipst);
2187 break;
2188
2189 case MLD_LISTENER_REDUCTION:
2190 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
2191 break;
2192 }
2193 return (mp);
2194 }
2195
2196 /*
2197 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate
2198 * (non-zero, unsigned) timer value to be set on success.
2199 */
2200 static uint_t
mld_query_in(mld_hdr_t * mldh,ill_t * ill)2201 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
2202 {
2203 ilm_t *ilm;
2204 int timer;
2205 uint_t next, current;
2206 in6_addr_t *v6group;
2207
2208 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2209
2210 /*
2211 * In the MLD specification, there are 3 states and a flag.
2212 *
2213 * In Non-Listener state, we simply don't have a membership record.
2214 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2215 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2216 * INFINITY)
2217 *
2218 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2219 * we have heard a report from another member, or IGMP_IREPORTEDLAST
2220 * if I sent the last report.
2221 */
2222 v6group = &mldh->mld_addr;
2223 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
2224 ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
2225 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
2226 return (0);
2227 }
2228
2229 /* Need to do compatibility mode checking */
2230 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2231 ill->ill_mcast_v1_time = 0;
2232 ill->ill_mcast_v1_tset = 1;
2233 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
2234 ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2235 "MLD_V1_ROUTER\n", ill->ill_name));
2236 atomic_inc_16(&ill->ill_ifptr->illif_mcast_v1);
2237 ill->ill_mcast_type = MLD_V1_ROUTER;
2238 }
2239
2240 timer = (int)ntohs(mldh->mld_maxdelay);
2241 if (ip_debug > 1) {
2242 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
2243 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2244 timer, (int)mldh->mld_type);
2245 }
2246
2247 /*
2248 * -Start the timers in all of our membership records for
2249 * the physical interface on which the query arrived,
2250 * excl:
2251 * 1. those that belong to the "all hosts" group,
2252 * 2. those with 0 scope, or 1 node-local scope.
2253 *
2254 * -Restart any timer that is already running but has a value
2255 * longer that the requested timeout.
2256 * -Use the value specified in the query message as the
2257 * maximum timeout.
2258 */
2259 next = INFINITY;
2260
2261 current = CURRENT_MSTIME;
2262 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2263 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
2264
2265 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2266 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2267 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
2268 continue;
2269 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
2270 &ipv6_all_hosts_mcast)) &&
2271 (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
2272 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
2273 if (timer == 0) {
2274 /* Respond immediately */
2275 ilm->ilm_timer = INFINITY;
2276 ilm->ilm_state = IGMP_IREPORTEDLAST;
2277 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
2278 break;
2279 }
2280 if (ilm->ilm_timer > timer) {
2281 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
2282 if (ilm->ilm_timer < next)
2283 next = ilm->ilm_timer;
2284 ilm->ilm_timer += current;
2285 }
2286 break;
2287 }
2288 }
2289 rw_exit(&ill->ill_mcast_lock);
2290 /* Send any deferred/queued IP packets */
2291 ill_mcast_send_queued(ill);
2292 ill_mcast_timer_start(ill->ill_ipst);
2293
2294 return (next);
2295 }
2296
2297 /*
2298 * Handles an MLDv2 Listener Query. On error, returns 0; on success,
2299 * returns the appropriate (non-zero, unsigned) timer value (which may
2300 * be INFINITY) to be set.
2301 */
2302 static uint_t
mldv2_query_in(mld2q_t * mld2q,ill_t * ill,int mldlen)2303 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
2304 {
2305 ilm_t *ilm;
2306 in6_addr_t *v6group, *src_array;
2307 uint_t next, numsrc, i, mrd, delay, qqi, current;
2308 uint8_t qrv;
2309
2310 v6group = &mld2q->mld2q_addr;
2311 numsrc = ntohs(mld2q->mld2q_numsrc);
2312
2313 /* make sure numsrc matches packet size */
2314 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
2315 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2316 return (0);
2317 }
2318 src_array = (in6_addr_t *)&mld2q[1];
2319
2320 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2321
2322 /* extract Maximum Response Delay from code in header */
2323 mrd = ntohs(mld2q->mld2q_mxrc);
2324 if (mrd >= MLD_V2_MAXRT_FPMIN) {
2325 uint_t hdrval, mant, exp;
2326 hdrval = mrd;
2327 mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
2328 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
2329 mrd = (mant | 0x1000) << (exp + 3);
2330 }
2331 if (mrd == 0)
2332 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
2333
2334 MCAST_RANDOM_DELAY(delay, mrd);
2335 next = (unsigned)INFINITY;
2336 current = CURRENT_MSTIME;
2337
2338 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
2339 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
2340 else
2341 ill->ill_mcast_rv = qrv;
2342
2343 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
2344 uint_t mant, exp;
2345 mant = qqi & MLD_V2_QQI_MANT_MASK;
2346 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
2347 qqi = (mant | 0x10) << (exp + 3);
2348 }
2349 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
2350
2351 /*
2352 * If we have a pending general query response that's scheduled
2353 * sooner than the delay we calculated for this response, then
2354 * no action is required (MLDv2 draft section 6.2 rule 1)
2355 */
2356 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2357 if (ill->ill_global_timer < (current + delay)) {
2358 rw_exit(&ill->ill_mcast_lock);
2359 return (next);
2360 }
2361
2362 /*
2363 * Now take action depending on query type: general,
2364 * group specific, or group/source specific.
2365 */
2366 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
2367 /*
2368 * general query
2369 * We know global timer is either not running or is
2370 * greater than our calculated delay, so reset it to
2371 * our delay (random value in range [0, response time])
2372 */
2373 ill->ill_global_timer = current + delay;
2374 next = delay;
2375 } else {
2376 /* group or group/source specific query */
2377 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2378 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2379 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2380 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
2381 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
2382 continue;
2383
2384 /*
2385 * If the query is group specific or we have a
2386 * pending group specific query, the response is
2387 * group specific (pending sources list should be
2388 * empty). Otherwise, need to update the pending
2389 * sources list for the group and source specific
2390 * response.
2391 */
2392 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
2393 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
2394 group_query:
2395 FREE_SLIST(ilm->ilm_pendsrcs);
2396 ilm->ilm_pendsrcs = NULL;
2397 } else {
2398 boolean_t overflow;
2399 slist_t *pktl;
2400 if (numsrc > MAX_FILTER_SIZE ||
2401 (ilm->ilm_pendsrcs == NULL &&
2402 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
2403 /*
2404 * We've been sent more sources than
2405 * we can deal with; or we can't deal
2406 * with a source list at all. Revert
2407 * to a group specific query.
2408 */
2409 goto group_query;
2410 }
2411 if ((pktl = l_alloc()) == NULL)
2412 goto group_query;
2413 pktl->sl_numsrc = numsrc;
2414 for (i = 0; i < numsrc; i++)
2415 pktl->sl_addr[i] = src_array[i];
2416 l_union_in_a(ilm->ilm_pendsrcs, pktl,
2417 &overflow);
2418 l_free(pktl);
2419 if (overflow)
2420 goto group_query;
2421 }
2422 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
2423 INFINITY : (ilm->ilm_timer - current);
2424 /* set timer to soonest value */
2425 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
2426 if (ilm->ilm_timer < next)
2427 next = ilm->ilm_timer;
2428 ilm->ilm_timer += current;
2429 break;
2430 }
2431 }
2432 rw_exit(&ill->ill_mcast_lock);
2433 /*
2434 * No packets have been sent above - no
2435 * ill_mcast_send_queued is needed.
2436 */
2437 ill_mcast_timer_start(ill->ill_ipst);
2438
2439 return (next);
2440 }
2441
2442 /*
2443 * Send MLDv1 response packet with hoplimit 1
2444 */
2445 static void
mld_sendpkt(ilm_t * ilm,uchar_t type,const in6_addr_t * v6addr)2446 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
2447 {
2448 mblk_t *mp;
2449 mld_hdr_t *mldh;
2450 ip6_t *ip6h;
2451 ip6_hbh_t *ip6hbh;
2452 struct ip6_opt_router *ip6router;
2453 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
2454 ill_t *ill = ilm->ilm_ill;
2455
2456 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2457
2458 /*
2459 * We need to place a router alert option in this packet. The length
2460 * of the options must be a multiple of 8. The hbh option header is 2
2461 * bytes followed by the 4 byte router alert option. That leaves
2462 * 2 bytes of pad for a total of 8 bytes.
2463 */
2464 const int router_alert_length = 8;
2465
2466 ASSERT(ill->ill_isv6);
2467
2468 size += router_alert_length;
2469 mp = allocb(size, BPRI_HI);
2470 if (mp == NULL)
2471 return;
2472 bzero(mp->b_rptr, size);
2473 mp->b_wptr = mp->b_rptr + size;
2474
2475 ip6h = (ip6_t *)mp->b_rptr;
2476 ip6hbh = (struct ip6_hbh *)&ip6h[1];
2477 ip6router = (struct ip6_opt_router *)&ip6hbh[1];
2478 /*
2479 * A zero is a pad option of length 1. The bzero of the whole packet
2480 * above will pad between ip6router and mld.
2481 */
2482 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
2483
2484 mldh->mld_type = type;
2485 mldh->mld_addr = ilm->ilm_v6addr;
2486
2487 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2488 ip6router->ip6or_len = 2;
2489 ip6router->ip6or_value[0] = 0;
2490 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2491
2492 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2493 ip6hbh->ip6h_len = 0;
2494
2495 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2496 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
2497 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2498 ip6h->ip6_hops = MLD_HOP_LIMIT;
2499 if (v6addr == NULL)
2500 ip6h->ip6_dst = ilm->ilm_v6addr;
2501 else
2502 ip6h->ip6_dst = *v6addr;
2503
2504 ip6h->ip6_src = ipv6_all_zeros;
2505 /*
2506 * Prepare for checksum by putting icmp length in the icmp
2507 * checksum field. The checksum is calculated in ip_output.
2508 */
2509 mldh->mld_cksum = htons(sizeof (*mldh));
2510
2511 ill_mcast_queue(ill, mp);
2512 }
2513
2514 /*
2515 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The
2516 * report will contain one multicast address record for each element of
2517 * reclist. If this causes packet length to exceed ill->ill_mc_mtu,
2518 * multiple reports are sent. reclist is assumed to be made up of
2519 * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2520 */
2521 static void
mldv2_sendrpt(ill_t * ill,mrec_t * reclist)2522 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
2523 {
2524 mblk_t *mp;
2525 mld2r_t *mld2r;
2526 mld2mar_t *mld2mar;
2527 in6_addr_t *srcarray;
2528 ip6_t *ip6h;
2529 ip6_hbh_t *ip6hbh;
2530 struct ip6_opt_router *ip6router;
2531 size_t size, optlen, padlen, icmpsize, rsize;
2532 int i, numrec, more_src_cnt;
2533 mrec_t *rp, *cur_reclist;
2534 mrec_t *next_reclist = reclist;
2535 boolean_t morepkts;
2536
2537 /* If there aren't any records, there's nothing to send */
2538 if (reclist == NULL)
2539 return;
2540
2541 ASSERT(ill->ill_isv6);
2542 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2543
2544 /*
2545 * Total option length (optlen + padlen) must be a multiple of
2546 * 8 bytes. We assume here that optlen <= 8, so the total option
2547 * length will be 8. Assert this in case anything ever changes.
2548 */
2549 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
2550 ASSERT(optlen <= 8);
2551 padlen = 8 - optlen;
2552 nextpkt:
2553 icmpsize = sizeof (mld2r_t);
2554 size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
2555 morepkts = B_FALSE;
2556 more_src_cnt = 0;
2557 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
2558 rp = rp->mrec_next, numrec++) {
2559 rsize = sizeof (mld2mar_t) +
2560 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
2561 if (size + rsize > ill->ill_mc_mtu) {
2562 if (rp == cur_reclist) {
2563 /*
2564 * If the first mrec we looked at is too big
2565 * to fit in a single packet (i.e the source
2566 * list is too big), we must either truncate
2567 * the list (if TO_EX or IS_EX), or send
2568 * multiple reports for the same group (all
2569 * other types).
2570 */
2571 int srcspace, srcsperpkt;
2572 srcspace = ill->ill_mc_mtu -
2573 (size + sizeof (mld2mar_t));
2574
2575 /*
2576 * Skip if there's not even enough room in
2577 * a single packet to send something useful.
2578 */
2579 if (srcspace <= sizeof (in6_addr_t))
2580 continue;
2581
2582 srcsperpkt = srcspace / sizeof (in6_addr_t);
2583 /*
2584 * Increment icmpsize and size, because we will
2585 * be sending a record for the mrec we're
2586 * looking at now.
2587 */
2588 rsize = sizeof (mld2mar_t) +
2589 (srcsperpkt * sizeof (in6_addr_t));
2590 icmpsize += rsize;
2591 size += rsize;
2592 if (rp->mrec_type == MODE_IS_EXCLUDE ||
2593 rp->mrec_type == CHANGE_TO_EXCLUDE) {
2594 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2595 if (rp->mrec_next == NULL) {
2596 /* no more packets to send */
2597 break;
2598 } else {
2599 /*
2600 * more packets, but we're
2601 * done with this mrec.
2602 */
2603 next_reclist = rp->mrec_next;
2604 }
2605 } else {
2606 more_src_cnt = rp->mrec_srcs.sl_numsrc
2607 - srcsperpkt;
2608 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2609 /*
2610 * We'll fix up this mrec (remove the
2611 * srcs we've already sent) before
2612 * returning to nextpkt above.
2613 */
2614 next_reclist = rp;
2615 }
2616 } else {
2617 next_reclist = rp;
2618 }
2619 morepkts = B_TRUE;
2620 break;
2621 }
2622 icmpsize += rsize;
2623 size += rsize;
2624 }
2625
2626 mp = allocb(size, BPRI_HI);
2627 if (mp == NULL)
2628 goto free_reclist;
2629 bzero(mp->b_rptr, size);
2630 mp->b_wptr = mp->b_rptr + size;
2631
2632 ip6h = (ip6_t *)mp->b_rptr;
2633 ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
2634 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
2635 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
2636 mld2mar = (mld2mar_t *)&(mld2r[1]);
2637
2638 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2639 ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
2640 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2641 ip6h->ip6_hops = MLD_HOP_LIMIT;
2642 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
2643 ip6h->ip6_src = ipv6_all_zeros;
2644
2645 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2646 /*
2647 * ip6h_len is the number of 8-byte words, not including the first
2648 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2649 */
2650 ip6hbh->ip6h_len = 0;
2651
2652 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2653 ip6router->ip6or_len = 2;
2654 ip6router->ip6or_value[0] = 0;
2655 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2656
2657 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
2658 mld2r->mld2r_nummar = htons(numrec);
2659 /*
2660 * Prepare for the checksum by putting icmp length in the icmp
2661 * checksum field. The checksum is calculated in ip_output_simple.
2662 */
2663 mld2r->mld2r_cksum = htons(icmpsize);
2664
2665 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2666 mld2mar->mld2mar_type = rp->mrec_type;
2667 mld2mar->mld2mar_auxlen = 0;
2668 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2669 mld2mar->mld2mar_group = rp->mrec_group;
2670 srcarray = (in6_addr_t *)&(mld2mar[1]);
2671
2672 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
2673 srcarray[i] = rp->mrec_srcs.sl_addr[i];
2674
2675 mld2mar = (mld2mar_t *)&(srcarray[i]);
2676 }
2677
2678 ill_mcast_queue(ill, mp);
2679
2680 if (morepkts) {
2681 if (more_src_cnt > 0) {
2682 int index, mvsize;
2683 slist_t *sl = &next_reclist->mrec_srcs;
2684 index = sl->sl_numsrc;
2685 mvsize = more_src_cnt * sizeof (in6_addr_t);
2686 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2687 mvsize);
2688 sl->sl_numsrc = more_src_cnt;
2689 }
2690 goto nextpkt;
2691 }
2692
2693 free_reclist:
2694 while (reclist != NULL) {
2695 rp = reclist->mrec_next;
2696 mi_free(reclist);
2697 reclist = rp;
2698 }
2699 }
2700
2701 static mrec_t *
mcast_bldmrec(mcast_record_t type,in6_addr_t * grp,slist_t * srclist,mrec_t * next)2702 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
2703 mrec_t *next)
2704 {
2705 mrec_t *rp;
2706 int i;
2707
2708 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
2709 SLIST_IS_EMPTY(srclist))
2710 return (next);
2711
2712 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
2713 if (rp == NULL)
2714 return (next);
2715
2716 rp->mrec_next = next;
2717 rp->mrec_type = type;
2718 rp->mrec_auxlen = 0;
2719 rp->mrec_group = *grp;
2720 if (srclist == NULL) {
2721 rp->mrec_srcs.sl_numsrc = 0;
2722 } else {
2723 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
2724 for (i = 0; i < srclist->sl_numsrc; i++)
2725 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
2726 }
2727
2728 return (rp);
2729 }
2730
2731 /*
2732 * Set up initial retransmit state. If memory cannot be allocated for
2733 * the source lists, simply create as much state as is possible; memory
2734 * allocation failures are considered one type of transient error that
2735 * the retransmissions are designed to overcome (and if they aren't
2736 * transient, there are bigger problems than failing to notify the
2737 * router about multicast group membership state changes).
2738 */
2739 static void
mcast_init_rtx(ill_t * ill,rtx_state_t * rtxp,mcast_record_t rtype,slist_t * flist)2740 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
2741 slist_t *flist)
2742 {
2743 /*
2744 * There are only three possibilities for rtype:
2745 * New join, transition from INCLUDE {} to INCLUDE {flist}
2746 * => rtype is ALLOW_NEW_SOURCES
2747 * New join, transition from INCLUDE {} to EXCLUDE {flist}
2748 * => rtype is CHANGE_TO_EXCLUDE
2749 * State change that involves a filter mode change
2750 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2751 */
2752 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
2753 rtype == ALLOW_NEW_SOURCES);
2754
2755 rtxp->rtx_cnt = ill->ill_mcast_rv;
2756
2757 switch (rtype) {
2758 case CHANGE_TO_EXCLUDE:
2759 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
2760 CLEAR_SLIST(rtxp->rtx_allow);
2761 COPY_SLIST(flist, rtxp->rtx_block);
2762 break;
2763 case ALLOW_NEW_SOURCES:
2764 case CHANGE_TO_INCLUDE:
2765 rtxp->rtx_fmode_cnt =
2766 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
2767 CLEAR_SLIST(rtxp->rtx_block);
2768 COPY_SLIST(flist, rtxp->rtx_allow);
2769 break;
2770 }
2771 }
2772
2773 /*
2774 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2775 * RFC 3376 section 5.1, covers three cases:
2776 * * The current state change is a filter mode change
2777 * Set filter mode retransmit counter; set retransmit allow or
2778 * block list to new source list as appropriate, and clear the
2779 * retransmit list that was not set; send TO_IN or TO_EX with
2780 * new source list.
2781 * * The current state change is a source list change, but the filter
2782 * mode retransmit counter is > 0
2783 * Decrement filter mode retransmit counter; set retransmit
2784 * allow or block list to new source list as appropriate,
2785 * and clear the retransmit list that was not set; send TO_IN
2786 * or TO_EX with new source list.
2787 * * The current state change is a source list change, and the filter
2788 * mode retransmit counter is 0.
2789 * Merge existing rtx allow and block lists with new state:
2790 * rtx_allow = (new allow + rtx_allow) - new block
2791 * rtx_block = (new block + rtx_block) - new allow
2792 * Send ALLOW and BLOCK records for new retransmit lists;
2793 * decrement retransmit counter.
2794 *
2795 * As is the case for mcast_init_rtx(), memory allocation failures are
2796 * acceptable; we just create as much state as we can.
2797 */
2798 static mrec_t *
mcast_merge_rtx(ilm_t * ilm,mrec_t * mreclist,slist_t * flist)2799 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
2800 {
2801 ill_t *ill;
2802 rtx_state_t *rtxp = &ilm->ilm_rtx;
2803 mcast_record_t txtype;
2804 mrec_t *rp, *rpnext, *rtnmrec;
2805 boolean_t ovf;
2806
2807 ill = ilm->ilm_ill;
2808
2809 if (mreclist == NULL)
2810 return (mreclist);
2811
2812 /*
2813 * A filter mode change is indicated by a single mrec, which is
2814 * either TO_IN or TO_EX. In this case, we just need to set new
2815 * retransmit state as if this were an initial join. There is
2816 * no change to the mrec list.
2817 */
2818 if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
2819 mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
2820 mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
2821 &mreclist->mrec_srcs);
2822 return (mreclist);
2823 }
2824
2825 /*
2826 * Only the source list has changed
2827 */
2828 rtxp->rtx_cnt = ill->ill_mcast_rv;
2829 if (rtxp->rtx_fmode_cnt > 0) {
2830 /* but we're still sending filter mode change reports */
2831 rtxp->rtx_fmode_cnt--;
2832 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
2833 CLEAR_SLIST(rtxp->rtx_block);
2834 COPY_SLIST(flist, rtxp->rtx_allow);
2835 txtype = CHANGE_TO_INCLUDE;
2836 } else {
2837 CLEAR_SLIST(rtxp->rtx_allow);
2838 COPY_SLIST(flist, rtxp->rtx_block);
2839 txtype = CHANGE_TO_EXCLUDE;
2840 }
2841 /* overwrite first mrec with new info */
2842 mreclist->mrec_type = txtype;
2843 l_copy(flist, &mreclist->mrec_srcs);
2844 /* then free any remaining mrecs */
2845 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
2846 rpnext = rp->mrec_next;
2847 mi_free(rp);
2848 }
2849 mreclist->mrec_next = NULL;
2850 rtnmrec = mreclist;
2851 } else {
2852 mrec_t *allow_mrec, *block_mrec;
2853 /*
2854 * Just send the source change reports; but we need to
2855 * recalculate the ALLOW and BLOCK lists based on previous
2856 * state and new changes.
2857 */
2858 rtnmrec = mreclist;
2859 allow_mrec = block_mrec = NULL;
2860 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
2861 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
2862 rp->mrec_type == BLOCK_OLD_SOURCES);
2863 if (rp->mrec_type == ALLOW_NEW_SOURCES)
2864 allow_mrec = rp;
2865 else
2866 block_mrec = rp;
2867 }
2868 /*
2869 * Perform calculations:
2870 * new_allow = mrec_allow + (rtx_allow - mrec_block)
2871 * new_block = mrec_block + (rtx_block - mrec_allow)
2872 *
2873 * Each calc requires two steps, for example:
2874 * rtx_allow = rtx_allow - mrec_block;
2875 * new_allow = mrec_allow + rtx_allow;
2876 *
2877 * Store results in mrec lists, and then copy into rtx lists.
2878 * We do it in this order in case the rtx list hasn't been
2879 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
2880 * Overflows are also okay.
2881 */
2882 if (block_mrec != NULL) {
2883 l_difference_in_a(rtxp->rtx_allow,
2884 &block_mrec->mrec_srcs);
2885 }
2886 if (allow_mrec != NULL) {
2887 l_difference_in_a(rtxp->rtx_block,
2888 &allow_mrec->mrec_srcs);
2889 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
2890 &ovf);
2891 }
2892 if (block_mrec != NULL) {
2893 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
2894 &ovf);
2895 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
2896 } else {
2897 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
2898 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
2899 }
2900 if (allow_mrec != NULL) {
2901 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
2902 } else {
2903 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
2904 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
2905 }
2906 }
2907
2908 return (rtnmrec);
2909 }
2910