1e11c3f44Smeem /*
2e11c3f44Smeem * CDDL HEADER START
3e11c3f44Smeem *
4e11c3f44Smeem * The contents of this file are subject to the terms of the
5e11c3f44Smeem * Common Development and Distribution License (the "License").
6e11c3f44Smeem * You may not use this file except in compliance with the License.
7e11c3f44Smeem *
8e11c3f44Smeem * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9e11c3f44Smeem * or http://www.opensolaris.org/os/licensing.
10e11c3f44Smeem * See the License for the specific language governing permissions
11e11c3f44Smeem * and limitations under the License.
12e11c3f44Smeem *
13e11c3f44Smeem * When distributing Covered Code, include this CDDL HEADER in each
14e11c3f44Smeem * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15e11c3f44Smeem * If applicable, add the following below this CDDL HEADER, with the
16e11c3f44Smeem * fields enclosed by brackets "[]" replaced with your own identifying
17e11c3f44Smeem * information: Portions Copyright [yyyy] [name of copyright owner]
18e11c3f44Smeem *
19e11c3f44Smeem * CDDL HEADER END
20e11c3f44Smeem *
211f19738eSmeem * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
22e11c3f44Smeem */
23e11c3f44Smeem
24e11c3f44Smeem #include <inet/ip.h>
25e11c3f44Smeem #include <inet/ip6.h>
26e11c3f44Smeem #include <inet/ip_if.h>
27e11c3f44Smeem #include <inet/ip_ire.h>
28e11c3f44Smeem #include <inet/ip_multi.h>
29bd670b35SErik Nordmark #include <inet/ip_ndp.h>
30e11c3f44Smeem #include <inet/ip_rts.h>
31e11c3f44Smeem #include <inet/mi.h>
32e11c3f44Smeem #include <net/if_types.h>
33e11c3f44Smeem #include <sys/dlpi.h>
34e11c3f44Smeem #include <sys/kmem.h>
35e11c3f44Smeem #include <sys/modhash.h>
36e11c3f44Smeem #include <sys/sdt.h>
37e11c3f44Smeem #include <sys/strsun.h>
38e11c3f44Smeem #include <sys/sunddi.h>
39e11c3f44Smeem #include <sys/types.h>
40e11c3f44Smeem
41e11c3f44Smeem /*
42e11c3f44Smeem * Convenience macros for getting the ip_stack_t associated with an
43e11c3f44Smeem * ipmp_illgrp_t or ipmp_grp_t.
44e11c3f44Smeem */
45e11c3f44Smeem #define IPMP_GRP_TO_IPST(grp) PHYINT_TO_IPST((grp)->gr_phyint)
46e11c3f44Smeem #define IPMP_ILLGRP_TO_IPST(illg) ((illg)->ig_ipmp_ill->ill_ipst)
47e11c3f44Smeem
48e11c3f44Smeem /*
49e11c3f44Smeem * Assorted constants that aren't important enough to be tunable.
50e11c3f44Smeem */
51e11c3f44Smeem #define IPMP_GRP_HASH_SIZE 64
52e11c3f44Smeem #define IPMP_ILL_REFRESH_TIMEOUT 120 /* seconds */
53e11c3f44Smeem
54e11c3f44Smeem /*
55e11c3f44Smeem * IPMP meta-interface kstats (based on those in PSARC/1997/198).
56e11c3f44Smeem */
57e11c3f44Smeem static const kstat_named_t ipmp_kstats[IPMP_KSTAT_MAX] = {
58e11c3f44Smeem { "obytes", KSTAT_DATA_UINT32 },
59e11c3f44Smeem { "obytes64", KSTAT_DATA_UINT64 },
60e11c3f44Smeem { "rbytes", KSTAT_DATA_UINT32 },
61e11c3f44Smeem { "rbytes64", KSTAT_DATA_UINT64 },
62e11c3f44Smeem { "opackets", KSTAT_DATA_UINT32 },
63e11c3f44Smeem { "opackets64", KSTAT_DATA_UINT64 },
64e11c3f44Smeem { "oerrors", KSTAT_DATA_UINT32 },
65e11c3f44Smeem { "ipackets", KSTAT_DATA_UINT32 },
66e11c3f44Smeem { "ipackets64", KSTAT_DATA_UINT64 },
67e11c3f44Smeem { "ierrors", KSTAT_DATA_UINT32 },
68e11c3f44Smeem { "multircv", KSTAT_DATA_UINT32 },
69e11c3f44Smeem { "multixmt", KSTAT_DATA_UINT32 },
70e11c3f44Smeem { "brdcstrcv", KSTAT_DATA_UINT32 },
71e11c3f44Smeem { "brdcstxmt", KSTAT_DATA_UINT32 },
72e11c3f44Smeem { "link_up", KSTAT_DATA_UINT32 }
73e11c3f44Smeem };
74e11c3f44Smeem
75e11c3f44Smeem static void ipmp_grp_insert(ipmp_grp_t *, mod_hash_hndl_t);
76e11c3f44Smeem static int ipmp_grp_create_kstats(ipmp_grp_t *);
77e11c3f44Smeem static int ipmp_grp_update_kstats(kstat_t *, int);
78e11c3f44Smeem static void ipmp_grp_destroy_kstats(ipmp_grp_t *);
79e11c3f44Smeem static ill_t *ipmp_illgrp_min_ill(ipmp_illgrp_t *);
80e11c3f44Smeem static ill_t *ipmp_illgrp_max_ill(ipmp_illgrp_t *);
81e11c3f44Smeem static void ipmp_illgrp_set_cast(ipmp_illgrp_t *, ill_t *);
82*1eee170aSErik Nordmark static void ipmp_illgrp_set_mtu(ipmp_illgrp_t *, uint_t, uint_t);
83e11c3f44Smeem static boolean_t ipmp_ill_activate(ill_t *);
84e11c3f44Smeem static void ipmp_ill_deactivate(ill_t *);
85e11c3f44Smeem static void ipmp_ill_ire_mark_testhidden(ire_t *, char *);
86e11c3f44Smeem static void ipmp_ill_ire_clear_testhidden(ire_t *, char *);
87e11c3f44Smeem static void ipmp_ill_refresh_active_timer_start(ill_t *);
88e11c3f44Smeem static void ipmp_ill_rtsaddrmsg(ill_t *, int);
89e11c3f44Smeem static void ipmp_ill_bind_ipif(ill_t *, ipif_t *, enum ip_resolver_action);
90e11c3f44Smeem static ipif_t *ipmp_ill_unbind_ipif(ill_t *, ipif_t *, boolean_t);
91e11c3f44Smeem static void ipmp_phyint_get_kstats(phyint_t *, uint64_t *);
92e11c3f44Smeem static boolean_t ipmp_ipif_is_up_dataaddr(const ipif_t *);
931f19738eSmeem static void ipmp_ncec_delete_nonlocal(ncec_t *, uchar_t *);
94e11c3f44Smeem
95e11c3f44Smeem /*
96e11c3f44Smeem * Initialize IPMP state for IP stack `ipst'; called from ip_stack_init().
97e11c3f44Smeem */
98e11c3f44Smeem void
ipmp_init(ip_stack_t * ipst)99e11c3f44Smeem ipmp_init(ip_stack_t *ipst)
100e11c3f44Smeem {
101e11c3f44Smeem ipst->ips_ipmp_grp_hash = mod_hash_create_extended("ipmp_grp_hash",
102e11c3f44Smeem IPMP_GRP_HASH_SIZE, mod_hash_null_keydtor, mod_hash_null_valdtor,
103e11c3f44Smeem mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
104e11c3f44Smeem rw_init(&ipst->ips_ipmp_lock, NULL, RW_DEFAULT, 0);
105e11c3f44Smeem }
106e11c3f44Smeem
107e11c3f44Smeem /*
108e11c3f44Smeem * Destroy IPMP state for IP stack `ipst'; called from ip_stack_fini().
109e11c3f44Smeem */
110e11c3f44Smeem void
ipmp_destroy(ip_stack_t * ipst)111e11c3f44Smeem ipmp_destroy(ip_stack_t *ipst)
112e11c3f44Smeem {
113e11c3f44Smeem mod_hash_destroy_hash(ipst->ips_ipmp_grp_hash);
114e11c3f44Smeem rw_destroy(&ipst->ips_ipmp_lock);
115e11c3f44Smeem }
116e11c3f44Smeem
117e11c3f44Smeem /*
118e11c3f44Smeem * Create an IPMP group named `grname', associate it with IPMP phyint `phyi',
119e11c3f44Smeem * and add it to the hash. On success, return a pointer to the created group.
120e11c3f44Smeem * Caller must ensure `grname' is not yet in the hash. Assumes that the IPMP
121e11c3f44Smeem * meta-interface associated with the group also has the same name (but they
122e11c3f44Smeem * may differ later via ipmp_grp_rename()).
123e11c3f44Smeem */
124e11c3f44Smeem ipmp_grp_t *
ipmp_grp_create(const char * grname,phyint_t * phyi)125e11c3f44Smeem ipmp_grp_create(const char *grname, phyint_t *phyi)
126e11c3f44Smeem {
127e11c3f44Smeem ipmp_grp_t *grp;
128e11c3f44Smeem ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
129e11c3f44Smeem mod_hash_hndl_t mh;
130e11c3f44Smeem
131e11c3f44Smeem ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
132e11c3f44Smeem
133e11c3f44Smeem if ((grp = kmem_zalloc(sizeof (ipmp_grp_t), KM_NOSLEEP)) == NULL)
134e11c3f44Smeem return (NULL);
135e11c3f44Smeem
136e11c3f44Smeem (void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name));
137e11c3f44Smeem (void) strlcpy(grp->gr_ifname, grname, sizeof (grp->gr_ifname));
138e11c3f44Smeem
139e11c3f44Smeem /*
140e11c3f44Smeem * Cache the group's phyint. This is safe since a phyint_t will
141e11c3f44Smeem * outlive its ipmp_grp_t.
142e11c3f44Smeem */
143e11c3f44Smeem grp->gr_phyint = phyi;
144e11c3f44Smeem
145e11c3f44Smeem /*
146e11c3f44Smeem * Create IPMP group kstats.
147e11c3f44Smeem */
148e11c3f44Smeem if (ipmp_grp_create_kstats(grp) != 0) {
149e11c3f44Smeem kmem_free(grp, sizeof (ipmp_grp_t));
150e11c3f44Smeem return (NULL);
151e11c3f44Smeem }
152e11c3f44Smeem
153e11c3f44Smeem /*
154e11c3f44Smeem * Insert the group into the hash.
155e11c3f44Smeem */
156e11c3f44Smeem if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0) {
157e11c3f44Smeem ipmp_grp_destroy_kstats(grp);
158e11c3f44Smeem kmem_free(grp, sizeof (ipmp_grp_t));
159e11c3f44Smeem return (NULL);
160e11c3f44Smeem }
161e11c3f44Smeem ipmp_grp_insert(grp, mh);
162e11c3f44Smeem
163e11c3f44Smeem return (grp);
164e11c3f44Smeem }
165e11c3f44Smeem
166e11c3f44Smeem /*
167e11c3f44Smeem * Create IPMP kstat structures for `grp'. Return an errno upon failure.
168e11c3f44Smeem */
169e11c3f44Smeem static int
ipmp_grp_create_kstats(ipmp_grp_t * grp)170e11c3f44Smeem ipmp_grp_create_kstats(ipmp_grp_t *grp)
171e11c3f44Smeem {
172e11c3f44Smeem kstat_t *ksp;
173e11c3f44Smeem netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid;
174e11c3f44Smeem
175e11c3f44Smeem ksp = kstat_create_netstack("ipmp", 0, grp->gr_ifname, "net",
176e11c3f44Smeem KSTAT_TYPE_NAMED, IPMP_KSTAT_MAX, 0, id);
177e11c3f44Smeem if (ksp == NULL)
178e11c3f44Smeem return (ENOMEM);
179e11c3f44Smeem
180e11c3f44Smeem ksp->ks_update = ipmp_grp_update_kstats;
181e11c3f44Smeem ksp->ks_private = grp;
182e11c3f44Smeem bcopy(ipmp_kstats, ksp->ks_data, sizeof (ipmp_kstats));
183e11c3f44Smeem
184e11c3f44Smeem kstat_install(ksp);
185e11c3f44Smeem grp->gr_ksp = ksp;
186e11c3f44Smeem return (0);
187e11c3f44Smeem }
188e11c3f44Smeem
189e11c3f44Smeem /*
190e11c3f44Smeem * Update the IPMP kstats tracked by `ksp'; called by the kstats framework.
191e11c3f44Smeem */
192e11c3f44Smeem static int
ipmp_grp_update_kstats(kstat_t * ksp,int rw)193e11c3f44Smeem ipmp_grp_update_kstats(kstat_t *ksp, int rw)
194e11c3f44Smeem {
195e11c3f44Smeem uint_t i;
196e11c3f44Smeem kstat_named_t *kn = KSTAT_NAMED_PTR(ksp);
197e11c3f44Smeem ipmp_grp_t *grp = ksp->ks_private;
198e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
199e11c3f44Smeem ipsq_t *ipsq, *grp_ipsq = grp->gr_phyint->phyint_ipsq;
200e11c3f44Smeem phyint_t *phyi;
201e11c3f44Smeem uint64_t phyi_kstats[IPMP_KSTAT_MAX];
202e11c3f44Smeem
203e11c3f44Smeem if (rw == KSTAT_WRITE)
204e11c3f44Smeem return (EACCES);
205e11c3f44Smeem
206e11c3f44Smeem /*
207e11c3f44Smeem * Start with the group's baseline values.
208e11c3f44Smeem */
209e11c3f44Smeem for (i = 0; i < IPMP_KSTAT_MAX; i++) {
210e11c3f44Smeem if (kn[i].data_type == KSTAT_DATA_UINT32) {
211e11c3f44Smeem kn[i].value.ui32 = grp->gr_kstats0[i];
212e11c3f44Smeem } else {
213e11c3f44Smeem ASSERT(kn[i].data_type == KSTAT_DATA_UINT64);
214e11c3f44Smeem kn[i].value.ui64 = grp->gr_kstats0[i];
215e11c3f44Smeem }
216e11c3f44Smeem }
217e11c3f44Smeem
218e11c3f44Smeem /*
219e11c3f44Smeem * Add in the stats of each phyint currently in the group. Since we
220e11c3f44Smeem * don't directly track the phyints in a group, we cheat by walking
221e11c3f44Smeem * the IPSQ set under ill_g_lock. (The IPSQ list cannot change while
222e11c3f44Smeem * ill_g_lock is held.)
223e11c3f44Smeem */
224e11c3f44Smeem rw_enter(&ipst->ips_ill_g_lock, RW_READER);
225e11c3f44Smeem ipsq = grp_ipsq->ipsq_next;
226e11c3f44Smeem for (; ipsq != grp_ipsq; ipsq = ipsq->ipsq_next) {
227e11c3f44Smeem phyi = ipsq->ipsq_phyint;
228e11c3f44Smeem
229e11c3f44Smeem /*
230e11c3f44Smeem * If a phyint in a group is being unplumbed, it's possible
231e11c3f44Smeem * that ill_glist_delete() -> phyint_free() already freed the
232e11c3f44Smeem * phyint (and set ipsq_phyint to NULL), but the unplumb
233e11c3f44Smeem * operation has yet to complete (and thus ipsq_dq() has yet
234e11c3f44Smeem * to remove the phyint's IPSQ from the group IPSQ's phyint
235e11c3f44Smeem * list). We skip those phyints here (note that their kstats
236e11c3f44Smeem * have already been added to gr_kstats0[]).
237e11c3f44Smeem */
238e11c3f44Smeem if (phyi == NULL)
239e11c3f44Smeem continue;
240e11c3f44Smeem
241e11c3f44Smeem ipmp_phyint_get_kstats(phyi, phyi_kstats);
242e11c3f44Smeem
243e11c3f44Smeem for (i = 0; i < IPMP_KSTAT_MAX; i++) {
244e11c3f44Smeem phyi_kstats[i] -= phyi->phyint_kstats0[i];
245e11c3f44Smeem if (kn[i].data_type == KSTAT_DATA_UINT32)
246e11c3f44Smeem kn[i].value.ui32 += phyi_kstats[i];
247e11c3f44Smeem else
248e11c3f44Smeem kn[i].value.ui64 += phyi_kstats[i];
249e11c3f44Smeem }
250e11c3f44Smeem }
251e11c3f44Smeem
252e11c3f44Smeem kn[IPMP_KSTAT_LINK_UP].value.ui32 =
253e11c3f44Smeem (grp->gr_phyint->phyint_flags & PHYI_RUNNING) != 0;
254e11c3f44Smeem
255e11c3f44Smeem rw_exit(&ipst->ips_ill_g_lock);
256e11c3f44Smeem return (0);
257e11c3f44Smeem }
258e11c3f44Smeem
259e11c3f44Smeem /*
260e11c3f44Smeem * Destroy IPMP kstat structures for `grp'.
261e11c3f44Smeem */
262e11c3f44Smeem static void
ipmp_grp_destroy_kstats(ipmp_grp_t * grp)263e11c3f44Smeem ipmp_grp_destroy_kstats(ipmp_grp_t *grp)
264e11c3f44Smeem {
265e11c3f44Smeem netstackid_t id = IPMP_GRP_TO_IPST(grp)->ips_netstack->netstack_stackid;
266e11c3f44Smeem
267e11c3f44Smeem kstat_delete_netstack(grp->gr_ksp, id);
268e11c3f44Smeem bzero(grp->gr_kstats0, sizeof (grp->gr_kstats0));
269e11c3f44Smeem grp->gr_ksp = NULL;
270e11c3f44Smeem }
271e11c3f44Smeem
272e11c3f44Smeem /*
273e11c3f44Smeem * Look up an IPMP group named `grname' on IP stack `ipst'. Return NULL if it
274e11c3f44Smeem * does not exist.
275e11c3f44Smeem */
276e11c3f44Smeem ipmp_grp_t *
ipmp_grp_lookup(const char * grname,ip_stack_t * ipst)277e11c3f44Smeem ipmp_grp_lookup(const char *grname, ip_stack_t *ipst)
278e11c3f44Smeem {
279e11c3f44Smeem ipmp_grp_t *grp;
280e11c3f44Smeem
281e11c3f44Smeem ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
282e11c3f44Smeem
283e11c3f44Smeem if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname,
284e11c3f44Smeem (mod_hash_val_t *)&grp) == 0)
285e11c3f44Smeem return (grp);
286e11c3f44Smeem
287e11c3f44Smeem return (NULL);
288e11c3f44Smeem }
289e11c3f44Smeem
290e11c3f44Smeem /*
291e11c3f44Smeem * Place information about group `grp' into `lifgr'.
292e11c3f44Smeem */
293e11c3f44Smeem void
ipmp_grp_info(const ipmp_grp_t * grp,lifgroupinfo_t * lifgr)294e11c3f44Smeem ipmp_grp_info(const ipmp_grp_t *grp, lifgroupinfo_t *lifgr)
295e11c3f44Smeem {
296e11c3f44Smeem ill_t *ill;
297e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
298e11c3f44Smeem
299e11c3f44Smeem ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
300e11c3f44Smeem
301e11c3f44Smeem lifgr->gi_v4 = (grp->gr_v4 != NULL);
302e11c3f44Smeem lifgr->gi_v6 = (grp->gr_v6 != NULL);
303e11c3f44Smeem lifgr->gi_nv4 = grp->gr_nv4 + grp->gr_pendv4;
304e11c3f44Smeem lifgr->gi_nv6 = grp->gr_nv6 + grp->gr_pendv6;
305e11c3f44Smeem lifgr->gi_mactype = grp->gr_nif > 0 ? grp->gr_mactype : SUNW_DL_IPMP;
306e11c3f44Smeem (void) strlcpy(lifgr->gi_grifname, grp->gr_ifname, LIFNAMSIZ);
307e11c3f44Smeem lifgr->gi_m4ifname[0] = '\0';
308e11c3f44Smeem lifgr->gi_m6ifname[0] = '\0';
309e11c3f44Smeem lifgr->gi_bcifname[0] = '\0';
310e11c3f44Smeem
311e11c3f44Smeem if (grp->gr_v4 != NULL && (ill = grp->gr_v4->ig_cast_ill) != NULL) {
312e11c3f44Smeem (void) strlcpy(lifgr->gi_m4ifname, ill->ill_name, LIFNAMSIZ);
313e11c3f44Smeem (void) strlcpy(lifgr->gi_bcifname, ill->ill_name, LIFNAMSIZ);
314e11c3f44Smeem }
315e11c3f44Smeem
316e11c3f44Smeem if (grp->gr_v6 != NULL && (ill = grp->gr_v6->ig_cast_ill) != NULL)
317e11c3f44Smeem (void) strlcpy(lifgr->gi_m6ifname, ill->ill_name, LIFNAMSIZ);
318e11c3f44Smeem }
319e11c3f44Smeem
320e11c3f44Smeem /*
321e11c3f44Smeem * Insert `grp' into the hash using the reserved hash entry `mh'.
322e11c3f44Smeem * Caller must ensure `grp' is not yet in the hash.
323e11c3f44Smeem */
324e11c3f44Smeem static void
ipmp_grp_insert(ipmp_grp_t * grp,mod_hash_hndl_t mh)325e11c3f44Smeem ipmp_grp_insert(ipmp_grp_t *grp, mod_hash_hndl_t mh)
326e11c3f44Smeem {
327e11c3f44Smeem int err;
328e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
329e11c3f44Smeem
330e11c3f44Smeem ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
331e11c3f44Smeem
332e11c3f44Smeem /*
333e11c3f44Smeem * Since grp->gr_name will exist at least as long as `grp' is in the
334e11c3f44Smeem * hash, we use it directly as the key.
335e11c3f44Smeem */
336e11c3f44Smeem err = mod_hash_insert_reserve(ipst->ips_ipmp_grp_hash,
337e11c3f44Smeem (mod_hash_key_t)grp->gr_name, (mod_hash_val_t)grp, mh);
338e11c3f44Smeem if (err != 0) {
339e11c3f44Smeem /*
340e11c3f44Smeem * This should never happen since `mh' was preallocated.
341e11c3f44Smeem */
342e11c3f44Smeem panic("cannot insert IPMP group \"%s\" (err %d)",
343e11c3f44Smeem grp->gr_name, err);
344e11c3f44Smeem }
345e11c3f44Smeem }
346e11c3f44Smeem
347e11c3f44Smeem /*
348e11c3f44Smeem * Remove `grp' from the hash. Caller must ensure `grp' is in it.
349e11c3f44Smeem */
350e11c3f44Smeem static void
ipmp_grp_remove(ipmp_grp_t * grp)351e11c3f44Smeem ipmp_grp_remove(ipmp_grp_t *grp)
352e11c3f44Smeem {
353e11c3f44Smeem int err;
354e11c3f44Smeem mod_hash_val_t val;
355e11c3f44Smeem mod_hash_key_t key = (mod_hash_key_t)grp->gr_name;
356e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
357e11c3f44Smeem
358e11c3f44Smeem ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
359e11c3f44Smeem
360e11c3f44Smeem err = mod_hash_remove(ipst->ips_ipmp_grp_hash, key, &val);
361e11c3f44Smeem if (err != 0 || val != grp) {
362e11c3f44Smeem panic("cannot remove IPMP group \"%s\" (err %d)",
363e11c3f44Smeem grp->gr_name, err);
364e11c3f44Smeem }
365e11c3f44Smeem }
366e11c3f44Smeem
367e11c3f44Smeem /*
368e11c3f44Smeem * Attempt to rename `grp' to new name `grname'. Return an errno if the new
369e11c3f44Smeem * group name already exists or is invalid, or if there isn't enough memory.
370e11c3f44Smeem */
371e11c3f44Smeem int
ipmp_grp_rename(ipmp_grp_t * grp,const char * grname)372e11c3f44Smeem ipmp_grp_rename(ipmp_grp_t *grp, const char *grname)
373e11c3f44Smeem {
374e11c3f44Smeem mod_hash_hndl_t mh;
375e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
376e11c3f44Smeem
377e11c3f44Smeem ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
378e11c3f44Smeem
379e11c3f44Smeem if (grname[0] == '\0')
380e11c3f44Smeem return (EINVAL);
381e11c3f44Smeem
382e11c3f44Smeem if (mod_hash_find(ipst->ips_ipmp_grp_hash, (mod_hash_key_t)grname,
383e11c3f44Smeem (mod_hash_val_t *)&grp) != MH_ERR_NOTFOUND)
384e11c3f44Smeem return (EEXIST);
385e11c3f44Smeem
386e11c3f44Smeem /*
387e11c3f44Smeem * Before we remove the group from the hash, ensure we'll be able to
388e11c3f44Smeem * re-insert it by reserving space.
389e11c3f44Smeem */
390e11c3f44Smeem if (mod_hash_reserve_nosleep(ipst->ips_ipmp_grp_hash, &mh) != 0)
391e11c3f44Smeem return (ENOMEM);
392e11c3f44Smeem
393e11c3f44Smeem ipmp_grp_remove(grp);
394e11c3f44Smeem (void) strlcpy(grp->gr_name, grname, sizeof (grp->gr_name));
395e11c3f44Smeem ipmp_grp_insert(grp, mh);
396e11c3f44Smeem
397e11c3f44Smeem return (0);
398e11c3f44Smeem }
399e11c3f44Smeem
400e11c3f44Smeem /*
401e11c3f44Smeem * Destroy `grp' and remove it from the hash. Caller must ensure `grp' is in
402e11c3f44Smeem * the hash, and that there are no interfaces on it.
403e11c3f44Smeem */
404e11c3f44Smeem void
ipmp_grp_destroy(ipmp_grp_t * grp)405e11c3f44Smeem ipmp_grp_destroy(ipmp_grp_t *grp)
406e11c3f44Smeem {
407e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
408e11c3f44Smeem
409e11c3f44Smeem ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
410e11c3f44Smeem
411e11c3f44Smeem /*
412e11c3f44Smeem * If there are still interfaces using this group, panic before things
413e11c3f44Smeem * go really off the rails.
414e11c3f44Smeem */
415e11c3f44Smeem if (grp->gr_nif != 0)
416e11c3f44Smeem panic("cannot destroy IPMP group \"%s\": in use", grp->gr_name);
417e11c3f44Smeem
418e11c3f44Smeem ipmp_grp_remove(grp);
419e11c3f44Smeem ipmp_grp_destroy_kstats(grp);
420e11c3f44Smeem
421e11c3f44Smeem ASSERT(grp->gr_v4 == NULL);
422e11c3f44Smeem ASSERT(grp->gr_v6 == NULL);
423e11c3f44Smeem ASSERT(grp->gr_nv4 == 0);
424e11c3f44Smeem ASSERT(grp->gr_nv6 == 0);
425e11c3f44Smeem ASSERT(grp->gr_nactif == 0);
426e11c3f44Smeem ASSERT(grp->gr_linkdownmp == NULL);
427e11c3f44Smeem grp->gr_phyint = NULL;
428e11c3f44Smeem
429e11c3f44Smeem kmem_free(grp, sizeof (ipmp_grp_t));
430e11c3f44Smeem }
431e11c3f44Smeem
432e11c3f44Smeem /*
433e11c3f44Smeem * Check whether `ill' is suitable for inclusion into `grp', and return an
434e11c3f44Smeem * errno describing the problem (if any). NOTE: many of these errno values
435e11c3f44Smeem * are interpreted by ifconfig, which will take corrective action and retry
436e11c3f44Smeem * the SIOCSLIFGROUPNAME, so please exercise care when changing them.
437e11c3f44Smeem */
438e11c3f44Smeem static int
ipmp_grp_vet_ill(ipmp_grp_t * grp,ill_t * ill)439e11c3f44Smeem ipmp_grp_vet_ill(ipmp_grp_t *grp, ill_t *ill)
440e11c3f44Smeem {
441e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
442e11c3f44Smeem
443e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
444e11c3f44Smeem ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
445e11c3f44Smeem
446e11c3f44Smeem /*
447e11c3f44Smeem * To sidestep complicated address migration logic in the kernel and
448e11c3f44Smeem * to force the kernel's all-hosts multicast memberships to be blown
449e11c3f44Smeem * away, all addresses that had been brought up must be brought back
450e11c3f44Smeem * down prior to adding an interface to a group. (This includes
451e11c3f44Smeem * addresses currently down due to DAD.) Once the interface has been
452e11c3f44Smeem * added to the group, its addresses can then be brought back up, at
453e11c3f44Smeem * which point they will be moved to the IPMP meta-interface.
454e11c3f44Smeem * NOTE: we do this before ill_appaddr_cnt() since bringing down the
455e11c3f44Smeem * link-local causes in.ndpd to remove its ADDRCONF'd addresses.
456e11c3f44Smeem */
457e11c3f44Smeem if (ill->ill_ipif_up_count + ill->ill_ipif_dup_count > 0)
458e11c3f44Smeem return (EADDRINUSE);
459e11c3f44Smeem
460e11c3f44Smeem /*
461e11c3f44Smeem * To avoid confusing applications by changing addresses that are
462e11c3f44Smeem * under their control, all such control must be removed prior to
463e11c3f44Smeem * adding an interface into a group.
464e11c3f44Smeem */
465e11c3f44Smeem if (ill_appaddr_cnt(ill) != 0)
466e11c3f44Smeem return (EADDRNOTAVAIL);
467e11c3f44Smeem
468e11c3f44Smeem /*
469e11c3f44Smeem * Since PTP addresses do not share the same broadcast domain, they
470e11c3f44Smeem * are not allowed to be in an IPMP group.
471e11c3f44Smeem */
472e11c3f44Smeem if (ill_ptpaddr_cnt(ill) != 0)
473e11c3f44Smeem return (EINVAL);
474e11c3f44Smeem
475e11c3f44Smeem /*
476e11c3f44Smeem * An ill must support multicast to be allowed into a group.
477e11c3f44Smeem */
478e11c3f44Smeem if (!(ill->ill_flags & ILLF_MULTICAST))
479e11c3f44Smeem return (ENOTSUP);
480e11c3f44Smeem
481e11c3f44Smeem /*
482e11c3f44Smeem * An ill must strictly be using ARP and/or ND for address
483e11c3f44Smeem * resolution for it to be allowed into a group.
484e11c3f44Smeem */
485bd670b35SErik Nordmark if (ill->ill_flags & (ILLF_NONUD | ILLF_NOARP))
486e11c3f44Smeem return (ENOTSUP);
487e11c3f44Smeem
488e11c3f44Smeem /*
489e11c3f44Smeem * An ill cannot also be using usesrc groups. (Although usesrc uses
490e11c3f44Smeem * ill_g_usesrc_lock, we don't need to grab it since usesrc also does
491e11c3f44Smeem * all its modifications as writer.)
492e11c3f44Smeem */
493e11c3f44Smeem if (IS_USESRC_ILL(ill) || IS_USESRC_CLI_ILL(ill))
494e11c3f44Smeem return (ENOTSUP);
495e11c3f44Smeem
496e11c3f44Smeem /*
497e11c3f44Smeem * All ills in a group must be the same mactype.
498e11c3f44Smeem */
499e11c3f44Smeem if (grp->gr_nif > 0 && grp->gr_mactype != ill->ill_mactype)
500e11c3f44Smeem return (EINVAL);
501e11c3f44Smeem
502e11c3f44Smeem return (0);
503e11c3f44Smeem }
504e11c3f44Smeem
505e11c3f44Smeem /*
506e11c3f44Smeem * Check whether `phyi' is suitable for inclusion into `grp', and return an
507e11c3f44Smeem * errno describing the problem (if any). See comment above ipmp_grp_vet_ill()
508e11c3f44Smeem * regarding errno values.
509e11c3f44Smeem */
510e11c3f44Smeem int
ipmp_grp_vet_phyint(ipmp_grp_t * grp,phyint_t * phyi)511e11c3f44Smeem ipmp_grp_vet_phyint(ipmp_grp_t *grp, phyint_t *phyi)
512e11c3f44Smeem {
513e11c3f44Smeem int err = 0;
514e11c3f44Smeem ip_stack_t *ipst = IPMP_GRP_TO_IPST(grp);
515e11c3f44Smeem
516e11c3f44Smeem ASSERT(IAM_WRITER_IPSQ(phyi->phyint_ipsq));
517e11c3f44Smeem ASSERT(RW_LOCK_HELD(&ipst->ips_ipmp_lock));
518e11c3f44Smeem
519e11c3f44Smeem /*
520e11c3f44Smeem * An interface cannot have address families plumbed that are not
521e11c3f44Smeem * configured in the group.
522e11c3f44Smeem */
523e11c3f44Smeem if (phyi->phyint_illv4 != NULL && grp->gr_v4 == NULL ||
524e11c3f44Smeem phyi->phyint_illv6 != NULL && grp->gr_v6 == NULL)
525e11c3f44Smeem return (EAFNOSUPPORT);
526e11c3f44Smeem
527e11c3f44Smeem if (phyi->phyint_illv4 != NULL)
528e11c3f44Smeem err = ipmp_grp_vet_ill(grp, phyi->phyint_illv4);
529e11c3f44Smeem if (err == 0 && phyi->phyint_illv6 != NULL)
530e11c3f44Smeem err = ipmp_grp_vet_ill(grp, phyi->phyint_illv6);
531e11c3f44Smeem
532e11c3f44Smeem return (err);
533e11c3f44Smeem }
534e11c3f44Smeem
535e11c3f44Smeem /*
536e11c3f44Smeem * Create a new illgrp on IPMP meta-interface `ill'.
537e11c3f44Smeem */
538e11c3f44Smeem ipmp_illgrp_t *
ipmp_illgrp_create(ill_t * ill)539e11c3f44Smeem ipmp_illgrp_create(ill_t *ill)
540e11c3f44Smeem {
541e11c3f44Smeem uint_t mtu = ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU;
542e11c3f44Smeem ipmp_illgrp_t *illg;
543e11c3f44Smeem
544e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
545e11c3f44Smeem ASSERT(IS_IPMP(ill));
546e11c3f44Smeem ASSERT(ill->ill_grp == NULL);
547e11c3f44Smeem
548e11c3f44Smeem if ((illg = kmem_zalloc(sizeof (ipmp_illgrp_t), KM_NOSLEEP)) == NULL)
549e11c3f44Smeem return (NULL);
550e11c3f44Smeem
551e11c3f44Smeem list_create(&illg->ig_if, sizeof (ill_t), offsetof(ill_t, ill_grpnode));
552e11c3f44Smeem list_create(&illg->ig_actif, sizeof (ill_t),
553e11c3f44Smeem offsetof(ill_t, ill_actnode));
554e11c3f44Smeem list_create(&illg->ig_arpent, sizeof (ipmp_arpent_t),
555e11c3f44Smeem offsetof(ipmp_arpent_t, ia_node));
556e11c3f44Smeem
557e11c3f44Smeem illg->ig_ipmp_ill = ill;
558e11c3f44Smeem ill->ill_grp = illg;
559*1eee170aSErik Nordmark ipmp_illgrp_set_mtu(illg, mtu, mtu);
560e11c3f44Smeem
561e11c3f44Smeem return (illg);
562e11c3f44Smeem }
563e11c3f44Smeem
564e11c3f44Smeem /*
565e11c3f44Smeem * Destroy illgrp `illg', and disconnect it from its IPMP meta-interface.
566e11c3f44Smeem */
567e11c3f44Smeem void
ipmp_illgrp_destroy(ipmp_illgrp_t * illg)568e11c3f44Smeem ipmp_illgrp_destroy(ipmp_illgrp_t *illg)
569e11c3f44Smeem {
570e11c3f44Smeem ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
571e11c3f44Smeem ASSERT(IS_IPMP(illg->ig_ipmp_ill));
572e11c3f44Smeem
573e11c3f44Smeem /*
574e11c3f44Smeem * Verify `illg' is empty.
575e11c3f44Smeem */
576e11c3f44Smeem ASSERT(illg->ig_next_ill == NULL);
577e11c3f44Smeem ASSERT(illg->ig_cast_ill == NULL);
578e11c3f44Smeem ASSERT(list_is_empty(&illg->ig_arpent));
579e11c3f44Smeem ASSERT(list_is_empty(&illg->ig_if));
580e11c3f44Smeem ASSERT(list_is_empty(&illg->ig_actif));
581e11c3f44Smeem ASSERT(illg->ig_nactif == 0);
582e11c3f44Smeem
583e11c3f44Smeem /*
584e11c3f44Smeem * Destroy `illg'.
585e11c3f44Smeem */
586e11c3f44Smeem illg->ig_ipmp_ill->ill_grp = NULL;
587e11c3f44Smeem illg->ig_ipmp_ill = NULL;
588e11c3f44Smeem list_destroy(&illg->ig_if);
589e11c3f44Smeem list_destroy(&illg->ig_actif);
590e11c3f44Smeem list_destroy(&illg->ig_arpent);
591e11c3f44Smeem kmem_free(illg, sizeof (ipmp_illgrp_t));
592e11c3f44Smeem }
593e11c3f44Smeem
594e11c3f44Smeem /*
595e11c3f44Smeem * Add `ipif' to the pool of usable data addresses on `illg' and attempt to
596e11c3f44Smeem * bind it to an underlying ill, while keeping an even address distribution.
597e11c3f44Smeem * If the bind is successful, return a pointer to the bound ill.
598e11c3f44Smeem */
599e11c3f44Smeem ill_t *
ipmp_illgrp_add_ipif(ipmp_illgrp_t * illg,ipif_t * ipif)600e11c3f44Smeem ipmp_illgrp_add_ipif(ipmp_illgrp_t *illg, ipif_t *ipif)
601e11c3f44Smeem {
602e11c3f44Smeem ill_t *minill;
603e11c3f44Smeem ipmp_arpent_t *entp;
604e11c3f44Smeem
605e11c3f44Smeem ASSERT(IAM_WRITER_IPIF(ipif));
606e11c3f44Smeem ASSERT(ipmp_ipif_is_dataaddr(ipif));
607e11c3f44Smeem
608e11c3f44Smeem /*
609e11c3f44Smeem * IPMP data address mappings are internally managed by IP itself, so
610e11c3f44Smeem * delete any existing ARP entries associated with the address.
611e11c3f44Smeem */
612e11c3f44Smeem if (!ipif->ipif_isv6) {
613e11c3f44Smeem entp = ipmp_illgrp_lookup_arpent(illg, &ipif->ipif_lcl_addr);
614e11c3f44Smeem if (entp != NULL)
615e11c3f44Smeem ipmp_illgrp_destroy_arpent(illg, entp);
616e11c3f44Smeem }
617e11c3f44Smeem
618e11c3f44Smeem if ((minill = ipmp_illgrp_min_ill(illg)) != NULL)
619e11c3f44Smeem ipmp_ill_bind_ipif(minill, ipif, Res_act_none);
620e11c3f44Smeem
621e11c3f44Smeem return (ipif->ipif_bound ? ipif->ipif_bound_ill : NULL);
622e11c3f44Smeem }
623e11c3f44Smeem
624e11c3f44Smeem /*
625e11c3f44Smeem * Delete `ipif' from the pool of usable data addresses on `illg'. If it's
626e11c3f44Smeem * bound, unbind it from the underlying ill while keeping an even address
627e11c3f44Smeem * distribution.
628e11c3f44Smeem */
629e11c3f44Smeem void
ipmp_illgrp_del_ipif(ipmp_illgrp_t * illg,ipif_t * ipif)630e11c3f44Smeem ipmp_illgrp_del_ipif(ipmp_illgrp_t *illg, ipif_t *ipif)
631e11c3f44Smeem {
632e11c3f44Smeem ill_t *maxill, *boundill = ipif->ipif_bound_ill;
633e11c3f44Smeem
634e11c3f44Smeem ASSERT(IAM_WRITER_IPIF(ipif));
635e11c3f44Smeem
636e11c3f44Smeem if (boundill != NULL) {
637e11c3f44Smeem (void) ipmp_ill_unbind_ipif(boundill, ipif, B_FALSE);
638e11c3f44Smeem
639e11c3f44Smeem maxill = ipmp_illgrp_max_ill(illg);
640e11c3f44Smeem if (maxill->ill_bound_cnt > boundill->ill_bound_cnt + 1) {
641e11c3f44Smeem ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE);
642e11c3f44Smeem ipmp_ill_bind_ipif(boundill, ipif, Res_act_rebind);
643e11c3f44Smeem }
644e11c3f44Smeem }
645e11c3f44Smeem }
646e11c3f44Smeem
647e11c3f44Smeem /*
648e11c3f44Smeem * Return the active ill with the greatest number of data addresses in `illg'.
649e11c3f44Smeem */
650e11c3f44Smeem static ill_t *
ipmp_illgrp_max_ill(ipmp_illgrp_t * illg)651e11c3f44Smeem ipmp_illgrp_max_ill(ipmp_illgrp_t *illg)
652e11c3f44Smeem {
653e11c3f44Smeem ill_t *ill, *bestill = NULL;
654e11c3f44Smeem
655e11c3f44Smeem ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
656e11c3f44Smeem
657e11c3f44Smeem ill = list_head(&illg->ig_actif);
658e11c3f44Smeem for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
659e11c3f44Smeem if (bestill == NULL ||
660e11c3f44Smeem ill->ill_bound_cnt > bestill->ill_bound_cnt) {
661e11c3f44Smeem bestill = ill;
662e11c3f44Smeem }
663e11c3f44Smeem }
664e11c3f44Smeem return (bestill);
665e11c3f44Smeem }
666e11c3f44Smeem
667e11c3f44Smeem /*
668e11c3f44Smeem * Return the active ill with the fewest number of data addresses in `illg'.
669e11c3f44Smeem */
670e11c3f44Smeem static ill_t *
ipmp_illgrp_min_ill(ipmp_illgrp_t * illg)671e11c3f44Smeem ipmp_illgrp_min_ill(ipmp_illgrp_t *illg)
672e11c3f44Smeem {
673e11c3f44Smeem ill_t *ill, *bestill = NULL;
674e11c3f44Smeem
675e11c3f44Smeem ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
676e11c3f44Smeem
677e11c3f44Smeem ill = list_head(&illg->ig_actif);
678e11c3f44Smeem for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
679e11c3f44Smeem if (bestill == NULL ||
680e11c3f44Smeem ill->ill_bound_cnt < bestill->ill_bound_cnt) {
681e11c3f44Smeem if (ill->ill_bound_cnt == 0)
682e11c3f44Smeem return (ill); /* can't get better */
683e11c3f44Smeem bestill = ill;
684e11c3f44Smeem }
685e11c3f44Smeem }
686e11c3f44Smeem return (bestill);
687e11c3f44Smeem }
688e11c3f44Smeem
689e11c3f44Smeem /*
690e11c3f44Smeem * Return a pointer to IPMP meta-interface for `illg' (which must exist).
691e11c3f44Smeem * Since ig_ipmp_ill never changes for a given illg, no locks are needed.
692e11c3f44Smeem */
693e11c3f44Smeem ill_t *
ipmp_illgrp_ipmp_ill(ipmp_illgrp_t * illg)694e11c3f44Smeem ipmp_illgrp_ipmp_ill(ipmp_illgrp_t *illg)
695e11c3f44Smeem {
696e11c3f44Smeem return (illg->ig_ipmp_ill);
697e11c3f44Smeem }
698e11c3f44Smeem
699e11c3f44Smeem /*
700e11c3f44Smeem * Return a pointer to the next available underlying ill in `illg', or NULL if
701e11c3f44Smeem * one doesn't exist. Caller must be inside the IPSQ.
702e11c3f44Smeem */
703e11c3f44Smeem ill_t *
ipmp_illgrp_next_ill(ipmp_illgrp_t * illg)704e11c3f44Smeem ipmp_illgrp_next_ill(ipmp_illgrp_t *illg)
705e11c3f44Smeem {
706e11c3f44Smeem ill_t *ill;
707e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
708e11c3f44Smeem
709e11c3f44Smeem ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
710e11c3f44Smeem
711e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
712e11c3f44Smeem if ((ill = illg->ig_next_ill) != NULL) {
713e11c3f44Smeem illg->ig_next_ill = list_next(&illg->ig_actif, ill);
714e11c3f44Smeem if (illg->ig_next_ill == NULL)
715e11c3f44Smeem illg->ig_next_ill = list_head(&illg->ig_actif);
716e11c3f44Smeem }
717e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
718e11c3f44Smeem
719e11c3f44Smeem return (ill);
720e11c3f44Smeem }
721e11c3f44Smeem
722e11c3f44Smeem /*
723e11c3f44Smeem * Return a held pointer to the next available underlying ill in `illg', or
724e11c3f44Smeem * NULL if one doesn't exist. Caller need not be inside the IPSQ.
725e11c3f44Smeem */
726e11c3f44Smeem ill_t *
ipmp_illgrp_hold_next_ill(ipmp_illgrp_t * illg)727e11c3f44Smeem ipmp_illgrp_hold_next_ill(ipmp_illgrp_t *illg)
728e11c3f44Smeem {
729e11c3f44Smeem ill_t *ill;
730e11c3f44Smeem uint_t i;
731e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
732e11c3f44Smeem
733e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
734e11c3f44Smeem for (i = 0; i < illg->ig_nactif; i++) {
735e11c3f44Smeem ill = illg->ig_next_ill;
736e11c3f44Smeem illg->ig_next_ill = list_next(&illg->ig_actif, ill);
737e11c3f44Smeem if (illg->ig_next_ill == NULL)
738e11c3f44Smeem illg->ig_next_ill = list_head(&illg->ig_actif);
739e11c3f44Smeem
740bd670b35SErik Nordmark if (ill_check_and_refhold(ill)) {
741e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
742e11c3f44Smeem return (ill);
743e11c3f44Smeem }
744e11c3f44Smeem }
745e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
746e11c3f44Smeem
747e11c3f44Smeem return (NULL);
748e11c3f44Smeem }
749e11c3f44Smeem
750e11c3f44Smeem /*
751e11c3f44Smeem * Return a held pointer to the nominated multicast ill in `illg', or NULL if
752e11c3f44Smeem * one doesn't exist. Caller need not be inside the IPSQ.
753e11c3f44Smeem */
754e11c3f44Smeem ill_t *
ipmp_illgrp_hold_cast_ill(ipmp_illgrp_t * illg)755e11c3f44Smeem ipmp_illgrp_hold_cast_ill(ipmp_illgrp_t *illg)
756e11c3f44Smeem {
757e11c3f44Smeem ill_t *castill;
758e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
759e11c3f44Smeem
760e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_READER);
761e11c3f44Smeem castill = illg->ig_cast_ill;
762bd670b35SErik Nordmark if (castill != NULL && ill_check_and_refhold(castill)) {
763e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
764e11c3f44Smeem return (castill);
765e11c3f44Smeem }
766e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
767e11c3f44Smeem return (NULL);
768e11c3f44Smeem }
769e11c3f44Smeem
770e11c3f44Smeem /*
771e11c3f44Smeem * Set the nominated cast ill on `illg' to `castill'. If `castill' is NULL,
772e11c3f44Smeem * any existing nomination is removed. Caller must be inside the IPSQ.
773e11c3f44Smeem */
774e11c3f44Smeem static void
ipmp_illgrp_set_cast(ipmp_illgrp_t * illg,ill_t * castill)775e11c3f44Smeem ipmp_illgrp_set_cast(ipmp_illgrp_t *illg, ill_t *castill)
776e11c3f44Smeem {
777e11c3f44Smeem ill_t *ocastill = illg->ig_cast_ill;
778e11c3f44Smeem ill_t *ipmp_ill = illg->ig_ipmp_ill;
779e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
780e11c3f44Smeem
781e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ipmp_ill));
782e11c3f44Smeem
783e11c3f44Smeem /*
784e11c3f44Smeem * Disable old nominated ill (if any).
785e11c3f44Smeem */
786e11c3f44Smeem if (ocastill != NULL) {
787e11c3f44Smeem DTRACE_PROBE2(ipmp__illgrp__cast__disable, ipmp_illgrp_t *,
788e11c3f44Smeem illg, ill_t *, ocastill);
789e11c3f44Smeem ASSERT(ocastill->ill_nom_cast);
790e11c3f44Smeem ocastill->ill_nom_cast = B_FALSE;
791e11c3f44Smeem /*
792e11c3f44Smeem * If the IPMP meta-interface is down, we never did the join,
793e11c3f44Smeem * so we must not try to leave.
794e11c3f44Smeem */
795e11c3f44Smeem if (ipmp_ill->ill_dl_up)
796e11c3f44Smeem ill_leave_multicast(ipmp_ill);
797bd670b35SErik Nordmark
798bd670b35SErik Nordmark /*
799bd670b35SErik Nordmark * Delete any NCEs tied to the old nomination. We must do this
800bd670b35SErik Nordmark * last since ill_leave_multicast() may trigger IREs to be
801bd670b35SErik Nordmark * built using ig_cast_ill.
802bd670b35SErik Nordmark */
803bd670b35SErik Nordmark ncec_walk(ocastill, (pfi_t)ipmp_ncec_delete_nonlocal, ocastill,
804bd670b35SErik Nordmark ocastill->ill_ipst);
805e11c3f44Smeem }
806e11c3f44Smeem
807e11c3f44Smeem /*
808e11c3f44Smeem * Set new nomination.
809e11c3f44Smeem */
810e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
811e11c3f44Smeem illg->ig_cast_ill = castill;
812e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
813e11c3f44Smeem
814e11c3f44Smeem /*
815e11c3f44Smeem * Enable new nominated ill (if any).
816e11c3f44Smeem */
817e11c3f44Smeem if (castill != NULL) {
818e11c3f44Smeem DTRACE_PROBE2(ipmp__illgrp__cast__enable, ipmp_illgrp_t *,
819e11c3f44Smeem illg, ill_t *, castill);
820e11c3f44Smeem ASSERT(!castill->ill_nom_cast);
821e11c3f44Smeem castill->ill_nom_cast = B_TRUE;
822e11c3f44Smeem /*
823e11c3f44Smeem * If the IPMP meta-interface is down, the attempt to recover
824e11c3f44Smeem * will silently fail but ill_need_recover_multicast will be
825e11c3f44Smeem * erroneously cleared -- so check first.
826e11c3f44Smeem */
827e11c3f44Smeem if (ipmp_ill->ill_dl_up)
828e11c3f44Smeem ill_recover_multicast(ipmp_ill);
829e11c3f44Smeem }
830e11c3f44Smeem }
831e11c3f44Smeem
832e11c3f44Smeem /*
833e11c3f44Smeem * Create an IPMP ARP entry and add it to the set tracked on `illg'. If an
834e11c3f44Smeem * entry for the same IP address already exists, destroy it first. Return the
835e11c3f44Smeem * created IPMP ARP entry, or NULL on failure.
836e11c3f44Smeem */
837e11c3f44Smeem ipmp_arpent_t *
ipmp_illgrp_create_arpent(ipmp_illgrp_t * illg,boolean_t proxyarp,ipaddr_t ipaddr,uchar_t * lladdr,size_t lladdr_len,uint16_t flags)838bd670b35SErik Nordmark ipmp_illgrp_create_arpent(ipmp_illgrp_t *illg, boolean_t proxyarp,
839bd670b35SErik Nordmark ipaddr_t ipaddr, uchar_t *lladdr, size_t lladdr_len, uint16_t flags)
840e11c3f44Smeem {
841e11c3f44Smeem ipmp_arpent_t *entp, *oentp;
842e11c3f44Smeem
843e11c3f44Smeem ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
844e11c3f44Smeem
845bd670b35SErik Nordmark if ((entp = kmem_alloc(sizeof (ipmp_arpent_t) + lladdr_len,
846bd670b35SErik Nordmark KM_NOSLEEP)) == NULL)
847e11c3f44Smeem return (NULL);
848e11c3f44Smeem
849bd670b35SErik Nordmark /*
850bd670b35SErik Nordmark * Delete any existing ARP entry for this address.
851bd670b35SErik Nordmark */
852e11c3f44Smeem if ((oentp = ipmp_illgrp_lookup_arpent(illg, &entp->ia_ipaddr)) != NULL)
853e11c3f44Smeem ipmp_illgrp_destroy_arpent(illg, oentp);
854e11c3f44Smeem
855bd670b35SErik Nordmark /*
856bd670b35SErik Nordmark * Prepend the new entry.
857bd670b35SErik Nordmark */
858bd670b35SErik Nordmark entp->ia_ipaddr = ipaddr;
859bd670b35SErik Nordmark entp->ia_flags = flags;
860bd670b35SErik Nordmark entp->ia_lladdr_len = lladdr_len;
861bd670b35SErik Nordmark entp->ia_lladdr = (uchar_t *)&entp[1];
862bd670b35SErik Nordmark bcopy(lladdr, entp->ia_lladdr, lladdr_len);
863bd670b35SErik Nordmark entp->ia_proxyarp = proxyarp;
864bd670b35SErik Nordmark entp->ia_notified = B_TRUE;
865e11c3f44Smeem list_insert_head(&illg->ig_arpent, entp);
866e11c3f44Smeem return (entp);
867e11c3f44Smeem }
868e11c3f44Smeem
869e11c3f44Smeem /*
870e11c3f44Smeem * Remove IPMP ARP entry `entp' from the set tracked on `illg' and destroy it.
871e11c3f44Smeem */
872e11c3f44Smeem void
ipmp_illgrp_destroy_arpent(ipmp_illgrp_t * illg,ipmp_arpent_t * entp)873e11c3f44Smeem ipmp_illgrp_destroy_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp)
874e11c3f44Smeem {
875e11c3f44Smeem ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
876e11c3f44Smeem
877e11c3f44Smeem list_remove(&illg->ig_arpent, entp);
878bd670b35SErik Nordmark kmem_free(entp, sizeof (ipmp_arpent_t) + entp->ia_lladdr_len);
879e11c3f44Smeem }
880e11c3f44Smeem
881e11c3f44Smeem /*
882e11c3f44Smeem * Mark that ARP has been notified about the IP address on `entp'; `illg' is
883e11c3f44Smeem * taken as a debugging aid for DTrace FBT probes.
884e11c3f44Smeem */
885e11c3f44Smeem /* ARGSUSED */
886e11c3f44Smeem void
ipmp_illgrp_mark_arpent(ipmp_illgrp_t * illg,ipmp_arpent_t * entp)887e11c3f44Smeem ipmp_illgrp_mark_arpent(ipmp_illgrp_t *illg, ipmp_arpent_t *entp)
888e11c3f44Smeem {
889e11c3f44Smeem entp->ia_notified = B_TRUE;
890e11c3f44Smeem }
891e11c3f44Smeem
892e11c3f44Smeem /*
893e11c3f44Smeem * Look up the IPMP ARP entry for IP address `addrp' on `illg'; if `addrp' is
894e11c3f44Smeem * NULL, any IPMP ARP entry is requested. Return NULL if it does not exist.
895e11c3f44Smeem */
896e11c3f44Smeem ipmp_arpent_t *
ipmp_illgrp_lookup_arpent(ipmp_illgrp_t * illg,ipaddr_t * addrp)897e11c3f44Smeem ipmp_illgrp_lookup_arpent(ipmp_illgrp_t *illg, ipaddr_t *addrp)
898e11c3f44Smeem {
899e11c3f44Smeem ipmp_arpent_t *entp = list_head(&illg->ig_arpent);
900e11c3f44Smeem
901e11c3f44Smeem ASSERT(IAM_WRITER_ILL(illg->ig_ipmp_ill));
902e11c3f44Smeem
903e11c3f44Smeem if (addrp == NULL)
904e11c3f44Smeem return (entp);
905e11c3f44Smeem
906e11c3f44Smeem for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp))
907e11c3f44Smeem if (entp->ia_ipaddr == *addrp)
908e11c3f44Smeem break;
909e11c3f44Smeem return (entp);
910e11c3f44Smeem }
911e11c3f44Smeem
912e11c3f44Smeem /*
913e11c3f44Smeem * Refresh ARP entries on `illg' to be distributed across its active
914e11c3f44Smeem * interfaces. Entries that cannot be refreshed (e.g., because there are no
915e11c3f44Smeem * active interfaces) are marked so that subsequent calls can try again.
916e11c3f44Smeem */
917e11c3f44Smeem void
ipmp_illgrp_refresh_arpent(ipmp_illgrp_t * illg)918e11c3f44Smeem ipmp_illgrp_refresh_arpent(ipmp_illgrp_t *illg)
919e11c3f44Smeem {
920e11c3f44Smeem ill_t *ill, *ipmp_ill = illg->ig_ipmp_ill;
921e11c3f44Smeem uint_t paddrlen = ipmp_ill->ill_phys_addr_length;
922e11c3f44Smeem ipmp_arpent_t *entp;
923bd670b35SErik Nordmark ncec_t *ncec;
924bd670b35SErik Nordmark nce_t *nce;
925e11c3f44Smeem
926e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ipmp_ill));
927e11c3f44Smeem ASSERT(!ipmp_ill->ill_isv6);
928e11c3f44Smeem
929e11c3f44Smeem ill = list_head(&illg->ig_actif);
930e11c3f44Smeem entp = list_head(&illg->ig_arpent);
931e11c3f44Smeem for (; entp != NULL; entp = list_next(&illg->ig_arpent, entp)) {
932e11c3f44Smeem if (ill == NULL || ipmp_ill->ill_ipif_up_count == 0) {
933e11c3f44Smeem entp->ia_notified = B_FALSE;
934e11c3f44Smeem continue;
935e11c3f44Smeem }
936e11c3f44Smeem
937e11c3f44Smeem ASSERT(paddrlen == ill->ill_phys_addr_length);
938e11c3f44Smeem
939e11c3f44Smeem /*
940e11c3f44Smeem * If this is a proxy ARP entry, we can skip notifying ARP if
941e11c3f44Smeem * the entry is already up-to-date. If it has changed, we
942e11c3f44Smeem * update the entry's hardware address before notifying ARP.
943e11c3f44Smeem */
944e11c3f44Smeem if (entp->ia_proxyarp) {
945bd670b35SErik Nordmark if (bcmp(ill->ill_phys_addr, entp->ia_lladdr,
946bd670b35SErik Nordmark paddrlen) == 0 && entp->ia_notified)
947e11c3f44Smeem continue;
948bd670b35SErik Nordmark bcopy(ill->ill_phys_addr, entp->ia_lladdr, paddrlen);
949e11c3f44Smeem }
950e11c3f44Smeem
951bd670b35SErik Nordmark (void) nce_lookup_then_add_v4(ipmp_ill, entp->ia_lladdr,
952bd670b35SErik Nordmark paddrlen, &entp->ia_ipaddr, entp->ia_flags, ND_UNCHANGED,
953bd670b35SErik Nordmark &nce);
954bd670b35SErik Nordmark if (nce == NULL || !entp->ia_proxyarp) {
955bd670b35SErik Nordmark if (nce != NULL)
956bd670b35SErik Nordmark nce_refrele(nce);
957e11c3f44Smeem continue;
958e11c3f44Smeem }
959bd670b35SErik Nordmark ncec = nce->nce_common;
960bd670b35SErik Nordmark mutex_enter(&ncec->ncec_lock);
961bd670b35SErik Nordmark nce_update(ncec, ND_UNCHANGED, ill->ill_phys_addr);
962bd670b35SErik Nordmark mutex_exit(&ncec->ncec_lock);
963bd670b35SErik Nordmark nce_refrele(nce);
964e11c3f44Smeem ipmp_illgrp_mark_arpent(illg, entp);
965e11c3f44Smeem
966e11c3f44Smeem if ((ill = list_next(&illg->ig_actif, ill)) == NULL)
967e11c3f44Smeem ill = list_head(&illg->ig_actif);
968e11c3f44Smeem }
969e11c3f44Smeem }
970e11c3f44Smeem
971e11c3f44Smeem /*
972e11c3f44Smeem * Return an interface in `illg' with the specified `physaddr', or NULL if one
973e11c3f44Smeem * doesn't exist. Caller must hold ill_g_lock if it's not inside the IPSQ.
974e11c3f44Smeem */
975e11c3f44Smeem ill_t *
ipmp_illgrp_find_ill(ipmp_illgrp_t * illg,uchar_t * physaddr,uint_t paddrlen)976e11c3f44Smeem ipmp_illgrp_find_ill(ipmp_illgrp_t *illg, uchar_t *physaddr, uint_t paddrlen)
977e11c3f44Smeem {
978e11c3f44Smeem ill_t *ill;
979e11c3f44Smeem ill_t *ipmp_ill = illg->ig_ipmp_ill;
980e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
981e11c3f44Smeem
982e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ipmp_ill) || RW_LOCK_HELD(&ipst->ips_ill_g_lock));
983e11c3f44Smeem
984e11c3f44Smeem ill = list_head(&illg->ig_if);
985e11c3f44Smeem for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) {
986e11c3f44Smeem if (ill->ill_phys_addr_length == paddrlen &&
987e11c3f44Smeem bcmp(ill->ill_phys_addr, physaddr, paddrlen) == 0)
988e11c3f44Smeem return (ill);
989e11c3f44Smeem }
990e11c3f44Smeem return (NULL);
991e11c3f44Smeem }
992e11c3f44Smeem
993e11c3f44Smeem /*
994e11c3f44Smeem * Asynchronously update the MTU for an IPMP ill by injecting a DL_NOTIFY_IND.
995e11c3f44Smeem * Caller must be inside the IPSQ unless this is initialization.
996e11c3f44Smeem */
997e11c3f44Smeem static void
ipmp_illgrp_set_mtu(ipmp_illgrp_t * illg,uint_t mtu,uint_t mc_mtu)998*1eee170aSErik Nordmark ipmp_illgrp_set_mtu(ipmp_illgrp_t *illg, uint_t mtu, uint_t mc_mtu)
999e11c3f44Smeem {
1000e11c3f44Smeem ill_t *ill = illg->ig_ipmp_ill;
1001e11c3f44Smeem mblk_t *mp;
1002e11c3f44Smeem
1003e11c3f44Smeem ASSERT(illg->ig_mtu == 0 || IAM_WRITER_ILL(ill));
1004e11c3f44Smeem
1005e11c3f44Smeem /*
1006e11c3f44Smeem * If allocation fails, we have bigger problems than MTU.
1007e11c3f44Smeem */
1008*1eee170aSErik Nordmark if ((mp = ip_dlnotify_alloc2(DL_NOTE_SDU_SIZE2, mtu, mc_mtu)) != NULL) {
1009e11c3f44Smeem illg->ig_mtu = mtu;
1010*1eee170aSErik Nordmark illg->ig_mc_mtu = mc_mtu;
1011e11c3f44Smeem put(ill->ill_rq, mp);
1012e11c3f44Smeem }
1013e11c3f44Smeem }
1014e11c3f44Smeem
1015e11c3f44Smeem /*
1016e11c3f44Smeem * Recalculate the IPMP group MTU for `illg', and update its associated IPMP
1017e11c3f44Smeem * ill MTU if necessary.
1018e11c3f44Smeem */
1019e11c3f44Smeem void
ipmp_illgrp_refresh_mtu(ipmp_illgrp_t * illg)1020e11c3f44Smeem ipmp_illgrp_refresh_mtu(ipmp_illgrp_t *illg)
1021e11c3f44Smeem {
1022e11c3f44Smeem ill_t *ill;
1023e11c3f44Smeem ill_t *ipmp_ill = illg->ig_ipmp_ill;
1024e11c3f44Smeem uint_t mtu = 0;
1025*1eee170aSErik Nordmark uint_t mc_mtu = 0;
1026e11c3f44Smeem
1027e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ipmp_ill));
1028e11c3f44Smeem
1029e11c3f44Smeem /*
1030bd670b35SErik Nordmark * Since ill_mtu can only change under ill_lock, we hold ill_lock
1031e11c3f44Smeem * for each ill as we iterate through the list. Any changes to the
1032bd670b35SErik Nordmark * ill_mtu will also trigger an update, so even if we missed it
1033e11c3f44Smeem * this time around, the update will catch it.
1034e11c3f44Smeem */
1035e11c3f44Smeem ill = list_head(&illg->ig_if);
1036e11c3f44Smeem for (; ill != NULL; ill = list_next(&illg->ig_if, ill)) {
1037e11c3f44Smeem mutex_enter(&ill->ill_lock);
1038bd670b35SErik Nordmark if (mtu == 0 || ill->ill_mtu < mtu)
1039bd670b35SErik Nordmark mtu = ill->ill_mtu;
1040*1eee170aSErik Nordmark if (mc_mtu == 0 || ill->ill_mc_mtu < mc_mtu)
1041*1eee170aSErik Nordmark mc_mtu = ill->ill_mc_mtu;
1042e11c3f44Smeem mutex_exit(&ill->ill_lock);
1043e11c3f44Smeem }
1044e11c3f44Smeem
1045e11c3f44Smeem /*
1046e11c3f44Smeem * MTU must be at least the minimum MTU.
1047e11c3f44Smeem */
1048e11c3f44Smeem mtu = MAX(mtu, ipmp_ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU);
1049*1eee170aSErik Nordmark mc_mtu = MAX(mc_mtu, ipmp_ill->ill_isv6 ? IPV6_MIN_MTU : IP_MIN_MTU);
1050*1eee170aSErik Nordmark if (illg->ig_mtu != mtu || illg->ig_mc_mtu != mc_mtu)
1051*1eee170aSErik Nordmark ipmp_illgrp_set_mtu(illg, mtu, mc_mtu);
1052e11c3f44Smeem }
1053e11c3f44Smeem
1054e11c3f44Smeem /*
1055e11c3f44Smeem * Link illgrp `illg' to IPMP group `grp'. To simplify the caller, silently
1056e11c3f44Smeem * allow the same link to be established more than once.
1057e11c3f44Smeem */
1058e11c3f44Smeem void
ipmp_illgrp_link_grp(ipmp_illgrp_t * illg,ipmp_grp_t * grp)1059e11c3f44Smeem ipmp_illgrp_link_grp(ipmp_illgrp_t *illg, ipmp_grp_t *grp)
1060e11c3f44Smeem {
1061e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
1062e11c3f44Smeem
1063e11c3f44Smeem ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
1064e11c3f44Smeem
1065e11c3f44Smeem if (illg->ig_ipmp_ill->ill_isv6) {
1066e11c3f44Smeem ASSERT(grp->gr_v6 == NULL || grp->gr_v6 == illg);
1067e11c3f44Smeem grp->gr_v6 = illg;
1068e11c3f44Smeem } else {
1069e11c3f44Smeem ASSERT(grp->gr_v4 == NULL || grp->gr_v4 == illg);
1070e11c3f44Smeem grp->gr_v4 = illg;
1071e11c3f44Smeem }
1072e11c3f44Smeem }
1073e11c3f44Smeem
1074e11c3f44Smeem /*
1075e11c3f44Smeem * Unlink illgrp `illg' from its IPMP group. Return an errno if the illgrp
1076e11c3f44Smeem * cannot be unlinked (e.g., because there are still interfaces using it).
1077e11c3f44Smeem */
1078e11c3f44Smeem int
ipmp_illgrp_unlink_grp(ipmp_illgrp_t * illg)1079e11c3f44Smeem ipmp_illgrp_unlink_grp(ipmp_illgrp_t *illg)
1080e11c3f44Smeem {
1081e11c3f44Smeem ipmp_grp_t *grp = illg->ig_ipmp_ill->ill_phyint->phyint_grp;
1082e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
1083e11c3f44Smeem
1084e11c3f44Smeem ASSERT(RW_WRITE_HELD(&ipst->ips_ipmp_lock));
1085e11c3f44Smeem
1086e11c3f44Smeem if (illg->ig_ipmp_ill->ill_isv6) {
1087e11c3f44Smeem if (grp->gr_nv6 + grp->gr_pendv6 != 0)
1088e11c3f44Smeem return (EBUSY);
1089e11c3f44Smeem grp->gr_v6 = NULL;
1090e11c3f44Smeem } else {
1091e11c3f44Smeem if (grp->gr_nv4 + grp->gr_pendv4 != 0)
1092e11c3f44Smeem return (EBUSY);
1093e11c3f44Smeem grp->gr_v4 = NULL;
1094e11c3f44Smeem }
1095e11c3f44Smeem return (0);
1096e11c3f44Smeem }
1097e11c3f44Smeem
1098e11c3f44Smeem /*
1099e11c3f44Smeem * Place `ill' into `illg', and rebalance the data addresses on `illg'
1100e11c3f44Smeem * to be spread evenly across the ills now in it. Also, adjust the IPMP
1101e11c3f44Smeem * ill as necessary to account for `ill' (e.g., MTU).
1102e11c3f44Smeem */
1103e11c3f44Smeem void
ipmp_ill_join_illgrp(ill_t * ill,ipmp_illgrp_t * illg)1104e11c3f44Smeem ipmp_ill_join_illgrp(ill_t *ill, ipmp_illgrp_t *illg)
1105e11c3f44Smeem {
1106e11c3f44Smeem ill_t *ipmp_ill;
1107e11c3f44Smeem ipif_t *ipif;
1108e11c3f44Smeem ip_stack_t *ipst = ill->ill_ipst;
1109e11c3f44Smeem
1110e11c3f44Smeem /* IS_UNDER_IPMP() requires ill_grp to be non-NULL */
1111e11c3f44Smeem ASSERT(!IS_IPMP(ill) && ill->ill_phyint->phyint_grp != NULL);
1112e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1113e11c3f44Smeem ASSERT(ill->ill_grp == NULL);
1114e11c3f44Smeem
1115e11c3f44Smeem ipmp_ill = illg->ig_ipmp_ill;
1116e11c3f44Smeem
1117e11c3f44Smeem /*
1118e11c3f44Smeem * Account for `ill' joining the illgrp.
1119e11c3f44Smeem */
1120e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1121e11c3f44Smeem if (ill->ill_isv6)
1122e11c3f44Smeem ill->ill_phyint->phyint_grp->gr_nv6++;
1123e11c3f44Smeem else
1124e11c3f44Smeem ill->ill_phyint->phyint_grp->gr_nv4++;
1125e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1126e11c3f44Smeem
1127e11c3f44Smeem /*
1128e11c3f44Smeem * Ensure the ILLF_ROUTER flag remains consistent across the group.
1129e11c3f44Smeem */
1130e11c3f44Smeem mutex_enter(&ill->ill_lock);
1131e11c3f44Smeem if (ipmp_ill->ill_flags & ILLF_ROUTER)
1132e11c3f44Smeem ill->ill_flags |= ILLF_ROUTER;
1133e11c3f44Smeem else
1134e11c3f44Smeem ill->ill_flags &= ~ILLF_ROUTER;
1135e11c3f44Smeem mutex_exit(&ill->ill_lock);
1136e11c3f44Smeem
1137e11c3f44Smeem /*
1138e11c3f44Smeem * Blow away all multicast memberships that currently exist on `ill'.
1139e11c3f44Smeem * This may seem odd, but it's consistent with the application view
1140e11c3f44Smeem * that `ill' no longer exists (e.g., due to ipmp_ill_rtsaddrmsg()).
1141f1c454b4SSowmini Varadhan * The ill_grp_pending bit prevents multicast group joins after
1142f1c454b4SSowmini Varadhan * update_conn_ill() and before ill_grp assignment.
1143e11c3f44Smeem */
1144f1c454b4SSowmini Varadhan mutex_enter(&ill->ill_mcast_serializer);
1145f1c454b4SSowmini Varadhan ill->ill_grp_pending = 1;
1146f1c454b4SSowmini Varadhan mutex_exit(&ill->ill_mcast_serializer);
1147bd670b35SErik Nordmark update_conn_ill(ill, ill->ill_ipst);
1148e11c3f44Smeem if (ill->ill_isv6) {
1149e11c3f44Smeem reset_mrt_ill(ill);
1150e11c3f44Smeem } else {
1151e11c3f44Smeem ipif = ill->ill_ipif;
1152e11c3f44Smeem for (; ipif != NULL; ipif = ipif->ipif_next) {
1153e11c3f44Smeem reset_mrt_vif_ipif(ipif);
1154e11c3f44Smeem }
1155e11c3f44Smeem }
1156e11c3f44Smeem ip_purge_allmulti(ill);
1157e11c3f44Smeem
1158e11c3f44Smeem /*
1159e11c3f44Smeem * Borrow the first ill's ill_phys_addr_length value for the illgrp's
1160e11c3f44Smeem * physical address length. All other ills must have the same value,
1161e11c3f44Smeem * since they are required to all be the same mactype. Also update
1162e11c3f44Smeem * the IPMP ill's MTU and CoS marking, if necessary.
1163e11c3f44Smeem */
1164e11c3f44Smeem if (list_is_empty(&illg->ig_if)) {
1165e11c3f44Smeem ASSERT(ipmp_ill->ill_phys_addr_length == 0);
1166e11c3f44Smeem /*
1167e11c3f44Smeem * NOTE: we leave ill_phys_addr NULL since the IPMP group
1168e11c3f44Smeem * doesn't have a physical address. This means that code must
1169e11c3f44Smeem * not assume that ill_phys_addr is non-NULL just because
1170e11c3f44Smeem * ill_phys_addr_length is non-zero. Likewise for ill_nd_lla.
1171e11c3f44Smeem */
1172e11c3f44Smeem ipmp_ill->ill_phys_addr_length = ill->ill_phys_addr_length;
1173e11c3f44Smeem ipmp_ill->ill_nd_lla_len = ill->ill_phys_addr_length;
1174e11c3f44Smeem ipmp_ill->ill_type = ill->ill_type;
1175e11c3f44Smeem
1176e11c3f44Smeem if (ill->ill_flags & ILLF_COS_ENABLED) {
1177e11c3f44Smeem mutex_enter(&ipmp_ill->ill_lock);
1178e11c3f44Smeem ipmp_ill->ill_flags |= ILLF_COS_ENABLED;
1179e11c3f44Smeem mutex_exit(&ipmp_ill->ill_lock);
1180e11c3f44Smeem }
1181*1eee170aSErik Nordmark ipmp_illgrp_set_mtu(illg, ill->ill_mtu, ill->ill_mc_mtu);
1182e11c3f44Smeem } else {
1183e11c3f44Smeem ASSERT(ipmp_ill->ill_phys_addr_length ==
1184e11c3f44Smeem ill->ill_phys_addr_length);
1185e11c3f44Smeem ASSERT(ipmp_ill->ill_type == ill->ill_type);
1186e11c3f44Smeem
1187e11c3f44Smeem if (!(ill->ill_flags & ILLF_COS_ENABLED)) {
1188e11c3f44Smeem mutex_enter(&ipmp_ill->ill_lock);
1189e11c3f44Smeem ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED;
1190e11c3f44Smeem mutex_exit(&ipmp_ill->ill_lock);
1191e11c3f44Smeem }
1192*1eee170aSErik Nordmark if (illg->ig_mtu > ill->ill_mtu ||
1193*1eee170aSErik Nordmark illg->ig_mc_mtu > ill->ill_mc_mtu) {
1194*1eee170aSErik Nordmark ipmp_illgrp_set_mtu(illg, ill->ill_mtu,
1195*1eee170aSErik Nordmark ill->ill_mc_mtu);
1196*1eee170aSErik Nordmark }
1197e11c3f44Smeem }
1198e11c3f44Smeem
1199e11c3f44Smeem rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
1200e11c3f44Smeem list_insert_tail(&illg->ig_if, ill);
1201e11c3f44Smeem ill->ill_grp = illg;
1202e11c3f44Smeem rw_exit(&ipst->ips_ill_g_lock);
1203e11c3f44Smeem
1204f1c454b4SSowmini Varadhan mutex_enter(&ill->ill_mcast_serializer);
1205f1c454b4SSowmini Varadhan ill->ill_grp_pending = 0;
1206f1c454b4SSowmini Varadhan mutex_exit(&ill->ill_mcast_serializer);
1207f1c454b4SSowmini Varadhan
1208e11c3f44Smeem /*
1209e11c3f44Smeem * Hide the IREs on `ill' so that we don't accidentally find them when
1210e11c3f44Smeem * sending data traffic.
1211e11c3f44Smeem */
1212e11c3f44Smeem ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_mark_testhidden, ill, ill);
1213e11c3f44Smeem
1214e11c3f44Smeem ipmp_ill_refresh_active(ill);
1215e11c3f44Smeem }
1216e11c3f44Smeem
1217e11c3f44Smeem /*
1218e11c3f44Smeem * Remove `ill' from its illgrp, and rebalance the data addresses in that
1219e11c3f44Smeem * illgrp to be spread evenly across the remaining ills. Also, adjust the
1220e11c3f44Smeem * IPMP ill as necessary now that `ill' is removed (e.g., MTU).
1221e11c3f44Smeem */
1222e11c3f44Smeem void
ipmp_ill_leave_illgrp(ill_t * ill)1223e11c3f44Smeem ipmp_ill_leave_illgrp(ill_t *ill)
1224e11c3f44Smeem {
1225e11c3f44Smeem ill_t *ipmp_ill;
1226e11c3f44Smeem ipif_t *ipif;
1227e11c3f44Smeem ipmp_arpent_t *entp;
1228e11c3f44Smeem ipmp_illgrp_t *illg = ill->ill_grp;
1229e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
1230e11c3f44Smeem
1231e11c3f44Smeem ASSERT(IS_UNDER_IPMP(ill));
1232e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1233e11c3f44Smeem ASSERT(illg != NULL);
1234e11c3f44Smeem
1235e11c3f44Smeem ipmp_ill = illg->ig_ipmp_ill;
1236e11c3f44Smeem
1237e11c3f44Smeem /*
1238e11c3f44Smeem * Cancel IPMP-specific ill timeouts.
1239e11c3f44Smeem */
1240e11c3f44Smeem (void) untimeout(ill->ill_refresh_tid);
1241e11c3f44Smeem
1242e11c3f44Smeem /*
1243e11c3f44Smeem * Expose any previously-hidden IREs on `ill'.
1244e11c3f44Smeem */
1245e11c3f44Smeem ire_walk_ill(MATCH_IRE_ILL, 0, ipmp_ill_ire_clear_testhidden, ill, ill);
1246e11c3f44Smeem
1247e11c3f44Smeem /*
1248e11c3f44Smeem * Ensure the multicast state for each ipif on `ill' is down so that
1249e11c3f44Smeem * our ipif_multicast_up() (once `ill' leaves the group) will rejoin
1250e11c3f44Smeem * all eligible groups.
1251e11c3f44Smeem */
1252e11c3f44Smeem for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
1253e11c3f44Smeem if (ipif->ipif_flags & IPIF_UP)
1254e11c3f44Smeem ipif_multicast_down(ipif);
1255e11c3f44Smeem
1256e11c3f44Smeem /*
1257e11c3f44Smeem * Account for `ill' leaving the illgrp.
1258e11c3f44Smeem */
1259e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1260e11c3f44Smeem if (ill->ill_isv6)
1261e11c3f44Smeem ill->ill_phyint->phyint_grp->gr_nv6--;
1262e11c3f44Smeem else
1263e11c3f44Smeem ill->ill_phyint->phyint_grp->gr_nv4--;
1264e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1265e11c3f44Smeem
1266e11c3f44Smeem /*
1267e11c3f44Smeem * Pull `ill' out of the interface lists.
1268e11c3f44Smeem */
1269e11c3f44Smeem if (list_link_active(&ill->ill_actnode))
1270e11c3f44Smeem ipmp_ill_deactivate(ill);
1271e11c3f44Smeem rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
1272e11c3f44Smeem list_remove(&illg->ig_if, ill);
1273e11c3f44Smeem ill->ill_grp = NULL;
1274e11c3f44Smeem rw_exit(&ipst->ips_ill_g_lock);
1275e11c3f44Smeem
1276e11c3f44Smeem /*
1277e11c3f44Smeem * Re-establish multicast memberships that were previously being
1278e11c3f44Smeem * handled by the IPMP meta-interface.
1279e11c3f44Smeem */
1280e11c3f44Smeem for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
1281e11c3f44Smeem if (ipif->ipif_flags & IPIF_UP)
1282e11c3f44Smeem ipif_multicast_up(ipif);
1283e11c3f44Smeem
1284e11c3f44Smeem /*
1285e11c3f44Smeem * Refresh the group MTU based on the new interface list.
1286e11c3f44Smeem */
1287e11c3f44Smeem ipmp_illgrp_refresh_mtu(illg);
1288e11c3f44Smeem
1289e11c3f44Smeem if (list_is_empty(&illg->ig_if)) {
1290e11c3f44Smeem /*
1291e11c3f44Smeem * No ills left in the illgrp; we no longer have a physical
1292e11c3f44Smeem * address length, nor can we support ARP, CoS, or anything
1293e11c3f44Smeem * else that depends on knowing the link layer type.
1294e11c3f44Smeem */
1295e11c3f44Smeem while ((entp = ipmp_illgrp_lookup_arpent(illg, NULL)) != NULL)
1296e11c3f44Smeem ipmp_illgrp_destroy_arpent(illg, entp);
1297e11c3f44Smeem
1298e11c3f44Smeem ipmp_ill->ill_phys_addr_length = 0;
1299e11c3f44Smeem ipmp_ill->ill_nd_lla_len = 0;
1300e11c3f44Smeem ipmp_ill->ill_type = IFT_OTHER;
1301e11c3f44Smeem mutex_enter(&ipmp_ill->ill_lock);
1302e11c3f44Smeem ipmp_ill->ill_flags &= ~ILLF_COS_ENABLED;
1303e11c3f44Smeem mutex_exit(&ipmp_ill->ill_lock);
1304e11c3f44Smeem } else {
1305e11c3f44Smeem /*
1306e11c3f44Smeem * If `ill' didn't support CoS, see if it can now be enabled.
1307e11c3f44Smeem */
1308e11c3f44Smeem if (!(ill->ill_flags & ILLF_COS_ENABLED)) {
1309e11c3f44Smeem ASSERT(!(ipmp_ill->ill_flags & ILLF_COS_ENABLED));
1310e11c3f44Smeem
1311e11c3f44Smeem ill = list_head(&illg->ig_if);
1312e11c3f44Smeem do {
1313e11c3f44Smeem if (!(ill->ill_flags & ILLF_COS_ENABLED))
1314e11c3f44Smeem break;
1315e11c3f44Smeem } while ((ill = list_next(&illg->ig_if, ill)) != NULL);
1316e11c3f44Smeem
1317e11c3f44Smeem if (ill == NULL) {
1318e11c3f44Smeem mutex_enter(&ipmp_ill->ill_lock);
1319e11c3f44Smeem ipmp_ill->ill_flags |= ILLF_COS_ENABLED;
1320e11c3f44Smeem mutex_exit(&ipmp_ill->ill_lock);
1321e11c3f44Smeem }
1322e11c3f44Smeem }
1323e11c3f44Smeem }
1324e11c3f44Smeem }
1325e11c3f44Smeem
1326e11c3f44Smeem /*
1327e11c3f44Smeem * Check if `ill' should be active, and activate or deactivate if need be.
1328e11c3f44Smeem * Return B_FALSE if a refresh was necessary but could not be performed.
1329e11c3f44Smeem */
1330e11c3f44Smeem static boolean_t
ipmp_ill_try_refresh_active(ill_t * ill)1331e11c3f44Smeem ipmp_ill_try_refresh_active(ill_t *ill)
1332e11c3f44Smeem {
1333e11c3f44Smeem boolean_t refreshed = B_TRUE;
1334e11c3f44Smeem
1335e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1336e11c3f44Smeem ASSERT(IS_UNDER_IPMP(ill));
1337e11c3f44Smeem
1338e11c3f44Smeem if (ipmp_ill_is_active(ill)) {
1339e11c3f44Smeem if (!list_link_active(&ill->ill_actnode))
1340e11c3f44Smeem refreshed = ipmp_ill_activate(ill);
1341e11c3f44Smeem } else {
1342e11c3f44Smeem if (list_link_active(&ill->ill_actnode))
1343e11c3f44Smeem ipmp_ill_deactivate(ill);
1344e11c3f44Smeem }
1345e11c3f44Smeem
1346e11c3f44Smeem return (refreshed);
1347e11c3f44Smeem }
1348e11c3f44Smeem
1349e11c3f44Smeem /*
1350e11c3f44Smeem * Check if `ill' should be active, and activate or deactivate if need be.
1351e11c3f44Smeem * If the refresh fails, schedule a timer to try again later.
1352e11c3f44Smeem */
1353e11c3f44Smeem void
ipmp_ill_refresh_active(ill_t * ill)1354e11c3f44Smeem ipmp_ill_refresh_active(ill_t *ill)
1355e11c3f44Smeem {
1356e11c3f44Smeem if (!ipmp_ill_try_refresh_active(ill))
1357e11c3f44Smeem ipmp_ill_refresh_active_timer_start(ill);
1358e11c3f44Smeem }
1359e11c3f44Smeem
1360e11c3f44Smeem /*
1361e11c3f44Smeem * Retry ipmp_ill_try_refresh_active() on the ill named by `ill_arg'.
1362e11c3f44Smeem */
1363e11c3f44Smeem static void
ipmp_ill_refresh_active_timer(void * ill_arg)1364e11c3f44Smeem ipmp_ill_refresh_active_timer(void *ill_arg)
1365e11c3f44Smeem {
1366e11c3f44Smeem ill_t *ill = ill_arg;
1367e11c3f44Smeem boolean_t refreshed = B_FALSE;
1368e11c3f44Smeem
1369e11c3f44Smeem /*
1370e11c3f44Smeem * Clear ill_refresh_tid to indicate that no timeout is pending
1371e11c3f44Smeem * (another thread could schedule a new timeout while we're still
1372e11c3f44Smeem * running, but that's harmless). If the ill is going away, bail.
1373e11c3f44Smeem */
1374e11c3f44Smeem mutex_enter(&ill->ill_lock);
1375e11c3f44Smeem ill->ill_refresh_tid = 0;
1376e11c3f44Smeem if (ill->ill_state_flags & ILL_CONDEMNED) {
1377e11c3f44Smeem mutex_exit(&ill->ill_lock);
1378e11c3f44Smeem return;
1379e11c3f44Smeem }
1380e11c3f44Smeem mutex_exit(&ill->ill_lock);
1381e11c3f44Smeem
1382e11c3f44Smeem if (ipsq_try_enter(NULL, ill, NULL, NULL, NULL, NEW_OP, B_FALSE)) {
1383e11c3f44Smeem refreshed = ipmp_ill_try_refresh_active(ill);
1384e11c3f44Smeem ipsq_exit(ill->ill_phyint->phyint_ipsq);
1385e11c3f44Smeem }
1386e11c3f44Smeem
1387e11c3f44Smeem /*
1388e11c3f44Smeem * If the refresh failed, schedule another attempt.
1389e11c3f44Smeem */
1390e11c3f44Smeem if (!refreshed)
1391e11c3f44Smeem ipmp_ill_refresh_active_timer_start(ill);
1392e11c3f44Smeem }
1393e11c3f44Smeem
1394e11c3f44Smeem /*
1395e11c3f44Smeem * Retry an ipmp_ill_try_refresh_active() on the ill named by `arg'.
1396e11c3f44Smeem */
1397e11c3f44Smeem static void
ipmp_ill_refresh_active_timer_start(ill_t * ill)1398e11c3f44Smeem ipmp_ill_refresh_active_timer_start(ill_t *ill)
1399e11c3f44Smeem {
1400e11c3f44Smeem mutex_enter(&ill->ill_lock);
1401e11c3f44Smeem
1402e11c3f44Smeem /*
1403e11c3f44Smeem * If the ill is going away or a refresh is already scheduled, bail.
1404e11c3f44Smeem */
1405e11c3f44Smeem if (ill->ill_refresh_tid != 0 ||
1406e11c3f44Smeem (ill->ill_state_flags & ILL_CONDEMNED)) {
1407e11c3f44Smeem mutex_exit(&ill->ill_lock);
1408e11c3f44Smeem return;
1409e11c3f44Smeem }
1410e11c3f44Smeem
1411e11c3f44Smeem ill->ill_refresh_tid = timeout(ipmp_ill_refresh_active_timer, ill,
1412e11c3f44Smeem SEC_TO_TICK(IPMP_ILL_REFRESH_TIMEOUT));
1413e11c3f44Smeem
1414e11c3f44Smeem mutex_exit(&ill->ill_lock);
1415e11c3f44Smeem }
1416e11c3f44Smeem
1417e11c3f44Smeem /*
1418e11c3f44Smeem * Activate `ill' so it will be used to send and receive data traffic. Return
1419e11c3f44Smeem * B_FALSE if `ill' cannot be activated. Note that we allocate any messages
1420e11c3f44Smeem * needed to deactivate `ill' here as well so that deactivation cannot fail.
1421e11c3f44Smeem */
1422e11c3f44Smeem static boolean_t
ipmp_ill_activate(ill_t * ill)1423e11c3f44Smeem ipmp_ill_activate(ill_t *ill)
1424e11c3f44Smeem {
1425e11c3f44Smeem ipif_t *ipif;
1426e11c3f44Smeem mblk_t *linkupmp = NULL, *linkdownmp = NULL;
1427e11c3f44Smeem ipmp_grp_t *grp = ill->ill_phyint->phyint_grp;
1428e11c3f44Smeem ipmp_illgrp_t *illg = ill->ill_grp;
1429e11c3f44Smeem ill_t *maxill;
1430e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
1431e11c3f44Smeem
1432e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1433e11c3f44Smeem ASSERT(IS_UNDER_IPMP(ill));
1434e11c3f44Smeem
1435e11c3f44Smeem /*
1436e11c3f44Smeem * If this will be the first active interface in the group, allocate
1437e11c3f44Smeem * the link-up and link-down messages.
1438e11c3f44Smeem */
1439e11c3f44Smeem if (grp->gr_nactif == 0) {
1440e11c3f44Smeem linkupmp = ip_dlnotify_alloc(DL_NOTE_LINK_UP, 0);
1441e11c3f44Smeem linkdownmp = ip_dlnotify_alloc(DL_NOTE_LINK_DOWN, 0);
1442e11c3f44Smeem if (linkupmp == NULL || linkdownmp == NULL)
1443e11c3f44Smeem goto fail;
1444e11c3f44Smeem }
1445e11c3f44Smeem
1446e11c3f44Smeem if (list_is_empty(&illg->ig_actif)) {
1447e11c3f44Smeem /*
1448e11c3f44Smeem * Now that we have an active ill, nominate it for multicast
1449e11c3f44Smeem * and broadcast duties. Do this before ipmp_ill_bind_ipif()
1450e11c3f44Smeem * since that may need to send multicast packets (e.g., IPv6
1451e11c3f44Smeem * neighbor discovery probes).
1452e11c3f44Smeem */
1453e11c3f44Smeem ipmp_illgrp_set_cast(illg, ill);
1454e11c3f44Smeem
1455e11c3f44Smeem /*
1456e11c3f44Smeem * This is the first active ill in the illgrp -- add 'em all.
1457e11c3f44Smeem * We can access/walk ig_ipmp_ill's ipif list since we're
1458e11c3f44Smeem * writer on its IPSQ as well.
1459e11c3f44Smeem */
1460e11c3f44Smeem ipif = illg->ig_ipmp_ill->ill_ipif;
1461e11c3f44Smeem for (; ipif != NULL; ipif = ipif->ipif_next)
1462e11c3f44Smeem if (ipmp_ipif_is_up_dataaddr(ipif))
1463e11c3f44Smeem ipmp_ill_bind_ipif(ill, ipif, Res_act_initial);
1464e11c3f44Smeem } else {
1465e11c3f44Smeem /*
1466e11c3f44Smeem * Redistribute the addresses by moving them from the ill with
1467e11c3f44Smeem * the most addresses until the ill being activated is at the
1468e11c3f44Smeem * same level as the rest of the ills.
1469e11c3f44Smeem */
1470e11c3f44Smeem for (;;) {
1471e11c3f44Smeem maxill = ipmp_illgrp_max_ill(illg);
1472e11c3f44Smeem ASSERT(maxill != NULL);
1473e11c3f44Smeem if (ill->ill_bound_cnt + 1 >= maxill->ill_bound_cnt)
1474e11c3f44Smeem break;
1475e11c3f44Smeem ipif = ipmp_ill_unbind_ipif(maxill, NULL, B_TRUE);
1476e11c3f44Smeem ipmp_ill_bind_ipif(ill, ipif, Res_act_rebind);
1477e11c3f44Smeem }
1478e11c3f44Smeem }
1479e11c3f44Smeem
1480e11c3f44Smeem /*
1481e11c3f44Smeem * Put the interface in the active list.
1482e11c3f44Smeem */
1483e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1484e11c3f44Smeem list_insert_tail(&illg->ig_actif, ill);
1485e11c3f44Smeem illg->ig_nactif++;
1486e11c3f44Smeem illg->ig_next_ill = ill;
1487e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1488e11c3f44Smeem
1489e11c3f44Smeem /*
1490bd670b35SErik Nordmark * Refresh static/proxy ARP entries to use `ill', if need be.
1491e11c3f44Smeem */
1492e11c3f44Smeem if (!ill->ill_isv6)
1493e11c3f44Smeem ipmp_illgrp_refresh_arpent(illg);
1494e11c3f44Smeem
1495e11c3f44Smeem /*
1496e11c3f44Smeem * Finally, mark the group link up, if necessary.
1497e11c3f44Smeem */
1498e11c3f44Smeem if (grp->gr_nactif++ == 0) {
1499e11c3f44Smeem ASSERT(grp->gr_linkdownmp == NULL);
1500e11c3f44Smeem grp->gr_linkdownmp = linkdownmp;
1501e11c3f44Smeem put(illg->ig_ipmp_ill->ill_rq, linkupmp);
1502e11c3f44Smeem }
1503e11c3f44Smeem return (B_TRUE);
1504e11c3f44Smeem fail:
1505e11c3f44Smeem freemsg(linkupmp);
1506e11c3f44Smeem freemsg(linkdownmp);
1507e11c3f44Smeem return (B_FALSE);
1508e11c3f44Smeem }
1509e11c3f44Smeem
1510e11c3f44Smeem /*
1511e11c3f44Smeem * Deactivate `ill' so it will not be used to send or receive data traffic.
1512e11c3f44Smeem */
1513e11c3f44Smeem static void
ipmp_ill_deactivate(ill_t * ill)1514e11c3f44Smeem ipmp_ill_deactivate(ill_t *ill)
1515e11c3f44Smeem {
15161f19738eSmeem ill_t *minill, *ipmp_ill;
1517e11c3f44Smeem ipif_t *ipif, *ubnextipif, *ubheadipif = NULL;
1518e11c3f44Smeem mblk_t *mp;
1519e11c3f44Smeem ipmp_grp_t *grp = ill->ill_phyint->phyint_grp;
1520e11c3f44Smeem ipmp_illgrp_t *illg = ill->ill_grp;
1521e11c3f44Smeem ip_stack_t *ipst = IPMP_ILLGRP_TO_IPST(illg);
1522e11c3f44Smeem
1523e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1524e11c3f44Smeem ASSERT(IS_UNDER_IPMP(ill));
1525e11c3f44Smeem
15261f19738eSmeem ipmp_ill = illg->ig_ipmp_ill;
15271f19738eSmeem
1528e11c3f44Smeem /*
1529e11c3f44Smeem * Pull the interface out of the active list.
1530e11c3f44Smeem */
1531e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1532e11c3f44Smeem list_remove(&illg->ig_actif, ill);
1533e11c3f44Smeem illg->ig_nactif--;
1534e11c3f44Smeem illg->ig_next_ill = list_head(&illg->ig_actif);
1535e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1536e11c3f44Smeem
1537e11c3f44Smeem /*
1538e11c3f44Smeem * If the ill that's being deactivated had been nominated for
1539e11c3f44Smeem * multicast/broadcast, nominate a new one.
1540e11c3f44Smeem */
1541e11c3f44Smeem if (ill == illg->ig_cast_ill)
1542e11c3f44Smeem ipmp_illgrp_set_cast(illg, list_head(&illg->ig_actif));
1543e11c3f44Smeem
1544e11c3f44Smeem /*
1545bd670b35SErik Nordmark * Delete all nce_t entries using this ill, so that the next attempt
1546bd670b35SErik Nordmark * to send data traffic will revalidate cached nce's.
1547bd670b35SErik Nordmark */
1548bd670b35SErik Nordmark nce_flush(ill, B_TRUE);
1549bd670b35SErik Nordmark
1550bd670b35SErik Nordmark /*
1551e11c3f44Smeem * Unbind all of the ipifs bound to this ill, and save 'em in a list;
1552e11c3f44Smeem * we'll rebind them after we tell the resolver the ill is no longer
1553e11c3f44Smeem * active. We must do things in this order or the resolver could
1554e11c3f44Smeem * accidentally rebind to the ill we're trying to remove if multiple
1555e11c3f44Smeem * ills in the group have the same hardware address (which is
1556e11c3f44Smeem * unsupported, but shouldn't lead to a wedged machine).
1557e11c3f44Smeem */
1558e11c3f44Smeem while ((ipif = ipmp_ill_unbind_ipif(ill, NULL, B_TRUE)) != NULL) {
1559e11c3f44Smeem ipif->ipif_bound_next = ubheadipif;
1560e11c3f44Smeem ubheadipif = ipif;
1561e11c3f44Smeem }
1562e11c3f44Smeem
15631f19738eSmeem if (!ill->ill_isv6) {
1564e11c3f44Smeem /*
1565bd670b35SErik Nordmark * Refresh static/proxy ARP entries that had been using `ill'.
1566e11c3f44Smeem */
1567e11c3f44Smeem ipmp_illgrp_refresh_arpent(illg);
1568e11c3f44Smeem }
1569e11c3f44Smeem
1570e11c3f44Smeem /*
1571e11c3f44Smeem * Rebind each ipif from the deactivated ill to the active ill with
1572e11c3f44Smeem * the fewest ipifs. If there are no active ills, the ipifs will
1573e11c3f44Smeem * remain unbound.
1574e11c3f44Smeem */
1575e11c3f44Smeem for (ipif = ubheadipif; ipif != NULL; ipif = ubnextipif) {
1576e11c3f44Smeem ubnextipif = ipif->ipif_bound_next;
1577e11c3f44Smeem ipif->ipif_bound_next = NULL;
1578e11c3f44Smeem
1579e11c3f44Smeem if ((minill = ipmp_illgrp_min_ill(illg)) != NULL)
1580e11c3f44Smeem ipmp_ill_bind_ipif(minill, ipif, Res_act_rebind);
1581e11c3f44Smeem }
1582e11c3f44Smeem
1583bd670b35SErik Nordmark /*
15841f19738eSmeem * Remove any IRE_IF_CLONEs for this ill since they might have an
15851f19738eSmeem * ire_nce_cache/nce_common which refers to another ill in the group.
1586bd670b35SErik Nordmark */
15871f19738eSmeem ire_walk_ill(MATCH_IRE_TYPE, IRE_IF_CLONE, ill_downi_if_clone, ill,
15881f19738eSmeem ill);
1589bd670b35SErik Nordmark
1590e11c3f44Smeem /*
15911f19738eSmeem * Finally, if there are no longer any active interfaces, then delete
15921f19738eSmeem * any NCECs associated with the group and mark the group link down.
1593e11c3f44Smeem */
1594e11c3f44Smeem if (--grp->gr_nactif == 0) {
15951f19738eSmeem ncec_walk(ipmp_ill, (pfi_t)ncec_delete_per_ill, ipmp_ill, ipst);
1596e11c3f44Smeem mp = grp->gr_linkdownmp;
1597e11c3f44Smeem grp->gr_linkdownmp = NULL;
1598e11c3f44Smeem ASSERT(mp != NULL);
15991f19738eSmeem put(ipmp_ill->ill_rq, mp);
1600e11c3f44Smeem }
1601e11c3f44Smeem }
1602e11c3f44Smeem
1603e11c3f44Smeem /*
1604e11c3f44Smeem * Send the routing socket messages needed to make `ill' "appear" (RTM_ADD)
1605e11c3f44Smeem * or "disappear" (RTM_DELETE) to non-IPMP-aware routing socket listeners.
1606e11c3f44Smeem */
1607e11c3f44Smeem static void
ipmp_ill_rtsaddrmsg(ill_t * ill,int cmd)1608e11c3f44Smeem ipmp_ill_rtsaddrmsg(ill_t *ill, int cmd)
1609e11c3f44Smeem {
1610e11c3f44Smeem ipif_t *ipif;
1611e11c3f44Smeem
1612e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1613e11c3f44Smeem ASSERT(cmd == RTM_ADD || cmd == RTM_DELETE);
1614e11c3f44Smeem
1615e11c3f44Smeem /*
1616e11c3f44Smeem * If `ill' is truly down, there are no messages to generate since:
1617e11c3f44Smeem *
1618e11c3f44Smeem * 1. If cmd == RTM_DELETE, then we're supposed to hide the interface
1619e11c3f44Smeem * and its addresses by bringing them down. But that's already
1620e11c3f44Smeem * true, so there's nothing to hide.
1621e11c3f44Smeem *
1622e11c3f44Smeem * 2. If cmd == RTM_ADD, then we're supposed to generate messages
1623e11c3f44Smeem * indicating that any previously-hidden up addresses are again
1624e11c3f44Smeem * back up (along with the interface). But they aren't, so
1625e11c3f44Smeem * there's nothing to expose.
1626e11c3f44Smeem */
1627e11c3f44Smeem if (ill->ill_ipif_up_count == 0)
1628e11c3f44Smeem return;
1629e11c3f44Smeem
1630e11c3f44Smeem if (cmd == RTM_ADD)
1631e11c3f44Smeem ip_rts_xifmsg(ill->ill_ipif, IPIF_UP, 0, RTSQ_NORMAL);
1632e11c3f44Smeem
1633e11c3f44Smeem for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next)
1634e11c3f44Smeem if (ipif->ipif_flags & IPIF_UP)
1635e11c3f44Smeem ip_rts_newaddrmsg(cmd, 0, ipif, RTSQ_NORMAL);
1636e11c3f44Smeem
1637e11c3f44Smeem if (cmd == RTM_DELETE)
1638e11c3f44Smeem ip_rts_xifmsg(ill->ill_ipif, 0, IPIF_UP, RTSQ_NORMAL);
1639e11c3f44Smeem }
1640e11c3f44Smeem
1641e11c3f44Smeem /*
1642e11c3f44Smeem * Bind the address named by `ipif' to the underlying ill named by `ill'.
1643e11c3f44Smeem * If `act' is Res_act_none, don't notify the resolver. Otherwise, `act'
1644e11c3f44Smeem * will indicate to the resolver whether this is an initial bringup of
1645e11c3f44Smeem * `ipif', or just a rebind to another ill.
1646e11c3f44Smeem */
1647e11c3f44Smeem static void
ipmp_ill_bind_ipif(ill_t * ill,ipif_t * ipif,enum ip_resolver_action act)1648e11c3f44Smeem ipmp_ill_bind_ipif(ill_t *ill, ipif_t *ipif, enum ip_resolver_action act)
1649e11c3f44Smeem {
1650e11c3f44Smeem int err = 0;
1651e11c3f44Smeem ip_stack_t *ipst = ill->ill_ipst;
1652e11c3f44Smeem
1653e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill) && IAM_WRITER_IPIF(ipif));
1654e11c3f44Smeem ASSERT(IS_UNDER_IPMP(ill) && IS_IPMP(ipif->ipif_ill));
1655e11c3f44Smeem ASSERT(act == Res_act_none || ipmp_ipif_is_up_dataaddr(ipif));
1656e11c3f44Smeem ASSERT(ipif->ipif_bound_ill == NULL);
1657e11c3f44Smeem ASSERT(ipif->ipif_bound_next == NULL);
1658e11c3f44Smeem
1659e11c3f44Smeem ipif->ipif_bound_next = ill->ill_bound_ipif;
1660e11c3f44Smeem ill->ill_bound_ipif = ipif;
1661e11c3f44Smeem ill->ill_bound_cnt++;
1662e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1663e11c3f44Smeem ipif->ipif_bound_ill = ill;
1664e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1665e11c3f44Smeem
1666e11c3f44Smeem /*
1667e11c3f44Smeem * If necessary, tell ARP/NDP about the new mapping. Note that
1668bd670b35SErik Nordmark * ipif_resolver_up() cannot fail for IPv6 ills.
1669e11c3f44Smeem */
1670e11c3f44Smeem if (act != Res_act_none) {
1671e11c3f44Smeem if (ill->ill_isv6) {
1672e11c3f44Smeem VERIFY(ipif_resolver_up(ipif, act) == 0);
1673e11c3f44Smeem err = ipif_ndp_up(ipif, act == Res_act_initial);
1674e11c3f44Smeem } else {
1675e11c3f44Smeem err = ipif_resolver_up(ipif, act);
1676e11c3f44Smeem }
1677e11c3f44Smeem
1678e11c3f44Smeem /*
1679e11c3f44Smeem * Since ipif_ndp_up() never returns EINPROGRESS and
1680e11c3f44Smeem * ipif_resolver_up() only returns EINPROGRESS when the
1681e11c3f44Smeem * associated ill is not up, we should never be here with
1682e11c3f44Smeem * EINPROGRESS. We rely on this to simplify the design.
1683e11c3f44Smeem */
1684e11c3f44Smeem ASSERT(err != EINPROGRESS);
1685e11c3f44Smeem }
1686e11c3f44Smeem /* TODO: retry binding on failure? when? */
1687e11c3f44Smeem ipif->ipif_bound = (err == 0);
1688e11c3f44Smeem }
1689e11c3f44Smeem
1690e11c3f44Smeem /*
1691e11c3f44Smeem * Unbind the address named by `ipif' from the underlying ill named by `ill'.
1692e11c3f44Smeem * If `ipif' is NULL, then an arbitrary ipif on `ill' is unbound and returned.
1693e11c3f44Smeem * If no ipifs are bound to `ill', NULL is returned. If `notifyres' is
1694e11c3f44Smeem * B_TRUE, notify the resolver about the change.
1695e11c3f44Smeem */
1696e11c3f44Smeem static ipif_t *
ipmp_ill_unbind_ipif(ill_t * ill,ipif_t * ipif,boolean_t notifyres)1697e11c3f44Smeem ipmp_ill_unbind_ipif(ill_t *ill, ipif_t *ipif, boolean_t notifyres)
1698e11c3f44Smeem {
1699e11c3f44Smeem ipif_t *previpif;
1700e11c3f44Smeem ip_stack_t *ipst = ill->ill_ipst;
1701e11c3f44Smeem
1702e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1703e11c3f44Smeem ASSERT(IS_UNDER_IPMP(ill));
1704e11c3f44Smeem
1705e11c3f44Smeem /*
1706e11c3f44Smeem * If necessary, find an ipif to unbind.
1707e11c3f44Smeem */
1708e11c3f44Smeem if (ipif == NULL) {
1709e11c3f44Smeem if ((ipif = ill->ill_bound_ipif) == NULL) {
1710e11c3f44Smeem ASSERT(ill->ill_bound_cnt == 0);
1711e11c3f44Smeem return (NULL);
1712e11c3f44Smeem }
1713e11c3f44Smeem }
1714e11c3f44Smeem
1715e11c3f44Smeem ASSERT(IAM_WRITER_IPIF(ipif));
1716e11c3f44Smeem ASSERT(IS_IPMP(ipif->ipif_ill));
1717e11c3f44Smeem ASSERT(ipif->ipif_bound_ill == ill);
1718e11c3f44Smeem ASSERT(ill->ill_bound_cnt > 0);
1719e11c3f44Smeem
1720e11c3f44Smeem /*
1721e11c3f44Smeem * Unbind it.
1722e11c3f44Smeem */
1723e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1724e11c3f44Smeem ipif->ipif_bound_ill = NULL;
1725e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1726e11c3f44Smeem ill->ill_bound_cnt--;
1727e11c3f44Smeem
1728e11c3f44Smeem if (ill->ill_bound_ipif == ipif) {
1729e11c3f44Smeem ill->ill_bound_ipif = ipif->ipif_bound_next;
1730e11c3f44Smeem } else {
1731e11c3f44Smeem previpif = ill->ill_bound_ipif;
1732e11c3f44Smeem while (previpif->ipif_bound_next != ipif)
1733e11c3f44Smeem previpif = previpif->ipif_bound_next;
1734e11c3f44Smeem
1735e11c3f44Smeem previpif->ipif_bound_next = ipif->ipif_bound_next;
1736e11c3f44Smeem }
1737e11c3f44Smeem ipif->ipif_bound_next = NULL;
1738e11c3f44Smeem
1739e11c3f44Smeem /*
1740e11c3f44Smeem * If requested, notify the resolvers (provided we're bound).
1741e11c3f44Smeem */
1742e11c3f44Smeem if (notifyres && ipif->ipif_bound) {
1743bd670b35SErik Nordmark if (ill->ill_isv6)
1744e11c3f44Smeem ipif_ndp_down(ipif);
1745bd670b35SErik Nordmark else
1746bd670b35SErik Nordmark (void) ipif_arp_down(ipif);
1747e11c3f44Smeem }
1748e11c3f44Smeem ipif->ipif_bound = B_FALSE;
1749e11c3f44Smeem
1750e11c3f44Smeem return (ipif);
1751e11c3f44Smeem }
1752e11c3f44Smeem
1753e11c3f44Smeem /*
1754e11c3f44Smeem * Check if `ill' is active. Caller must hold ill_lock and phyint_lock if
1755e11c3f44Smeem * it's not inside the IPSQ. Since ipmp_ill_try_refresh_active() calls this
1756e11c3f44Smeem * to determine whether an ill should be considered active, other consumers
1757e11c3f44Smeem * may race and learn about an ill that should be deactivated/activated before
1758e11c3f44Smeem * IPMP has performed the activation/deactivation. This should be safe though
1759e11c3f44Smeem * since at worst e.g. ire_atomic_start() will prematurely delete an IRE that
1760e11c3f44Smeem * would've been cleaned up by ipmp_ill_deactivate().
1761e11c3f44Smeem */
1762e11c3f44Smeem boolean_t
ipmp_ill_is_active(ill_t * ill)1763e11c3f44Smeem ipmp_ill_is_active(ill_t *ill)
1764e11c3f44Smeem {
1765e11c3f44Smeem phyint_t *phyi = ill->ill_phyint;
1766e11c3f44Smeem
1767e11c3f44Smeem ASSERT(IS_UNDER_IPMP(ill));
1768e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill) ||
1769e11c3f44Smeem (MUTEX_HELD(&ill->ill_lock) && MUTEX_HELD(&phyi->phyint_lock)));
1770e11c3f44Smeem
1771e11c3f44Smeem /*
1772e11c3f44Smeem * Note that PHYI_RUNNING isn't checked since we rely on in.mpathd to
1773e11c3f44Smeem * set PHYI_FAILED whenever PHYI_RUNNING is cleared. This allows the
1774e11c3f44Smeem * link flapping logic to be just in in.mpathd and allows us to ignore
1775e11c3f44Smeem * changes to PHYI_RUNNING.
1776e11c3f44Smeem */
1777e11c3f44Smeem return (!(ill->ill_ipif_up_count == 0 ||
1778e11c3f44Smeem (phyi->phyint_flags & (PHYI_OFFLINE|PHYI_INACTIVE|PHYI_FAILED))));
1779e11c3f44Smeem }
1780e11c3f44Smeem
1781e11c3f44Smeem /*
1782bd670b35SErik Nordmark * IRE walker callback: set ire_testhidden on IRE_HIDDEN_TYPE IREs associated
1783bd670b35SErik Nordmark * with `ill_arg'.
1784e11c3f44Smeem */
1785e11c3f44Smeem static void
ipmp_ill_ire_mark_testhidden(ire_t * ire,char * ill_arg)1786e11c3f44Smeem ipmp_ill_ire_mark_testhidden(ire_t *ire, char *ill_arg)
1787e11c3f44Smeem {
1788e11c3f44Smeem ill_t *ill = (ill_t *)ill_arg;
1789e11c3f44Smeem
1790e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1791e11c3f44Smeem ASSERT(!IS_IPMP(ill));
1792e11c3f44Smeem
1793bd670b35SErik Nordmark if (ire->ire_ill != ill)
1794e11c3f44Smeem return;
1795e11c3f44Smeem
1796bd670b35SErik Nordmark if (IRE_HIDDEN_TYPE(ire->ire_type)) {
1797e11c3f44Smeem DTRACE_PROBE1(ipmp__mark__testhidden, ire_t *, ire);
1798bd670b35SErik Nordmark ire->ire_testhidden = B_TRUE;
1799e11c3f44Smeem }
1800e11c3f44Smeem }
1801e11c3f44Smeem
1802e11c3f44Smeem /*
1803bd670b35SErik Nordmark * IRE walker callback: clear ire_testhidden if the IRE has a source address
1804bd670b35SErik Nordmark * on `ill_arg'.
1805e11c3f44Smeem */
1806e11c3f44Smeem static void
ipmp_ill_ire_clear_testhidden(ire_t * ire,char * ill_arg)1807e11c3f44Smeem ipmp_ill_ire_clear_testhidden(ire_t *ire, char *ill_arg)
1808e11c3f44Smeem {
1809e11c3f44Smeem ill_t *ill = (ill_t *)ill_arg;
1810e11c3f44Smeem
1811e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ill));
1812e11c3f44Smeem ASSERT(!IS_IPMP(ill));
1813e11c3f44Smeem
1814bd670b35SErik Nordmark if (ire->ire_ill == ill) {
1815e11c3f44Smeem DTRACE_PROBE1(ipmp__clear__testhidden, ire_t *, ire);
1816bd670b35SErik Nordmark ire->ire_testhidden = B_FALSE;
1817e11c3f44Smeem }
1818e11c3f44Smeem }
1819e11c3f44Smeem
1820e11c3f44Smeem /*
1821e11c3f44Smeem * Return a held pointer to the IPMP ill for underlying interface `ill', or
1822e11c3f44Smeem * NULL if one doesn't exist. (Unfortunately, this function needs to take an
1823e11c3f44Smeem * underlying ill rather than an ipmp_illgrp_t because an underlying ill's
18240bd79941Smeem * ill_grp pointer may become stale when not inside an IPSQ and not holding
1825e11c3f44Smeem * ipmp_lock.) Caller need not be inside the IPSQ.
1826e11c3f44Smeem */
1827e11c3f44Smeem ill_t *
ipmp_ill_hold_ipmp_ill(ill_t * ill)1828e11c3f44Smeem ipmp_ill_hold_ipmp_ill(ill_t *ill)
1829e11c3f44Smeem {
1830e11c3f44Smeem ip_stack_t *ipst = ill->ill_ipst;
1831e11c3f44Smeem ipmp_illgrp_t *illg;
1832e11c3f44Smeem
1833e11c3f44Smeem ASSERT(!IS_IPMP(ill));
1834e11c3f44Smeem
1835e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_READER);
1836e11c3f44Smeem illg = ill->ill_grp;
1837bd670b35SErik Nordmark if (illg != NULL && ill_check_and_refhold(illg->ig_ipmp_ill)) {
1838e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1839e11c3f44Smeem return (illg->ig_ipmp_ill);
1840e11c3f44Smeem }
1841e11c3f44Smeem /*
1842e11c3f44Smeem * Assume `ill' was removed from the illgrp in the meantime.
1843e11c3f44Smeem */
1844e11c3f44Smeem rw_exit(&ill->ill_ipst->ips_ipmp_lock);
1845e11c3f44Smeem return (NULL);
1846e11c3f44Smeem }
1847e11c3f44Smeem
1848e11c3f44Smeem /*
18491f19738eSmeem * Return a held pointer to the appropriate underlying ill for sending the
18501f19738eSmeem * specified type of packet. (Unfortunately, this function needs to take an
18511f19738eSmeem * underlying ill rather than an ipmp_illgrp_t because an underlying ill's
18521f19738eSmeem * ill_grp pointer may become stale when not inside an IPSQ and not holding
18531f19738eSmeem * ipmp_lock.) Caller need not be inside the IPSQ.
18541f19738eSmeem */
18551f19738eSmeem ill_t *
ipmp_ill_hold_xmit_ill(ill_t * ill,boolean_t is_unicast)18561f19738eSmeem ipmp_ill_hold_xmit_ill(ill_t *ill, boolean_t is_unicast)
18571f19738eSmeem {
18581f19738eSmeem ill_t *xmit_ill;
18591f19738eSmeem ip_stack_t *ipst = ill->ill_ipst;
18601f19738eSmeem
18611f19738eSmeem rw_enter(&ipst->ips_ill_g_lock, RW_READER);
18621f19738eSmeem if (ill->ill_grp == NULL) {
18631f19738eSmeem /*
18641f19738eSmeem * The ill was taken out of the group, so just send on it.
18651f19738eSmeem */
18661f19738eSmeem rw_exit(&ipst->ips_ill_g_lock);
18671f19738eSmeem ill_refhold(ill);
18681f19738eSmeem return (ill);
18691f19738eSmeem }
18701f19738eSmeem if (is_unicast)
18711f19738eSmeem xmit_ill = ipmp_illgrp_hold_next_ill(ill->ill_grp);
18721f19738eSmeem else
18731f19738eSmeem xmit_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
18741f19738eSmeem rw_exit(&ipst->ips_ill_g_lock);
18751f19738eSmeem
18761f19738eSmeem return (xmit_ill);
18771f19738eSmeem }
18781f19738eSmeem
18791f19738eSmeem /*
1880e11c3f44Smeem * Return the interface index for the IPMP ill tied to underlying interface
1881e11c3f44Smeem * `ill', or zero if one doesn't exist. Caller need not be inside the IPSQ.
1882e11c3f44Smeem */
1883e11c3f44Smeem uint_t
ipmp_ill_get_ipmp_ifindex(const ill_t * ill)1884e11c3f44Smeem ipmp_ill_get_ipmp_ifindex(const ill_t *ill)
1885e11c3f44Smeem {
1886e11c3f44Smeem uint_t ifindex = 0;
1887e11c3f44Smeem ip_stack_t *ipst = ill->ill_ipst;
1888e11c3f44Smeem ipmp_grp_t *grp;
1889e11c3f44Smeem
1890e11c3f44Smeem ASSERT(!IS_IPMP(ill));
1891e11c3f44Smeem
1892e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_READER);
1893e11c3f44Smeem if ((grp = ill->ill_phyint->phyint_grp) != NULL)
1894e11c3f44Smeem ifindex = grp->gr_phyint->phyint_ifindex;
1895e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1896e11c3f44Smeem return (ifindex);
1897e11c3f44Smeem }
1898e11c3f44Smeem
1899e11c3f44Smeem /*
1900e11c3f44Smeem * Place phyint `phyi' into IPMP group `grp'.
1901e11c3f44Smeem */
1902e11c3f44Smeem void
ipmp_phyint_join_grp(phyint_t * phyi,ipmp_grp_t * grp)1903e11c3f44Smeem ipmp_phyint_join_grp(phyint_t *phyi, ipmp_grp_t *grp)
1904e11c3f44Smeem {
1905e11c3f44Smeem ill_t *ill;
1906e11c3f44Smeem ipsq_t *ipsq = phyi->phyint_ipsq;
1907e11c3f44Smeem ipsq_t *grp_ipsq = grp->gr_phyint->phyint_ipsq;
1908e11c3f44Smeem ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
1909e11c3f44Smeem
1910e11c3f44Smeem ASSERT(IAM_WRITER_IPSQ(ipsq));
1911e11c3f44Smeem ASSERT(phyi->phyint_illv4 != NULL || phyi->phyint_illv6 != NULL);
1912e11c3f44Smeem
1913e11c3f44Smeem /*
1914e11c3f44Smeem * Send routing socket messages indicating that the phyint's ills
1915e11c3f44Smeem * and ipifs vanished.
1916e11c3f44Smeem */
1917e11c3f44Smeem if (phyi->phyint_illv4 != NULL) {
1918e11c3f44Smeem ill = phyi->phyint_illv4;
1919e11c3f44Smeem ipmp_ill_rtsaddrmsg(ill, RTM_DELETE);
1920e11c3f44Smeem }
1921e11c3f44Smeem
1922e11c3f44Smeem if (phyi->phyint_illv6 != NULL) {
1923e11c3f44Smeem ill = phyi->phyint_illv6;
1924e11c3f44Smeem ipmp_ill_rtsaddrmsg(ill, RTM_DELETE);
1925e11c3f44Smeem }
1926e11c3f44Smeem
1927e11c3f44Smeem /*
1928e11c3f44Smeem * Snapshot the phyint's initial kstats as a baseline.
1929e11c3f44Smeem */
1930e11c3f44Smeem ipmp_phyint_get_kstats(phyi, phyi->phyint_kstats0);
1931e11c3f44Smeem
1932e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1933e11c3f44Smeem
1934e11c3f44Smeem phyi->phyint_grp = grp;
1935e11c3f44Smeem if (++grp->gr_nif == 1)
1936e11c3f44Smeem grp->gr_mactype = ill->ill_mactype;
1937e11c3f44Smeem else
1938e11c3f44Smeem ASSERT(grp->gr_mactype == ill->ill_mactype);
1939e11c3f44Smeem
1940e11c3f44Smeem /*
1941e11c3f44Smeem * Now that we're in the group, request a switch to the group's xop
1942e11c3f44Smeem * when we ipsq_exit(). All future operations will be exclusive on
1943e11c3f44Smeem * the group xop until ipmp_phyint_leave_grp() is called.
1944e11c3f44Smeem */
1945e11c3f44Smeem ASSERT(ipsq->ipsq_swxop == NULL);
1946e11c3f44Smeem ASSERT(grp_ipsq->ipsq_xop == &grp_ipsq->ipsq_ownxop);
1947e11c3f44Smeem ipsq->ipsq_swxop = &grp_ipsq->ipsq_ownxop;
1948e11c3f44Smeem
1949e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
1950e11c3f44Smeem }
1951e11c3f44Smeem
1952e11c3f44Smeem /*
1953e11c3f44Smeem * Remove phyint `phyi' from its current IPMP group.
1954e11c3f44Smeem */
1955e11c3f44Smeem void
ipmp_phyint_leave_grp(phyint_t * phyi)1956e11c3f44Smeem ipmp_phyint_leave_grp(phyint_t *phyi)
1957e11c3f44Smeem {
1958e11c3f44Smeem uint_t i;
1959e11c3f44Smeem ipsq_t *ipsq = phyi->phyint_ipsq;
1960e11c3f44Smeem ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
1961e11c3f44Smeem uint64_t phyi_kstats[IPMP_KSTAT_MAX];
1962e11c3f44Smeem
1963e11c3f44Smeem ASSERT(IAM_WRITER_IPSQ(ipsq));
1964e11c3f44Smeem
1965e11c3f44Smeem /*
1966e11c3f44Smeem * If any of the phyint's ills are still in an illgrp, kick 'em out.
1967e11c3f44Smeem */
1968e11c3f44Smeem if (phyi->phyint_illv4 != NULL && IS_UNDER_IPMP(phyi->phyint_illv4))
1969e11c3f44Smeem ipmp_ill_leave_illgrp(phyi->phyint_illv4);
1970e11c3f44Smeem if (phyi->phyint_illv6 != NULL && IS_UNDER_IPMP(phyi->phyint_illv6))
1971e11c3f44Smeem ipmp_ill_leave_illgrp(phyi->phyint_illv6);
1972e11c3f44Smeem
1973e11c3f44Smeem /*
1974e11c3f44Smeem * Send routing socket messages indicating that the phyint's ills
1975e11c3f44Smeem * and ipifs have reappeared.
1976e11c3f44Smeem */
1977e11c3f44Smeem if (phyi->phyint_illv4 != NULL)
1978e11c3f44Smeem ipmp_ill_rtsaddrmsg(phyi->phyint_illv4, RTM_ADD);
1979e11c3f44Smeem if (phyi->phyint_illv6 != NULL)
1980e11c3f44Smeem ipmp_ill_rtsaddrmsg(phyi->phyint_illv6, RTM_ADD);
1981e11c3f44Smeem
1982e11c3f44Smeem /*
1983e11c3f44Smeem * Calculate the phyint's cumulative kstats while it was in the group,
1984e11c3f44Smeem * and add that to the group's baseline.
1985e11c3f44Smeem */
1986e11c3f44Smeem ipmp_phyint_get_kstats(phyi, phyi_kstats);
1987e11c3f44Smeem for (i = 0; i < IPMP_KSTAT_MAX; i++) {
1988e11c3f44Smeem phyi_kstats[i] -= phyi->phyint_kstats0[i];
1989e11c3f44Smeem atomic_add_64(&phyi->phyint_grp->gr_kstats0[i], phyi_kstats[i]);
1990e11c3f44Smeem }
1991e11c3f44Smeem
1992e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_WRITER);
1993e11c3f44Smeem
1994e11c3f44Smeem phyi->phyint_grp->gr_nif--;
1995e11c3f44Smeem phyi->phyint_grp = NULL;
1996e11c3f44Smeem
1997e11c3f44Smeem /*
1998e11c3f44Smeem * As our final act in leaving the group, request a switch back to our
1999e11c3f44Smeem * IPSQ's own xop when we ipsq_exit().
2000e11c3f44Smeem */
2001e11c3f44Smeem ASSERT(ipsq->ipsq_swxop == NULL);
2002e11c3f44Smeem ipsq->ipsq_swxop = &ipsq->ipsq_ownxop;
2003e11c3f44Smeem
2004e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
2005e11c3f44Smeem }
2006e11c3f44Smeem
2007e11c3f44Smeem /*
2008e11c3f44Smeem * Store the IPMP-related kstats for `phyi' into the array named by `kstats'.
2009e11c3f44Smeem * Assumes that `kstats' has at least IPMP_KSTAT_MAX elements.
2010e11c3f44Smeem */
2011e11c3f44Smeem static void
ipmp_phyint_get_kstats(phyint_t * phyi,uint64_t kstats[])2012e11c3f44Smeem ipmp_phyint_get_kstats(phyint_t *phyi, uint64_t kstats[])
2013e11c3f44Smeem {
2014e11c3f44Smeem uint_t i, j;
2015e11c3f44Smeem const char *name;
2016e11c3f44Smeem kstat_t *ksp;
2017e11c3f44Smeem kstat_named_t *kn;
20182b24ab6bSSebastien Roy ip_stack_t *ipst = PHYINT_TO_IPST(phyi);
20192b24ab6bSSebastien Roy zoneid_t zoneid;
2020e11c3f44Smeem
2021e11c3f44Smeem bzero(kstats, sizeof (kstats[0]) * IPMP_KSTAT_MAX);
20222b24ab6bSSebastien Roy zoneid = netstackid_to_zoneid(ipst->ips_netstack->netstack_stackid);
20232b24ab6bSSebastien Roy ksp = kstat_hold_byname("link", 0, phyi->phyint_name, zoneid);
2024e11c3f44Smeem if (ksp == NULL)
2025e11c3f44Smeem return;
2026e11c3f44Smeem
2027e11c3f44Smeem KSTAT_ENTER(ksp);
2028e11c3f44Smeem
2029e11c3f44Smeem if (ksp->ks_data != NULL && ksp->ks_type == KSTAT_TYPE_NAMED) {
2030e11c3f44Smeem /*
2031e11c3f44Smeem * Bring kstats up-to-date before recording.
2032e11c3f44Smeem */
2033e11c3f44Smeem (void) KSTAT_UPDATE(ksp, KSTAT_READ);
2034e11c3f44Smeem
2035e11c3f44Smeem kn = KSTAT_NAMED_PTR(ksp);
2036e11c3f44Smeem for (i = 0; i < IPMP_KSTAT_MAX; i++) {
2037e11c3f44Smeem name = ipmp_kstats[i].name;
2038e11c3f44Smeem kstats[i] = 0;
2039e11c3f44Smeem for (j = 0; j < ksp->ks_ndata; j++) {
2040e11c3f44Smeem if (strcmp(kn[j].name, name) != 0)
2041e11c3f44Smeem continue;
2042e11c3f44Smeem
2043e11c3f44Smeem switch (kn[j].data_type) {
2044e11c3f44Smeem case KSTAT_DATA_INT32:
2045e11c3f44Smeem case KSTAT_DATA_UINT32:
2046e11c3f44Smeem kstats[i] = kn[j].value.ui32;
2047e11c3f44Smeem break;
2048e11c3f44Smeem #ifdef _LP64
2049e11c3f44Smeem case KSTAT_DATA_LONG:
2050e11c3f44Smeem case KSTAT_DATA_ULONG:
2051e11c3f44Smeem kstats[i] = kn[j].value.ul;
2052e11c3f44Smeem break;
2053e11c3f44Smeem #endif
2054e11c3f44Smeem case KSTAT_DATA_INT64:
2055e11c3f44Smeem case KSTAT_DATA_UINT64:
2056e11c3f44Smeem kstats[i] = kn[j].value.ui64;
2057e11c3f44Smeem break;
2058e11c3f44Smeem }
2059e11c3f44Smeem break;
2060e11c3f44Smeem }
2061e11c3f44Smeem }
2062e11c3f44Smeem }
2063e11c3f44Smeem
2064e11c3f44Smeem KSTAT_EXIT(ksp);
2065e11c3f44Smeem kstat_rele(ksp);
2066e11c3f44Smeem }
2067e11c3f44Smeem
2068e11c3f44Smeem /*
2069e11c3f44Smeem * Refresh the active state of all ills on `phyi'.
2070e11c3f44Smeem */
2071e11c3f44Smeem void
ipmp_phyint_refresh_active(phyint_t * phyi)2072e11c3f44Smeem ipmp_phyint_refresh_active(phyint_t *phyi)
2073e11c3f44Smeem {
2074e11c3f44Smeem if (phyi->phyint_illv4 != NULL)
2075e11c3f44Smeem ipmp_ill_refresh_active(phyi->phyint_illv4);
2076e11c3f44Smeem if (phyi->phyint_illv6 != NULL)
2077e11c3f44Smeem ipmp_ill_refresh_active(phyi->phyint_illv6);
2078e11c3f44Smeem }
2079e11c3f44Smeem
2080e11c3f44Smeem /*
2081e11c3f44Smeem * Return a held pointer to the underlying ill bound to `ipif', or NULL if one
2082e11c3f44Smeem * doesn't exist. Caller need not be inside the IPSQ.
2083e11c3f44Smeem */
2084e11c3f44Smeem ill_t *
ipmp_ipif_hold_bound_ill(const ipif_t * ipif)2085e11c3f44Smeem ipmp_ipif_hold_bound_ill(const ipif_t *ipif)
2086e11c3f44Smeem {
2087e11c3f44Smeem ill_t *boundill;
2088e11c3f44Smeem ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
2089e11c3f44Smeem
2090e11c3f44Smeem ASSERT(IS_IPMP(ipif->ipif_ill));
2091e11c3f44Smeem
2092e11c3f44Smeem rw_enter(&ipst->ips_ipmp_lock, RW_READER);
2093e11c3f44Smeem boundill = ipif->ipif_bound_ill;
2094bd670b35SErik Nordmark if (boundill != NULL && ill_check_and_refhold(boundill)) {
2095e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
2096e11c3f44Smeem return (boundill);
2097e11c3f44Smeem }
2098e11c3f44Smeem rw_exit(&ipst->ips_ipmp_lock);
2099e11c3f44Smeem return (NULL);
2100e11c3f44Smeem }
2101e11c3f44Smeem
2102e11c3f44Smeem /*
2103e11c3f44Smeem * Return a pointer to the underlying ill bound to `ipif', or NULL if one
2104e11c3f44Smeem * doesn't exist. Caller must be inside the IPSQ.
2105e11c3f44Smeem */
2106e11c3f44Smeem ill_t *
ipmp_ipif_bound_ill(const ipif_t * ipif)2107e11c3f44Smeem ipmp_ipif_bound_ill(const ipif_t *ipif)
2108e11c3f44Smeem {
2109e11c3f44Smeem ASSERT(IAM_WRITER_ILL(ipif->ipif_ill));
2110e11c3f44Smeem ASSERT(IS_IPMP(ipif->ipif_ill));
2111e11c3f44Smeem
2112e11c3f44Smeem return (ipif->ipif_bound_ill);
2113e11c3f44Smeem }
2114e11c3f44Smeem
2115e11c3f44Smeem /*
2116e11c3f44Smeem * Check if `ipif' is a "stub" (placeholder address not being used).
2117e11c3f44Smeem */
2118e11c3f44Smeem boolean_t
ipmp_ipif_is_stubaddr(const ipif_t * ipif)2119e11c3f44Smeem ipmp_ipif_is_stubaddr(const ipif_t *ipif)
2120e11c3f44Smeem {
2121e11c3f44Smeem if (ipif->ipif_flags & IPIF_UP)
2122e11c3f44Smeem return (B_FALSE);
2123e11c3f44Smeem if (ipif->ipif_ill->ill_isv6)
2124e11c3f44Smeem return (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr));
2125e11c3f44Smeem else
2126e11c3f44Smeem return (ipif->ipif_lcl_addr == INADDR_ANY);
2127e11c3f44Smeem }
2128e11c3f44Smeem
2129e11c3f44Smeem /*
2130e11c3f44Smeem * Check if `ipif' is an IPMP data address.
2131e11c3f44Smeem */
2132e11c3f44Smeem boolean_t
ipmp_ipif_is_dataaddr(const ipif_t * ipif)2133e11c3f44Smeem ipmp_ipif_is_dataaddr(const ipif_t *ipif)
2134e11c3f44Smeem {
2135e11c3f44Smeem if (ipif->ipif_flags & IPIF_NOFAILOVER)
2136e11c3f44Smeem return (B_FALSE);
2137e11c3f44Smeem if (ipif->ipif_ill->ill_isv6)
2138e11c3f44Smeem return (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr));
2139e11c3f44Smeem else
2140e11c3f44Smeem return (ipif->ipif_lcl_addr != INADDR_ANY);
2141e11c3f44Smeem }
2142e11c3f44Smeem
2143e11c3f44Smeem /*
2144e11c3f44Smeem * Check if `ipif' is an IPIF_UP IPMP data address.
2145e11c3f44Smeem */
2146e11c3f44Smeem static boolean_t
ipmp_ipif_is_up_dataaddr(const ipif_t * ipif)2147e11c3f44Smeem ipmp_ipif_is_up_dataaddr(const ipif_t *ipif)
2148e11c3f44Smeem {
2149e11c3f44Smeem return (ipmp_ipif_is_dataaddr(ipif) && (ipif->ipif_flags & IPIF_UP));
2150e11c3f44Smeem }
2151bd670b35SErik Nordmark
2152bd670b35SErik Nordmark /*
21531f19738eSmeem * Check if `mp' contains a probe packet by checking if the IP source address
21541f19738eSmeem * is a test address on underlying interface `ill'. Caller need not be inside
2155bd670b35SErik Nordmark * the IPSQ.
2156bd670b35SErik Nordmark */
2157bd670b35SErik Nordmark boolean_t
ipmp_packet_is_probe(mblk_t * mp,ill_t * ill)2158bd670b35SErik Nordmark ipmp_packet_is_probe(mblk_t *mp, ill_t *ill)
2159bd670b35SErik Nordmark {
2160bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2161bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr;
2162bd670b35SErik Nordmark
2163bd670b35SErik Nordmark ASSERT(DB_TYPE(mp) != M_CTL);
2164bd670b35SErik Nordmark
2165bd670b35SErik Nordmark if (!IS_UNDER_IPMP(ill))
2166bd670b35SErik Nordmark return (B_FALSE);
2167bd670b35SErik Nordmark
2168bd670b35SErik Nordmark if (ill->ill_isv6) {
2169bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src) &&
2170bd670b35SErik Nordmark ipif_lookup_testaddr_v6(ill, &ip6h->ip6_src, NULL))
2171bd670b35SErik Nordmark return (B_TRUE);
2172bd670b35SErik Nordmark } else {
21731f19738eSmeem if (ipha->ipha_src != INADDR_ANY &&
2174bd670b35SErik Nordmark ipif_lookup_testaddr_v4(ill, &ipha->ipha_src, NULL))
2175bd670b35SErik Nordmark return (B_TRUE);
2176bd670b35SErik Nordmark }
2177bd670b35SErik Nordmark return (B_FALSE);
2178bd670b35SErik Nordmark }
2179bd670b35SErik Nordmark
2180bd670b35SErik Nordmark /*
21811f19738eSmeem * NCEC walker callback: delete `ncec' if it is associated with `ill_arg' and
21821f19738eSmeem * is not one of our local addresses. Caller must be inside the IPSQ.
2183bd670b35SErik Nordmark */
21841f19738eSmeem static void
ipmp_ncec_delete_nonlocal(ncec_t * ncec,uchar_t * ill_arg)21851f19738eSmeem ipmp_ncec_delete_nonlocal(ncec_t *ncec, uchar_t *ill_arg)
2186bd670b35SErik Nordmark {
21871f19738eSmeem if (!NCE_MYADDR(ncec) && ncec->ncec_ill == (ill_t *)ill_arg)
21881f19738eSmeem ncec_delete(ncec);
2189bd670b35SErik Nordmark }
2190bd670b35SErik Nordmark
2191bd670b35SErik Nordmark /*
21921f19738eSmeem * Delete any NCEs tied to the illgrp associated with `ncec'. Caller need not
21931f19738eSmeem * be inside the IPSQ.
2194bd670b35SErik Nordmark */
2195bd670b35SErik Nordmark void
ipmp_ncec_delete_nce(ncec_t * ncec)21961f19738eSmeem ipmp_ncec_delete_nce(ncec_t *ncec)
2197bd670b35SErik Nordmark {
21981f19738eSmeem ipmp_illgrp_t *illg = ncec->ncec_ill->ill_grp;
21991f19738eSmeem ip_stack_t *ipst = ncec->ncec_ipst;
2200bd670b35SErik Nordmark ill_t *ill;
2201bd670b35SErik Nordmark nce_t *nce;
22021f19738eSmeem list_t dead;
2203bd670b35SErik Nordmark
22041f19738eSmeem ASSERT(IS_IPMP(ncec->ncec_ill));
2205bd670b35SErik Nordmark
22061f19738eSmeem /*
22071f19738eSmeem * For each underlying interface, delete `ncec' from its ill_nce list
22081f19738eSmeem * via nce_fastpath_list_delete(). Defer the actual nce_refrele()
22091f19738eSmeem * until we've dropped ill_g_lock.
22101f19738eSmeem */
2211bd670b35SErik Nordmark list_create(&dead, sizeof (nce_t), offsetof(nce_t, nce_node));
2212bd670b35SErik Nordmark
2213bd670b35SErik Nordmark rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2214bd670b35SErik Nordmark ill = list_head(&illg->ig_if);
22151f19738eSmeem for (; ill != NULL; ill = list_next(&illg->ig_if, ill))
2216bd670b35SErik Nordmark nce_fastpath_list_delete(ill, ncec, &dead);
2217bd670b35SErik Nordmark rw_exit(&ipst->ips_ill_g_lock);
2218bd670b35SErik Nordmark
22191f19738eSmeem while ((nce = list_remove_head(&dead)) != NULL)
2220bd670b35SErik Nordmark nce_refrele(nce);
22211f19738eSmeem
2222bd670b35SErik Nordmark list_destroy(&dead);
2223bd670b35SErik Nordmark }
2224bd670b35SErik Nordmark
2225bd670b35SErik Nordmark /*
22261f19738eSmeem * Refresh any NCE entries tied to the illgrp associated with `ncec' to
22271f19738eSmeem * use the information in `ncec'. Caller need not be inside the IPSQ.
2228bd670b35SErik Nordmark */
2229bd670b35SErik Nordmark void
ipmp_ncec_refresh_nce(ncec_t * ncec)22301f19738eSmeem ipmp_ncec_refresh_nce(ncec_t *ncec)
2231bd670b35SErik Nordmark {
22321f19738eSmeem ipmp_illgrp_t *illg = ncec->ncec_ill->ill_grp;
22331f19738eSmeem ip_stack_t *ipst = ncec->ncec_ipst;
2234bd670b35SErik Nordmark ill_t *ill;
2235bd670b35SErik Nordmark nce_t *nce, *nce_next;
2236bd670b35SErik Nordmark list_t replace;
2237bd670b35SErik Nordmark
22381f19738eSmeem ASSERT(IS_IPMP(ncec->ncec_ill));
2239bd670b35SErik Nordmark
2240bd670b35SErik Nordmark /*
22411f19738eSmeem * If `ncec' is not reachable, there is no use in refreshing NCEs.
2242bd670b35SErik Nordmark */
2243bd670b35SErik Nordmark if (!NCE_ISREACHABLE(ncec))
2244bd670b35SErik Nordmark return;
2245bd670b35SErik Nordmark
22461f19738eSmeem /*
22471f19738eSmeem * Find all the NCEs matching ncec->ncec_addr. We cannot update them
22481f19738eSmeem * in-situ because we're holding ipmp_lock to prevent changes to IPMP
22491f19738eSmeem * group membership and updating indirectly calls nce_fastpath_probe()
22501f19738eSmeem * -> putnext() which cannot hold locks. Thus, move the NCEs to a
22511f19738eSmeem * separate list and process that list after dropping ipmp_lock.
22521f19738eSmeem */
2253bd670b35SErik Nordmark list_create(&replace, sizeof (nce_t), offsetof(nce_t, nce_node));
2254bd670b35SErik Nordmark rw_enter(&ipst->ips_ipmp_lock, RW_READER);
2255bd670b35SErik Nordmark ill = list_head(&illg->ig_actif);
2256bd670b35SErik Nordmark for (; ill != NULL; ill = list_next(&illg->ig_actif, ill)) {
2257bd670b35SErik Nordmark mutex_enter(&ill->ill_lock);
22581f19738eSmeem nce = list_head(&ill->ill_nce);
22591f19738eSmeem for (; nce != NULL; nce = nce_next) {
2260bd670b35SErik Nordmark nce_next = list_next(&ill->ill_nce, nce);
22611f19738eSmeem if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr,
2262bd670b35SErik Nordmark &ncec->ncec_addr)) {
2263bd670b35SErik Nordmark nce_refhold(nce);
2264bd670b35SErik Nordmark nce_delete(nce);
2265bd670b35SErik Nordmark list_insert_tail(&replace, nce);
22661f19738eSmeem }
2267bd670b35SErik Nordmark }
2268bd670b35SErik Nordmark mutex_exit(&ill->ill_lock);
2269bd670b35SErik Nordmark }
2270bd670b35SErik Nordmark rw_exit(&ipst->ips_ipmp_lock);
22711f19738eSmeem
2272bd670b35SErik Nordmark /*
22731f19738eSmeem * Process the list; nce_lookup_then_add_v* ensures that nce->nce_ill
22741f19738eSmeem * is still in the group for ncec->ncec_ill.
2275bd670b35SErik Nordmark */
22761f19738eSmeem while ((nce = list_remove_head(&replace)) != NULL) {
2277bd670b35SErik Nordmark if (ncec->ncec_ill->ill_isv6) {
2278bd670b35SErik Nordmark (void) nce_lookup_then_add_v6(nce->nce_ill,
2279bd670b35SErik Nordmark ncec->ncec_lladdr, ncec->ncec_lladdr_length,
2280bd670b35SErik Nordmark &nce->nce_addr, ncec->ncec_flags, ND_UNCHANGED,
2281bd670b35SErik Nordmark NULL);
2282bd670b35SErik Nordmark } else {
2283bd670b35SErik Nordmark ipaddr_t ipaddr;
2284bd670b35SErik Nordmark
2285bd670b35SErik Nordmark IN6_V4MAPPED_TO_IPADDR(&ncec->ncec_addr, ipaddr);
2286bd670b35SErik Nordmark (void) nce_lookup_then_add_v4(nce->nce_ill,
2287bd670b35SErik Nordmark ncec->ncec_lladdr, ncec->ncec_lladdr_length,
2288bd670b35SErik Nordmark &ipaddr, ncec->ncec_flags, ND_UNCHANGED, NULL);
2289bd670b35SErik Nordmark }
2290bd670b35SErik Nordmark nce_refrele(nce);
2291bd670b35SErik Nordmark }
22921f19738eSmeem
2293bd670b35SErik Nordmark list_destroy(&replace);
2294bd670b35SErik Nordmark }
2295