xref: /titanic_52/usr/src/uts/common/inet/ip/ip_arp.c (revision 67dbe2be0c0f1e2eb428b89088bb5667e8f0b9f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <inet/ip_arp.h>
28 #include <inet/ip_ndp.h>
29 #include <net/if_arp.h>
30 #include <netinet/if_ether.h>
31 #include <sys/strsubr.h>
32 #include <inet/ip6.h>
33 #include <inet/ip.h>
34 #include <inet/ip_ire.h>
35 #include <inet/ip_if.h>
36 #include <sys/dlpi.h>
37 #include <sys/sunddi.h>
38 #include <sys/strsun.h>
39 #include <sys/sdt.h>
40 #include <inet/mi.h>
41 #include <inet/arp.h>
42 #include <inet/ipdrop.h>
43 #include <sys/sockio.h>
44 #include <inet/ip_impl.h>
45 #include <sys/policy.h>
46 
47 #define	ARL_LL_ADDR_OFFSET(arl)	(((arl)->arl_sap_length) < 0 ? \
48 	(sizeof (dl_unitdata_req_t)) : \
49 	((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length))))
50 
51 /*
52  * MAC-specific intelligence.  Shouldn't be needed, but the DL_INFO_ACK
53  * doesn't quite do it for us.
54  */
55 typedef struct arp_m_s {
56 	t_uscalar_t	arp_mac_type;
57 	uint32_t	arp_mac_arp_hw_type;
58 	t_scalar_t	arp_mac_sap_length;
59 	uint32_t	arp_mac_hw_addr_length;
60 } arp_m_t;
61 
62 static int arp_close(queue_t *, int);
63 static void arp_rput(queue_t *, mblk_t *);
64 static void arp_wput(queue_t *, mblk_t *);
65 static arp_m_t	*arp_m_lookup(t_uscalar_t mac_type);
66 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *,
67 	ncec_t *);
68 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *,
69 	const uchar_t *, const uchar_t *, uchar_t *);
70 static int  arp_modclose(arl_t *);
71 static void  arp_mod_close_tail(arl_t *);
72 static mblk_t *arl_unbind(arl_t *);
73 static void arp_process_packet(ill_t *, mblk_t *);
74 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *);
75 static void arp_drop_packet(const char *str, mblk_t *, ill_t *);
76 static int arp_open(queue_t *, dev_t *, int, int, cred_t *);
77 static int ip_sioctl_ifunitsel_arp(queue_t *, int *);
78 static int ip_sioctl_slifname_arp(queue_t *, void *);
79 static void arp_dlpi_send(arl_t *, mblk_t *);
80 static void arl_defaults_common(arl_t *, mblk_t *);
81 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *);
82 static void arp_ifname_notify(arl_t *);
83 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *);
84 static arl_t *ill_to_arl(ill_t *);
85 
86 #define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
87 #define	IS_DLPI_DATA(mp)						\
88 	((DB_TYPE(mp) == M_PROTO) &&					\
89 	MBLKL(mp) >= sizeof (dl_unitdata_ind_t) &&			\
90 	(DL_PRIM(mp) == DL_UNITDATA_IND))
91 
92 #define	AR_NOTFOUND	1	/* No matching ace found in cache */
93 #define	AR_MERGED	2	/* Matching ace updated (RFC 826 Merge_flag) */
94 #define	AR_LOOPBACK	3	/* Our own arp packet was received */
95 #define	AR_BOGON	4	/* Another host has our IP addr. */
96 #define	AR_FAILED	5	/* Duplicate Address Detection has failed */
97 #define	AR_CHANGED	6	/* Address has changed; tell IP (and merged) */
98 
99 boolean_t arp_no_defense;
100 
101 struct module_info arp_mod_info = {
102 	IP_MOD_ID, "arpip", 1, INFPSZ, 65536, 1024
103 };
104 static struct qinit rinit_arp = {
105 	(pfi_t)arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info
106 };
107 static struct qinit winit_arp = {
108 	(pfi_t)arp_wput, NULL, arp_open, arp_close, NULL,
109 	&arp_mod_info
110 };
111 struct streamtab arpinfo = {
112 	&rinit_arp, &winit_arp
113 };
114 #define	ARH_FIXED_LEN	8
115 #define	AR_LL_HDR_SLACK	32
116 
117 /*
118  * pfhooks for ARP.
119  */
120 #define	ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst)		\
121 									\
122 	if ((_hook).he_interested) {                       		\
123 		hook_pkt_event_t info;                          	\
124 									\
125 		info.hpe_protocol = ipst->ips_arp_net_data;		\
126 		info.hpe_ifp = _ilp;                       		\
127 		info.hpe_ofp = 0;                       		\
128 		info.hpe_hdr = _hdr;                            	\
129 		info.hpe_mp = &(_fm);                           	\
130 		info.hpe_mb = _m;                               	\
131 		if (hook_run(ipst->ips_arp_net_data->netd_hooks,	\
132 		    _event, (hook_data_t)&info) != 0) {			\
133 			if (_fm != NULL) {                      	\
134 				freemsg(_fm);                   	\
135 				_fm = NULL;                     	\
136 			}                                       	\
137 			_hdr = NULL;                            	\
138 			_m = NULL;                              	\
139 		} else {                                        	\
140 			_hdr = info.hpe_hdr;                    	\
141 			_m = info.hpe_mb;                       	\
142 		}                                               	\
143 	}
144 
145 #define	ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst)		\
146 									\
147 	if ((_hook).he_interested) {                       		\
148 		hook_pkt_event_t info;                          	\
149 									\
150 		info.hpe_protocol = ipst->ips_arp_net_data;		\
151 		info.hpe_ifp = 0;                       		\
152 		info.hpe_ofp = _olp;                       		\
153 		info.hpe_hdr = _hdr;                            	\
154 		info.hpe_mp = &(_fm);                           	\
155 		info.hpe_mb = _m;                               	\
156 		if (hook_run(ipst->ips_arp_net_data->netd_hooks,	\
157 		    _event, (hook_data_t)&info) != 0) {			\
158 			if (_fm != NULL) {                      	\
159 				freemsg(_fm);                   	\
160 				_fm = NULL;                     	\
161 			}                                       	\
162 			_hdr = NULL;                            	\
163 			_m = NULL;                              	\
164 		} else {                                        	\
165 			_hdr = info.hpe_hdr;                    	\
166 			_m = info.hpe_mb;                       	\
167 		}                                               	\
168 	}
169 
170 static arp_m_t	arp_m_tbl[] = {
171 	{ DL_CSMACD,	ARPHRD_ETHER,	-2,	6},	/* 802.3 */
172 	{ DL_TPB,	ARPHRD_IEEE802,	-2,	6},	/* 802.4 */
173 	{ DL_TPR,	ARPHRD_IEEE802,	-2,	6},	/* 802.5 */
174 	{ DL_METRO,	ARPHRD_IEEE802,	-2,	6},	/* 802.6 */
175 	{ DL_ETHER,	ARPHRD_ETHER,	-2,	6},	/* Ethernet */
176 	{ DL_FDDI,	ARPHRD_ETHER,	-2,	6},	/* FDDI */
177 	{ DL_IB,	ARPHRD_IB,	-2,	20},	/* Infiniband */
178 	{ DL_OTHER,	ARPHRD_ETHER,	-2,	6}	/* unknown */
179 };
180 
181 static void
182 arl_refhold_locked(arl_t *arl)
183 {
184 	ASSERT(MUTEX_HELD(&arl->arl_lock));
185 	arl->arl_refcnt++;
186 	ASSERT(arl->arl_refcnt != 0);
187 }
188 
189 static void
190 arl_refrele(arl_t *arl)
191 {
192 	mutex_enter(&arl->arl_lock);
193 	ASSERT(arl->arl_refcnt != 0);
194 	arl->arl_refcnt--;
195 	if (arl->arl_refcnt > 1) {
196 		mutex_exit(&arl->arl_lock);
197 		return;
198 	}
199 
200 	/* ill_close or arp_unbind_complete may be waiting */
201 	cv_broadcast(&arl->arl_cv);
202 	mutex_exit(&arl->arl_lock);
203 }
204 
205 /*
206  * wake up any pending ip ioctls.
207  */
208 static void
209 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim)
210 {
211 	if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing)
212 		arp_replumb_done(ill, 0);
213 	else
214 		arp_bringup_done(ill, err);
215 }
216 
217 static int
218 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen,
219     const in_addr_t *src_paddr, ncec_t **sncec, int op)
220 {
221 	int retv;
222 	ncec_t *ncec;
223 	boolean_t ll_changed;
224 	uchar_t *lladdr = NULL;
225 	int new_state;
226 
227 	ASSERT(ill != NULL);
228 
229 	ncec = ncec_lookup_illgrp_v4(ill, src_paddr);
230 	*sncec = ncec;
231 
232 	if (ncec == NULL) {
233 		retv = AR_NOTFOUND;
234 		goto done;
235 	}
236 
237 	mutex_enter(&ncec->ncec_lock);
238 	/*
239 	 * IP addr and hardware address match what we already
240 	 * have, then this is a broadcast packet emitted by one of our
241 	 * interfaces, reflected by the switch and received on another
242 	 * interface.  We return AR_LOOPBACK.
243 	 */
244 	lladdr = ncec->ncec_lladdr;
245 	if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length &&
246 	    bcmp(lladdr, src_haddr, hlen) == 0) {
247 		mutex_exit(&ncec->ncec_lock);
248 		retv = AR_LOOPBACK;
249 		goto done;
250 	}
251 	/*
252 	 * If the entry is unverified, then we've just verified that
253 	 * someone else already owns this address, because this is a
254 	 * message with the same protocol address but different
255 	 * hardware address.
256 	 */
257 	if (ncec->ncec_flags & NCE_F_UNVERIFIED) {
258 		mutex_exit(&ncec->ncec_lock);
259 		ncec_delete(ncec);
260 		ncec_refrele(ncec);
261 		*sncec = NULL;
262 		retv = AR_FAILED;
263 		goto done;
264 	}
265 
266 	/*
267 	 * If the IP address matches ours and we're authoritative for
268 	 * this entry, then some other node is using our IP addr, so
269 	 * return AR_BOGON.  Also reset the transmit count to zero so
270 	 * that, if we're currently in initial announcement mode, we
271 	 * switch back to the lazier defense mode.  Knowing that
272 	 * there's at least one duplicate out there, we ought not
273 	 * blindly announce.
274 	 *
275 	 * NCE_F_AUTHORITY is set in one of two ways:
276 	 * 1. /sbin/arp told us so, via the "permanent" flag.
277 	 * 2. This is one of my addresses.
278 	 */
279 	if (ncec->ncec_flags & NCE_F_AUTHORITY) {
280 		ncec->ncec_unsolicit_count = 0;
281 		mutex_exit(&ncec->ncec_lock);
282 		retv = AR_BOGON;
283 		goto done;
284 	}
285 
286 	/*
287 	 * No address conflict was detected, and we are getting
288 	 * ready to update the ncec's hwaddr. The nce MUST NOT be on an
289 	 * under interface, because all dynamic nce's are created on the
290 	 * native interface (in the non-IPMP case) or on the IPMP
291 	 * meta-interface (in the IPMP case)
292 	 */
293 	ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill));
294 
295 	/*
296 	 * update ncec with src_haddr, hlen.
297 	 *
298 	 * We are trying to resolve this ncec_addr/src_paddr and we
299 	 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr.
300 	 * So the new_state is at least "STALE". If, in addition,
301 	 * this a solicited, unicast ARP_RESPONSE, we can transition
302 	 * to REACHABLE.
303 	 */
304 	new_state = ND_STALE;
305 	ip1dbg(("got info for ncec %p from addr %x\n",
306 	    (void *)ncec, *src_paddr));
307 	retv = AR_MERGED;
308 	if (ncec->ncec_state == ND_INCOMPLETE ||
309 	    ncec->ncec_state == ND_INITIAL) {
310 		ll_changed = B_TRUE;
311 	} else {
312 		ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen);
313 		if (!ll_changed)
314 			new_state = ND_UNCHANGED;
315 		else
316 			retv = AR_CHANGED;
317 	}
318 	/*
319 	 * We don't have the equivalent of the IPv6 'S' flag indicating
320 	 * a solicited response, so we assume that if we are in
321 	 * INCOMPLETE, or got back an unchanged lladdr in PROBE state,
322 	 * and this is an ARP_RESPONSE, it must be a
323 	 * solicited response allowing us to transtion to REACHABLE.
324 	 */
325 	if (op == ARP_RESPONSE) {
326 		switch (ncec->ncec_state) {
327 		case ND_PROBE:
328 			new_state = (ll_changed ? ND_STALE : ND_REACHABLE);
329 			break;
330 		case ND_INCOMPLETE:
331 			new_state = ND_REACHABLE;
332 			break;
333 		}
334 	}
335 	/*
336 	 * Call nce_update() to refresh fastpath information on any
337 	 * dependent nce_t entries.
338 	 */
339 	nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL));
340 	mutex_exit(&ncec->ncec_lock);
341 	nce_resolv_ok(ncec);
342 done:
343 	return (retv);
344 }
345 
346 /* Find an entry for a particular MAC type in the arp_m_tbl. */
347 static arp_m_t	*
348 arp_m_lookup(t_uscalar_t mac_type)
349 {
350 	arp_m_t	*arm;
351 
352 	for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) {
353 		if (arm->arp_mac_type == mac_type)
354 			return (arm);
355 	}
356 	return (NULL);
357 }
358 
359 static uint32_t
360 arp_hw_type(t_uscalar_t mactype)
361 {
362 	arp_m_t *arm;
363 
364 	if ((arm = arp_m_lookup(mactype)) == NULL)
365 		arm = arp_m_lookup(DL_OTHER);
366 	return (arm->arp_mac_arp_hw_type);
367 }
368 
369 /*
370  * Called when an DLPI control message has been acked; send down the next
371  * queued message (if any).
372  * The DLPI messages of interest being bind, attach and unbind since
373  * these are the only ones sent by ARP via arp_dlpi_send.
374  */
375 static void
376 arp_dlpi_done(arl_t *arl, ill_t *ill)
377 {
378 	mblk_t *mp;
379 	int err;
380 	t_uscalar_t prim;
381 
382 	mutex_enter(&arl->arl_lock);
383 	prim = arl->arl_dlpi_pending;
384 
385 	if ((mp = arl->arl_dlpi_deferred) == NULL) {
386 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
387 		if (arl->arl_state_flags & ARL_LL_DOWN)
388 			err = ENETDOWN;
389 		else
390 			err = 0;
391 		mutex_exit(&arl->arl_lock);
392 
393 		mutex_enter(&ill->ill_lock);
394 		ill->ill_arl_dlpi_pending = 0;
395 		mutex_exit(&ill->ill_lock);
396 		arp_cmd_done(ill, err, prim);
397 		return;
398 	}
399 
400 	arl->arl_dlpi_deferred = mp->b_next;
401 	mp->b_next = NULL;
402 
403 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
404 
405 	arl->arl_dlpi_pending = DL_PRIM(mp);
406 	mutex_exit(&arl->arl_lock);
407 
408 	mutex_enter(&ill->ill_lock);
409 	ill->ill_arl_dlpi_pending = 1;
410 	mutex_exit(&ill->ill_lock);
411 
412 	putnext(arl->arl_wq, mp);
413 }
414 
415 /*
416  * This routine is called during module initialization when the DL_INFO_ACK
417  * comes back from the device.	We set up defaults for all the device dependent
418  * doo-dads we are going to need.  This will leave us ready to roll if we are
419  * attempting auto-configuration.  Alternatively, these defaults can be
420  * overridden by initialization procedures possessing higher intelligence.
421  *
422  * Caller will free the mp.
423  */
424 static void
425 arp_ll_set_defaults(arl_t *arl, mblk_t *mp)
426 {
427 	arp_m_t		*arm;
428 	dl_info_ack_t	*dlia = (dl_info_ack_t *)mp->b_rptr;
429 
430 	if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL)
431 		arm = arp_m_lookup(DL_OTHER);
432 	ASSERT(arm != NULL);
433 
434 	/*
435 	 * We initialize based on parameters in the (currently) not too
436 	 * exhaustive arp_m_tbl.
437 	 */
438 	if (dlia->dl_version == DL_VERSION_2) {
439 		arl->arl_sap_length = dlia->dl_sap_length;
440 		arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length;
441 		if (dlia->dl_provider_style == DL_STYLE2)
442 			arl->arl_needs_attach = 1;
443 	} else {
444 		arl->arl_sap_length = arm->arp_mac_sap_length;
445 		arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length;
446 	}
447 	/*
448 	 * Note: the arp_hw_type in the arp header may be derived from
449 	 * the ill_mac_type and arp_m_lookup().
450 	 */
451 	arl->arl_sap = ETHERTYPE_ARP;
452 	arl_defaults_common(arl, mp);
453 }
454 
455 static void
456 arp_wput(queue_t *q, mblk_t *mp)
457 {
458 	int err = EINVAL;
459 	struct iocblk *ioc;
460 	mblk_t *mp1;
461 
462 	switch (DB_TYPE(mp)) {
463 	case M_IOCTL:
464 		ASSERT(q->q_next != NULL);
465 		ioc = (struct iocblk *)mp->b_rptr;
466 		if (ioc->ioc_cmd != SIOCSLIFNAME &&
467 		    ioc->ioc_cmd != IF_UNITSEL) {
468 			DTRACE_PROBE4(arl__dlpi, char *, "arp_wput",
469 			    char *, "<some ioctl>", char *, "-",
470 			    arl_t *, (arl_t *)q->q_ptr);
471 			putnext(q, mp);
472 			return;
473 		}
474 		if ((mp1 = mp->b_cont) == 0)
475 			err = EINVAL;
476 		else if (ioc->ioc_cmd == SIOCSLIFNAME)
477 			err = ip_sioctl_slifname_arp(q, mp1->b_rptr);
478 		else if (ioc->ioc_cmd == IF_UNITSEL)
479 			err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr);
480 		if (err == 0)
481 			miocack(q, mp, 0, 0);
482 		else
483 			miocnak(q, mp, 0, err);
484 		return;
485 	default:
486 		DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default",
487 		    char *, "default mblk", char *, "-",
488 		    arl_t *, (arl_t *)q->q_ptr);
489 		putnext(q, mp);
490 		return;
491 	}
492 }
493 
494 /*
495  * similar to ill_dlpi_pending(): verify that the received DLPI response
496  * matches the one that is pending for the arl.
497  */
498 static boolean_t
499 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim)
500 {
501 	t_uscalar_t pending;
502 
503 	mutex_enter(&arl->arl_lock);
504 	if (arl->arl_dlpi_pending == prim) {
505 		mutex_exit(&arl->arl_lock);
506 		return (B_TRUE);
507 	}
508 
509 	if (arl->arl_state_flags & ARL_CONDEMNED) {
510 		mutex_exit(&arl->arl_lock);
511 		return (B_FALSE);
512 	}
513 	pending = arl->arl_dlpi_pending;
514 	mutex_exit(&arl->arl_lock);
515 
516 	if (pending == DL_PRIM_INVAL) {
517 		ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s",
518 		    dl_primstr(prim), arl->arl_name));
519 	} else {
520 		ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s",
521 		    dl_primstr(prim), arl->arl_name, dl_primstr(pending)));
522 	}
523 	return (B_FALSE);
524 }
525 
526 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */
527 static void
528 arp_rput_dlpi(queue_t *q, mblk_t *mp)
529 {
530 	arl_t		*arl = (arl_t *)q->q_ptr;
531 	union DL_primitives *dlp;
532 	t_uscalar_t	prim;
533 	t_uscalar_t	reqprim = DL_PRIM_INVAL;
534 	ill_t		*ill;
535 
536 	if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) {
537 		putnext(q, mp);
538 		return;
539 	}
540 	dlp = (union DL_primitives *)mp->b_rptr;
541 	prim = dlp->dl_primitive;
542 
543 	/*
544 	 * If we received an ACK but didn't send a request for it, then it
545 	 * can't be part of any pending operation; discard up-front.
546 	 */
547 	switch (prim) {
548 	case DL_ERROR_ACK:
549 		/*
550 		 * ce is confused about how DLPI works, so we have to interpret
551 		 * an "error" on DL_NOTIFY_ACK (which we never could have sent)
552 		 * as really meaning an error on DL_NOTIFY_REQ.
553 		 *
554 		 * Note that supporting DL_NOTIFY_REQ is optional, so printing
555 		 * out an error message on the console isn't warranted except
556 		 * for debug.
557 		 */
558 		if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK ||
559 		    dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) {
560 			reqprim = DL_NOTIFY_REQ;
561 		} else {
562 			reqprim = dlp->error_ack.dl_error_primitive;
563 		}
564 		break;
565 	case DL_INFO_ACK:
566 		reqprim = DL_INFO_REQ;
567 		break;
568 	case DL_OK_ACK:
569 		reqprim = dlp->ok_ack.dl_correct_primitive;
570 		break;
571 	case DL_BIND_ACK:
572 		reqprim = DL_BIND_REQ;
573 		break;
574 	default:
575 		DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
576 		    union DL_primitives *, dlp);
577 		putnext(q, mp);
578 		return;
579 	}
580 	if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) {
581 		freemsg(mp);
582 		return;
583 	}
584 	DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received",
585 	    char *, dl_primstr(prim), char *, dl_primstr(reqprim),
586 	    arl_t *, arl);
587 
588 	ASSERT(prim != DL_NOTIFY_IND);
589 
590 	ill = arl_to_ill(arl);
591 
592 	switch (reqprim) {
593 	case DL_INFO_REQ:
594 		/*
595 		 * ill has not been set up yet for this case. This is the
596 		 * DL_INFO_ACK for the first DL_INFO_REQ sent from
597 		 * arp_modopen(). There should be no other arl_dlpi_deferred
598 		 * messages pending. We initialize the arl here.
599 		 */
600 		ASSERT(!arl->arl_dlpi_style_set);
601 		ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ);
602 		ASSERT(arl->arl_dlpi_deferred == NULL);
603 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
604 		arp_ll_set_defaults(arl, mp);
605 		freemsg(mp);
606 		return;
607 	case DL_UNBIND_REQ:
608 		mutex_enter(&arl->arl_lock);
609 		arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
610 		/*
611 		 * This is not an error, so we don't set ARL_LL_DOWN
612 		 */
613 		arl->arl_state_flags &= ~ARL_LL_UP;
614 		arl->arl_state_flags |= ARL_LL_UNBOUND;
615 		if (arl->arl_state_flags & ARL_CONDEMNED) {
616 			/*
617 			 * if this is part of the unplumb the arl may
618 			 * vaporize any moment after we cv_signal the
619 			 * arl_cv so we reset arl_dlpi_pending here.
620 			 * All other cases (including replumb) will
621 			 * have the arl_dlpi_pending reset in
622 			 * arp_dlpi_done.
623 			 */
624 			arl->arl_dlpi_pending = DL_PRIM_INVAL;
625 		}
626 		cv_signal(&arl->arl_cv);
627 		mutex_exit(&arl->arl_lock);
628 		break;
629 	}
630 	if (ill != NULL) {
631 		/*
632 		 * ill ref obtained by arl_to_ill()  will be released
633 		 * by qwriter_ip()
634 		 */
635 		qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer,
636 		    CUR_OP, B_TRUE);
637 		return;
638 	}
639 	freemsg(mp);
640 }
641 
642 /*
643  * Handling of DLPI messages that require exclusive access to the ipsq.
644  */
645 /* ARGSUSED */
646 static void
647 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
648 {
649 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
650 	ill_t		*ill = (ill_t *)q->q_ptr;
651 	arl_t		*arl = ill_to_arl(ill);
652 
653 	if (arl == NULL) {
654 		/*
655 		 * happens as a result arp_modclose triggering unbind.
656 		 * arp_rput_dlpi will cv_signal the arl_cv and the modclose
657 		 * will complete, but when it does ipsq_exit, the waiting
658 		 * qwriter_ip gets into the ipsq but will find the arl null.
659 		 * There should be no deferred messages in this case, so
660 		 * just complete and exit.
661 		 */
662 		arp_cmd_done(ill, 0, DL_UNBIND_REQ);
663 		freemsg(mp);
664 		return;
665 	}
666 	switch (dlp->dl_primitive) {
667 	case DL_ERROR_ACK:
668 		switch (dlp->error_ack.dl_error_primitive) {
669 		case DL_UNBIND_REQ:
670 			mutex_enter(&arl->arl_lock);
671 			arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
672 			arl->arl_state_flags &= ~ARL_LL_UP;
673 			arl->arl_state_flags |= ARL_LL_UNBOUND;
674 			arl->arl_state_flags |= ARL_LL_DOWN;
675 			cv_signal(&arl->arl_cv);
676 			mutex_exit(&arl->arl_lock);
677 			break;
678 		case DL_BIND_REQ:
679 			mutex_enter(&arl->arl_lock);
680 			arl->arl_state_flags &= ~ARL_LL_UP;
681 			arl->arl_state_flags |= ARL_LL_DOWN;
682 			arl->arl_state_flags |= ARL_LL_UNBOUND;
683 			cv_signal(&arl->arl_cv);
684 			mutex_exit(&arl->arl_lock);
685 			break;
686 		case DL_ATTACH_REQ:
687 			break;
688 		default:
689 			/* If it's anything else, we didn't send it. */
690 			arl_refrele(arl);
691 			putnext(q, mp);
692 			return;
693 		}
694 		break;
695 	case DL_OK_ACK:
696 		DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok",
697 		    char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
698 		    char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
699 		    arl_t *, arl);
700 		mutex_enter(&arl->arl_lock);
701 		switch (dlp->ok_ack.dl_correct_primitive) {
702 		case DL_UNBIND_REQ:
703 		case DL_ATTACH_REQ:
704 			break;
705 		default:
706 			ip0dbg(("Dropping unrecognized DL_OK_ACK for %s",
707 			    dl_primstr(dlp->ok_ack.dl_correct_primitive)));
708 			mutex_exit(&arl->arl_lock);
709 			arl_refrele(arl);
710 			freemsg(mp);
711 			return;
712 		}
713 		mutex_exit(&arl->arl_lock);
714 		break;
715 	case DL_BIND_ACK:
716 		DTRACE_PROBE2(rput_dl_bind, arl_t *, arl,
717 		    dl_bind_ack_t *, &dlp->bind_ack);
718 
719 		mutex_enter(&arl->arl_lock);
720 		ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING);
721 		arl->arl_state_flags &=
722 		    ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND);
723 		arl->arl_state_flags |= ARL_LL_UP;
724 		mutex_exit(&arl->arl_lock);
725 		break;
726 	case DL_UDERROR_IND:
727 		DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl,
728 		    dl_uderror_ind_t *, &dlp->uderror_ind);
729 		arl_refrele(arl);
730 		putnext(q, mp);
731 		return;
732 	default:
733 		DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
734 		    union DL_primitives *, dlp);
735 		arl_refrele(arl);
736 		putnext(q, mp);
737 		return;
738 	}
739 	arp_dlpi_done(arl, ill);
740 	arl_refrele(arl);
741 	freemsg(mp);
742 }
743 
744 void
745 arp_rput(queue_t *q, mblk_t *mp)
746 {
747 	arl_t		*arl = q->q_ptr;
748 	boolean_t	need_refrele = B_FALSE;
749 
750 	mutex_enter(&arl->arl_lock);
751 	if (((arl->arl_state_flags &
752 	    (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) {
753 		/*
754 		 * Only allow high priority DLPI messages during unplumb or
755 		 * replumb, and we don't take an arl_refcnt for that case.
756 		 */
757 		if (DB_TYPE(mp) != M_PCPROTO) {
758 			mutex_exit(&arl->arl_lock);
759 			freemsg(mp);
760 			return;
761 		}
762 	} else {
763 		arl_refhold_locked(arl);
764 		need_refrele = B_TRUE;
765 	}
766 	mutex_exit(&arl->arl_lock);
767 
768 	switch (DB_TYPE(mp)) {
769 	case M_PCPROTO:
770 	case M_PROTO: {
771 		ill_t *ill;
772 
773 		/*
774 		 * could be one of
775 		 * (i)   real message from the wire, (DLPI_DATA)
776 		 * (ii)  DLPI message
777 		 * Take a ref on the ill associated with this arl to
778 		 * prevent the ill from being unplumbed until this thread
779 		 * is done.
780 		 */
781 		if (IS_DLPI_DATA(mp)) {
782 			ill = arl_to_ill(arl);
783 			if (ill == NULL) {
784 				arp_drop_packet("No ill", mp, ill);
785 				break;
786 			}
787 			arp_process_packet(ill, mp);
788 			ill_refrele(ill);
789 			break;
790 		}
791 		/* Miscellaneous DLPI messages get shuffled off. */
792 		arp_rput_dlpi(q, mp);
793 		break;
794 	}
795 	case M_ERROR:
796 	case M_HANGUP:
797 		if (mp->b_rptr < mp->b_wptr)
798 			arl->arl_error = (int)(*mp->b_rptr & 0xFF);
799 		if (arl->arl_error == 0)
800 			arl->arl_error = ENXIO;
801 		freemsg(mp);
802 		break;
803 	default:
804 		ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp)));
805 		putnext(q, mp);
806 		break;
807 	}
808 	if (need_refrele)
809 		arl_refrele(arl);
810 }
811 
812 static void
813 arp_process_packet(ill_t *ill, mblk_t *mp)
814 {
815 	mblk_t 		*mp1;
816 	arh_t		*arh;
817 	in_addr_t	src_paddr, dst_paddr;
818 	uint32_t	hlen, plen;
819 	boolean_t	is_probe;
820 	int		op;
821 	ncec_t		*dst_ncec, *src_ncec = NULL;
822 	uchar_t		*src_haddr, *arhp, *dst_haddr, *dp, *sp;
823 	int		err;
824 	ip_stack_t	*ipst;
825 	boolean_t	need_ill_refrele = B_FALSE;
826 	nce_t		*nce;
827 	uchar_t		*src_lladdr;
828 	dl_unitdata_ind_t *dlui;
829 	ip_recv_attr_t	iras;
830 
831 	ASSERT(ill != NULL);
832 	if (ill->ill_flags & ILLF_NOARP) {
833 		arp_drop_packet("Interface does not support ARP", mp, ill);
834 		return;
835 	}
836 	ipst = ill->ill_ipst;
837 	/*
838 	 * What we should have at this point is a DL_UNITDATA_IND message
839 	 * followed by an ARP packet.  We do some initial checks and then
840 	 * get to work.
841 	 */
842 	dlui = (dl_unitdata_ind_t *)mp->b_rptr;
843 	if (dlui->dl_group_address == 1) {
844 		/*
845 		 * multicast or broadcast  packet. Only accept on the ipmp
846 		 * nominated interface for multicasts ('cast_ill').
847 		 * If we have no cast_ill we are liberal and accept everything.
848 		 */
849 		if (IS_UNDER_IPMP(ill)) {
850 			/* For an under ill_grp can change under lock */
851 			rw_enter(&ipst->ips_ill_g_lock, RW_READER);
852 			if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
853 			    ill->ill_grp->ig_cast_ill != NULL) {
854 				rw_exit(&ipst->ips_ill_g_lock);
855 				arp_drop_packet("Interface is not nominated "
856 				    "for multicast sends and receives",
857 				    mp, ill);
858 				return;
859 			}
860 			rw_exit(&ipst->ips_ill_g_lock);
861 		}
862 	}
863 	mp1 = mp->b_cont;
864 	if (mp1 == NULL) {
865 		arp_drop_packet("Missing ARP packet", mp, ill);
866 		return;
867 	}
868 	if (mp1->b_cont != NULL) {
869 		/* No fooling around with funny messages. */
870 		if (!pullupmsg(mp1, -1)) {
871 			arp_drop_packet("Funny message: pullup failed",
872 			    mp, ill);
873 			return;
874 		}
875 	}
876 	arh = (arh_t *)mp1->b_rptr;
877 	hlen = arh->arh_hlen;
878 	plen = arh->arh_plen;
879 	if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) {
880 		arp_drop_packet("mblk len too small", mp, ill);
881 		return;
882 	}
883 	/*
884 	 * hlen 0 is used for RFC 1868 UnARP.
885 	 *
886 	 * Note that the rest of the code checks that hlen is what we expect
887 	 * for this hardware address type, so might as well discard packets
888 	 * here that don't match.
889 	 */
890 	if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) {
891 		DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1);
892 		arp_drop_packet("Bogus hlen or plen", mp, ill);
893 		return;
894 	}
895 	/*
896 	 * Historically, Solaris has been lenient about hardware type numbers.
897 	 * We should check here, but don't.
898 	 */
899 	DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh,
900 	    mblk_t *, mp);
901 	/*
902 	 * If ill is in an ipmp group, it will be the under ill. If we want
903 	 * to report the packet as coming up the IPMP interface, we should
904 	 * convert it to the ipmp ill.
905 	 */
906 	ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in,
907 	    ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst);
908 	DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp);
909 	if (mp == NULL)
910 		return;
911 	arhp = (uchar_t *)arh + ARH_FIXED_LEN;
912 	src_haddr = arhp;			/* ar$sha */
913 	arhp += hlen;
914 	bcopy(arhp, &src_paddr, IP_ADDR_LEN);	/* ar$spa */
915 	sp = arhp;
916 	arhp += IP_ADDR_LEN;
917 	dst_haddr = arhp;			/* ar$dha */
918 	arhp += hlen;
919 	bcopy(arhp, &dst_paddr, IP_ADDR_LEN);	/* ar$tpa */
920 	dp = arhp;
921 	op = BE16_TO_U16(arh->arh_operation);
922 
923 	DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr,
924 	    (in_addr_t), dst_paddr);
925 
926 	/* Determine if this is just a probe */
927 	is_probe = (src_paddr == INADDR_ANY);
928 
929 	/*
930 	 * ira_ill is the only field used down the arp_notify path.
931 	 */
932 	bzero(&iras, sizeof (iras));
933 	iras.ira_ill = iras.ira_rill = ill;
934 	/*
935 	 * RFC 826: first check if the <protocol, sender protocol address> is
936 	 * in the cache, if there is a sender protocol address.  Note that this
937 	 * step also handles resolutions based on source.
938 	 */
939 	/* Note: after here we need to freeb(mp) and freemsg(mp1) separately */
940 	mp->b_cont = NULL;
941 	if (is_probe) {
942 		err = AR_NOTFOUND;
943 	} else {
944 		if (plen != 4) {
945 			arp_drop_packet("bad protocol len", mp, ill);
946 			return;
947 		}
948 		err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr,
949 		    &src_ncec, op);
950 		switch (err) {
951 		case AR_BOGON:
952 			ASSERT(src_ncec != NULL);
953 			arp_notify(src_paddr, mp1, AR_CN_BOGON,
954 			    &iras, src_ncec);
955 			break;
956 		case AR_FAILED:
957 			arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
958 			    src_ncec);
959 			break;
960 		case AR_LOOPBACK:
961 			DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *,
962 			    arh);
963 			freemsg(mp1);
964 			break;
965 		default:
966 			goto update;
967 		}
968 		freemsg(mp);
969 		if (src_ncec != NULL)
970 			ncec_refrele(src_ncec);
971 		return;
972 	}
973 update:
974 	/*
975 	 * Now look up the destination address.  By RFC 826, we ignore the
976 	 * packet at this step if the target isn't one of our addresses (i.e.,
977 	 * one we have been asked to PUBLISH).  This is true even if the
978 	 * target is something we're trying to resolve and the packet
979 	 * is a response.
980 	 */
981 	dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr);
982 	if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) {
983 		/*
984 		 * Let the client know if the source mapping has changed, even
985 		 * if the destination provides no useful information for the
986 		 * client.
987 		 */
988 		if (err == AR_CHANGED) {
989 			arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
990 			    NULL);
991 			freemsg(mp);
992 		} else {
993 			freemsg(mp);
994 			arp_drop_packet("Target is not interesting", mp1, ill);
995 		}
996 		if (dst_ncec != NULL)
997 			ncec_refrele(dst_ncec);
998 		if (src_ncec != NULL)
999 			ncec_refrele(src_ncec);
1000 		return;
1001 	}
1002 
1003 	if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) {
1004 		/*
1005 		 * Check for a reflection.  Some misbehaving bridges will
1006 		 * reflect our own transmitted packets back to us.
1007 		 */
1008 		ASSERT(NCE_PUBLISH(dst_ncec));
1009 		if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) {
1010 			ncec_refrele(dst_ncec);
1011 			if (src_ncec != NULL)
1012 				ncec_refrele(src_ncec);
1013 			freemsg(mp);
1014 			arp_drop_packet("bad arh_len", mp1, ill);
1015 			return;
1016 		}
1017 		if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) {
1018 			DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill,
1019 			    arh_t *, arh, ncec_t *, dst_ncec);
1020 			ncec_refrele(dst_ncec);
1021 			if (src_ncec != NULL)
1022 				ncec_refrele(src_ncec);
1023 			freemsg(mp);
1024 			arp_drop_packet("Reflected probe", mp1, ill);
1025 			return;
1026 		}
1027 		/*
1028 		 * Responses targeting our HW address that are not responses to
1029 		 * our DAD probe must be ignored as they are related to requests
1030 		 * sent before DAD was restarted.
1031 		 */
1032 		if (op == ARP_RESPONSE &&
1033 		    (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) {
1034 			ncec_refrele(dst_ncec);
1035 			if (src_ncec != NULL)
1036 				ncec_refrele(src_ncec);
1037 			freemsg(mp);
1038 			arp_drop_packet(
1039 			    "Response to request that was sent before DAD",
1040 			    mp1, ill);
1041 			return;
1042 		}
1043 		/*
1044 		 * Responses targeted to HW addresses which are not ours but
1045 		 * sent to our unverified proto address are also conflicts.
1046 		 * These may be reported by a proxy rather than the interface
1047 		 * with the conflicting address, dst_paddr is in conflict
1048 		 * rather than src_paddr. To ensure IP can locate the correct
1049 		 * ipif to take down, it is necessary to copy dst_paddr to
1050 		 * the src_paddr field before sending it to IP. The same is
1051 		 * required for probes, where src_paddr will be INADDR_ANY.
1052 		 */
1053 		if (is_probe || op == ARP_RESPONSE) {
1054 			bcopy(dp, sp, plen);
1055 			arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
1056 			    NULL);
1057 			ncec_delete(dst_ncec);
1058 		} else if (err == AR_CHANGED) {
1059 			arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
1060 			    NULL);
1061 		} else {
1062 			DTRACE_PROBE3(rput_request_unverified,
1063 			    ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec);
1064 			arp_drop_packet("Unverified request", mp1, ill);
1065 		}
1066 		freemsg(mp);
1067 		ncec_refrele(dst_ncec);
1068 		if (src_ncec != NULL)
1069 			ncec_refrele(src_ncec);
1070 		return;
1071 	}
1072 	/*
1073 	 * If it's a request, then we reply to this, and if we think the
1074 	 * sender's unknown, then we create an entry to avoid unnecessary ARPs.
1075 	 * The design assumption is that someone ARPing us is likely to send us
1076 	 * a packet soon, and that we'll want to reply to it.
1077 	 */
1078 	if (op == ARP_REQUEST) {
1079 		const uchar_t *nce_hwaddr;
1080 		struct in_addr nce_paddr;
1081 		clock_t now;
1082 		ill_t *under_ill = ill;
1083 		boolean_t send_unicast = B_TRUE;
1084 
1085 		ASSERT(NCE_PUBLISH(dst_ncec));
1086 
1087 		if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) {
1088 			/*
1089 			 * Ignore senders who are deliberately or accidentally
1090 			 * confused.
1091 			 */
1092 			goto bail;
1093 		}
1094 
1095 		if (!is_probe && err == AR_NOTFOUND) {
1096 			ASSERT(src_ncec == NULL);
1097 
1098 			if (IS_UNDER_IPMP(under_ill)) {
1099 				/*
1100 				 * create the ncec for the sender on ipmp_ill.
1101 				 * We pass in the ipmp_ill itself to avoid
1102 				 * creating an nce_t on the under_ill.
1103 				 */
1104 				ill = ipmp_ill_hold_ipmp_ill(under_ill);
1105 				if (ill == NULL)
1106 					ill = under_ill;
1107 				else
1108 					need_ill_refrele = B_TRUE;
1109 			}
1110 
1111 			err = nce_lookup_then_add_v4(ill, src_haddr, hlen,
1112 			    &src_paddr, 0, ND_STALE, &nce);
1113 
1114 			switch (err) {
1115 			case 0:
1116 			case EEXIST:
1117 				ip1dbg(("added ncec %p in state %d ill %s\n",
1118 				    (void *)src_ncec, src_ncec->ncec_state,
1119 				    ill->ill_name));
1120 				src_ncec = nce->nce_common;
1121 				break;
1122 			default:
1123 				/*
1124 				 * Either no memory, or the outgoing interface
1125 				 * is in the process of down/unplumb. In the
1126 				 * latter case, we will fail the send anyway,
1127 				 * and in the former case, we should try to send
1128 				 * the ARP response.
1129 				 */
1130 				src_lladdr = src_haddr;
1131 				goto send_response;
1132 			}
1133 			ncec_refhold(src_ncec);
1134 			nce_refrele(nce);
1135 			/* set up cleanup interval on ncec */
1136 		}
1137 
1138 		/*
1139 		 * This implements periodic address defense based on a modified
1140 		 * version of the RFC 3927 requirements.  Instead of sending a
1141 		 * broadcasted reply every time, as demanded by the RFC, we
1142 		 * send at most one broadcast reply per arp_broadcast_interval.
1143 		 */
1144 		now = ddi_get_lbolt();
1145 		if ((now - dst_ncec->ncec_last_time_defended) >
1146 		    MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) {
1147 			dst_ncec->ncec_last_time_defended = now;
1148 			/*
1149 			 * If this is one of the long-suffering entries,
1150 			 * pull it out now.  It no longer needs separate
1151 			 * defense, because we're now doing that with this
1152 			 * broadcasted reply.
1153 			 */
1154 			dst_ncec->ncec_flags &= ~NCE_F_DELAYED;
1155 			send_unicast = B_FALSE;
1156 		}
1157 		if (src_ncec != NULL && send_unicast) {
1158 			src_lladdr = src_ncec->ncec_lladdr;
1159 		} else {
1160 			src_lladdr = under_ill->ill_bcast_mp->b_rptr +
1161 			    NCE_LL_ADDR_OFFSET(under_ill);
1162 		}
1163 send_response:
1164 		nce_hwaddr = dst_ncec->ncec_lladdr;
1165 		IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr);
1166 
1167 		(void) arp_output(under_ill, ARP_RESPONSE,
1168 		    nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr,
1169 		    (uchar_t *)&src_paddr, src_lladdr);
1170 	}
1171 bail:
1172 	if (dst_ncec != NULL) {
1173 		ncec_refrele(dst_ncec);
1174 	}
1175 	if (src_ncec != NULL) {
1176 		ncec_refrele(src_ncec);
1177 	}
1178 	if (err == AR_CHANGED) {
1179 		mp->b_cont = NULL;
1180 		arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL);
1181 		mp1 = NULL;
1182 	}
1183 	if (need_ill_refrele)
1184 		ill_refrele(ill);
1185 done:
1186 	freemsg(mp);
1187 	freemsg(mp1);
1188 }
1189 
1190 /*
1191  * Basic initialization of the arl_t and the arl_common structure shared with
1192  * the ill_t that is done after SLIFNAME/IF_UNITSEL.
1193  */
1194 static int
1195 arl_ill_init(arl_t *arl, char *ill_name)
1196 {
1197 	ill_t *ill;
1198 	arl_ill_common_t *ai;
1199 
1200 	ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE,
1201 	    arl->arl_ipst);
1202 
1203 	if (ill == NULL)
1204 		return (ENXIO);
1205 
1206 	/*
1207 	 * By the time we set up the arl, we expect the ETHERTYPE_IP
1208 	 * stream to be fully bound and attached. So we copy/verify
1209 	 * relevant information as possible from/against the ill.
1210 	 *
1211 	 * The following should have been set up in arp_ll_set_defaults()
1212 	 * after the first DL_INFO_ACK was received.
1213 	 */
1214 	ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length);
1215 	ASSERT(arl->arl_sap == ETHERTYPE_ARP);
1216 	ASSERT(arl->arl_mactype == ill->ill_mactype);
1217 	ASSERT(arl->arl_sap_length == ill->ill_sap_length);
1218 
1219 	ai =  kmem_zalloc(sizeof (*ai), KM_SLEEP);
1220 	mutex_enter(&ill->ill_lock);
1221 	/* First ensure that the ill is not CONDEMNED.  */
1222 	if (ill->ill_state_flags & ILL_CONDEMNED) {
1223 		mutex_exit(&ill->ill_lock);
1224 		ill_refrele(ill);
1225 		kmem_free(ai, sizeof (*ai));
1226 		return (ENXIO);
1227 	}
1228 	if (ill->ill_common != NULL || arl->arl_common != NULL) {
1229 		mutex_exit(&ill->ill_lock);
1230 		ip0dbg(("%s: PPA already exists", ill->ill_name));
1231 		ill_refrele(ill);
1232 		kmem_free(ai, sizeof (*ai));
1233 		return (EEXIST);
1234 	}
1235 	mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL);
1236 	ai->ai_arl = arl;
1237 	ai->ai_ill = ill;
1238 	ill->ill_common = ai;
1239 	arl->arl_common = ai;
1240 	mutex_exit(&ill->ill_lock);
1241 	(void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ);
1242 	arl->arl_name_length = ill->ill_name_length;
1243 	ill_refrele(ill);
1244 	arp_ifname_notify(arl);
1245 	return (0);
1246 }
1247 
1248 /* Allocate and do common initializations for DLPI messages. */
1249 static mblk_t *
1250 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size)
1251 {
1252 	mblk_t  *mp;
1253 
1254 	if ((mp = allocb(size, BPRI_HI)) == NULL)
1255 		return (NULL);
1256 
1257 	/*
1258 	 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter
1259 	 * of which we don't seem to use) are sent with M_PCPROTO, and
1260 	 * that other DLPI are M_PROTO.
1261 	 */
1262 	DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO;
1263 
1264 	mp->b_wptr = mp->b_rptr + size;
1265 	bzero(mp->b_rptr, size);
1266 	DL_PRIM(mp) = prim;
1267 	return (mp);
1268 }
1269 
1270 
1271 int
1272 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa)
1273 {
1274 	arl_t *arl;
1275 	char *cp, ill_name[LIFNAMSIZ];
1276 
1277 	if (q->q_next == NULL)
1278 		return (EINVAL);
1279 
1280 	do {
1281 		q = q->q_next;
1282 	} while (q->q_next != NULL);
1283 	cp = q->q_qinfo->qi_minfo->mi_idname;
1284 
1285 	arl = (arl_t *)q->q_ptr;
1286 	(void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa);
1287 	arl->arl_ppa = *ppa;
1288 	return (arl_ill_init(arl, ill_name));
1289 }
1290 
1291 int
1292 ip_sioctl_slifname_arp(queue_t *q, void *lifreq)
1293 {
1294 	arl_t *arl;
1295 	struct lifreq *lifr = lifreq;
1296 
1297 	/* ioctl not valid when IP opened as a device */
1298 	if (q->q_next == NULL)
1299 		return (EINVAL);
1300 
1301 	arl = (arl_t *)q->q_ptr;
1302 	arl->arl_ppa = lifr->lifr_ppa;
1303 	return (arl_ill_init(arl, lifr->lifr_name));
1304 }
1305 
1306 arl_t *
1307 ill_to_arl(ill_t *ill)
1308 {
1309 	arl_ill_common_t *ai = ill->ill_common;
1310 	arl_t *arl = NULL;
1311 
1312 	if (ai == NULL)
1313 		return (NULL);
1314 	/*
1315 	 * Find the arl_t that corresponds to this ill_t from the shared
1316 	 * ill_common structure. We can safely access the ai here as it
1317 	 * will only be freed in arp_modclose() after we have become
1318 	 * single-threaded.
1319 	 */
1320 	mutex_enter(&ai->ai_lock);
1321 	if ((arl = ai->ai_arl) != NULL) {
1322 		mutex_enter(&arl->arl_lock);
1323 		if (!(arl->arl_state_flags & ARL_CONDEMNED)) {
1324 			arl_refhold_locked(arl);
1325 			mutex_exit(&arl->arl_lock);
1326 		} else {
1327 			mutex_exit(&arl->arl_lock);
1328 			arl = NULL;
1329 		}
1330 	}
1331 	mutex_exit(&ai->ai_lock);
1332 	return (arl);
1333 }
1334 
1335 ill_t *
1336 arl_to_ill(arl_t *arl)
1337 {
1338 	arl_ill_common_t *ai = arl->arl_common;
1339 	ill_t *ill = NULL;
1340 
1341 	if (ai == NULL) {
1342 		/*
1343 		 * happens when the arp stream is just being opened, and
1344 		 * arl_ill_init has not been executed yet.
1345 		 */
1346 		return (NULL);
1347 	}
1348 	/*
1349 	 * Find the ill_t that corresponds to this arl_t from the shared
1350 	 * arl_common structure. We can safely access the ai here as it
1351 	 * will only be freed in arp_modclose() after we have become
1352 	 * single-threaded.
1353 	 */
1354 	mutex_enter(&ai->ai_lock);
1355 	if ((ill = ai->ai_ill) != NULL) {
1356 		mutex_enter(&ill->ill_lock);
1357 		if (!ILL_IS_CONDEMNED(ill)) {
1358 			ill_refhold_locked(ill);
1359 			mutex_exit(&ill->ill_lock);
1360 		} else {
1361 			mutex_exit(&ill->ill_lock);
1362 			ill = NULL;
1363 		}
1364 	}
1365 	mutex_exit(&ai->ai_lock);
1366 	return (ill);
1367 }
1368 
1369 int
1370 arp_ll_up(ill_t *ill)
1371 {
1372 	mblk_t	*attach_mp = NULL;
1373 	mblk_t	*bind_mp = NULL;
1374 	mblk_t	*unbind_mp = NULL;
1375 	arl_t 	*arl;
1376 
1377 	ASSERT(IAM_WRITER_ILL(ill));
1378 	arl = ill_to_arl(ill);
1379 
1380 	DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill);
1381 	if (arl == NULL)
1382 		return (ENXIO);
1383 	DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl);
1384 	if ((arl->arl_state_flags & ARL_LL_UP) != 0) {
1385 		arl_refrele(arl);
1386 		return (0);
1387 	}
1388 	if (arl->arl_needs_attach) { /* DL_STYLE2 */
1389 		attach_mp =
1390 		    ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t));
1391 		if (attach_mp == NULL)
1392 			goto bad;
1393 		((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa;
1394 	}
1395 
1396 	/* Allocate and initialize a bind message. */
1397 	bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t));
1398 	if (bind_mp == NULL)
1399 		goto bad;
1400 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP;
1401 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS;
1402 
1403 	unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t));
1404 	if (unbind_mp == NULL)
1405 		goto bad;
1406 	if (arl->arl_needs_attach) {
1407 		arp_dlpi_send(arl, attach_mp);
1408 	}
1409 	arl->arl_unbind_mp = unbind_mp;
1410 
1411 	arl->arl_state_flags |= ARL_LL_BIND_PENDING;
1412 	arp_dlpi_send(arl, bind_mp);
1413 	arl_refrele(arl);
1414 	return (EINPROGRESS);
1415 
1416 bad:
1417 	freemsg(attach_mp);
1418 	freemsg(bind_mp);
1419 	freemsg(unbind_mp);
1420 	arl_refrele(arl);
1421 	return (ENOMEM);
1422 }
1423 
1424 /*
1425  * consumes/frees mp
1426  */
1427 static void
1428 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code,
1429     ip_recv_attr_t *ira, ncec_t *ncec)
1430 {
1431 	char		hbuf[MAC_STR_LEN];
1432 	char		sbuf[INET_ADDRSTRLEN];
1433 	ill_t		*ill = ira->ira_ill;
1434 	ip_stack_t	*ipst = ill->ill_ipst;
1435 	arh_t		*arh = (arh_t *)mp->b_rptr;
1436 
1437 	switch (arcn_code) {
1438 	case AR_CN_BOGON:
1439 		/*
1440 		 * Someone is sending ARP packets with a source protocol
1441 		 * address that we have published and for which we believe our
1442 		 * entry is authoritative and verified to be unique on
1443 		 * the network.
1444 		 *
1445 		 * arp_process_packet() sends AR_CN_FAILED for the case when
1446 		 * a DAD probe is received and the hardware address of a
1447 		 * non-authoritative entry has changed. Thus, AR_CN_BOGON
1448 		 * indicates a real conflict, and we have to do resolution.
1449 		 *
1450 		 * We back away quickly from the address if it's from DHCP or
1451 		 * otherwise temporary and hasn't been used recently (or at
1452 		 * all).  We'd like to include "deprecated" addresses here as
1453 		 * well (as there's no real reason to defend something we're
1454 		 * discarding), but IPMP "reuses" this flag to mean something
1455 		 * other than the standard meaning.
1456 		 */
1457 		if (ip_nce_conflict(mp, ira, ncec)) {
1458 			(void) mac_colon_addr((uint8_t *)(arh + 1),
1459 			    arh->arh_hlen, hbuf, sizeof (hbuf));
1460 			(void) ip_dot_addr(src, sbuf);
1461 			cmn_err(CE_WARN,
1462 			    "proxy ARP problem?  Node '%s' is using %s on %s",
1463 			    hbuf, sbuf, ill->ill_name);
1464 			if (!arp_no_defense)
1465 				(void) arp_announce(ncec);
1466 			/*
1467 			 * ncec_last_time_defended has been adjusted in
1468 			 * ip_nce_conflict.
1469 			 */
1470 		} else {
1471 			ncec_delete(ncec);
1472 		}
1473 		freemsg(mp);
1474 		break;
1475 	case AR_CN_ANNOUNCE: {
1476 		nce_hw_map_t hwm;
1477 		/*
1478 		 * ARP gives us a copy of any packet where it thinks
1479 		 * the address has changed, so that we can update our
1480 		 * caches.  We're responsible for caching known answers
1481 		 * in the current design.  We check whether the
1482 		 * hardware address really has changed in all of our
1483 		 * entries that have cached this mapping, and if so, we
1484 		 * blow them away.  This way we will immediately pick
1485 		 * up the rare case of a host changing hardware
1486 		 * address.
1487 		 */
1488 		if (src == 0) {
1489 			freemsg(mp);
1490 			break;
1491 		}
1492 		hwm.hwm_addr = src;
1493 		hwm.hwm_hwlen = arh->arh_hlen;
1494 		hwm.hwm_hwaddr = (uchar_t *)(arh + 1);
1495 		hwm.hwm_flags = 0;
1496 		ncec_walk_common(ipst->ips_ndp4, NULL,
1497 		    (pfi_t)nce_update_hw_changed, &hwm, B_TRUE);
1498 		freemsg(mp);
1499 		break;
1500 	}
1501 	case AR_CN_FAILED:
1502 		if (arp_no_defense) {
1503 			(void) mac_colon_addr((uint8_t *)(arh + 1),
1504 			    arh->arh_hlen, hbuf, sizeof (hbuf));
1505 			(void) ip_dot_addr(src, sbuf);
1506 
1507 			cmn_err(CE_WARN,
1508 			    "node %s is using our IP address %s on %s",
1509 			    hbuf, sbuf, ill->ill_name);
1510 			freemsg(mp);
1511 			break;
1512 		}
1513 		/*
1514 		 * mp will be freed by arp_excl.
1515 		 */
1516 		ill_refhold(ill);
1517 		qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
1518 		return;
1519 	default:
1520 		ASSERT(0);
1521 		freemsg(mp);
1522 		break;
1523 	}
1524 }
1525 
1526 /*
1527  * arp_output is called to transmit an ARP Request or Response. The mapping
1528  * to RFC 826 variables is:
1529  *   haddr1 == ar$sha
1530  *   paddr1 == ar$spa
1531  *   haddr2 == ar$tha
1532  *   paddr2 == ar$tpa
1533  * The ARP frame is sent to the ether_dst in dst_lladdr.
1534  */
1535 static int
1536 arp_output(ill_t *ill, uint32_t operation,
1537     const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2,
1538     const uchar_t *paddr2, uchar_t *dst_lladdr)
1539 {
1540 	arh_t	*arh;
1541 	uint8_t	*cp;
1542 	uint_t	hlen;
1543 	uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */
1544 	uint32_t proto = IP_ARP_PROTO_TYPE;
1545 	mblk_t *mp;
1546 	arl_t *arl;
1547 
1548 	ASSERT(dst_lladdr != NULL);
1549 	hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */
1550 	mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length);
1551 
1552 	if (mp == NULL)
1553 		return (ENOMEM);
1554 
1555 	/* IFF_NOARP flag is set or link down: do not send arp messages */
1556 	if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) {
1557 		freemsg(mp);
1558 		return (ENXIO);
1559 	}
1560 
1561 	mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) +
1562 	    plen + plen, BPRI_MED);
1563 	if (mp->b_cont == NULL) {
1564 		freeb(mp);
1565 		return (ENOMEM);
1566 	}
1567 
1568 	/* Fill in the ARP header. */
1569 	cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen);
1570 	mp->b_cont->b_rptr = cp;
1571 	arh = (arh_t *)cp;
1572 	U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware);
1573 	U16_TO_BE16(proto, arh->arh_proto);
1574 	arh->arh_hlen = (uint8_t)hlen;
1575 	arh->arh_plen = (uint8_t)plen;
1576 	U16_TO_BE16(operation, arh->arh_operation);
1577 	cp += ARH_FIXED_LEN;
1578 	bcopy(haddr1, cp, hlen);
1579 	cp += hlen;
1580 	if (paddr1 == NULL)
1581 		bzero(cp, plen);
1582 	else
1583 		bcopy(paddr1, cp, plen);
1584 	cp += plen;
1585 	if (haddr2 == NULL)
1586 		bzero(cp, hlen);
1587 	else
1588 		bcopy(haddr2, cp, hlen);
1589 	cp += hlen;
1590 	bcopy(paddr2, cp, plen);
1591 	cp += plen;
1592 	mp->b_cont->b_wptr = cp;
1593 
1594 	DTRACE_PROBE3(arp__physical__out__start,
1595 	    ill_t *, ill, arh_t *, arh, mblk_t *, mp);
1596 	ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event,
1597 	    ill->ill_ipst->ips_arp_physical_out,
1598 	    ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont,
1599 	    ill->ill_ipst);
1600 	DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp);
1601 	if (mp == NULL)
1602 		return (0);
1603 
1604 	/* Ship it out. */
1605 	arl = ill_to_arl(ill);
1606 	if (arl == NULL) {
1607 		freemsg(mp);
1608 		return (0);
1609 	}
1610 	if (canputnext(arl->arl_wq))
1611 		putnext(arl->arl_wq, mp);
1612 	else
1613 		freemsg(mp);
1614 	arl_refrele(arl);
1615 	return (0);
1616 }
1617 
1618 /*
1619  * Process resolve requests.
1620  * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise
1621  * we leave it alone (the caller will check and manage ncec_pcnt in those
1622  * cases.)
1623  */
1624 int
1625 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill)
1626 {
1627 	int err;
1628 	const uchar_t *target_hwaddr;
1629 	struct in_addr nce_paddr;
1630 	uchar_t *dst_lladdr;
1631 	boolean_t use_rcnt = !NCE_ISREACHABLE(ncec);
1632 
1633 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
1634 	ASSERT(!IS_IPMP(ill));
1635 
1636 	if (use_rcnt && ncec->ncec_rcnt == 0) {
1637 		/* not allowed any more retransmits. */
1638 		return (0);
1639 	}
1640 
1641 	if ((ill->ill_flags & ILLF_NOARP) != 0)
1642 		return (0);
1643 
1644 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr);
1645 
1646 	target_hwaddr =
1647 	    ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1648 
1649 	if (NCE_ISREACHABLE(ncec)) {
1650 		dst_lladdr =  ncec->ncec_lladdr;
1651 	} else {
1652 		dst_lladdr =  ill->ill_bcast_mp->b_rptr +
1653 		    NCE_LL_ADDR_OFFSET(ill);
1654 	}
1655 
1656 	mutex_exit(&ncec->ncec_lock);
1657 	err = arp_output(ill, ARP_REQUEST,
1658 	    ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr,
1659 	    (uchar_t *)&nce_paddr, dst_lladdr);
1660 	mutex_enter(&ncec->ncec_lock);
1661 
1662 	if (err != 0) {
1663 		/*
1664 		 * Some transient error such as ENOMEM or a down link was
1665 		 * encountered. If the link has been taken down permanently,
1666 		 * the ncec will eventually be cleaned up (ipif_down_tail()
1667 		 * will call ipif_nce_down() and flush the ncec), to terminate
1668 		 * recurring attempts to send ARP requests. In all other cases,
1669 		 * allow the caller another chance at success next time.
1670 		 */
1671 		return (ncec->ncec_ill->ill_reachable_retrans_time);
1672 	}
1673 
1674 	if (use_rcnt)
1675 		ncec->ncec_rcnt--;
1676 
1677 	return (ncec->ncec_ill->ill_reachable_retrans_time);
1678 }
1679 
1680 /* return B_TRUE if dropped */
1681 boolean_t
1682 arp_announce(ncec_t *ncec)
1683 {
1684 	ill_t *ill;
1685 	int err;
1686 	uchar_t *sphys_addr, *bcast_addr;
1687 	struct in_addr ncec_addr;
1688 	boolean_t need_refrele = B_FALSE;
1689 
1690 	ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0);
1691 	ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0);
1692 
1693 	if (IS_IPMP(ncec->ncec_ill)) {
1694 		/* sent on the cast_ill */
1695 		ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE);
1696 		if (ill == NULL)
1697 			return (B_TRUE);
1698 		need_refrele = B_TRUE;
1699 	} else {
1700 		ill = ncec->ncec_ill;
1701 	}
1702 
1703 	/*
1704 	 * broadcast an announce to ill_bcast address.
1705 	 */
1706 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
1707 
1708 	sphys_addr = ncec->ncec_lladdr;
1709 	bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1710 
1711 	err = arp_output(ill, ARP_REQUEST,
1712 	    sphys_addr, (uchar_t *)&ncec_addr, bcast_addr,
1713 	    (uchar_t *)&ncec_addr, bcast_addr);
1714 
1715 	if (need_refrele)
1716 		ill_refrele(ill);
1717 	return (err != 0);
1718 }
1719 
1720 /* return B_TRUE if dropped */
1721 boolean_t
1722 arp_probe(ncec_t *ncec)
1723 {
1724 	ill_t *ill;
1725 	int err;
1726 	struct in_addr ncec_addr;
1727 	uchar_t *sphys_addr, *dst_lladdr;
1728 
1729 	if (IS_IPMP(ncec->ncec_ill)) {
1730 		ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE);
1731 		if (ill == NULL)
1732 			return (B_TRUE);
1733 	} else {
1734 		ill = ncec->ncec_ill;
1735 	}
1736 
1737 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
1738 
1739 	sphys_addr = ncec->ncec_lladdr;
1740 	dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1741 	err = arp_output(ill, ARP_REQUEST,
1742 	    sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr);
1743 
1744 	if (IS_IPMP(ncec->ncec_ill))
1745 		ill_refrele(ill);
1746 	return (err != 0);
1747 }
1748 
1749 static mblk_t *
1750 arl_unbind(arl_t *arl)
1751 {
1752 	mblk_t *mp;
1753 
1754 	if ((mp = arl->arl_unbind_mp) != NULL) {
1755 		arl->arl_unbind_mp = NULL;
1756 		arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS;
1757 	}
1758 	return (mp);
1759 }
1760 
1761 int
1762 arp_ll_down(ill_t *ill)
1763 {
1764 	arl_t 	*arl;
1765 	mblk_t *unbind_mp;
1766 	int err = 0;
1767 	boolean_t replumb = (ill->ill_replumbing == 1);
1768 
1769 	DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill);
1770 	if ((arl = ill_to_arl(ill)) == NULL)
1771 		return (ENXIO);
1772 	DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl);
1773 	mutex_enter(&arl->arl_lock);
1774 	unbind_mp = arl_unbind(arl);
1775 	if (unbind_mp != NULL) {
1776 		ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS);
1777 		DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp,
1778 		    arl_t *, arl);
1779 		err = EINPROGRESS;
1780 		if (replumb)
1781 			arl->arl_state_flags |= ARL_LL_REPLUMBING;
1782 	}
1783 	mutex_exit(&arl->arl_lock);
1784 	if (unbind_mp != NULL)
1785 		arp_dlpi_send(arl, unbind_mp);
1786 	arl_refrele(arl);
1787 	return (err);
1788 }
1789 
1790 /* ARGSUSED */
1791 int
1792 arp_close(queue_t *q, int flags)
1793 {
1794 	if (WR(q)->q_next != NULL) {
1795 		/* This is a module close */
1796 		return (arp_modclose(q->q_ptr));
1797 	}
1798 	qprocsoff(q);
1799 	q->q_ptr = WR(q)->q_ptr = NULL;
1800 	return (0);
1801 }
1802 
1803 static int
1804 arp_modclose(arl_t *arl)
1805 {
1806 	arl_ill_common_t *ai = arl->arl_common;
1807 	ill_t		*ill;
1808 	queue_t		*q = arl->arl_rq;
1809 	mblk_t		*mp, *nextmp;
1810 	ipsq_t		*ipsq = NULL;
1811 
1812 	ill = arl_to_ill(arl);
1813 	if (ill != NULL) {
1814 		if (!ill_waiter_inc(ill)) {
1815 			ill_refrele(ill);
1816 		} else {
1817 			ill_refrele(ill);
1818 			if (ipsq_enter(ill, B_FALSE, NEW_OP))
1819 				ipsq = ill->ill_phyint->phyint_ipsq;
1820 			ill_waiter_dcr(ill);
1821 		}
1822 		if (ipsq == NULL) {
1823 			/*
1824 			 * could not enter the ipsq because ill is already
1825 			 * marked CONDEMNED.
1826 			 */
1827 			ill = NULL;
1828 		}
1829 	}
1830 	if (ai != NULL && ipsq == NULL) {
1831 		/*
1832 		 * Either we did not get an ill because it was marked CONDEMNED
1833 		 * or we could not enter the ipsq because it was unplumbing.
1834 		 * In both cases, wait for the ill to complete ip_modclose().
1835 		 *
1836 		 * If the arp_modclose happened even before SLIFNAME, the ai
1837 		 * itself would be NULL, in which case we can complete the close
1838 		 * without waiting.
1839 		 */
1840 		mutex_enter(&ai->ai_lock);
1841 		while (ai->ai_ill != NULL)
1842 			cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock);
1843 		mutex_exit(&ai->ai_lock);
1844 	}
1845 	ASSERT(ill == NULL || IAM_WRITER_ILL(ill));
1846 
1847 	mutex_enter(&arl->arl_lock);
1848 	/*
1849 	 * If the ill had completed unplumbing before arp_modclose(), there
1850 	 * would be no ill (and therefore, no ipsq) to serialize arp_modclose()
1851 	 * so that we need to explicitly check for ARL_CONDEMNED and back off
1852 	 * if it is set.
1853 	 */
1854 	if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) {
1855 		mutex_exit(&arl->arl_lock);
1856 		ASSERT(ipsq == NULL);
1857 		return (0);
1858 	}
1859 	arl->arl_state_flags |= ARL_CONDEMNED;
1860 
1861 	/*
1862 	 * send out all pending dlpi messages, don't wait for the ack (which
1863 	 * will be ignored in arp_rput when CONDEMNED is set)
1864 	 *
1865 	 * We have to check for pending DL_UNBIND_REQ because, in the case
1866 	 * that ip_modclose() executed before arp_modclose(), the call to
1867 	 * ill_delete_tail->ipif_arp_down() would have triggered a
1868 	 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail
1869 	 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not
1870 	 * have been processed yet. In this scenario, we cannot reset
1871 	 * arl_dlpi_pending, because the setting/clearing of arl_state_flags
1872 	 * related to unbind, and the associated cv_waits must be allowed to
1873 	 * continue.
1874 	 */
1875 	if (arl->arl_dlpi_pending != DL_UNBIND_REQ)
1876 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
1877 	mp = arl->arl_dlpi_deferred;
1878 	arl->arl_dlpi_deferred = NULL;
1879 	mutex_exit(&arl->arl_lock);
1880 
1881 	for (; mp != NULL; mp = nextmp) {
1882 		nextmp = mp->b_next;
1883 		mp->b_next = NULL;
1884 		putnext(arl->arl_wq, mp);
1885 	}
1886 
1887 	/* Wait for data paths to quiesce */
1888 	mutex_enter(&arl->arl_lock);
1889 	while (arl->arl_refcnt != 0)
1890 		cv_wait(&arl->arl_cv, &arl->arl_lock);
1891 
1892 	/*
1893 	 * unbind, so that nothing else can come up from driver.
1894 	 */
1895 	mp = arl_unbind(arl);
1896 	mutex_exit(&arl->arl_lock);
1897 	if (mp != NULL)
1898 		arp_dlpi_send(arl, mp);
1899 	mutex_enter(&arl->arl_lock);
1900 
1901 	/* wait for unbind ack  */
1902 	while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
1903 		cv_wait(&arl->arl_cv, &arl->arl_lock);
1904 	mutex_exit(&arl->arl_lock);
1905 
1906 	qprocsoff(q);
1907 
1908 	if (ill != NULL) {
1909 		mutex_enter(&ill->ill_lock);
1910 		ill->ill_arl_dlpi_pending = 0;
1911 		mutex_exit(&ill->ill_lock);
1912 	}
1913 
1914 	if (ai != NULL) {
1915 		mutex_enter(&ai->ai_lock);
1916 		ai->ai_arl = NULL;
1917 		if (ai->ai_ill == NULL) {
1918 			mutex_destroy(&ai->ai_lock);
1919 			kmem_free(ai, sizeof (*ai));
1920 		} else {
1921 			mutex_exit(&ai->ai_lock);
1922 		}
1923 	}
1924 
1925 	/* free up the rest */
1926 	arp_mod_close_tail(arl);
1927 
1928 	q->q_ptr = WR(q)->q_ptr = NULL;
1929 
1930 	if (ipsq != NULL)
1931 		ipsq_exit(ipsq);
1932 
1933 	return (0);
1934 }
1935 
1936 static void
1937 arp_mod_close_tail(arl_t *arl)
1938 {
1939 	ip_stack_t	*ipst = arl->arl_ipst;
1940 	mblk_t		**mpp;
1941 
1942 	netstack_hold(ipst->ips_netstack);
1943 
1944 	mutex_enter(&ipst->ips_ip_mi_lock);
1945 	mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl);
1946 	mutex_exit(&ipst->ips_ip_mi_lock);
1947 
1948 	/*
1949 	 * credp could be null if the open didn't succeed and ip_modopen
1950 	 * itself calls ip_close.
1951 	 */
1952 	if (arl->arl_credp != NULL)
1953 		crfree(arl->arl_credp);
1954 
1955 	/* Free all retained control messages. */
1956 	mpp = &arl->arl_first_mp_to_free;
1957 	do {
1958 		while (mpp[0]) {
1959 			mblk_t  *mp;
1960 			mblk_t  *mp1;
1961 
1962 			mp = mpp[0];
1963 			mpp[0] = mp->b_next;
1964 			for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) {
1965 				mp1->b_next = NULL;
1966 				mp1->b_prev = NULL;
1967 			}
1968 			freemsg(mp);
1969 		}
1970 	} while (mpp++ != &arl->arl_last_mp_to_free);
1971 
1972 	netstack_rele(ipst->ips_netstack);
1973 	mi_free(arl->arl_name);
1974 	mi_close_free((IDP)arl);
1975 }
1976 
1977 /*
1978  * DAD failed. Tear down ipifs with the specified srce address. Note that
1979  * tearing down the ipif also meas deleting the ncec through ipif_down,
1980  * so it is not possible to use nce_timer for recovery. Instead we start
1981  * a timer on the ipif. Caller has to free the mp.
1982  */
1983 void
1984 arp_failure(mblk_t *mp, ip_recv_attr_t *ira)
1985 {
1986 	ill_t *ill = ira->ira_ill;
1987 
1988 	if ((mp = copymsg(mp)) != NULL) {
1989 		ill_refhold(ill);
1990 		qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
1991 	}
1992 }
1993 
1994 /*
1995  * This is for exclusive changes due to ARP.  Tear down an interface due
1996  * to AR_CN_FAILED and AR_CN_BOGON.
1997  */
1998 /* ARGSUSED */
1999 static void
2000 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
2001 {
2002 	ill_t	*ill = rq->q_ptr;
2003 	arh_t *arh;
2004 	ipaddr_t src;
2005 	ipif_t	*ipif;
2006 	ip_stack_t *ipst = ill->ill_ipst;
2007 	uchar_t	*haddr;
2008 	uint_t	haddrlen;
2009 
2010 	/* first try src = ar$spa */
2011 	arh = (arh_t *)mp->b_rptr;
2012 	bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN);
2013 
2014 	haddrlen = arh->arh_hlen;
2015 	haddr = (uint8_t *)(arh + 1);
2016 
2017 	if (haddrlen == ill->ill_phys_addr_length) {
2018 		/*
2019 		 * Ignore conflicts generated by misbehaving switches that
2020 		 * just reflect our own messages back to us.  For IPMP, we may
2021 		 * see reflections across any ill in the illgrp.
2022 		 */
2023 		/* For an under ill_grp can change under lock */
2024 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2025 		if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
2026 		    IS_UNDER_IPMP(ill) && ill->ill_grp != NULL &&
2027 		    ipmp_illgrp_find_ill(ill->ill_grp, haddr,
2028 		    haddrlen) != NULL) {
2029 			rw_exit(&ipst->ips_ill_g_lock);
2030 			goto ignore_conflict;
2031 		}
2032 		rw_exit(&ipst->ips_ill_g_lock);
2033 	}
2034 
2035 	/*
2036 	 * Look up the appropriate ipif.
2037 	 */
2038 	ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst);
2039 	if (ipif == NULL)
2040 		goto ignore_conflict;
2041 
2042 	/* Reload the ill to match the ipif */
2043 	ill = ipif->ipif_ill;
2044 
2045 	/* If it's already duplicate or ineligible, then don't do anything. */
2046 	if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
2047 		ipif_refrele(ipif);
2048 		goto ignore_conflict;
2049 	}
2050 
2051 	/*
2052 	 * If we failed on a recovery probe, then restart the timer to
2053 	 * try again later.
2054 	 */
2055 	if (!ipif->ipif_was_dup) {
2056 		char hbuf[MAC_STR_LEN];
2057 		char sbuf[INET_ADDRSTRLEN];
2058 		char ibuf[LIFNAMSIZ];
2059 
2060 		(void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf));
2061 		(void) ip_dot_addr(src, sbuf);
2062 		ipif_get_name(ipif, ibuf, sizeof (ibuf));
2063 
2064 		cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
2065 		    " disabled", ibuf, sbuf, hbuf);
2066 	}
2067 	mutex_enter(&ill->ill_lock);
2068 	ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
2069 	ipif->ipif_flags |= IPIF_DUPLICATE;
2070 	ill->ill_ipif_dup_count++;
2071 	mutex_exit(&ill->ill_lock);
2072 	(void) ipif_down(ipif, NULL, NULL);
2073 	(void) ipif_down_tail(ipif);
2074 	mutex_enter(&ill->ill_lock);
2075 	if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
2076 	    ill->ill_net_type == IRE_IF_RESOLVER &&
2077 	    !(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
2078 	    ipst->ips_ip_dup_recovery > 0) {
2079 		ASSERT(ipif->ipif_recovery_id == 0);
2080 		ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
2081 		    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
2082 	}
2083 	mutex_exit(&ill->ill_lock);
2084 	ipif_refrele(ipif);
2085 
2086 ignore_conflict:
2087 	freemsg(mp);
2088 }
2089 
2090 /*
2091  * This is a place for a dtrace hook.
2092  * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload,
2093  * or just the ARP packet payload as an M_DATA.
2094  */
2095 /* ARGSUSED */
2096 static void
2097 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill)
2098 {
2099 	freemsg(mp);
2100 }
2101 
2102 static boolean_t
2103 arp_over_driver(queue_t *q)
2104 {
2105 	queue_t *qnext = STREAM(q)->sd_wrq->q_next;
2106 
2107 	/*
2108 	 * check if first module below stream head is IP or UDP.
2109 	 */
2110 	ASSERT(qnext != NULL);
2111 	if (strcmp(Q2NAME(qnext), "ip") != 0 &&
2112 	    strcmp(Q2NAME(qnext), "udp") != 0) {
2113 		/*
2114 		 * module below is not ip or udp, so arp has been pushed
2115 		 * on the driver.
2116 		 */
2117 		return (B_TRUE);
2118 	}
2119 	return (B_FALSE);
2120 }
2121 
2122 static int
2123 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
2124 {
2125 	int err;
2126 
2127 	ASSERT(sflag & MODOPEN);
2128 	if (!arp_over_driver(q)) {
2129 		q->q_qinfo = dummymodinfo.st_rdinit;
2130 		WR(q)->q_qinfo = dummymodinfo.st_wrinit;
2131 		return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag,
2132 		    sflag, credp));
2133 	}
2134 	err = arp_modopen(q, devp, flag, sflag, credp);
2135 	return (err);
2136 }
2137 
2138 /*
2139  * In most cases we must be a writer on the IP stream before coming to
2140  * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions
2141  * when we are not a writer are very early duing initialization (in
2142  * arl_init, before the arl has done a SLIFNAME, so that we don't yet know
2143  * the associated ill) or during arp_mod_close, when we could not enter the
2144  * ipsq because the ill has already unplumbed.
2145  */
2146 static void
2147 arp_dlpi_send(arl_t *arl, mblk_t *mp)
2148 {
2149 	mblk_t **mpp;
2150 	t_uscalar_t prim;
2151 	arl_ill_common_t *ai;
2152 
2153 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
2154 
2155 #ifdef DEBUG
2156 	ai = arl->arl_common;
2157 	if (ai != NULL) {
2158 		mutex_enter(&ai->ai_lock);
2159 		if (ai->ai_ill != NULL)
2160 			ASSERT(IAM_WRITER_ILL(ai->ai_ill));
2161 		mutex_exit(&ai->ai_lock);
2162 	}
2163 #endif /* DEBUG */
2164 
2165 	mutex_enter(&arl->arl_lock);
2166 	if (arl->arl_dlpi_pending != DL_PRIM_INVAL) {
2167 		/* Must queue message. Tail insertion */
2168 		mpp = &arl->arl_dlpi_deferred;
2169 		while (*mpp != NULL)
2170 			mpp = &((*mpp)->b_next);
2171 
2172 		*mpp = mp;
2173 		mutex_exit(&arl->arl_lock);
2174 		return;
2175 	}
2176 	mutex_exit(&arl->arl_lock);
2177 	if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive)
2178 	    == DL_BIND_REQ) {
2179 		ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0);
2180 	}
2181 	/*
2182 	 * No need to take the arl_lock to examine ARL_CONDEMNED at this point
2183 	 * because the only thread that can see ARL_CONDEMNED here is the
2184 	 * closing arp_modclose() thread which sets the flag after becoming a
2185 	 * writer on the ipsq. Threads from IP must have finished and
2186 	 * cannot be active now.
2187 	 */
2188 	if (!(arl->arl_state_flags & ARL_CONDEMNED) ||
2189 	    (prim == DL_UNBIND_REQ)) {
2190 		if (prim != DL_NOTIFY_CONF) {
2191 			ill_t *ill = arl_to_ill(arl);
2192 
2193 			arl->arl_dlpi_pending = prim;
2194 			if (ill != NULL) {
2195 				mutex_enter(&ill->ill_lock);
2196 				ill->ill_arl_dlpi_pending = 1;
2197 				mutex_exit(&ill->ill_lock);
2198 				ill_refrele(ill);
2199 			}
2200 		}
2201 	}
2202 	DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send",
2203 	    char *, dl_primstr(prim), char *, "-",  arl_t *, arl);
2204 	putnext(arl->arl_wq, mp);
2205 }
2206 
2207 static void
2208 arl_defaults_common(arl_t *arl, mblk_t *mp)
2209 {
2210 	dl_info_ack_t	*dlia = (dl_info_ack_t *)mp->b_rptr;
2211 	/*
2212 	 * Till the ill is fully up  the ill is not globally visible.
2213 	 * So no need for a lock.
2214 	 */
2215 	arl->arl_mactype = dlia->dl_mac_type;
2216 	arl->arl_sap_length = dlia->dl_sap_length;
2217 
2218 	if (!arl->arl_dlpi_style_set) {
2219 		if (dlia->dl_provider_style == DL_STYLE2)
2220 			arl->arl_needs_attach = 1;
2221 		mutex_enter(&arl->arl_lock);
2222 		ASSERT(arl->arl_dlpi_style_set == 0);
2223 		arl->arl_dlpi_style_set = 1;
2224 		arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING;
2225 		cv_broadcast(&arl->arl_cv);
2226 		mutex_exit(&arl->arl_lock);
2227 	}
2228 }
2229 
2230 int
2231 arl_init(queue_t *q, arl_t *arl)
2232 {
2233 	mblk_t *info_mp;
2234 	dl_info_req_t   *dlir;
2235 
2236 	/* subset of ill_init */
2237 	mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0);
2238 
2239 	arl->arl_rq = q;
2240 	arl->arl_wq = WR(q);
2241 
2242 	info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
2243 	    BPRI_HI);
2244 	if (info_mp == NULL)
2245 		return (ENOMEM);
2246 	/*
2247 	 * allocate sufficient space to contain device name.
2248 	 */
2249 	arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ));
2250 	arl->arl_ppa = UINT_MAX;
2251 	arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND);
2252 
2253 	/* Send down the Info Request to the driver. */
2254 	info_mp->b_datap->db_type = M_PCPROTO;
2255 	dlir = (dl_info_req_t *)info_mp->b_rptr;
2256 	info_mp->b_wptr = (uchar_t *)&dlir[1];
2257 	dlir->dl_primitive = DL_INFO_REQ;
2258 	arl->arl_dlpi_pending = DL_PRIM_INVAL;
2259 	qprocson(q);
2260 
2261 	arp_dlpi_send(arl, info_mp);
2262 	return (0);
2263 }
2264 
2265 int
2266 arl_wait_for_info_ack(arl_t *arl)
2267 {
2268 	int err;
2269 
2270 	mutex_enter(&arl->arl_lock);
2271 	while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) {
2272 		/*
2273 		 * Return value of 0 indicates a pending signal.
2274 		 */
2275 		err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock);
2276 		if (err == 0) {
2277 			mutex_exit(&arl->arl_lock);
2278 			return (EINTR);
2279 		}
2280 	}
2281 	mutex_exit(&arl->arl_lock);
2282 	/*
2283 	 * ip_rput_other could have set an error  in ill_error on
2284 	 * receipt of M_ERROR.
2285 	 */
2286 	return (arl->arl_error);
2287 }
2288 
2289 void
2290 arl_set_muxid(ill_t *ill, int muxid)
2291 {
2292 	arl_t *arl;
2293 
2294 	arl = ill_to_arl(ill);
2295 	if (arl != NULL) {
2296 		arl->arl_muxid = muxid;
2297 		arl_refrele(arl);
2298 	}
2299 }
2300 
2301 int
2302 arl_get_muxid(ill_t *ill)
2303 {
2304 	arl_t *arl;
2305 	int muxid = 0;
2306 
2307 	arl = ill_to_arl(ill);
2308 	if (arl != NULL) {
2309 		muxid = arl->arl_muxid;
2310 		arl_refrele(arl);
2311 	}
2312 	return (muxid);
2313 }
2314 
2315 static int
2316 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
2317 {
2318 	int	err;
2319 	zoneid_t zoneid;
2320 	netstack_t *ns;
2321 	ip_stack_t *ipst;
2322 	arl_t	*arl = NULL;
2323 
2324 	/*
2325 	 * Prevent unprivileged processes from pushing IP so that
2326 	 * they can't send raw IP.
2327 	 */
2328 	if (secpolicy_net_rawaccess(credp) != 0)
2329 		return (EPERM);
2330 
2331 	ns = netstack_find_by_cred(credp);
2332 	ASSERT(ns != NULL);
2333 	ipst = ns->netstack_ip;
2334 	ASSERT(ipst != NULL);
2335 
2336 	/*
2337 	 * For exclusive stacks we set the zoneid to zero
2338 	 * to make IP operate as if in the global zone.
2339 	 */
2340 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
2341 		zoneid = GLOBAL_ZONEID;
2342 	else
2343 		zoneid = crgetzoneid(credp);
2344 
2345 	arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t));
2346 	q->q_ptr = WR(q)->q_ptr = arl;
2347 	arl->arl_ipst = ipst;
2348 	arl->arl_zoneid = zoneid;
2349 	err = arl_init(q, arl);
2350 
2351 	if (err != 0) {
2352 		mi_free(arl->arl_name);
2353 		mi_free(arl);
2354 		netstack_rele(ipst->ips_netstack);
2355 		q->q_ptr = NULL;
2356 		WR(q)->q_ptr = NULL;
2357 		return (err);
2358 	}
2359 
2360 	/*
2361 	 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent.
2362 	 */
2363 	err = arl_wait_for_info_ack(arl);
2364 	if (err == 0)
2365 		arl->arl_credp = credp;
2366 	else
2367 		goto fail;
2368 
2369 	crhold(credp);
2370 
2371 	mutex_enter(&ipst->ips_ip_mi_lock);
2372 	err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag,
2373 	    sflag, credp);
2374 	mutex_exit(&ipst->ips_ip_mi_lock);
2375 fail:
2376 	if (err) {
2377 		(void) arp_close(q, 0);
2378 		return (err);
2379 	}
2380 	return (0);
2381 }
2382 
2383 /*
2384  * Notify any downstream modules (esp softmac and hitbox) of the name
2385  * of this interface using an M_CTL.
2386  */
2387 static void
2388 arp_ifname_notify(arl_t *arl)
2389 {
2390 	mblk_t *mp1, *mp2;
2391 	struct iocblk *iocp;
2392 	struct lifreq *lifr;
2393 
2394 	if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL)
2395 		return;
2396 	if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) {
2397 		freemsg(mp1);
2398 		return;
2399 	}
2400 
2401 	lifr = (struct lifreq *)mp2->b_rptr;
2402 	mp2->b_wptr += sizeof (struct lifreq);
2403 	bzero(lifr, sizeof (struct lifreq));
2404 
2405 	(void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ);
2406 	lifr->lifr_ppa = arl->arl_ppa;
2407 	lifr->lifr_flags = ILLF_IPV4;
2408 
2409 	/* Use M_CTL to avoid confusing anyone else who might be listening. */
2410 	DB_TYPE(mp1) = M_CTL;
2411 	mp1->b_cont = mp2;
2412 	iocp = (struct iocblk *)mp1->b_rptr;
2413 	iocp->ioc_count = msgsize(mp1->b_cont);
2414 	DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify",
2415 	    char *, "SIOCSLIFNAME", char *, "-",  arl_t *, arl);
2416 	putnext(arl->arl_wq, mp1);
2417 }
2418 
2419 void
2420 arp_send_replumb_conf(ill_t *ill)
2421 {
2422 	mblk_t *mp;
2423 	arl_t *arl = ill_to_arl(ill);
2424 
2425 	if (arl == NULL)
2426 		return;
2427 	/*
2428 	 * arl_got_replumb and arl_got_unbind to be cleared after we complete
2429 	 * arp_cmd_done.
2430 	 */
2431 	mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO,
2432 	    DL_NOTIFY_CONF);
2433 	((dl_notify_conf_t *)(mp->b_rptr))->dl_notification =
2434 	    DL_NOTE_REPLUMB_DONE;
2435 	arp_dlpi_send(arl, mp);
2436 	mutex_enter(&arl->arl_lock);
2437 	arl->arl_state_flags &= ~ARL_LL_REPLUMBING;
2438 	mutex_exit(&arl->arl_lock);
2439 	arl_refrele(arl);
2440 }
2441 
2442 /*
2443  * The unplumb code paths call arp_unbind_complete() to make sure that it is
2444  * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also
2445  * for the arl_refcnt to fall to one so that, when we return from
2446  * arp_unbind_complete(), we know for certain that there are no threads in
2447  * arp_rput() that might access the arl_ill.
2448  */
2449 void
2450 arp_unbind_complete(ill_t *ill)
2451 {
2452 	arl_t *arl = ill_to_arl(ill);
2453 
2454 	if (arl == NULL)
2455 		return;
2456 	mutex_enter(&arl->arl_lock);
2457 	/*
2458 	 * wait for unbind ack and arl_refcnt to drop to 1. Note that the
2459 	 * quiescent arl_refcnt for this function is 1 (and not 0) because
2460 	 * ill_to_arl() will itself return after taking a ref on the arl_t.
2461 	 */
2462 	while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
2463 		cv_wait(&arl->arl_cv, &arl->arl_lock);
2464 	while (arl->arl_refcnt != 1)
2465 		cv_wait(&arl->arl_cv, &arl->arl_lock);
2466 	mutex_exit(&arl->arl_lock);
2467 	arl_refrele(arl);
2468 }
2469