xref: /titanic_50/usr/src/uts/common/inet/ip/ip_arp.c (revision cc22b130832529204c03214239a57aaadd05101f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <inet/ip_arp.h>
27 #include <inet/ip_ndp.h>
28 #include <net/if_arp.h>
29 #include <netinet/if_ether.h>
30 #include <sys/strsubr.h>
31 #include <inet/ip6.h>
32 #include <inet/ip.h>
33 #include <inet/ip_ire.h>
34 #include <inet/ip_if.h>
35 #include <sys/dlpi.h>
36 #include <sys/sunddi.h>
37 #include <sys/strsun.h>
38 #include <sys/sdt.h>
39 #include <inet/mi.h>
40 #include <inet/arp.h>
41 #include <inet/ipdrop.h>
42 #include <sys/sockio.h>
43 #include <inet/ip_impl.h>
44 #include <sys/policy.h>
45 
46 #define	ARL_LL_ADDR_OFFSET(arl)	(((arl)->arl_sap_length) < 0 ? \
47 	(sizeof (dl_unitdata_req_t)) : \
48 	((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length))))
49 
50 /*
51  * MAC-specific intelligence.  Shouldn't be needed, but the DL_INFO_ACK
52  * doesn't quite do it for us.
53  */
54 typedef struct arp_m_s {
55 	t_uscalar_t	arp_mac_type;
56 	uint32_t	arp_mac_arp_hw_type;
57 	t_scalar_t	arp_mac_sap_length;
58 	uint32_t	arp_mac_hw_addr_length;
59 } arp_m_t;
60 
61 static int arp_close(queue_t *, int);
62 static void arp_rput(queue_t *, mblk_t *);
63 static void arp_wput(queue_t *, mblk_t *);
64 static arp_m_t	*arp_m_lookup(t_uscalar_t mac_type);
65 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *,
66 	ncec_t *);
67 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *,
68 	const uchar_t *, const uchar_t *, uchar_t *);
69 static int  arp_modclose(arl_t *);
70 static void  arp_mod_close_tail(arl_t *);
71 static mblk_t *arl_unbind(arl_t *);
72 static void arp_process_packet(ill_t *, mblk_t *);
73 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *);
74 static void arp_drop_packet(const char *str, mblk_t *, ill_t *);
75 static int arp_open(queue_t *, dev_t *, int, int, cred_t *);
76 static int ip_sioctl_ifunitsel_arp(queue_t *, int *);
77 static int ip_sioctl_slifname_arp(queue_t *, void *);
78 static void arp_dlpi_send(arl_t *, mblk_t *);
79 static void arl_defaults_common(arl_t *, mblk_t *);
80 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *);
81 static void arp_ifname_notify(arl_t *);
82 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *);
83 static arl_t *ill_to_arl(ill_t *);
84 
85 #define	DL_PRIM(mp)	(((union DL_primitives *)(mp)->b_rptr)->dl_primitive)
86 #define	IS_DLPI_DATA(mp)						\
87 	((DB_TYPE(mp) == M_PROTO) &&					\
88 	MBLKL(mp) >= sizeof (dl_unitdata_ind_t) &&			\
89 	(DL_PRIM(mp) == DL_UNITDATA_IND))
90 
91 #define	AR_NOTFOUND	1	/* No matching ace found in cache */
92 #define	AR_MERGED	2	/* Matching ace updated (RFC 826 Merge_flag) */
93 #define	AR_LOOPBACK	3	/* Our own arp packet was received */
94 #define	AR_BOGON	4	/* Another host has our IP addr. */
95 #define	AR_FAILED	5	/* Duplicate Address Detection has failed */
96 #define	AR_CHANGED	6	/* Address has changed; tell IP (and merged) */
97 
98 boolean_t arp_no_defense;
99 
100 struct module_info arp_mod_info = {
101 	IP_MOD_ID, "arp", 1, INFPSZ, 65536, 1024
102 };
103 static struct qinit rinit_arp = {
104 	(pfi_t)arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info
105 };
106 static struct qinit winit_arp = {
107 	(pfi_t)arp_wput, NULL, arp_open, arp_close, NULL,
108 	&arp_mod_info
109 };
110 struct streamtab arpinfo = {
111 	&rinit_arp, &winit_arp
112 };
113 #define	ARH_FIXED_LEN	8
114 #define	AR_LL_HDR_SLACK	32
115 
116 /*
117  * pfhooks for ARP.
118  */
119 #define	ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst)		\
120 									\
121 	if ((_hook).he_interested) {                       		\
122 		hook_pkt_event_t info;                          	\
123 									\
124 		info.hpe_protocol = ipst->ips_arp_net_data;		\
125 		info.hpe_ifp = _ilp;                       		\
126 		info.hpe_ofp = 0;                       		\
127 		info.hpe_hdr = _hdr;                            	\
128 		info.hpe_mp = &(_fm);                           	\
129 		info.hpe_mb = _m;                               	\
130 		if (hook_run(ipst->ips_arp_net_data->netd_hooks,	\
131 		    _event, (hook_data_t)&info) != 0) {			\
132 			if (_fm != NULL) {                      	\
133 				freemsg(_fm);                   	\
134 				_fm = NULL;                     	\
135 			}                                       	\
136 			_hdr = NULL;                            	\
137 			_m = NULL;                              	\
138 		} else {                                        	\
139 			_hdr = info.hpe_hdr;                    	\
140 			_m = info.hpe_mb;                       	\
141 		}                                               	\
142 	}
143 
144 #define	ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst)		\
145 									\
146 	if ((_hook).he_interested) {                       		\
147 		hook_pkt_event_t info;                          	\
148 									\
149 		info.hpe_protocol = ipst->ips_arp_net_data;		\
150 		info.hpe_ifp = 0;                       		\
151 		info.hpe_ofp = _olp;                       		\
152 		info.hpe_hdr = _hdr;                            	\
153 		info.hpe_mp = &(_fm);                           	\
154 		info.hpe_mb = _m;                               	\
155 		if (hook_run(ipst->ips_arp_net_data->netd_hooks,	\
156 		    _event, (hook_data_t)&info) != 0) {			\
157 			if (_fm != NULL) {                      	\
158 				freemsg(_fm);                   	\
159 				_fm = NULL;                     	\
160 			}                                       	\
161 			_hdr = NULL;                            	\
162 			_m = NULL;                              	\
163 		} else {                                        	\
164 			_hdr = info.hpe_hdr;                    	\
165 			_m = info.hpe_mb;                       	\
166 		}                                               	\
167 	}
168 
169 static arp_m_t	arp_m_tbl[] = {
170 	{ DL_CSMACD,	ARPHRD_ETHER,	-2,	6},	/* 802.3 */
171 	{ DL_TPB,	ARPHRD_IEEE802,	-2,	6},	/* 802.4 */
172 	{ DL_TPR,	ARPHRD_IEEE802,	-2,	6},	/* 802.5 */
173 	{ DL_METRO,	ARPHRD_IEEE802,	-2,	6},	/* 802.6 */
174 	{ DL_ETHER,	ARPHRD_ETHER,	-2,	6},	/* Ethernet */
175 	{ DL_FDDI,	ARPHRD_ETHER,	-2,	6},	/* FDDI */
176 	{ DL_IB,	ARPHRD_IB,	-2,	20},	/* Infiniband */
177 	{ DL_OTHER,	ARPHRD_ETHER,	-2,	6}	/* unknown */
178 };
179 
180 static void
181 arl_refhold_locked(arl_t *arl)
182 {
183 	ASSERT(MUTEX_HELD(&arl->arl_lock));
184 	arl->arl_refcnt++;
185 	ASSERT(arl->arl_refcnt != 0);
186 }
187 
188 static void
189 arl_refrele(arl_t *arl)
190 {
191 	mutex_enter(&arl->arl_lock);
192 	ASSERT(arl->arl_refcnt != 0);
193 	arl->arl_refcnt--;
194 	if (arl->arl_refcnt > 1) {
195 		mutex_exit(&arl->arl_lock);
196 		return;
197 	}
198 
199 	/* ill_close or arp_unbind_complete may be waiting */
200 	cv_broadcast(&arl->arl_cv);
201 	mutex_exit(&arl->arl_lock);
202 }
203 
204 /*
205  * wake up any pending ip ioctls.
206  */
207 static void
208 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim)
209 {
210 	if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing)
211 		arp_replumb_done(ill, 0);
212 	else
213 		arp_bringup_done(ill, err);
214 }
215 
216 static int
217 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen,
218     const in_addr_t *src_paddr, ncec_t **sncec, int op)
219 {
220 	int retv;
221 	ncec_t *ncec;
222 	boolean_t ll_changed;
223 	uchar_t *lladdr = NULL;
224 	int new_state;
225 
226 	ASSERT(ill != NULL);
227 
228 	ncec = ncec_lookup_illgrp_v4(ill, src_paddr);
229 	*sncec = ncec;
230 
231 	if (ncec == NULL) {
232 		retv = AR_NOTFOUND;
233 		goto done;
234 	}
235 
236 	mutex_enter(&ncec->ncec_lock);
237 	/*
238 	 * IP addr and hardware address match what we already
239 	 * have, then this is a broadcast packet emitted by one of our
240 	 * interfaces, reflected by the switch and received on another
241 	 * interface.  We return AR_LOOPBACK.
242 	 */
243 	lladdr = ncec->ncec_lladdr;
244 	if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length &&
245 	    bcmp(lladdr, src_haddr, hlen) == 0) {
246 		mutex_exit(&ncec->ncec_lock);
247 		retv = AR_LOOPBACK;
248 		goto done;
249 	}
250 	/*
251 	 * If the entry is unverified, then we've just verified that
252 	 * someone else already owns this address, because this is a
253 	 * message with the same protocol address but different
254 	 * hardware address.
255 	 */
256 	if (ncec->ncec_flags & NCE_F_UNVERIFIED) {
257 		mutex_exit(&ncec->ncec_lock);
258 		ncec_delete(ncec);
259 		ncec_refrele(ncec);
260 		*sncec = NULL;
261 		retv = AR_FAILED;
262 		goto done;
263 	}
264 
265 	/*
266 	 * If the IP address matches ours and we're authoritative for
267 	 * this entry, then some other node is using our IP addr, so
268 	 * return AR_BOGON.  Also reset the transmit count to zero so
269 	 * that, if we're currently in initial announcement mode, we
270 	 * switch back to the lazier defense mode.  Knowing that
271 	 * there's at least one duplicate out there, we ought not
272 	 * blindly announce.
273 	 *
274 	 * NCE_F_AUTHORITY is set in one of two ways:
275 	 * 1. /sbin/arp told us so, via the "permanent" flag.
276 	 * 2. This is one of my addresses.
277 	 */
278 	if (ncec->ncec_flags & NCE_F_AUTHORITY) {
279 		ncec->ncec_unsolicit_count = 0;
280 		mutex_exit(&ncec->ncec_lock);
281 		retv = AR_BOGON;
282 		goto done;
283 	}
284 
285 	/*
286 	 * No address conflict was detected, and we are getting
287 	 * ready to update the ncec's hwaddr. The nce MUST NOT be on an
288 	 * under interface, because all dynamic nce's are created on the
289 	 * native interface (in the non-IPMP case) or on the IPMP
290 	 * meta-interface (in the IPMP case)
291 	 */
292 	ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill));
293 
294 	/*
295 	 * update ncec with src_haddr, hlen.
296 	 *
297 	 * We are trying to resolve this ncec_addr/src_paddr and we
298 	 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr.
299 	 * So the new_state is at least "STALE". If, in addition,
300 	 * this a solicited, unicast ARP_RESPONSE, we can transition
301 	 * to REACHABLE.
302 	 */
303 	new_state = ND_STALE;
304 	ip1dbg(("got info for ncec %p from addr %x\n",
305 	    (void *)ncec, *src_paddr));
306 	retv = AR_MERGED;
307 	if (ncec->ncec_state == ND_INCOMPLETE ||
308 	    ncec->ncec_state == ND_INITIAL) {
309 		ll_changed = B_TRUE;
310 	} else {
311 		ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen);
312 		if (!ll_changed)
313 			new_state = ND_UNCHANGED;
314 		else
315 			retv = AR_CHANGED;
316 	}
317 	/*
318 	 * We don't have the equivalent of the IPv6 'S' flag indicating
319 	 * a solicited response, so we assume that if we are in
320 	 * INCOMPLETE, or got back an unchanged lladdr in PROBE state,
321 	 * and this is an ARP_RESPONSE, it must be a
322 	 * solicited response allowing us to transtion to REACHABLE.
323 	 */
324 	if (op == ARP_RESPONSE) {
325 		switch (ncec->ncec_state) {
326 		case ND_PROBE:
327 			new_state = (ll_changed ? ND_STALE : ND_REACHABLE);
328 			break;
329 		case ND_INCOMPLETE:
330 			new_state = ND_REACHABLE;
331 			break;
332 		}
333 	}
334 	/*
335 	 * Call nce_update() to refresh fastpath information on any
336 	 * dependent nce_t entries.
337 	 */
338 	nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL));
339 	mutex_exit(&ncec->ncec_lock);
340 	nce_resolv_ok(ncec);
341 done:
342 	return (retv);
343 }
344 
345 /* Find an entry for a particular MAC type in the arp_m_tbl. */
346 static arp_m_t	*
347 arp_m_lookup(t_uscalar_t mac_type)
348 {
349 	arp_m_t	*arm;
350 
351 	for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) {
352 		if (arm->arp_mac_type == mac_type)
353 			return (arm);
354 	}
355 	return (NULL);
356 }
357 
358 static uint32_t
359 arp_hw_type(t_uscalar_t mactype)
360 {
361 	arp_m_t *arm;
362 
363 	if ((arm = arp_m_lookup(mactype)) == NULL)
364 		arm = arp_m_lookup(DL_OTHER);
365 	return (arm->arp_mac_arp_hw_type);
366 }
367 
368 /*
369  * Called when an DLPI control message has been acked; send down the next
370  * queued message (if any).
371  * The DLPI messages of interest being bind, attach and unbind since
372  * these are the only ones sent by ARP via arp_dlpi_send.
373  */
374 static void
375 arp_dlpi_done(arl_t *arl, ill_t *ill)
376 {
377 	mblk_t *mp;
378 	int err;
379 	t_uscalar_t prim;
380 
381 	mutex_enter(&arl->arl_lock);
382 	prim = arl->arl_dlpi_pending;
383 
384 	if ((mp = arl->arl_dlpi_deferred) == NULL) {
385 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
386 		if (arl->arl_state_flags & ARL_LL_DOWN)
387 			err = ENETDOWN;
388 		else
389 			err = 0;
390 		mutex_exit(&arl->arl_lock);
391 
392 		mutex_enter(&ill->ill_lock);
393 		ill->ill_arl_dlpi_pending = 0;
394 		mutex_exit(&ill->ill_lock);
395 		arp_cmd_done(ill, err, prim);
396 		return;
397 	}
398 
399 	arl->arl_dlpi_deferred = mp->b_next;
400 	mp->b_next = NULL;
401 
402 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
403 
404 	arl->arl_dlpi_pending = DL_PRIM(mp);
405 	mutex_exit(&arl->arl_lock);
406 
407 	mutex_enter(&ill->ill_lock);
408 	ill->ill_arl_dlpi_pending = 1;
409 	mutex_exit(&ill->ill_lock);
410 
411 	putnext(arl->arl_wq, mp);
412 }
413 
414 /*
415  * This routine is called during module initialization when the DL_INFO_ACK
416  * comes back from the device.	We set up defaults for all the device dependent
417  * doo-dads we are going to need.  This will leave us ready to roll if we are
418  * attempting auto-configuration.  Alternatively, these defaults can be
419  * overridden by initialization procedures possessing higher intelligence.
420  *
421  * Caller will free the mp.
422  */
423 static void
424 arp_ll_set_defaults(arl_t *arl, mblk_t *mp)
425 {
426 	arp_m_t		*arm;
427 	dl_info_ack_t	*dlia = (dl_info_ack_t *)mp->b_rptr;
428 
429 	if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL)
430 		arm = arp_m_lookup(DL_OTHER);
431 	ASSERT(arm != NULL);
432 
433 	/*
434 	 * We initialize based on parameters in the (currently) not too
435 	 * exhaustive arp_m_tbl.
436 	 */
437 	if (dlia->dl_version == DL_VERSION_2) {
438 		arl->arl_sap_length = dlia->dl_sap_length;
439 		arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length;
440 		if (dlia->dl_provider_style == DL_STYLE2)
441 			arl->arl_needs_attach = 1;
442 	} else {
443 		arl->arl_sap_length = arm->arp_mac_sap_length;
444 		arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length;
445 	}
446 	/*
447 	 * Note: the arp_hw_type in the arp header may be derived from
448 	 * the ill_mac_type and arp_m_lookup().
449 	 */
450 	arl->arl_sap = ETHERTYPE_ARP;
451 	arl_defaults_common(arl, mp);
452 }
453 
454 static void
455 arp_wput(queue_t *q, mblk_t *mp)
456 {
457 	int err = EINVAL;
458 	struct iocblk *ioc;
459 	mblk_t *mp1;
460 
461 	switch (DB_TYPE(mp)) {
462 	case M_IOCTL:
463 		ASSERT(q->q_next != NULL);
464 		ioc = (struct iocblk *)mp->b_rptr;
465 		if (ioc->ioc_cmd != SIOCSLIFNAME &&
466 		    ioc->ioc_cmd != IF_UNITSEL) {
467 			DTRACE_PROBE4(arl__dlpi, char *, "arp_wput",
468 			    char *, "<some ioctl>", char *, "-",
469 			    arl_t *, (arl_t *)q->q_ptr);
470 			putnext(q, mp);
471 			return;
472 		}
473 		if ((mp1 = mp->b_cont) == 0)
474 			err = EINVAL;
475 		else if (ioc->ioc_cmd == SIOCSLIFNAME)
476 			err = ip_sioctl_slifname_arp(q, mp1->b_rptr);
477 		else if (ioc->ioc_cmd == IF_UNITSEL)
478 			err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr);
479 		if (err == 0)
480 			miocack(q, mp, 0, 0);
481 		else
482 			miocnak(q, mp, 0, err);
483 		return;
484 	default:
485 		DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default",
486 		    char *, "default mblk", char *, "-",
487 		    arl_t *, (arl_t *)q->q_ptr);
488 		putnext(q, mp);
489 		return;
490 	}
491 }
492 
493 /*
494  * similar to ill_dlpi_pending(): verify that the received DLPI response
495  * matches the one that is pending for the arl.
496  */
497 static boolean_t
498 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim)
499 {
500 	t_uscalar_t pending;
501 
502 	mutex_enter(&arl->arl_lock);
503 	if (arl->arl_dlpi_pending == prim) {
504 		mutex_exit(&arl->arl_lock);
505 		return (B_TRUE);
506 	}
507 
508 	if (arl->arl_state_flags & ARL_CONDEMNED) {
509 		mutex_exit(&arl->arl_lock);
510 		return (B_FALSE);
511 	}
512 	pending = arl->arl_dlpi_pending;
513 	mutex_exit(&arl->arl_lock);
514 
515 	if (pending == DL_PRIM_INVAL) {
516 		ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s",
517 		    dl_primstr(prim), arl->arl_name));
518 	} else {
519 		ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s",
520 		    dl_primstr(prim), arl->arl_name, dl_primstr(pending)));
521 	}
522 	return (B_FALSE);
523 }
524 
525 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */
526 static void
527 arp_rput_dlpi(queue_t *q, mblk_t *mp)
528 {
529 	arl_t		*arl = (arl_t *)q->q_ptr;
530 	union DL_primitives *dlp;
531 	t_uscalar_t	prim;
532 	t_uscalar_t	reqprim = DL_PRIM_INVAL;
533 	ill_t		*ill;
534 
535 	if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) {
536 		putnext(q, mp);
537 		return;
538 	}
539 	dlp = (union DL_primitives *)mp->b_rptr;
540 	prim = dlp->dl_primitive;
541 
542 	/*
543 	 * If we received an ACK but didn't send a request for it, then it
544 	 * can't be part of any pending operation; discard up-front.
545 	 */
546 	switch (prim) {
547 	case DL_ERROR_ACK:
548 		/*
549 		 * ce is confused about how DLPI works, so we have to interpret
550 		 * an "error" on DL_NOTIFY_ACK (which we never could have sent)
551 		 * as really meaning an error on DL_NOTIFY_REQ.
552 		 *
553 		 * Note that supporting DL_NOTIFY_REQ is optional, so printing
554 		 * out an error message on the console isn't warranted except
555 		 * for debug.
556 		 */
557 		if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK ||
558 		    dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) {
559 			reqprim = DL_NOTIFY_REQ;
560 		} else {
561 			reqprim = dlp->error_ack.dl_error_primitive;
562 		}
563 		break;
564 	case DL_INFO_ACK:
565 		reqprim = DL_INFO_REQ;
566 		break;
567 	case DL_OK_ACK:
568 		reqprim = dlp->ok_ack.dl_correct_primitive;
569 		break;
570 	case DL_BIND_ACK:
571 		reqprim = DL_BIND_REQ;
572 		break;
573 	default:
574 		DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
575 		    union DL_primitives *, dlp);
576 		putnext(q, mp);
577 		return;
578 	}
579 	if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) {
580 		freemsg(mp);
581 		return;
582 	}
583 	DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received",
584 	    char *, dl_primstr(prim), char *, dl_primstr(reqprim),
585 	    arl_t *, arl);
586 
587 	ASSERT(prim != DL_NOTIFY_IND);
588 
589 	ill = arl_to_ill(arl);
590 
591 	switch (reqprim) {
592 	case DL_INFO_REQ:
593 		/*
594 		 * ill has not been set up yet for this case. This is the
595 		 * DL_INFO_ACK for the first DL_INFO_REQ sent from
596 		 * arp_modopen(). There should be no other arl_dlpi_deferred
597 		 * messages pending. We initialize the arl here.
598 		 */
599 		ASSERT(!arl->arl_dlpi_style_set);
600 		ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ);
601 		ASSERT(arl->arl_dlpi_deferred == NULL);
602 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
603 		arp_ll_set_defaults(arl, mp);
604 		freemsg(mp);
605 		return;
606 	case DL_UNBIND_REQ:
607 		mutex_enter(&arl->arl_lock);
608 		arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
609 		/*
610 		 * This is not an error, so we don't set ARL_LL_DOWN
611 		 */
612 		arl->arl_state_flags &= ~ARL_LL_UP;
613 		arl->arl_state_flags |= ARL_LL_UNBOUND;
614 		if (arl->arl_state_flags & ARL_CONDEMNED) {
615 			/*
616 			 * if this is part of the unplumb the arl may
617 			 * vaporize any moment after we cv_signal the
618 			 * arl_cv so we reset arl_dlpi_pending here.
619 			 * All other cases (including replumb) will
620 			 * have the arl_dlpi_pending reset in
621 			 * arp_dlpi_done.
622 			 */
623 			arl->arl_dlpi_pending = DL_PRIM_INVAL;
624 		}
625 		cv_signal(&arl->arl_cv);
626 		mutex_exit(&arl->arl_lock);
627 		break;
628 	}
629 	if (ill != NULL) {
630 		/*
631 		 * ill ref obtained by arl_to_ill()  will be released
632 		 * by qwriter_ip()
633 		 */
634 		qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer,
635 		    CUR_OP, B_TRUE);
636 		return;
637 	}
638 	freemsg(mp);
639 }
640 
641 /*
642  * Handling of DLPI messages that require exclusive access to the ipsq.
643  */
644 /* ARGSUSED */
645 static void
646 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg)
647 {
648 	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
649 	ill_t		*ill = (ill_t *)q->q_ptr;
650 	arl_t		*arl = ill_to_arl(ill);
651 
652 	if (arl == NULL) {
653 		/*
654 		 * happens as a result arp_modclose triggering unbind.
655 		 * arp_rput_dlpi will cv_signal the arl_cv and the modclose
656 		 * will complete, but when it does ipsq_exit, the waiting
657 		 * qwriter_ip gets into the ipsq but will find the arl null.
658 		 * There should be no deferred messages in this case, so
659 		 * just complete and exit.
660 		 */
661 		arp_cmd_done(ill, 0, DL_UNBIND_REQ);
662 		freemsg(mp);
663 		return;
664 	}
665 	switch (dlp->dl_primitive) {
666 	case DL_ERROR_ACK:
667 		switch (dlp->error_ack.dl_error_primitive) {
668 		case DL_UNBIND_REQ:
669 			mutex_enter(&arl->arl_lock);
670 			arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS;
671 			arl->arl_state_flags &= ~ARL_LL_UP;
672 			arl->arl_state_flags |= ARL_LL_UNBOUND;
673 			arl->arl_state_flags |= ARL_LL_DOWN;
674 			cv_signal(&arl->arl_cv);
675 			mutex_exit(&arl->arl_lock);
676 			break;
677 		case DL_BIND_REQ:
678 			mutex_enter(&arl->arl_lock);
679 			arl->arl_state_flags &= ~ARL_LL_UP;
680 			arl->arl_state_flags |= ARL_LL_DOWN;
681 			arl->arl_state_flags |= ARL_LL_UNBOUND;
682 			cv_signal(&arl->arl_cv);
683 			mutex_exit(&arl->arl_lock);
684 			break;
685 		case DL_ATTACH_REQ:
686 			break;
687 		default:
688 			/* If it's anything else, we didn't send it. */
689 			arl_refrele(arl);
690 			putnext(q, mp);
691 			return;
692 		}
693 		break;
694 	case DL_OK_ACK:
695 		DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok",
696 		    char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
697 		    char *, dl_primstr(dlp->ok_ack.dl_correct_primitive),
698 		    arl_t *, arl);
699 		mutex_enter(&arl->arl_lock);
700 		switch (dlp->ok_ack.dl_correct_primitive) {
701 		case DL_UNBIND_REQ:
702 		case DL_ATTACH_REQ:
703 			break;
704 		default:
705 			ip0dbg(("Dropping unrecognized DL_OK_ACK for %s",
706 			    dl_primstr(dlp->ok_ack.dl_correct_primitive)));
707 			mutex_exit(&arl->arl_lock);
708 			arl_refrele(arl);
709 			freemsg(mp);
710 			return;
711 		}
712 		mutex_exit(&arl->arl_lock);
713 		break;
714 	case DL_BIND_ACK:
715 		DTRACE_PROBE2(rput_dl_bind, arl_t *, arl,
716 		    dl_bind_ack_t *, &dlp->bind_ack);
717 
718 		mutex_enter(&arl->arl_lock);
719 		ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING);
720 		arl->arl_state_flags &=
721 		    ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND);
722 		arl->arl_state_flags |= ARL_LL_UP;
723 		mutex_exit(&arl->arl_lock);
724 		break;
725 	case DL_UDERROR_IND:
726 		DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl,
727 		    dl_uderror_ind_t *, &dlp->uderror_ind);
728 		arl_refrele(arl);
729 		putnext(q, mp);
730 		return;
731 	default:
732 		DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl,
733 		    union DL_primitives *, dlp);
734 		arl_refrele(arl);
735 		putnext(q, mp);
736 		return;
737 	}
738 	arp_dlpi_done(arl, ill);
739 	arl_refrele(arl);
740 	freemsg(mp);
741 }
742 
743 void
744 arp_rput(queue_t *q, mblk_t *mp)
745 {
746 	arl_t		*arl = q->q_ptr;
747 	boolean_t	need_refrele = B_FALSE;
748 
749 	mutex_enter(&arl->arl_lock);
750 	if (((arl->arl_state_flags &
751 	    (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) {
752 		/*
753 		 * Only allow high priority DLPI messages during unplumb or
754 		 * replumb, and we don't take an arl_refcnt for that case.
755 		 */
756 		if (DB_TYPE(mp) != M_PCPROTO) {
757 			mutex_exit(&arl->arl_lock);
758 			freemsg(mp);
759 			return;
760 		}
761 	} else {
762 		arl_refhold_locked(arl);
763 		need_refrele = B_TRUE;
764 	}
765 	mutex_exit(&arl->arl_lock);
766 
767 	switch (DB_TYPE(mp)) {
768 	case M_PCPROTO:
769 	case M_PROTO: {
770 		ill_t *ill;
771 
772 		/*
773 		 * could be one of
774 		 * (i)   real message from the wire, (DLPI_DATA)
775 		 * (ii)  DLPI message
776 		 * Take a ref on the ill associated with this arl to
777 		 * prevent the ill from being unplumbed until this thread
778 		 * is done.
779 		 */
780 		if (IS_DLPI_DATA(mp)) {
781 			ill = arl_to_ill(arl);
782 			if (ill == NULL) {
783 				arp_drop_packet("No ill", mp, ill);
784 				break;
785 			}
786 			arp_process_packet(ill, mp);
787 			ill_refrele(ill);
788 			break;
789 		}
790 		/* Miscellaneous DLPI messages get shuffled off. */
791 		arp_rput_dlpi(q, mp);
792 		break;
793 	}
794 	case M_ERROR:
795 	case M_HANGUP:
796 		if (mp->b_rptr < mp->b_wptr)
797 			arl->arl_error = (int)(*mp->b_rptr & 0xFF);
798 		if (arl->arl_error == 0)
799 			arl->arl_error = ENXIO;
800 		freemsg(mp);
801 		break;
802 	default:
803 		ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp)));
804 		putnext(q, mp);
805 		break;
806 	}
807 	if (need_refrele)
808 		arl_refrele(arl);
809 }
810 
811 static void
812 arp_process_packet(ill_t *ill, mblk_t *mp)
813 {
814 	mblk_t 		*mp1;
815 	arh_t		*arh;
816 	in_addr_t	src_paddr, dst_paddr;
817 	uint32_t	hlen, plen;
818 	boolean_t	is_probe;
819 	int		op;
820 	ncec_t		*dst_ncec, *src_ncec = NULL;
821 	uchar_t		*src_haddr, *arhp, *dst_haddr, *dp, *sp;
822 	int		err;
823 	ip_stack_t	*ipst;
824 	boolean_t	need_ill_refrele = B_FALSE;
825 	nce_t		*nce;
826 	uchar_t		*src_lladdr;
827 	dl_unitdata_ind_t *dlui;
828 	ip_recv_attr_t	iras;
829 
830 	ASSERT(ill != NULL);
831 	if (ill->ill_flags & ILLF_NOARP) {
832 		arp_drop_packet("Interface does not support ARP", mp, ill);
833 		return;
834 	}
835 	ipst = ill->ill_ipst;
836 	/*
837 	 * What we should have at this point is a DL_UNITDATA_IND message
838 	 * followed by an ARP packet.  We do some initial checks and then
839 	 * get to work.
840 	 */
841 	dlui = (dl_unitdata_ind_t *)mp->b_rptr;
842 	if (dlui->dl_group_address == 1) {
843 		/*
844 		 * multicast or broadcast  packet. Only accept on the ipmp
845 		 * nominated interface for multicasts ('cast_ill').
846 		 * If we have no cast_ill we are liberal and accept everything.
847 		 */
848 		if (IS_UNDER_IPMP(ill)) {
849 			/* For an under ill_grp can change under lock */
850 			rw_enter(&ipst->ips_ill_g_lock, RW_READER);
851 			if (!ill->ill_nom_cast && ill->ill_grp != NULL &&
852 			    ill->ill_grp->ig_cast_ill != NULL) {
853 				rw_exit(&ipst->ips_ill_g_lock);
854 				arp_drop_packet("Interface is not nominated "
855 				    "for multicast sends and receives",
856 				    mp, ill);
857 				return;
858 			}
859 			rw_exit(&ipst->ips_ill_g_lock);
860 		}
861 	}
862 	mp1 = mp->b_cont;
863 	if (mp1 == NULL) {
864 		arp_drop_packet("Missing ARP packet", mp, ill);
865 		return;
866 	}
867 	if (mp1->b_cont != NULL) {
868 		/* No fooling around with funny messages. */
869 		if (!pullupmsg(mp1, -1)) {
870 			arp_drop_packet("Funny message: pullup failed",
871 			    mp, ill);
872 			return;
873 		}
874 	}
875 	arh = (arh_t *)mp1->b_rptr;
876 	hlen = arh->arh_hlen;
877 	plen = arh->arh_plen;
878 	if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) {
879 		arp_drop_packet("mblk len too small", mp, ill);
880 		return;
881 	}
882 	/*
883 	 * hlen 0 is used for RFC 1868 UnARP.
884 	 *
885 	 * Note that the rest of the code checks that hlen is what we expect
886 	 * for this hardware address type, so might as well discard packets
887 	 * here that don't match.
888 	 */
889 	if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) {
890 		DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1);
891 		arp_drop_packet("Bogus hlen or plen", mp, ill);
892 		return;
893 	}
894 	/*
895 	 * Historically, Solaris has been lenient about hardware type numbers.
896 	 * We should check here, but don't.
897 	 */
898 	DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh,
899 	    mblk_t *, mp);
900 	/*
901 	 * If ill is in an ipmp group, it will be the under ill. If we want
902 	 * to report the packet as coming up the IPMP interface, we should
903 	 * convert it to the ipmp ill.
904 	 */
905 	ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in,
906 	    ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst);
907 	DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp);
908 	if (mp == NULL)
909 		return;
910 	arhp = (uchar_t *)arh + ARH_FIXED_LEN;
911 	src_haddr = arhp;			/* ar$sha */
912 	arhp += hlen;
913 	bcopy(arhp, &src_paddr, IP_ADDR_LEN);	/* ar$spa */
914 	sp = arhp;
915 	arhp += IP_ADDR_LEN;
916 	dst_haddr = arhp;			/* ar$dha */
917 	arhp += hlen;
918 	bcopy(arhp, &dst_paddr, IP_ADDR_LEN);	/* ar$tpa */
919 	dp = arhp;
920 	op = BE16_TO_U16(arh->arh_operation);
921 
922 	DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr,
923 	    (in_addr_t), dst_paddr);
924 
925 	/* Determine if this is just a probe */
926 	is_probe = (src_paddr == INADDR_ANY);
927 
928 	/*
929 	 * The following test for loopback is faster than
930 	 * IP_LOOPBACK_ADDR(), because it avoids any bitwise
931 	 * operations.
932 	 * Note that these addresses are always in network byte order
933 	 */
934 	if ((*(uint8_t *)&src_paddr) == IN_LOOPBACKNET ||
935 	    (*(uint8_t *)&dst_paddr) == IN_LOOPBACKNET ||
936 	    CLASSD(src_paddr) || CLASSD(dst_paddr)) {
937 		arp_drop_packet("Martian IP addr", mp, ill);
938 		return;
939 	}
940 
941 	/*
942 	 * ira_ill is the only field used down the arp_notify path.
943 	 */
944 	bzero(&iras, sizeof (iras));
945 	iras.ira_ill = iras.ira_rill = ill;
946 	/*
947 	 * RFC 826: first check if the <protocol, sender protocol address> is
948 	 * in the cache, if there is a sender protocol address.  Note that this
949 	 * step also handles resolutions based on source.
950 	 */
951 	/* Note: after here we need to freeb(mp) and freemsg(mp1) separately */
952 	mp->b_cont = NULL;
953 	if (is_probe) {
954 		err = AR_NOTFOUND;
955 	} else {
956 		if (plen != 4) {
957 			arp_drop_packet("bad protocol len", mp, ill);
958 			return;
959 		}
960 		err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr,
961 		    &src_ncec, op);
962 		switch (err) {
963 		case AR_BOGON:
964 			ASSERT(src_ncec != NULL);
965 			arp_notify(src_paddr, mp1, AR_CN_BOGON,
966 			    &iras, src_ncec);
967 			break;
968 		case AR_FAILED:
969 			arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
970 			    src_ncec);
971 			break;
972 		case AR_LOOPBACK:
973 			DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *,
974 			    arh);
975 			freemsg(mp1);
976 			break;
977 		default:
978 			goto update;
979 		}
980 		freemsg(mp);
981 		if (src_ncec != NULL)
982 			ncec_refrele(src_ncec);
983 		return;
984 	}
985 update:
986 	/*
987 	 * Now look up the destination address.  By RFC 826, we ignore the
988 	 * packet at this step if the target isn't one of our addresses (i.e.,
989 	 * one we have been asked to PUBLISH).  This is true even if the
990 	 * target is something we're trying to resolve and the packet
991 	 * is a response.
992 	 */
993 	dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr);
994 	if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) {
995 		/*
996 		 * Let the client know if the source mapping has changed, even
997 		 * if the destination provides no useful information for the
998 		 * client.
999 		 */
1000 		if (err == AR_CHANGED) {
1001 			arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
1002 			    NULL);
1003 			freemsg(mp);
1004 		} else {
1005 			freemsg(mp);
1006 			arp_drop_packet("Target is not interesting", mp1, ill);
1007 		}
1008 		if (dst_ncec != NULL)
1009 			ncec_refrele(dst_ncec);
1010 		if (src_ncec != NULL)
1011 			ncec_refrele(src_ncec);
1012 		return;
1013 	}
1014 
1015 	if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) {
1016 		/*
1017 		 * Check for a reflection.  Some misbehaving bridges will
1018 		 * reflect our own transmitted packets back to us.
1019 		 */
1020 		ASSERT(NCE_PUBLISH(dst_ncec));
1021 		if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) {
1022 			ncec_refrele(dst_ncec);
1023 			if (src_ncec != NULL)
1024 				ncec_refrele(src_ncec);
1025 			freemsg(mp);
1026 			arp_drop_packet("bad arh_len", mp1, ill);
1027 			return;
1028 		}
1029 		if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) {
1030 			DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill,
1031 			    arh_t *, arh, ncec_t *, dst_ncec);
1032 			ncec_refrele(dst_ncec);
1033 			if (src_ncec != NULL)
1034 				ncec_refrele(src_ncec);
1035 			freemsg(mp);
1036 			arp_drop_packet("Reflected probe", mp1, ill);
1037 			return;
1038 		}
1039 		/*
1040 		 * Responses targeting our HW address that are not responses to
1041 		 * our DAD probe must be ignored as they are related to requests
1042 		 * sent before DAD was restarted.
1043 		 */
1044 		if (op == ARP_RESPONSE &&
1045 		    (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) {
1046 			ncec_refrele(dst_ncec);
1047 			if (src_ncec != NULL)
1048 				ncec_refrele(src_ncec);
1049 			freemsg(mp);
1050 			arp_drop_packet(
1051 			    "Response to request that was sent before DAD",
1052 			    mp1, ill);
1053 			return;
1054 		}
1055 		/*
1056 		 * Responses targeted to HW addresses which are not ours but
1057 		 * sent to our unverified proto address are also conflicts.
1058 		 * These may be reported by a proxy rather than the interface
1059 		 * with the conflicting address, dst_paddr is in conflict
1060 		 * rather than src_paddr. To ensure IP can locate the correct
1061 		 * ipif to take down, it is necessary to copy dst_paddr to
1062 		 * the src_paddr field before sending it to IP. The same is
1063 		 * required for probes, where src_paddr will be INADDR_ANY.
1064 		 */
1065 		if (is_probe || op == ARP_RESPONSE) {
1066 			bcopy(dp, sp, plen);
1067 			arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras,
1068 			    NULL);
1069 			ncec_delete(dst_ncec);
1070 		} else if (err == AR_CHANGED) {
1071 			arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras,
1072 			    NULL);
1073 		} else {
1074 			DTRACE_PROBE3(rput_request_unverified,
1075 			    ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec);
1076 			arp_drop_packet("Unverified request", mp1, ill);
1077 		}
1078 		freemsg(mp);
1079 		ncec_refrele(dst_ncec);
1080 		if (src_ncec != NULL)
1081 			ncec_refrele(src_ncec);
1082 		return;
1083 	}
1084 	/*
1085 	 * If it's a request, then we reply to this, and if we think the
1086 	 * sender's unknown, then we create an entry to avoid unnecessary ARPs.
1087 	 * The design assumption is that someone ARPing us is likely to send us
1088 	 * a packet soon, and that we'll want to reply to it.
1089 	 */
1090 	if (op == ARP_REQUEST) {
1091 		const uchar_t *nce_hwaddr;
1092 		struct in_addr nce_paddr;
1093 		clock_t now;
1094 		ill_t *under_ill = ill;
1095 		boolean_t send_unicast = B_TRUE;
1096 
1097 		ASSERT(NCE_PUBLISH(dst_ncec));
1098 
1099 		if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) {
1100 			/*
1101 			 * Ignore senders who are deliberately or accidentally
1102 			 * confused.
1103 			 */
1104 			goto bail;
1105 		}
1106 
1107 		if (!is_probe && err == AR_NOTFOUND) {
1108 			ASSERT(src_ncec == NULL);
1109 
1110 			if (IS_UNDER_IPMP(under_ill)) {
1111 				/*
1112 				 * create the ncec for the sender on ipmp_ill.
1113 				 * We pass in the ipmp_ill itself to avoid
1114 				 * creating an nce_t on the under_ill.
1115 				 */
1116 				ill = ipmp_ill_hold_ipmp_ill(under_ill);
1117 				if (ill == NULL)
1118 					ill = under_ill;
1119 				else
1120 					need_ill_refrele = B_TRUE;
1121 			}
1122 
1123 			err = nce_lookup_then_add_v4(ill, src_haddr, hlen,
1124 			    &src_paddr, 0, ND_STALE, &nce);
1125 
1126 			switch (err) {
1127 			case 0:
1128 			case EEXIST:
1129 				ip1dbg(("added ncec %p in state %d ill %s\n",
1130 				    (void *)src_ncec, src_ncec->ncec_state,
1131 				    ill->ill_name));
1132 				src_ncec = nce->nce_common;
1133 				break;
1134 			default:
1135 				/*
1136 				 * Either no memory, or the outgoing interface
1137 				 * is in the process of down/unplumb. In the
1138 				 * latter case, we will fail the send anyway,
1139 				 * and in the former case, we should try to send
1140 				 * the ARP response.
1141 				 */
1142 				src_lladdr = src_haddr;
1143 				goto send_response;
1144 			}
1145 			ncec_refhold(src_ncec);
1146 			nce_refrele(nce);
1147 			/* set up cleanup interval on ncec */
1148 		}
1149 
1150 		/*
1151 		 * This implements periodic address defense based on a modified
1152 		 * version of the RFC 3927 requirements.  Instead of sending a
1153 		 * broadcasted reply every time, as demanded by the RFC, we
1154 		 * send at most one broadcast reply per arp_broadcast_interval.
1155 		 */
1156 		now = ddi_get_lbolt();
1157 		if ((now - dst_ncec->ncec_last_time_defended) >
1158 		    MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) {
1159 			dst_ncec->ncec_last_time_defended = now;
1160 			/*
1161 			 * If this is one of the long-suffering entries,
1162 			 * pull it out now.  It no longer needs separate
1163 			 * defense, because we're now doing that with this
1164 			 * broadcasted reply.
1165 			 */
1166 			dst_ncec->ncec_flags &= ~NCE_F_DELAYED;
1167 			send_unicast = B_FALSE;
1168 		}
1169 		if (src_ncec != NULL && send_unicast) {
1170 			src_lladdr = src_ncec->ncec_lladdr;
1171 		} else {
1172 			src_lladdr = under_ill->ill_bcast_mp->b_rptr +
1173 			    NCE_LL_ADDR_OFFSET(under_ill);
1174 		}
1175 send_response:
1176 		nce_hwaddr = dst_ncec->ncec_lladdr;
1177 		IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr);
1178 
1179 		(void) arp_output(under_ill, ARP_RESPONSE,
1180 		    nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr,
1181 		    (uchar_t *)&src_paddr, src_lladdr);
1182 	}
1183 bail:
1184 	if (dst_ncec != NULL) {
1185 		ncec_refrele(dst_ncec);
1186 	}
1187 	if (src_ncec != NULL) {
1188 		ncec_refrele(src_ncec);
1189 	}
1190 	if (err == AR_CHANGED) {
1191 		mp->b_cont = NULL;
1192 		arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL);
1193 		mp1 = NULL;
1194 	}
1195 	if (need_ill_refrele)
1196 		ill_refrele(ill);
1197 done:
1198 	freemsg(mp);
1199 	freemsg(mp1);
1200 }
1201 
1202 /*
1203  * Basic initialization of the arl_t and the arl_common structure shared with
1204  * the ill_t that is done after SLIFNAME/IF_UNITSEL.
1205  */
1206 static int
1207 arl_ill_init(arl_t *arl, char *ill_name)
1208 {
1209 	ill_t *ill;
1210 	arl_ill_common_t *ai;
1211 
1212 	ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE,
1213 	    arl->arl_ipst);
1214 
1215 	if (ill == NULL)
1216 		return (ENXIO);
1217 
1218 	/*
1219 	 * By the time we set up the arl, we expect the ETHERTYPE_IP
1220 	 * stream to be fully bound and attached. So we copy/verify
1221 	 * relevant information as possible from/against the ill.
1222 	 *
1223 	 * The following should have been set up in arp_ll_set_defaults()
1224 	 * after the first DL_INFO_ACK was received.
1225 	 */
1226 	ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length);
1227 	ASSERT(arl->arl_sap == ETHERTYPE_ARP);
1228 	ASSERT(arl->arl_mactype == ill->ill_mactype);
1229 	ASSERT(arl->arl_sap_length == ill->ill_sap_length);
1230 
1231 	ai =  kmem_zalloc(sizeof (*ai), KM_SLEEP);
1232 	mutex_enter(&ill->ill_lock);
1233 	/* First ensure that the ill is not CONDEMNED.  */
1234 	if (ill->ill_state_flags & ILL_CONDEMNED) {
1235 		mutex_exit(&ill->ill_lock);
1236 		ill_refrele(ill);
1237 		kmem_free(ai, sizeof (*ai));
1238 		return (ENXIO);
1239 	}
1240 	if (ill->ill_common != NULL || arl->arl_common != NULL) {
1241 		mutex_exit(&ill->ill_lock);
1242 		ip0dbg(("%s: PPA already exists", ill->ill_name));
1243 		ill_refrele(ill);
1244 		kmem_free(ai, sizeof (*ai));
1245 		return (EEXIST);
1246 	}
1247 	mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL);
1248 	ai->ai_arl = arl;
1249 	ai->ai_ill = ill;
1250 	ill->ill_common = ai;
1251 	arl->arl_common = ai;
1252 	mutex_exit(&ill->ill_lock);
1253 	(void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ);
1254 	arl->arl_name_length = ill->ill_name_length;
1255 	ill_refrele(ill);
1256 	arp_ifname_notify(arl);
1257 	return (0);
1258 }
1259 
1260 /* Allocate and do common initializations for DLPI messages. */
1261 static mblk_t *
1262 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size)
1263 {
1264 	mblk_t  *mp;
1265 
1266 	if ((mp = allocb(size, BPRI_HI)) == NULL)
1267 		return (NULL);
1268 
1269 	/*
1270 	 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter
1271 	 * of which we don't seem to use) are sent with M_PCPROTO, and
1272 	 * that other DLPI are M_PROTO.
1273 	 */
1274 	DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO;
1275 
1276 	mp->b_wptr = mp->b_rptr + size;
1277 	bzero(mp->b_rptr, size);
1278 	DL_PRIM(mp) = prim;
1279 	return (mp);
1280 }
1281 
1282 
1283 int
1284 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa)
1285 {
1286 	arl_t *arl;
1287 	char *cp, ill_name[LIFNAMSIZ];
1288 
1289 	if (q->q_next == NULL)
1290 		return (EINVAL);
1291 
1292 	do {
1293 		q = q->q_next;
1294 	} while (q->q_next != NULL);
1295 	cp = q->q_qinfo->qi_minfo->mi_idname;
1296 
1297 	arl = (arl_t *)q->q_ptr;
1298 	(void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa);
1299 	arl->arl_ppa = *ppa;
1300 	return (arl_ill_init(arl, ill_name));
1301 }
1302 
1303 int
1304 ip_sioctl_slifname_arp(queue_t *q, void *lifreq)
1305 {
1306 	arl_t *arl;
1307 	struct lifreq *lifr = lifreq;
1308 
1309 	/* ioctl not valid when IP opened as a device */
1310 	if (q->q_next == NULL)
1311 		return (EINVAL);
1312 
1313 	arl = (arl_t *)q->q_ptr;
1314 	arl->arl_ppa = lifr->lifr_ppa;
1315 	return (arl_ill_init(arl, lifr->lifr_name));
1316 }
1317 
1318 arl_t *
1319 ill_to_arl(ill_t *ill)
1320 {
1321 	arl_ill_common_t *ai = ill->ill_common;
1322 	arl_t *arl = NULL;
1323 
1324 	if (ai == NULL)
1325 		return (NULL);
1326 	/*
1327 	 * Find the arl_t that corresponds to this ill_t from the shared
1328 	 * ill_common structure. We can safely access the ai here as it
1329 	 * will only be freed in arp_modclose() after we have become
1330 	 * single-threaded.
1331 	 */
1332 	mutex_enter(&ai->ai_lock);
1333 	if ((arl = ai->ai_arl) != NULL) {
1334 		mutex_enter(&arl->arl_lock);
1335 		if (!(arl->arl_state_flags & ARL_CONDEMNED)) {
1336 			arl_refhold_locked(arl);
1337 			mutex_exit(&arl->arl_lock);
1338 		} else {
1339 			mutex_exit(&arl->arl_lock);
1340 			arl = NULL;
1341 		}
1342 	}
1343 	mutex_exit(&ai->ai_lock);
1344 	return (arl);
1345 }
1346 
1347 ill_t *
1348 arl_to_ill(arl_t *arl)
1349 {
1350 	arl_ill_common_t *ai = arl->arl_common;
1351 	ill_t *ill = NULL;
1352 
1353 	if (ai == NULL) {
1354 		/*
1355 		 * happens when the arp stream is just being opened, and
1356 		 * arl_ill_init has not been executed yet.
1357 		 */
1358 		return (NULL);
1359 	}
1360 	/*
1361 	 * Find the ill_t that corresponds to this arl_t from the shared
1362 	 * arl_common structure. We can safely access the ai here as it
1363 	 * will only be freed in arp_modclose() after we have become
1364 	 * single-threaded.
1365 	 */
1366 	mutex_enter(&ai->ai_lock);
1367 	if ((ill = ai->ai_ill) != NULL) {
1368 		mutex_enter(&ill->ill_lock);
1369 		if (!ILL_IS_CONDEMNED(ill)) {
1370 			ill_refhold_locked(ill);
1371 			mutex_exit(&ill->ill_lock);
1372 		} else {
1373 			mutex_exit(&ill->ill_lock);
1374 			ill = NULL;
1375 		}
1376 	}
1377 	mutex_exit(&ai->ai_lock);
1378 	return (ill);
1379 }
1380 
1381 int
1382 arp_ll_up(ill_t *ill)
1383 {
1384 	mblk_t	*attach_mp = NULL;
1385 	mblk_t	*bind_mp = NULL;
1386 	mblk_t	*unbind_mp = NULL;
1387 	arl_t 	*arl;
1388 
1389 	ASSERT(IAM_WRITER_ILL(ill));
1390 	arl = ill_to_arl(ill);
1391 
1392 	DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill);
1393 	if (arl == NULL)
1394 		return (ENXIO);
1395 	DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl);
1396 	if ((arl->arl_state_flags & ARL_LL_UP) != 0) {
1397 		arl_refrele(arl);
1398 		return (0);
1399 	}
1400 	if (arl->arl_needs_attach) { /* DL_STYLE2 */
1401 		attach_mp =
1402 		    ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t));
1403 		if (attach_mp == NULL)
1404 			goto bad;
1405 		((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa;
1406 	}
1407 
1408 	/* Allocate and initialize a bind message. */
1409 	bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t));
1410 	if (bind_mp == NULL)
1411 		goto bad;
1412 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP;
1413 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS;
1414 
1415 	unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t));
1416 	if (unbind_mp == NULL)
1417 		goto bad;
1418 	if (arl->arl_needs_attach) {
1419 		arp_dlpi_send(arl, attach_mp);
1420 	}
1421 	arl->arl_unbind_mp = unbind_mp;
1422 
1423 	arl->arl_state_flags |= ARL_LL_BIND_PENDING;
1424 	arp_dlpi_send(arl, bind_mp);
1425 	arl_refrele(arl);
1426 	return (EINPROGRESS);
1427 
1428 bad:
1429 	freemsg(attach_mp);
1430 	freemsg(bind_mp);
1431 	freemsg(unbind_mp);
1432 	arl_refrele(arl);
1433 	return (ENOMEM);
1434 }
1435 
1436 /*
1437  * consumes/frees mp
1438  */
1439 static void
1440 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code,
1441     ip_recv_attr_t *ira, ncec_t *ncec)
1442 {
1443 	char		hbuf[MAC_STR_LEN];
1444 	char		sbuf[INET_ADDRSTRLEN];
1445 	ill_t		*ill = ira->ira_ill;
1446 	ip_stack_t	*ipst = ill->ill_ipst;
1447 	arh_t		*arh = (arh_t *)mp->b_rptr;
1448 
1449 	switch (arcn_code) {
1450 	case AR_CN_BOGON:
1451 		/*
1452 		 * Someone is sending ARP packets with a source protocol
1453 		 * address that we have published and for which we believe our
1454 		 * entry is authoritative and verified to be unique on
1455 		 * the network.
1456 		 *
1457 		 * arp_process_packet() sends AR_CN_FAILED for the case when
1458 		 * a DAD probe is received and the hardware address of a
1459 		 * non-authoritative entry has changed. Thus, AR_CN_BOGON
1460 		 * indicates a real conflict, and we have to do resolution.
1461 		 *
1462 		 * We back away quickly from the address if it's from DHCP or
1463 		 * otherwise temporary and hasn't been used recently (or at
1464 		 * all).  We'd like to include "deprecated" addresses here as
1465 		 * well (as there's no real reason to defend something we're
1466 		 * discarding), but IPMP "reuses" this flag to mean something
1467 		 * other than the standard meaning.
1468 		 */
1469 		if (ip_nce_conflict(mp, ira, ncec)) {
1470 			(void) mac_colon_addr((uint8_t *)(arh + 1),
1471 			    arh->arh_hlen, hbuf, sizeof (hbuf));
1472 			(void) ip_dot_addr(src, sbuf);
1473 			cmn_err(CE_WARN,
1474 			    "proxy ARP problem?  Node '%s' is using %s on %s",
1475 			    hbuf, sbuf, ill->ill_name);
1476 			if (!arp_no_defense)
1477 				(void) arp_announce(ncec);
1478 			/*
1479 			 * ncec_last_time_defended has been adjusted in
1480 			 * ip_nce_conflict.
1481 			 */
1482 		} else {
1483 			ncec_delete(ncec);
1484 		}
1485 		freemsg(mp);
1486 		break;
1487 	case AR_CN_ANNOUNCE: {
1488 		nce_hw_map_t hwm;
1489 		/*
1490 		 * ARP gives us a copy of any packet where it thinks
1491 		 * the address has changed, so that we can update our
1492 		 * caches.  We're responsible for caching known answers
1493 		 * in the current design.  We check whether the
1494 		 * hardware address really has changed in all of our
1495 		 * entries that have cached this mapping, and if so, we
1496 		 * blow them away.  This way we will immediately pick
1497 		 * up the rare case of a host changing hardware
1498 		 * address.
1499 		 */
1500 		if (src == 0) {
1501 			freemsg(mp);
1502 			break;
1503 		}
1504 		hwm.hwm_addr = src;
1505 		hwm.hwm_hwlen = arh->arh_hlen;
1506 		hwm.hwm_hwaddr = (uchar_t *)(arh + 1);
1507 		hwm.hwm_flags = 0;
1508 		ncec_walk_common(ipst->ips_ndp4, NULL,
1509 		    (pfi_t)nce_update_hw_changed, &hwm, B_TRUE);
1510 		freemsg(mp);
1511 		break;
1512 	}
1513 	case AR_CN_FAILED:
1514 		if (arp_no_defense) {
1515 			(void) mac_colon_addr((uint8_t *)(arh + 1),
1516 			    arh->arh_hlen, hbuf, sizeof (hbuf));
1517 			(void) ip_dot_addr(src, sbuf);
1518 
1519 			cmn_err(CE_WARN,
1520 			    "node %s is using our IP address %s on %s",
1521 			    hbuf, sbuf, ill->ill_name);
1522 			freemsg(mp);
1523 			break;
1524 		}
1525 		/*
1526 		 * mp will be freed by arp_excl.
1527 		 */
1528 		ill_refhold(ill);
1529 		qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
1530 		return;
1531 	default:
1532 		ASSERT(0);
1533 		freemsg(mp);
1534 		break;
1535 	}
1536 }
1537 
1538 /*
1539  * arp_output is called to transmit an ARP Request or Response. The mapping
1540  * to RFC 826 variables is:
1541  *   haddr1 == ar$sha
1542  *   paddr1 == ar$spa
1543  *   haddr2 == ar$tha
1544  *   paddr2 == ar$tpa
1545  * The ARP frame is sent to the ether_dst in dst_lladdr.
1546  */
1547 static int
1548 arp_output(ill_t *ill, uint32_t operation,
1549     const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2,
1550     const uchar_t *paddr2, uchar_t *dst_lladdr)
1551 {
1552 	arh_t	*arh;
1553 	uint8_t	*cp;
1554 	uint_t	hlen;
1555 	uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */
1556 	uint32_t proto = IP_ARP_PROTO_TYPE;
1557 	mblk_t *mp;
1558 	arl_t *arl;
1559 
1560 	ASSERT(dst_lladdr != NULL);
1561 	hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */
1562 	mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length);
1563 
1564 	if (mp == NULL)
1565 		return (ENOMEM);
1566 
1567 	/* IFF_NOARP flag is set or link down: do not send arp messages */
1568 	if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) {
1569 		freemsg(mp);
1570 		return (ENXIO);
1571 	}
1572 
1573 	mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) +
1574 	    plen + plen, BPRI_MED);
1575 	if (mp->b_cont == NULL) {
1576 		freeb(mp);
1577 		return (ENOMEM);
1578 	}
1579 
1580 	/* Fill in the ARP header. */
1581 	cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen);
1582 	mp->b_cont->b_rptr = cp;
1583 	arh = (arh_t *)cp;
1584 	U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware);
1585 	U16_TO_BE16(proto, arh->arh_proto);
1586 	arh->arh_hlen = (uint8_t)hlen;
1587 	arh->arh_plen = (uint8_t)plen;
1588 	U16_TO_BE16(operation, arh->arh_operation);
1589 	cp += ARH_FIXED_LEN;
1590 	bcopy(haddr1, cp, hlen);
1591 	cp += hlen;
1592 	if (paddr1 == NULL)
1593 		bzero(cp, plen);
1594 	else
1595 		bcopy(paddr1, cp, plen);
1596 	cp += plen;
1597 	if (haddr2 == NULL)
1598 		bzero(cp, hlen);
1599 	else
1600 		bcopy(haddr2, cp, hlen);
1601 	cp += hlen;
1602 	bcopy(paddr2, cp, plen);
1603 	cp += plen;
1604 	mp->b_cont->b_wptr = cp;
1605 
1606 	DTRACE_PROBE3(arp__physical__out__start,
1607 	    ill_t *, ill, arh_t *, arh, mblk_t *, mp);
1608 	ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event,
1609 	    ill->ill_ipst->ips_arp_physical_out,
1610 	    ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont,
1611 	    ill->ill_ipst);
1612 	DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp);
1613 	if (mp == NULL)
1614 		return (0);
1615 
1616 	/* Ship it out. */
1617 	arl = ill_to_arl(ill);
1618 	if (arl == NULL) {
1619 		freemsg(mp);
1620 		return (0);
1621 	}
1622 	if (canputnext(arl->arl_wq))
1623 		putnext(arl->arl_wq, mp);
1624 	else
1625 		freemsg(mp);
1626 	arl_refrele(arl);
1627 	return (0);
1628 }
1629 
1630 /*
1631  * Process resolve requests.
1632  * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise
1633  * we leave it alone (the caller will check and manage ncec_pcnt in those
1634  * cases.)
1635  */
1636 int
1637 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill)
1638 {
1639 	int err;
1640 	const uchar_t *target_hwaddr;
1641 	struct in_addr nce_paddr;
1642 	uchar_t *dst_lladdr;
1643 	boolean_t use_rcnt = !NCE_ISREACHABLE(ncec);
1644 
1645 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
1646 	ASSERT(!IS_IPMP(ill));
1647 
1648 	if (use_rcnt && ncec->ncec_rcnt == 0) {
1649 		/* not allowed any more retransmits. */
1650 		return (0);
1651 	}
1652 
1653 	if ((ill->ill_flags & ILLF_NOARP) != 0)
1654 		return (0);
1655 
1656 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr);
1657 
1658 	target_hwaddr =
1659 	    ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1660 
1661 	if (NCE_ISREACHABLE(ncec)) {
1662 		dst_lladdr =  ncec->ncec_lladdr;
1663 	} else {
1664 		dst_lladdr =  ill->ill_bcast_mp->b_rptr +
1665 		    NCE_LL_ADDR_OFFSET(ill);
1666 	}
1667 
1668 	mutex_exit(&ncec->ncec_lock);
1669 	err = arp_output(ill, ARP_REQUEST,
1670 	    ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr,
1671 	    (uchar_t *)&nce_paddr, dst_lladdr);
1672 	mutex_enter(&ncec->ncec_lock);
1673 
1674 	if (err != 0) {
1675 		/*
1676 		 * Some transient error such as ENOMEM or a down link was
1677 		 * encountered. If the link has been taken down permanently,
1678 		 * the ncec will eventually be cleaned up (ipif_down_tail()
1679 		 * will call ipif_nce_down() and flush the ncec), to terminate
1680 		 * recurring attempts to send ARP requests. In all other cases,
1681 		 * allow the caller another chance at success next time.
1682 		 */
1683 		return (ncec->ncec_ill->ill_reachable_retrans_time);
1684 	}
1685 
1686 	if (use_rcnt)
1687 		ncec->ncec_rcnt--;
1688 
1689 	return (ncec->ncec_ill->ill_reachable_retrans_time);
1690 }
1691 
1692 /* return B_TRUE if dropped */
1693 boolean_t
1694 arp_announce(ncec_t *ncec)
1695 {
1696 	ill_t *ill;
1697 	int err;
1698 	uchar_t *sphys_addr, *bcast_addr;
1699 	struct in_addr ncec_addr;
1700 	boolean_t need_refrele = B_FALSE;
1701 
1702 	ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0);
1703 	ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0);
1704 
1705 	if (IS_IPMP(ncec->ncec_ill)) {
1706 		/* sent on the cast_ill */
1707 		ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE);
1708 		if (ill == NULL)
1709 			return (B_TRUE);
1710 		need_refrele = B_TRUE;
1711 	} else {
1712 		ill = ncec->ncec_ill;
1713 	}
1714 
1715 	/*
1716 	 * broadcast an announce to ill_bcast address.
1717 	 */
1718 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
1719 
1720 	sphys_addr = ncec->ncec_lladdr;
1721 	bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1722 
1723 	err = arp_output(ill, ARP_REQUEST,
1724 	    sphys_addr, (uchar_t *)&ncec_addr, bcast_addr,
1725 	    (uchar_t *)&ncec_addr, bcast_addr);
1726 
1727 	if (need_refrele)
1728 		ill_refrele(ill);
1729 	return (err != 0);
1730 }
1731 
1732 /* return B_TRUE if dropped */
1733 boolean_t
1734 arp_probe(ncec_t *ncec)
1735 {
1736 	ill_t *ill;
1737 	int err;
1738 	struct in_addr ncec_addr;
1739 	uchar_t *sphys_addr, *dst_lladdr;
1740 
1741 	if (IS_IPMP(ncec->ncec_ill)) {
1742 		ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE);
1743 		if (ill == NULL)
1744 			return (B_TRUE);
1745 	} else {
1746 		ill = ncec->ncec_ill;
1747 	}
1748 
1749 	IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr);
1750 
1751 	sphys_addr = ncec->ncec_lladdr;
1752 	dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
1753 	err = arp_output(ill, ARP_REQUEST,
1754 	    sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr);
1755 
1756 	if (IS_IPMP(ncec->ncec_ill))
1757 		ill_refrele(ill);
1758 	return (err != 0);
1759 }
1760 
1761 static mblk_t *
1762 arl_unbind(arl_t *arl)
1763 {
1764 	mblk_t *mp;
1765 
1766 	if ((mp = arl->arl_unbind_mp) != NULL) {
1767 		arl->arl_unbind_mp = NULL;
1768 		arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS;
1769 	}
1770 	return (mp);
1771 }
1772 
1773 int
1774 arp_ll_down(ill_t *ill)
1775 {
1776 	arl_t 	*arl;
1777 	mblk_t *unbind_mp;
1778 	int err = 0;
1779 	boolean_t replumb = (ill->ill_replumbing == 1);
1780 
1781 	DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill);
1782 	if ((arl = ill_to_arl(ill)) == NULL)
1783 		return (ENXIO);
1784 	DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl);
1785 	mutex_enter(&arl->arl_lock);
1786 	unbind_mp = arl_unbind(arl);
1787 	if (unbind_mp != NULL) {
1788 		ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS);
1789 		DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp,
1790 		    arl_t *, arl);
1791 		err = EINPROGRESS;
1792 		if (replumb)
1793 			arl->arl_state_flags |= ARL_LL_REPLUMBING;
1794 	}
1795 	mutex_exit(&arl->arl_lock);
1796 	if (unbind_mp != NULL)
1797 		arp_dlpi_send(arl, unbind_mp);
1798 	arl_refrele(arl);
1799 	return (err);
1800 }
1801 
1802 /* ARGSUSED */
1803 int
1804 arp_close(queue_t *q, int flags)
1805 {
1806 	if (WR(q)->q_next != NULL) {
1807 		/* This is a module close */
1808 		return (arp_modclose(q->q_ptr));
1809 	}
1810 	qprocsoff(q);
1811 	q->q_ptr = WR(q)->q_ptr = NULL;
1812 	return (0);
1813 }
1814 
1815 static int
1816 arp_modclose(arl_t *arl)
1817 {
1818 	arl_ill_common_t *ai = arl->arl_common;
1819 	ill_t		*ill;
1820 	queue_t		*q = arl->arl_rq;
1821 	mblk_t		*mp, *nextmp;
1822 	ipsq_t		*ipsq = NULL;
1823 
1824 	ill = arl_to_ill(arl);
1825 	if (ill != NULL) {
1826 		if (!ill_waiter_inc(ill)) {
1827 			ill_refrele(ill);
1828 		} else {
1829 			ill_refrele(ill);
1830 			if (ipsq_enter(ill, B_FALSE, NEW_OP))
1831 				ipsq = ill->ill_phyint->phyint_ipsq;
1832 			ill_waiter_dcr(ill);
1833 		}
1834 		if (ipsq == NULL) {
1835 			/*
1836 			 * could not enter the ipsq because ill is already
1837 			 * marked CONDEMNED.
1838 			 */
1839 			ill = NULL;
1840 		}
1841 	}
1842 	if (ai != NULL && ipsq == NULL) {
1843 		/*
1844 		 * Either we did not get an ill because it was marked CONDEMNED
1845 		 * or we could not enter the ipsq because it was unplumbing.
1846 		 * In both cases, wait for the ill to complete ip_modclose().
1847 		 *
1848 		 * If the arp_modclose happened even before SLIFNAME, the ai
1849 		 * itself would be NULL, in which case we can complete the close
1850 		 * without waiting.
1851 		 */
1852 		mutex_enter(&ai->ai_lock);
1853 		while (ai->ai_ill != NULL)
1854 			cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock);
1855 		mutex_exit(&ai->ai_lock);
1856 	}
1857 	ASSERT(ill == NULL || IAM_WRITER_ILL(ill));
1858 
1859 	mutex_enter(&arl->arl_lock);
1860 	/*
1861 	 * If the ill had completed unplumbing before arp_modclose(), there
1862 	 * would be no ill (and therefore, no ipsq) to serialize arp_modclose()
1863 	 * so that we need to explicitly check for ARL_CONDEMNED and back off
1864 	 * if it is set.
1865 	 */
1866 	if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) {
1867 		mutex_exit(&arl->arl_lock);
1868 		ASSERT(ipsq == NULL);
1869 		return (0);
1870 	}
1871 	arl->arl_state_flags |= ARL_CONDEMNED;
1872 
1873 	/*
1874 	 * send out all pending dlpi messages, don't wait for the ack (which
1875 	 * will be ignored in arp_rput when CONDEMNED is set)
1876 	 *
1877 	 * We have to check for pending DL_UNBIND_REQ because, in the case
1878 	 * that ip_modclose() executed before arp_modclose(), the call to
1879 	 * ill_delete_tail->ipif_arp_down() would have triggered a
1880 	 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail
1881 	 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not
1882 	 * have been processed yet. In this scenario, we cannot reset
1883 	 * arl_dlpi_pending, because the setting/clearing of arl_state_flags
1884 	 * related to unbind, and the associated cv_waits must be allowed to
1885 	 * continue.
1886 	 */
1887 	if (arl->arl_dlpi_pending != DL_UNBIND_REQ)
1888 		arl->arl_dlpi_pending = DL_PRIM_INVAL;
1889 	mp = arl->arl_dlpi_deferred;
1890 	arl->arl_dlpi_deferred = NULL;
1891 	mutex_exit(&arl->arl_lock);
1892 
1893 	for (; mp != NULL; mp = nextmp) {
1894 		nextmp = mp->b_next;
1895 		mp->b_next = NULL;
1896 		putnext(arl->arl_wq, mp);
1897 	}
1898 
1899 	/* Wait for data paths to quiesce */
1900 	mutex_enter(&arl->arl_lock);
1901 	while (arl->arl_refcnt != 0)
1902 		cv_wait(&arl->arl_cv, &arl->arl_lock);
1903 
1904 	/*
1905 	 * unbind, so that nothing else can come up from driver.
1906 	 */
1907 	mp = arl_unbind(arl);
1908 	mutex_exit(&arl->arl_lock);
1909 	if (mp != NULL)
1910 		arp_dlpi_send(arl, mp);
1911 	mutex_enter(&arl->arl_lock);
1912 
1913 	/* wait for unbind ack  */
1914 	while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
1915 		cv_wait(&arl->arl_cv, &arl->arl_lock);
1916 	mutex_exit(&arl->arl_lock);
1917 
1918 	qprocsoff(q);
1919 
1920 	if (ill != NULL) {
1921 		mutex_enter(&ill->ill_lock);
1922 		ill->ill_arl_dlpi_pending = 0;
1923 		mutex_exit(&ill->ill_lock);
1924 	}
1925 
1926 	if (ai != NULL) {
1927 		mutex_enter(&ai->ai_lock);
1928 		ai->ai_arl = NULL;
1929 		if (ai->ai_ill == NULL) {
1930 			mutex_destroy(&ai->ai_lock);
1931 			kmem_free(ai, sizeof (*ai));
1932 		} else {
1933 			mutex_exit(&ai->ai_lock);
1934 		}
1935 	}
1936 
1937 	/* free up the rest */
1938 	arp_mod_close_tail(arl);
1939 
1940 	q->q_ptr = WR(q)->q_ptr = NULL;
1941 
1942 	if (ipsq != NULL)
1943 		ipsq_exit(ipsq);
1944 
1945 	return (0);
1946 }
1947 
1948 static void
1949 arp_mod_close_tail(arl_t *arl)
1950 {
1951 	ip_stack_t	*ipst = arl->arl_ipst;
1952 	mblk_t		**mpp;
1953 
1954 	mutex_enter(&ipst->ips_ip_mi_lock);
1955 	mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl);
1956 	mutex_exit(&ipst->ips_ip_mi_lock);
1957 
1958 	/*
1959 	 * credp could be null if the open didn't succeed and ip_modopen
1960 	 * itself calls ip_close.
1961 	 */
1962 	if (arl->arl_credp != NULL)
1963 		crfree(arl->arl_credp);
1964 
1965 	/* Free all retained control messages. */
1966 	mpp = &arl->arl_first_mp_to_free;
1967 	do {
1968 		while (mpp[0]) {
1969 			mblk_t  *mp;
1970 			mblk_t  *mp1;
1971 
1972 			mp = mpp[0];
1973 			mpp[0] = mp->b_next;
1974 			for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) {
1975 				mp1->b_next = NULL;
1976 				mp1->b_prev = NULL;
1977 			}
1978 			freemsg(mp);
1979 		}
1980 	} while (mpp++ != &arl->arl_last_mp_to_free);
1981 
1982 	netstack_rele(ipst->ips_netstack);
1983 	mi_free(arl->arl_name);
1984 	mi_close_free((IDP)arl);
1985 }
1986 
1987 /*
1988  * DAD failed. Tear down ipifs with the specified srce address. Note that
1989  * tearing down the ipif also meas deleting the ncec through ipif_down,
1990  * so it is not possible to use nce_timer for recovery. Instead we start
1991  * a timer on the ipif. Caller has to free the mp.
1992  */
1993 void
1994 arp_failure(mblk_t *mp, ip_recv_attr_t *ira)
1995 {
1996 	ill_t *ill = ira->ira_ill;
1997 
1998 	if ((mp = copymsg(mp)) != NULL) {
1999 		ill_refhold(ill);
2000 		qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE);
2001 	}
2002 }
2003 
2004 /*
2005  * This is for exclusive changes due to ARP.  Tear down an interface due
2006  * to AR_CN_FAILED and AR_CN_BOGON.
2007  */
2008 /* ARGSUSED */
2009 static void
2010 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
2011 {
2012 	ill_t	*ill = rq->q_ptr;
2013 	arh_t *arh;
2014 	ipaddr_t src;
2015 	ipif_t	*ipif;
2016 	ip_stack_t *ipst = ill->ill_ipst;
2017 	uchar_t	*haddr;
2018 	uint_t	haddrlen;
2019 
2020 	/* first try src = ar$spa */
2021 	arh = (arh_t *)mp->b_rptr;
2022 	bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN);
2023 
2024 	haddrlen = arh->arh_hlen;
2025 	haddr = (uint8_t *)(arh + 1);
2026 
2027 	if (haddrlen == ill->ill_phys_addr_length) {
2028 		/*
2029 		 * Ignore conflicts generated by misbehaving switches that
2030 		 * just reflect our own messages back to us.  For IPMP, we may
2031 		 * see reflections across any ill in the illgrp.
2032 		 */
2033 		/* For an under ill_grp can change under lock */
2034 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2035 		if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
2036 		    IS_UNDER_IPMP(ill) && ill->ill_grp != NULL &&
2037 		    ipmp_illgrp_find_ill(ill->ill_grp, haddr,
2038 		    haddrlen) != NULL) {
2039 			rw_exit(&ipst->ips_ill_g_lock);
2040 			goto ignore_conflict;
2041 		}
2042 		rw_exit(&ipst->ips_ill_g_lock);
2043 	}
2044 
2045 	/*
2046 	 * Look up the appropriate ipif.
2047 	 */
2048 	ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst);
2049 	if (ipif == NULL)
2050 		goto ignore_conflict;
2051 
2052 	/* Reload the ill to match the ipif */
2053 	ill = ipif->ipif_ill;
2054 
2055 	/* If it's already duplicate or ineligible, then don't do anything. */
2056 	if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
2057 		ipif_refrele(ipif);
2058 		goto ignore_conflict;
2059 	}
2060 
2061 	/*
2062 	 * If we failed on a recovery probe, then restart the timer to
2063 	 * try again later.
2064 	 */
2065 	if (!ipif->ipif_was_dup) {
2066 		char hbuf[MAC_STR_LEN];
2067 		char sbuf[INET_ADDRSTRLEN];
2068 		char ibuf[LIFNAMSIZ];
2069 
2070 		(void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf));
2071 		(void) ip_dot_addr(src, sbuf);
2072 		ipif_get_name(ipif, ibuf, sizeof (ibuf));
2073 
2074 		cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
2075 		    " disabled", ibuf, sbuf, hbuf);
2076 	}
2077 	mutex_enter(&ill->ill_lock);
2078 	ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
2079 	ipif->ipif_flags |= IPIF_DUPLICATE;
2080 	ill->ill_ipif_dup_count++;
2081 	mutex_exit(&ill->ill_lock);
2082 	(void) ipif_down(ipif, NULL, NULL);
2083 	(void) ipif_down_tail(ipif);
2084 	mutex_enter(&ill->ill_lock);
2085 	if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
2086 	    ill->ill_net_type == IRE_IF_RESOLVER &&
2087 	    !(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
2088 	    ipst->ips_ip_dup_recovery > 0) {
2089 		ASSERT(ipif->ipif_recovery_id == 0);
2090 		ipif->ipif_recovery_id = timeout(ipif_dup_recovery,
2091 		    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
2092 	}
2093 	mutex_exit(&ill->ill_lock);
2094 	ipif_refrele(ipif);
2095 
2096 ignore_conflict:
2097 	freemsg(mp);
2098 }
2099 
2100 /*
2101  * This is a place for a dtrace hook.
2102  * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload,
2103  * or just the ARP packet payload as an M_DATA.
2104  */
2105 /* ARGSUSED */
2106 static void
2107 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill)
2108 {
2109 	freemsg(mp);
2110 }
2111 
2112 static boolean_t
2113 arp_over_driver(queue_t *q)
2114 {
2115 	queue_t *qnext = STREAM(q)->sd_wrq->q_next;
2116 
2117 	/*
2118 	 * check if first module below stream head is IP or UDP.
2119 	 */
2120 	ASSERT(qnext != NULL);
2121 	if (strcmp(Q2NAME(qnext), "ip") != 0 &&
2122 	    strcmp(Q2NAME(qnext), "udp") != 0) {
2123 		/*
2124 		 * module below is not ip or udp, so arp has been pushed
2125 		 * on the driver.
2126 		 */
2127 		return (B_TRUE);
2128 	}
2129 	return (B_FALSE);
2130 }
2131 
2132 static int
2133 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
2134 {
2135 	int err;
2136 
2137 	ASSERT(sflag & MODOPEN);
2138 	if (!arp_over_driver(q)) {
2139 		q->q_qinfo = dummymodinfo.st_rdinit;
2140 		WR(q)->q_qinfo = dummymodinfo.st_wrinit;
2141 		return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag,
2142 		    sflag, credp));
2143 	}
2144 	err = arp_modopen(q, devp, flag, sflag, credp);
2145 	return (err);
2146 }
2147 
2148 /*
2149  * In most cases we must be a writer on the IP stream before coming to
2150  * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions
2151  * when we are not a writer are very early duing initialization (in
2152  * arl_init, before the arl has done a SLIFNAME, so that we don't yet know
2153  * the associated ill) or during arp_mod_close, when we could not enter the
2154  * ipsq because the ill has already unplumbed.
2155  */
2156 static void
2157 arp_dlpi_send(arl_t *arl, mblk_t *mp)
2158 {
2159 	mblk_t **mpp;
2160 	t_uscalar_t prim;
2161 	arl_ill_common_t *ai;
2162 
2163 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
2164 
2165 #ifdef DEBUG
2166 	ai = arl->arl_common;
2167 	if (ai != NULL) {
2168 		mutex_enter(&ai->ai_lock);
2169 		if (ai->ai_ill != NULL)
2170 			ASSERT(IAM_WRITER_ILL(ai->ai_ill));
2171 		mutex_exit(&ai->ai_lock);
2172 	}
2173 #endif /* DEBUG */
2174 
2175 	mutex_enter(&arl->arl_lock);
2176 	if (arl->arl_dlpi_pending != DL_PRIM_INVAL) {
2177 		/* Must queue message. Tail insertion */
2178 		mpp = &arl->arl_dlpi_deferred;
2179 		while (*mpp != NULL)
2180 			mpp = &((*mpp)->b_next);
2181 
2182 		*mpp = mp;
2183 		mutex_exit(&arl->arl_lock);
2184 		return;
2185 	}
2186 	mutex_exit(&arl->arl_lock);
2187 	if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive)
2188 	    == DL_BIND_REQ) {
2189 		ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0);
2190 	}
2191 	/*
2192 	 * No need to take the arl_lock to examine ARL_CONDEMNED at this point
2193 	 * because the only thread that can see ARL_CONDEMNED here is the
2194 	 * closing arp_modclose() thread which sets the flag after becoming a
2195 	 * writer on the ipsq. Threads from IP must have finished and
2196 	 * cannot be active now.
2197 	 */
2198 	if (!(arl->arl_state_flags & ARL_CONDEMNED) ||
2199 	    (prim == DL_UNBIND_REQ)) {
2200 		if (prim != DL_NOTIFY_CONF) {
2201 			ill_t *ill = arl_to_ill(arl);
2202 
2203 			arl->arl_dlpi_pending = prim;
2204 			if (ill != NULL) {
2205 				mutex_enter(&ill->ill_lock);
2206 				ill->ill_arl_dlpi_pending = 1;
2207 				mutex_exit(&ill->ill_lock);
2208 				ill_refrele(ill);
2209 			}
2210 		}
2211 	}
2212 	DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send",
2213 	    char *, dl_primstr(prim), char *, "-",  arl_t *, arl);
2214 	putnext(arl->arl_wq, mp);
2215 }
2216 
2217 static void
2218 arl_defaults_common(arl_t *arl, mblk_t *mp)
2219 {
2220 	dl_info_ack_t	*dlia = (dl_info_ack_t *)mp->b_rptr;
2221 	/*
2222 	 * Till the ill is fully up  the ill is not globally visible.
2223 	 * So no need for a lock.
2224 	 */
2225 	arl->arl_mactype = dlia->dl_mac_type;
2226 	arl->arl_sap_length = dlia->dl_sap_length;
2227 
2228 	if (!arl->arl_dlpi_style_set) {
2229 		if (dlia->dl_provider_style == DL_STYLE2)
2230 			arl->arl_needs_attach = 1;
2231 		mutex_enter(&arl->arl_lock);
2232 		ASSERT(arl->arl_dlpi_style_set == 0);
2233 		arl->arl_dlpi_style_set = 1;
2234 		arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING;
2235 		cv_broadcast(&arl->arl_cv);
2236 		mutex_exit(&arl->arl_lock);
2237 	}
2238 }
2239 
2240 int
2241 arl_init(queue_t *q, arl_t *arl)
2242 {
2243 	mblk_t *info_mp;
2244 	dl_info_req_t   *dlir;
2245 
2246 	/* subset of ill_init */
2247 	mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0);
2248 
2249 	arl->arl_rq = q;
2250 	arl->arl_wq = WR(q);
2251 
2252 	info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
2253 	    BPRI_HI);
2254 	if (info_mp == NULL)
2255 		return (ENOMEM);
2256 	/*
2257 	 * allocate sufficient space to contain device name.
2258 	 */
2259 	arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ));
2260 	arl->arl_ppa = UINT_MAX;
2261 	arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND);
2262 
2263 	/* Send down the Info Request to the driver. */
2264 	info_mp->b_datap->db_type = M_PCPROTO;
2265 	dlir = (dl_info_req_t *)info_mp->b_rptr;
2266 	info_mp->b_wptr = (uchar_t *)&dlir[1];
2267 	dlir->dl_primitive = DL_INFO_REQ;
2268 	arl->arl_dlpi_pending = DL_PRIM_INVAL;
2269 	qprocson(q);
2270 
2271 	arp_dlpi_send(arl, info_mp);
2272 	return (0);
2273 }
2274 
2275 int
2276 arl_wait_for_info_ack(arl_t *arl)
2277 {
2278 	int err;
2279 
2280 	mutex_enter(&arl->arl_lock);
2281 	while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) {
2282 		/*
2283 		 * Return value of 0 indicates a pending signal.
2284 		 */
2285 		err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock);
2286 		if (err == 0) {
2287 			mutex_exit(&arl->arl_lock);
2288 			return (EINTR);
2289 		}
2290 	}
2291 	mutex_exit(&arl->arl_lock);
2292 	/*
2293 	 * ip_rput_other could have set an error  in ill_error on
2294 	 * receipt of M_ERROR.
2295 	 */
2296 	return (arl->arl_error);
2297 }
2298 
2299 void
2300 arl_set_muxid(ill_t *ill, int muxid)
2301 {
2302 	arl_t *arl;
2303 
2304 	arl = ill_to_arl(ill);
2305 	if (arl != NULL) {
2306 		arl->arl_muxid = muxid;
2307 		arl_refrele(arl);
2308 	}
2309 }
2310 
2311 int
2312 arl_get_muxid(ill_t *ill)
2313 {
2314 	arl_t *arl;
2315 	int muxid = 0;
2316 
2317 	arl = ill_to_arl(ill);
2318 	if (arl != NULL) {
2319 		muxid = arl->arl_muxid;
2320 		arl_refrele(arl);
2321 	}
2322 	return (muxid);
2323 }
2324 
2325 static int
2326 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
2327 {
2328 	int	err;
2329 	zoneid_t zoneid;
2330 	netstack_t *ns;
2331 	ip_stack_t *ipst;
2332 	arl_t	*arl = NULL;
2333 
2334 	/*
2335 	 * Prevent unprivileged processes from pushing IP so that
2336 	 * they can't send raw IP.
2337 	 */
2338 	if (secpolicy_net_rawaccess(credp) != 0)
2339 		return (EPERM);
2340 
2341 	ns = netstack_find_by_cred(credp);
2342 	ASSERT(ns != NULL);
2343 	ipst = ns->netstack_ip;
2344 	ASSERT(ipst != NULL);
2345 
2346 	/*
2347 	 * For exclusive stacks we set the zoneid to zero
2348 	 * to make IP operate as if in the global zone.
2349 	 */
2350 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
2351 		zoneid = GLOBAL_ZONEID;
2352 	else
2353 		zoneid = crgetzoneid(credp);
2354 
2355 	arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t));
2356 	q->q_ptr = WR(q)->q_ptr = arl;
2357 	arl->arl_ipst = ipst;
2358 	arl->arl_zoneid = zoneid;
2359 	err = arl_init(q, arl);
2360 
2361 	if (err != 0) {
2362 		mi_free(arl->arl_name);
2363 		mi_free(arl);
2364 		netstack_rele(ipst->ips_netstack);
2365 		q->q_ptr = NULL;
2366 		WR(q)->q_ptr = NULL;
2367 		return (err);
2368 	}
2369 
2370 	/*
2371 	 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent.
2372 	 */
2373 	err = arl_wait_for_info_ack(arl);
2374 	if (err == 0)
2375 		arl->arl_credp = credp;
2376 	else
2377 		goto fail;
2378 
2379 	crhold(credp);
2380 
2381 	mutex_enter(&ipst->ips_ip_mi_lock);
2382 	err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag,
2383 	    sflag, credp);
2384 	mutex_exit(&ipst->ips_ip_mi_lock);
2385 fail:
2386 	if (err) {
2387 		(void) arp_close(q, 0);
2388 		return (err);
2389 	}
2390 	return (0);
2391 }
2392 
2393 /*
2394  * Notify any downstream modules (esp softmac and hitbox) of the name
2395  * of this interface using an M_CTL.
2396  */
2397 static void
2398 arp_ifname_notify(arl_t *arl)
2399 {
2400 	mblk_t *mp1, *mp2;
2401 	struct iocblk *iocp;
2402 	struct lifreq *lifr;
2403 
2404 	if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL)
2405 		return;
2406 	if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) {
2407 		freemsg(mp1);
2408 		return;
2409 	}
2410 
2411 	lifr = (struct lifreq *)mp2->b_rptr;
2412 	mp2->b_wptr += sizeof (struct lifreq);
2413 	bzero(lifr, sizeof (struct lifreq));
2414 
2415 	(void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ);
2416 	lifr->lifr_ppa = arl->arl_ppa;
2417 	lifr->lifr_flags = ILLF_IPV4;
2418 
2419 	/* Use M_CTL to avoid confusing anyone else who might be listening. */
2420 	DB_TYPE(mp1) = M_CTL;
2421 	mp1->b_cont = mp2;
2422 	iocp = (struct iocblk *)mp1->b_rptr;
2423 	iocp->ioc_count = msgsize(mp1->b_cont);
2424 	DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify",
2425 	    char *, "SIOCSLIFNAME", char *, "-",  arl_t *, arl);
2426 	putnext(arl->arl_wq, mp1);
2427 }
2428 
2429 void
2430 arp_send_replumb_conf(ill_t *ill)
2431 {
2432 	mblk_t *mp;
2433 	arl_t *arl = ill_to_arl(ill);
2434 
2435 	if (arl == NULL)
2436 		return;
2437 	/*
2438 	 * arl_got_replumb and arl_got_unbind to be cleared after we complete
2439 	 * arp_cmd_done.
2440 	 */
2441 	mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO,
2442 	    DL_NOTIFY_CONF);
2443 	((dl_notify_conf_t *)(mp->b_rptr))->dl_notification =
2444 	    DL_NOTE_REPLUMB_DONE;
2445 	arp_dlpi_send(arl, mp);
2446 	mutex_enter(&arl->arl_lock);
2447 	arl->arl_state_flags &= ~ARL_LL_REPLUMBING;
2448 	mutex_exit(&arl->arl_lock);
2449 	arl_refrele(arl);
2450 }
2451 
2452 /*
2453  * The unplumb code paths call arp_unbind_complete() to make sure that it is
2454  * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also
2455  * for the arl_refcnt to fall to one so that, when we return from
2456  * arp_unbind_complete(), we know for certain that there are no threads in
2457  * arp_rput() that might access the arl_ill.
2458  */
2459 void
2460 arp_unbind_complete(ill_t *ill)
2461 {
2462 	arl_t *arl = ill_to_arl(ill);
2463 
2464 	if (arl == NULL)
2465 		return;
2466 	mutex_enter(&arl->arl_lock);
2467 	/*
2468 	 * wait for unbind ack and arl_refcnt to drop to 1. Note that the
2469 	 * quiescent arl_refcnt for this function is 1 (and not 0) because
2470 	 * ill_to_arl() will itself return after taking a ref on the arl_t.
2471 	 */
2472 	while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS)
2473 		cv_wait(&arl->arl_cv, &arl->arl_lock);
2474 	while (arl->arl_refcnt != 1)
2475 		cv_wait(&arl->arl_cv, &arl->arl_lock);
2476 	mutex_exit(&arl->arl_lock);
2477 	arl_refrele(arl);
2478 }
2479