xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_ndp.c (revision 1a7b528f36ec61147fc237b7b9a23642fab7ad13)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/sysmacros.h>
33 #include <sys/errno.h>
34 #include <sys/dlpi.h>
35 #include <sys/socket.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/vtrace.h>
41 #include <sys/kmem.h>
42 #include <sys/zone.h>
43 #include <sys/ethernet.h>
44 #include <sys/sdt.h>
45 
46 #include <net/if.h>
47 #include <net/if_types.h>
48 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <netinet/in.h>
51 #include <netinet/ip6.h>
52 #include <netinet/icmp6.h>
53 
54 #include <inet/common.h>
55 #include <inet/mi.h>
56 #include <inet/mib2.h>
57 #include <inet/nd.h>
58 #include <inet/ip.h>
59 #include <inet/ip_impl.h>
60 #include <inet/ipclassifier.h>
61 #include <inet/ip_if.h>
62 #include <inet/ip_ire.h>
63 #include <inet/ip_rts.h>
64 #include <inet/ip6.h>
65 #include <inet/ip_ndp.h>
66 #include <inet/ipsec_impl.h>
67 #include <inet/ipsec_info.h>
68 #include <inet/sctp_ip.h>
69 
70 /*
71  * Function names with nce_ prefix are static while function
72  * names with ndp_ prefix are used by rest of the IP.
73  *
74  * Lock ordering:
75  *
76  *	ndp_g_lock -> ill_lock -> nce_lock
77  *
78  * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
79  * nce_next.  Nce_lock protects the contents of the NCE (particularly
80  * nce_refcnt).
81  */
82 
83 static	boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr,
84     uint32_t ll_addr_len);
85 static	void	nce_ire_delete(nce_t *nce);
86 static	void	nce_ire_delete1(ire_t *ire, char *nce_arg);
87 static	void 	nce_set_ll(nce_t *nce, uchar_t *ll_addr);
88 static	nce_t	*nce_lookup_addr(ill_t *, const in6_addr_t *, nce_t *);
89 static	nce_t	*nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr);
90 static	void	nce_make_mapping(nce_t *nce, uchar_t *addrpos,
91     uchar_t *addr);
92 static	int	nce_set_multicast(ill_t *ill, const in6_addr_t *addr);
93 static	void	nce_queue_mp(nce_t *nce, mblk_t *mp);
94 static	void	nce_report1(nce_t *nce, uchar_t *mp_arg);
95 static	mblk_t	*nce_udreq_alloc(ill_t *ill);
96 static	void	nce_update(nce_t *nce, uint16_t new_state,
97     uchar_t *new_ll_addr);
98 static	uint32_t	nce_solicit(nce_t *nce, mblk_t *mp);
99 static	boolean_t	nce_xmit(ill_t *ill, uint32_t operation,
100     ill_t *hwaddr_ill, boolean_t use_lla_addr, const in6_addr_t *sender,
101     const in6_addr_t *target, int flag);
102 extern void	th_trace_rrecord(th_trace_t *);
103 static int	ndp_add_v4(ill_t *, const in_addr_t *, uint16_t,
104     nce_t **, nce_t *);
105 
106 /*
107  * We track the time of creation of the nce in the  nce_init_time field
108  * of IPv4 nce_t entries. If an nce is stuck in the ND_INITIAL state for
109  * more than NCE_STUCK_TIMEOUT milliseconds, trigger the nce-stuck dtrace
110  * probe to assist in debugging. This probe will be fired from
111  * nce_thread_exit() for debug kernels, and from nce_report1() when
112  * 'ndd -get /dev/ip ip_ndp_cache_report' is invoked on both debug and
113  * non-debug kernels.
114  */
115 #define	NCE_STUCK_TIMEOUT	120000
116 
117 #ifdef NCE_DEBUG
118 void	nce_trace_inactive(nce_t *);
119 #endif
120 
121 #define	NCE_HASH_PTR_V4(ipst, addr)					\
122 	(&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
123 
124 #define	NCE_HASH_PTR_V6(ipst, addr)				 \
125 	(&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
126 		NCE_TABLE_SIZE)]))
127 
128 /*
129  * Compute default flags to use for an advertisement of this nce's address.
130  */
131 static int
132 nce_advert_flags(const nce_t *nce)
133 {
134 	int flag = 0;
135 
136 	if (nce->nce_flags & NCE_F_ISROUTER)
137 		flag |= NDP_ISROUTER;
138 	return (flag);
139 }
140 
141 /* Non-tunable probe interval, based on link capabilities */
142 #define	ILL_PROBE_INTERVAL(ill)	((ill)->ill_note_link ? 150 : 1500)
143 
144 /*
145  * NDP Cache Entry creation routine.
146  * Mapped entries will never do NUD .
147  * This routine must always be called with ndp6->ndp_g_lock held.
148  * Prior to return, nce_refcnt is incremented.
149  */
150 int
151 ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
152     const in6_addr_t *mask, const in6_addr_t *extract_mask,
153     uint32_t hw_extract_start, uint16_t flags, uint16_t state,
154     nce_t **newnce)
155 {
156 	static	nce_t		nce_nil;
157 	nce_t		*nce;
158 	mblk_t		*mp;
159 	mblk_t		*template;
160 	nce_t		**ncep;
161 	int		err;
162 	boolean_t	dropped = B_FALSE;
163 	ip_stack_t	*ipst = ill->ill_ipst;
164 
165 	ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
166 	ASSERT(ill != NULL && ill->ill_isv6);
167 	if (IN6_IS_ADDR_UNSPECIFIED(addr)) {
168 		ip0dbg(("ndp_add_v6: no addr\n"));
169 		return (EINVAL);
170 	}
171 	if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) {
172 		ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags));
173 		return (EINVAL);
174 	}
175 	if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) &&
176 	    (flags & NCE_F_MAPPING)) {
177 		ip0dbg(("ndp_add_v6: extract mask zero for mapping"));
178 		return (EINVAL);
179 	}
180 	/*
181 	 * Allocate the mblk to hold the nce.
182 	 *
183 	 * XXX This can come out of a separate cache - nce_cache.
184 	 * We don't need the mp anymore as there are no more
185 	 * "qwriter"s
186 	 */
187 	mp = allocb(sizeof (nce_t), BPRI_MED);
188 	if (mp == NULL)
189 		return (ENOMEM);
190 
191 	nce = (nce_t *)mp->b_rptr;
192 	mp->b_wptr = (uchar_t *)&nce[1];
193 	*nce = nce_nil;
194 
195 	/*
196 	 * This one holds link layer address
197 	 */
198 	if (ill->ill_net_type == IRE_IF_RESOLVER) {
199 		template = nce_udreq_alloc(ill);
200 	} else {
201 		if (ill->ill_resolver_mp == NULL) {
202 			freeb(mp);
203 			return (EINVAL);
204 		}
205 		ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER));
206 		template = copyb(ill->ill_resolver_mp);
207 	}
208 	if (template == NULL) {
209 		freeb(mp);
210 		return (ENOMEM);
211 	}
212 	nce->nce_ill = ill;
213 	nce->nce_ipversion = IPV6_VERSION;
214 	nce->nce_flags = flags;
215 	nce->nce_state = state;
216 	nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
217 	nce->nce_rcnt = ill->ill_xmit_count;
218 	nce->nce_addr = *addr;
219 	nce->nce_mask = *mask;
220 	nce->nce_extract_mask = *extract_mask;
221 	nce->nce_ll_extract_start = hw_extract_start;
222 	nce->nce_fp_mp = NULL;
223 	nce->nce_res_mp = template;
224 	if (state == ND_REACHABLE)
225 		nce->nce_last = TICK_TO_MSEC(lbolt64);
226 	else
227 		nce->nce_last = 0;
228 	nce->nce_qd_mp = NULL;
229 	nce->nce_mp = mp;
230 	if (hw_addr != NULL)
231 		nce_set_ll(nce, hw_addr);
232 	/* This one is for nce getting created */
233 	nce->nce_refcnt = 1;
234 	mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
235 	if (nce->nce_flags & NCE_F_MAPPING) {
236 		ASSERT(IN6_IS_ADDR_MULTICAST(addr));
237 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask));
238 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
239 		ncep = &ipst->ips_ndp6->nce_mask_entries;
240 	} else {
241 		ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
242 	}
243 
244 #ifdef NCE_DEBUG
245 	bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX);
246 #endif
247 	/*
248 	 * Atomically ensure that the ill is not CONDEMNED, before
249 	 * adding the NCE.
250 	 */
251 	mutex_enter(&ill->ill_lock);
252 	if (ill->ill_state_flags & ILL_CONDEMNED) {
253 		mutex_exit(&ill->ill_lock);
254 		freeb(mp);
255 		freeb(template);
256 		return (EINVAL);
257 	}
258 	if ((nce->nce_next = *ncep) != NULL)
259 		nce->nce_next->nce_ptpn = &nce->nce_next;
260 	*ncep = nce;
261 	nce->nce_ptpn = ncep;
262 	*newnce = nce;
263 	/* This one is for nce being used by an active thread */
264 	NCE_REFHOLD(*newnce);
265 
266 	/* Bump up the number of nce's referencing this ill */
267 	ill->ill_nce_cnt++;
268 	mutex_exit(&ill->ill_lock);
269 
270 	err = 0;
271 	if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) {
272 		mutex_enter(&nce->nce_lock);
273 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
274 		nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
275 		mutex_exit(&nce->nce_lock);
276 		dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE,
277 		    &ipv6_all_zeros, addr, NDP_PROBE);
278 		if (dropped) {
279 			mutex_enter(&nce->nce_lock);
280 			nce->nce_pcnt++;
281 			mutex_exit(&nce->nce_lock);
282 		}
283 		NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
284 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
285 		err = EINPROGRESS;
286 	} else if (flags & NCE_F_UNSOL_ADV) {
287 		/*
288 		 * We account for the transmit below by assigning one
289 		 * less than the ndd variable. Subsequent decrements
290 		 * are done in ndp_timer.
291 		 */
292 		mutex_enter(&nce->nce_lock);
293 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
294 		nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1;
295 		mutex_exit(&nce->nce_lock);
296 		dropped = nce_xmit(ill,
297 		    ND_NEIGHBOR_ADVERT,
298 		    ill,	/* ill to be used for extracting ill_nd_lla */
299 		    B_TRUE,	/* use ill_nd_lla */
300 		    addr,	/* Source and target of the advertisement pkt */
301 		    &ipv6_all_hosts_mcast, /* Destination of the packet */
302 		    nce_advert_flags(nce));
303 		mutex_enter(&nce->nce_lock);
304 		if (dropped)
305 			nce->nce_unsolicit_count++;
306 		if (nce->nce_unsolicit_count != 0) {
307 			nce->nce_timeout_id = timeout(ndp_timer, nce,
308 			    MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval));
309 		}
310 		mutex_exit(&nce->nce_lock);
311 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
312 	}
313 	/*
314 	 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
315 	 * we call nce_fastpath as soon as the nce is resolved in ndp_process.
316 	 * We call nce_fastpath from nce_update if the link layer address of
317 	 * the peer changes from nce_update
318 	 */
319 	if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER)
320 		nce_fastpath(nce);
321 	return (err);
322 }
323 
324 int
325 ndp_lookup_then_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
326     const in6_addr_t *mask, const in6_addr_t *extract_mask,
327     uint32_t hw_extract_start, uint16_t flags, uint16_t state,
328     nce_t **newnce)
329 {
330 	int	err = 0;
331 	nce_t	*nce;
332 	ip_stack_t	*ipst = ill->ill_ipst;
333 
334 	ASSERT(ill->ill_isv6);
335 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
336 
337 	/* Get head of v6 hash table */
338 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
339 	nce = nce_lookup_addr(ill, addr, nce);
340 	if (nce == NULL) {
341 		err = ndp_add_v6(ill,
342 		    hw_addr,
343 		    addr,
344 		    mask,
345 		    extract_mask,
346 		    hw_extract_start,
347 		    flags,
348 		    state,
349 		    newnce);
350 	} else {
351 		*newnce = nce;
352 		err = EEXIST;
353 	}
354 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
355 	return (err);
356 }
357 
358 /*
359  * Remove all the CONDEMNED nces from the appropriate hash table.
360  * We create a private list of NCEs, these may have ires pointing
361  * to them, so the list will be passed through to clean up dependent
362  * ires and only then we can do NCE_REFRELE which can make NCE inactive.
363  */
364 static void
365 nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list)
366 {
367 	nce_t *nce1;
368 	nce_t **ptpn;
369 
370 	ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
371 	ASSERT(ndp->ndp_g_walker == 0);
372 	for (; nce; nce = nce1) {
373 		nce1 = nce->nce_next;
374 		mutex_enter(&nce->nce_lock);
375 		if (nce->nce_flags & NCE_F_CONDEMNED) {
376 			ptpn = nce->nce_ptpn;
377 			nce1 = nce->nce_next;
378 			if (nce1 != NULL)
379 				nce1->nce_ptpn = ptpn;
380 			*ptpn = nce1;
381 			nce->nce_ptpn = NULL;
382 			nce->nce_next = NULL;
383 			nce->nce_next = *free_nce_list;
384 			*free_nce_list = nce;
385 		}
386 		mutex_exit(&nce->nce_lock);
387 	}
388 }
389 
390 /*
391  * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup()
392  *    will return this NCE. Also no new IREs will be created that
393  *    point to this NCE (See ire_add_v6).  Also no new timeouts will
394  *    be started (See NDP_RESTART_TIMER).
395  * 2. Cancel any currently running timeouts.
396  * 3. If there is an ndp walker, return. The walker will do the cleanup.
397  *    This ensures that walkers see a consistent list of NCEs while walking.
398  * 4. Otherwise remove the NCE from the list of NCEs
399  * 5. Delete all IREs pointing to this NCE.
400  */
401 void
402 ndp_delete(nce_t *nce)
403 {
404 	nce_t	**ptpn;
405 	nce_t	*nce1;
406 	int	ipversion = nce->nce_ipversion;
407 	ndp_g_t *ndp;
408 	ip_stack_t	*ipst = nce->nce_ill->ill_ipst;
409 
410 	if (ipversion == IPV4_VERSION)
411 		ndp = ipst->ips_ndp4;
412 	else
413 		ndp = ipst->ips_ndp6;
414 
415 	/* Serialize deletes */
416 	mutex_enter(&nce->nce_lock);
417 	if (nce->nce_flags & NCE_F_CONDEMNED) {
418 		/* Some other thread is doing the delete */
419 		mutex_exit(&nce->nce_lock);
420 		return;
421 	}
422 	/*
423 	 * Caller has a refhold. Also 1 ref for being in the list. Thus
424 	 * refcnt has to be >= 2
425 	 */
426 	ASSERT(nce->nce_refcnt >= 2);
427 	nce->nce_flags |= NCE_F_CONDEMNED;
428 	mutex_exit(&nce->nce_lock);
429 
430 	nce_fastpath_list_delete(nce);
431 
432 	/*
433 	 * Cancel any running timer. Timeout can't be restarted
434 	 * since CONDEMNED is set. Can't hold nce_lock across untimeout.
435 	 * Passing invalid timeout id is fine.
436 	 */
437 	if (nce->nce_timeout_id != 0) {
438 		(void) untimeout(nce->nce_timeout_id);
439 		nce->nce_timeout_id = 0;
440 	}
441 
442 	mutex_enter(&ndp->ndp_g_lock);
443 	if (nce->nce_ptpn == NULL) {
444 		/*
445 		 * The last ndp walker has already removed this nce from
446 		 * the list after we marked the nce CONDEMNED and before
447 		 * we grabbed the global lock.
448 		 */
449 		mutex_exit(&ndp->ndp_g_lock);
450 		return;
451 	}
452 	if (ndp->ndp_g_walker > 0) {
453 		/*
454 		 * Can't unlink. The walker will clean up
455 		 */
456 		ndp->ndp_g_walker_cleanup = B_TRUE;
457 		mutex_exit(&ndp->ndp_g_lock);
458 		return;
459 	}
460 
461 	/*
462 	 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart
463 	 * the timer since it is marked CONDEMNED.
464 	 */
465 	ptpn = nce->nce_ptpn;
466 	nce1 = nce->nce_next;
467 	if (nce1 != NULL)
468 		nce1->nce_ptpn = ptpn;
469 	*ptpn = nce1;
470 	nce->nce_ptpn = NULL;
471 	nce->nce_next = NULL;
472 	mutex_exit(&ndp->ndp_g_lock);
473 
474 	nce_ire_delete(nce);
475 }
476 
477 void
478 ndp_inactive(nce_t *nce)
479 {
480 	mblk_t		**mpp;
481 	ill_t		*ill;
482 
483 	ASSERT(nce->nce_refcnt == 0);
484 	ASSERT(MUTEX_HELD(&nce->nce_lock));
485 	ASSERT(nce->nce_fastpath == NULL);
486 
487 	/* Free all nce allocated messages */
488 	mpp = &nce->nce_first_mp_to_free;
489 	do {
490 		while (*mpp != NULL) {
491 			mblk_t  *mp;
492 
493 			mp = *mpp;
494 			*mpp = mp->b_next;
495 
496 			inet_freemsg(mp);
497 		}
498 	} while (mpp++ != &nce->nce_last_mp_to_free);
499 
500 #ifdef NCE_DEBUG
501 	nce_trace_inactive(nce);
502 #endif
503 
504 	ill = nce->nce_ill;
505 	mutex_enter(&ill->ill_lock);
506 	ill->ill_nce_cnt--;
507 	/*
508 	 * If the number of nce's associated with this ill have dropped
509 	 * to zero, check whether we need to restart any operation that
510 	 * is waiting for this to happen.
511 	 */
512 	if (ill->ill_nce_cnt == 0) {
513 		/* ipif_ill_refrele_tail drops the ill_lock */
514 		ipif_ill_refrele_tail(ill);
515 	} else {
516 		mutex_exit(&ill->ill_lock);
517 	}
518 	mutex_destroy(&nce->nce_lock);
519 	if (nce->nce_mp != NULL)
520 		inet_freemsg(nce->nce_mp);
521 }
522 
523 /*
524  * ndp_walk routine.  Delete the nce if it is associated with the ill
525  * that is going away.  Always called as a writer.
526  */
527 void
528 ndp_delete_per_ill(nce_t *nce, uchar_t *arg)
529 {
530 	if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) {
531 		ndp_delete(nce);
532 	}
533 }
534 
535 /*
536  * Walk a list of to be inactive NCEs and blow away all the ires.
537  */
538 static void
539 nce_ire_delete_list(nce_t *nce)
540 {
541 	nce_t *nce_next;
542 
543 	ASSERT(nce != NULL);
544 	while (nce != NULL) {
545 		nce_next = nce->nce_next;
546 		nce->nce_next = NULL;
547 
548 		/*
549 		 * It is possible for the last ndp walker (this thread)
550 		 * to come here after ndp_delete has marked the nce CONDEMNED
551 		 * and before it has removed the nce from the fastpath list
552 		 * or called untimeout. So we need to do it here. It is safe
553 		 * for both ndp_delete and this thread to do it twice or
554 		 * even simultaneously since each of the threads has a
555 		 * reference on the nce.
556 		 */
557 		nce_fastpath_list_delete(nce);
558 		/*
559 		 * Cancel any running timer. Timeout can't be restarted
560 		 * since CONDEMNED is set. Can't hold nce_lock across untimeout.
561 		 * Passing invalid timeout id is fine.
562 		 */
563 		if (nce->nce_timeout_id != 0) {
564 			(void) untimeout(nce->nce_timeout_id);
565 			nce->nce_timeout_id = 0;
566 		}
567 		/*
568 		 * We might hit this func thus in the v4 case:
569 		 * ipif_down->ipif_ndp_down->ndp_walk
570 		 */
571 
572 		if (nce->nce_ipversion == IPV4_VERSION) {
573 			ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE,
574 			    IRE_CACHE, nce_ire_delete1,
575 			    (char *)nce, nce->nce_ill);
576 		} else {
577 			ASSERT(nce->nce_ipversion == IPV6_VERSION);
578 			ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE,
579 			    IRE_CACHE, nce_ire_delete1,
580 			    (char *)nce, nce->nce_ill);
581 		}
582 		NCE_REFRELE_NOTR(nce);
583 		nce = nce_next;
584 	}
585 }
586 
587 /*
588  * Delete an ire when the nce goes away.
589  */
590 /* ARGSUSED */
591 static void
592 nce_ire_delete(nce_t *nce)
593 {
594 	if (nce->nce_ipversion == IPV6_VERSION) {
595 		ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
596 		    nce_ire_delete1, (char *)nce, nce->nce_ill);
597 		NCE_REFRELE_NOTR(nce);
598 	} else {
599 		ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
600 		    nce_ire_delete1, (char *)nce, nce->nce_ill);
601 		NCE_REFRELE_NOTR(nce);
602 	}
603 }
604 
605 /*
606  * ire_walk routine used to delete every IRE that shares this nce
607  */
608 static void
609 nce_ire_delete1(ire_t *ire, char *nce_arg)
610 {
611 	nce_t	*nce = (nce_t *)nce_arg;
612 
613 	ASSERT(ire->ire_type == IRE_CACHE);
614 
615 	if (ire->ire_nce == nce) {
616 		ASSERT(ire->ire_ipversion == nce->nce_ipversion);
617 		ire_delete(ire);
618 	}
619 }
620 
621 /*
622  * Restart DAD on given NCE.  Returns B_TRUE if DAD has been restarted.
623  */
624 boolean_t
625 ndp_restart_dad(nce_t *nce)
626 {
627 	boolean_t started;
628 	boolean_t dropped;
629 
630 	if (nce == NULL)
631 		return (B_FALSE);
632 	mutex_enter(&nce->nce_lock);
633 	if (nce->nce_state == ND_PROBE) {
634 		mutex_exit(&nce->nce_lock);
635 		started = B_TRUE;
636 	} else if (nce->nce_state == ND_REACHABLE) {
637 		nce->nce_state = ND_PROBE;
638 		nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1;
639 		mutex_exit(&nce->nce_lock);
640 		dropped = nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, NULL,
641 		    B_FALSE, &ipv6_all_zeros, &nce->nce_addr, NDP_PROBE);
642 		if (dropped) {
643 			mutex_enter(&nce->nce_lock);
644 			nce->nce_pcnt++;
645 			mutex_exit(&nce->nce_lock);
646 		}
647 		NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill));
648 		started = B_TRUE;
649 	} else {
650 		mutex_exit(&nce->nce_lock);
651 		started = B_FALSE;
652 	}
653 	return (started);
654 }
655 
656 /*
657  * IPv6 Cache entry lookup.  Try to find an nce matching the parameters passed.
658  * If one is found, the refcnt on the nce will be incremented.
659  */
660 nce_t *
661 ndp_lookup_v6(ill_t *ill, const in6_addr_t *addr, boolean_t caller_holds_lock)
662 {
663 	nce_t	*nce;
664 	ip_stack_t	*ipst;
665 
666 	ASSERT(ill != NULL);
667 	ipst = ill->ill_ipst;
668 
669 	ASSERT(ill != NULL && ill->ill_isv6);
670 	if (!caller_holds_lock) {
671 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
672 	}
673 
674 	/* Get head of v6 hash table */
675 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
676 	nce = nce_lookup_addr(ill, addr, nce);
677 	if (nce == NULL)
678 		nce = nce_lookup_mapping(ill, addr);
679 	if (!caller_holds_lock)
680 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
681 	return (nce);
682 }
683 /*
684  * IPv4 Cache entry lookup.  Try to find an nce matching the parameters passed.
685  * If one is found, the refcnt on the nce will be incremented.
686  * Since multicast mappings are handled in arp, there are no nce_mcast_entries
687  * so we skip the nce_lookup_mapping call.
688  * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL
689  */
690 nce_t *
691 ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock)
692 {
693 	nce_t	*nce;
694 	in6_addr_t addr6;
695 	ip_stack_t *ipst = ill->ill_ipst;
696 
697 	if (!caller_holds_lock) {
698 		mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
699 	}
700 
701 	/* Get head of v4 hash table */
702 	nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
703 	IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
704 	nce = nce_lookup_addr(ill, &addr6, nce);
705 	if (!caller_holds_lock)
706 		mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
707 	return (nce);
708 }
709 
710 /*
711  * Cache entry lookup.  Try to find an nce matching the parameters passed.
712  * Look only for exact entries (no mappings).  If an nce is found, increment
713  * the hold count on that nce. The caller passes in the start of the
714  * appropriate hash table, and must be holding the appropriate global
715  * lock (ndp_g_lock).
716  */
717 static nce_t *
718 nce_lookup_addr(ill_t *ill, const in6_addr_t *addr, nce_t *nce)
719 {
720 	ndp_g_t		*ndp;
721 	ip_stack_t	*ipst = ill->ill_ipst;
722 
723 	if (ill->ill_isv6)
724 		ndp = ipst->ips_ndp6;
725 	else
726 		ndp = ipst->ips_ndp4;
727 
728 	ASSERT(ill != NULL);
729 	ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
730 	if (IN6_IS_ADDR_UNSPECIFIED(addr))
731 		return (NULL);
732 	for (; nce != NULL; nce = nce->nce_next) {
733 		if (nce->nce_ill == ill) {
734 			if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) &&
735 			    IN6_ARE_ADDR_EQUAL(&nce->nce_mask,
736 			    &ipv6_all_ones)) {
737 				mutex_enter(&nce->nce_lock);
738 				if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
739 					NCE_REFHOLD_LOCKED(nce);
740 					mutex_exit(&nce->nce_lock);
741 					break;
742 				}
743 				mutex_exit(&nce->nce_lock);
744 			}
745 		}
746 	}
747 	return (nce);
748 }
749 
750 /*
751  * Cache entry lookup.  Try to find an nce matching the parameters passed.
752  * Look only for mappings.
753  */
754 static nce_t *
755 nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr)
756 {
757 	nce_t	*nce;
758 	ip_stack_t	*ipst = ill->ill_ipst;
759 
760 	ASSERT(ill != NULL && ill->ill_isv6);
761 	ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
762 	if (!IN6_IS_ADDR_MULTICAST(addr))
763 		return (NULL);
764 	nce = ipst->ips_ndp6->nce_mask_entries;
765 	for (; nce != NULL; nce = nce->nce_next)
766 		if (nce->nce_ill == ill &&
767 		    (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) {
768 			mutex_enter(&nce->nce_lock);
769 			if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
770 				NCE_REFHOLD_LOCKED(nce);
771 				mutex_exit(&nce->nce_lock);
772 				break;
773 			}
774 			mutex_exit(&nce->nce_lock);
775 		}
776 	return (nce);
777 }
778 
779 /*
780  * Process passed in parameters either from an incoming packet or via
781  * user ioctl.
782  */
783 void
784 ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
785 {
786 	ill_t	*ill = nce->nce_ill;
787 	uint32_t hw_addr_len = ill->ill_nd_lla_len;
788 	mblk_t	*mp;
789 	boolean_t ll_updated = B_FALSE;
790 	boolean_t ll_changed;
791 	ip_stack_t	*ipst = ill->ill_ipst;
792 
793 	ASSERT(nce->nce_ipversion == IPV6_VERSION);
794 	/*
795 	 * No updates of link layer address or the neighbor state is
796 	 * allowed, when the cache is in NONUD state.  This still
797 	 * allows for responding to reachability solicitation.
798 	 */
799 	mutex_enter(&nce->nce_lock);
800 	if (nce->nce_state == ND_INCOMPLETE) {
801 		if (hw_addr == NULL) {
802 			mutex_exit(&nce->nce_lock);
803 			return;
804 		}
805 		nce_set_ll(nce, hw_addr);
806 		/*
807 		 * Update nce state and send the queued packets
808 		 * back to ip this time ire will be added.
809 		 */
810 		if (flag & ND_NA_FLAG_SOLICITED) {
811 			nce_update(nce, ND_REACHABLE, NULL);
812 		} else {
813 			nce_update(nce, ND_STALE, NULL);
814 		}
815 		mutex_exit(&nce->nce_lock);
816 		nce_fastpath(nce);
817 		mutex_enter(&nce->nce_lock);
818 		mp = nce->nce_qd_mp;
819 		nce->nce_qd_mp = NULL;
820 		mutex_exit(&nce->nce_lock);
821 		while (mp != NULL) {
822 			mblk_t *nxt_mp, *data_mp;
823 
824 			nxt_mp = mp->b_next;
825 			mp->b_next = NULL;
826 
827 			if (mp->b_datap->db_type == M_CTL)
828 				data_mp = mp->b_cont;
829 			else
830 				data_mp = mp;
831 			if (data_mp->b_prev != NULL) {
832 				ill_t   *inbound_ill;
833 				queue_t *fwdq = NULL;
834 				uint_t ifindex;
835 
836 				ifindex = (uint_t)(uintptr_t)data_mp->b_prev;
837 				inbound_ill = ill_lookup_on_ifindex(ifindex,
838 				    B_TRUE, NULL, NULL, NULL, NULL, ipst);
839 				if (inbound_ill == NULL) {
840 					data_mp->b_prev = NULL;
841 					freemsg(mp);
842 					return;
843 				} else {
844 					fwdq = inbound_ill->ill_rq;
845 				}
846 				data_mp->b_prev = NULL;
847 				/*
848 				 * Send a forwarded packet back into ip_rput_v6
849 				 * just as in ire_send_v6().
850 				 * Extract the queue from b_prev (set in
851 				 * ip_rput_data_v6).
852 				 */
853 				if (fwdq != NULL) {
854 					/*
855 					 * Forwarded packets hop count will
856 					 * get decremented in ip_rput_data_v6
857 					 */
858 					if (data_mp != mp)
859 						freeb(mp);
860 					put(fwdq, data_mp);
861 				} else {
862 					/*
863 					 * Send locally originated packets back
864 					 * into * ip_wput_v6.
865 					 */
866 					put(ill->ill_wq, mp);
867 				}
868 				ill_refrele(inbound_ill);
869 			} else {
870 				put(ill->ill_wq, mp);
871 			}
872 			mp = nxt_mp;
873 		}
874 		return;
875 	}
876 	ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len);
877 	if (!is_adv) {
878 		/* If this is a SOLICITATION request only */
879 		if (ll_changed)
880 			nce_update(nce, ND_STALE, hw_addr);
881 		mutex_exit(&nce->nce_lock);
882 		return;
883 	}
884 	if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) {
885 		/* If in any other state than REACHABLE, ignore */
886 		if (nce->nce_state == ND_REACHABLE) {
887 			nce_update(nce, ND_STALE, NULL);
888 		}
889 		mutex_exit(&nce->nce_lock);
890 		return;
891 	} else {
892 		if (ll_changed) {
893 			nce_update(nce, ND_UNCHANGED, hw_addr);
894 			ll_updated = B_TRUE;
895 		}
896 		if (flag & ND_NA_FLAG_SOLICITED) {
897 			nce_update(nce, ND_REACHABLE, NULL);
898 		} else {
899 			if (ll_updated) {
900 				nce_update(nce, ND_STALE, NULL);
901 			}
902 		}
903 		mutex_exit(&nce->nce_lock);
904 		if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags &
905 		    NCE_F_ISROUTER)) {
906 			ire_t *ire;
907 
908 			/*
909 			 * Router turned to host.  We need to remove the
910 			 * entry as well as any default route that may be
911 			 * using this as a next hop.  This is required by
912 			 * section 7.2.5 of RFC 2461.
913 			 */
914 			ire = ire_ftable_lookup_v6(&ipv6_all_zeros,
915 			    &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT,
916 			    nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL,
917 			    MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW |
918 			    MATCH_IRE_DEFAULT, ipst);
919 			if (ire != NULL) {
920 				ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst);
921 				ire_delete(ire);
922 				ire_refrele(ire);
923 			}
924 			ndp_delete(nce);
925 		}
926 	}
927 }
928 
929 /*
930  * Pass arg1 to the pfi supplied, along with each nce in existence.
931  * ndp_walk() places a REFHOLD on the nce and drops the lock when
932  * walking the hash list.
933  */
934 void
935 ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1,
936     boolean_t trace)
937 {
938 
939 	nce_t	*nce;
940 	nce_t	*nce1;
941 	nce_t	**ncep;
942 	nce_t	*free_nce_list = NULL;
943 
944 	mutex_enter(&ndp->ndp_g_lock);
945 	/* Prevent ndp_delete from unlink and free of NCE */
946 	ndp->ndp_g_walker++;
947 	mutex_exit(&ndp->ndp_g_lock);
948 	for (ncep = ndp->nce_hash_tbl;
949 	    ncep < A_END(ndp->nce_hash_tbl); ncep++) {
950 		for (nce = *ncep; nce != NULL; nce = nce1) {
951 			nce1 = nce->nce_next;
952 			if (ill == NULL || nce->nce_ill == ill) {
953 				if (trace) {
954 					NCE_REFHOLD(nce);
955 					(*pfi)(nce, arg1);
956 					NCE_REFRELE(nce);
957 				} else {
958 					NCE_REFHOLD_NOTR(nce);
959 					(*pfi)(nce, arg1);
960 					NCE_REFRELE_NOTR(nce);
961 				}
962 			}
963 		}
964 	}
965 	for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) {
966 		nce1 = nce->nce_next;
967 		if (ill == NULL || nce->nce_ill == ill) {
968 			if (trace) {
969 				NCE_REFHOLD(nce);
970 				(*pfi)(nce, arg1);
971 				NCE_REFRELE(nce);
972 			} else {
973 				NCE_REFHOLD_NOTR(nce);
974 				(*pfi)(nce, arg1);
975 				NCE_REFRELE_NOTR(nce);
976 			}
977 		}
978 	}
979 	mutex_enter(&ndp->ndp_g_lock);
980 	ndp->ndp_g_walker--;
981 	/*
982 	 * While NCE's are removed from global list they are placed
983 	 * in a private list, to be passed to nce_ire_delete_list().
984 	 * The reason is, there may be ires pointing to this nce
985 	 * which needs to cleaned up.
986 	 */
987 	if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) {
988 		/* Time to delete condemned entries */
989 		for (ncep = ndp->nce_hash_tbl;
990 		    ncep < A_END(ndp->nce_hash_tbl); ncep++) {
991 			nce = *ncep;
992 			if (nce != NULL) {
993 				nce_remove(ndp, nce, &free_nce_list);
994 			}
995 		}
996 		nce = ndp->nce_mask_entries;
997 		if (nce != NULL) {
998 			nce_remove(ndp, nce, &free_nce_list);
999 		}
1000 		ndp->ndp_g_walker_cleanup = B_FALSE;
1001 	}
1002 
1003 	mutex_exit(&ndp->ndp_g_lock);
1004 
1005 	if (free_nce_list != NULL) {
1006 		nce_ire_delete_list(free_nce_list);
1007 	}
1008 }
1009 
1010 /*
1011  * Walk everything.
1012  * Note that ill can be NULL hence can't derive the ipst from it.
1013  */
1014 void
1015 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst)
1016 {
1017 	ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE);
1018 	ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE);
1019 }
1020 
1021 /*
1022  * Process resolve requests.  Handles both mapped entries
1023  * as well as cases that needs to be send out on the wire.
1024  * Lookup a NCE for a given IRE.  Regardless of whether one exists
1025  * or one is created, we defer making ire point to nce until the
1026  * ire is actually added at which point the nce_refcnt on the nce is
1027  * incremented.  This is done primarily to have symmetry between ire_add()
1028  * and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
1029  */
1030 int
1031 ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid)
1032 {
1033 	nce_t		*nce;
1034 	int		err = 0;
1035 	uint32_t	ms;
1036 	mblk_t		*mp_nce = NULL;
1037 	ip_stack_t	*ipst = ill->ill_ipst;
1038 
1039 	ASSERT(ill->ill_isv6);
1040 	if (IN6_IS_ADDR_MULTICAST(dst)) {
1041 		err = nce_set_multicast(ill, dst);
1042 		return (err);
1043 	}
1044 	err = ndp_lookup_then_add_v6(ill,
1045 	    NULL,	/* No hardware address */
1046 	    dst,
1047 	    &ipv6_all_ones,
1048 	    &ipv6_all_zeros,
1049 	    0,
1050 	    (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0,
1051 	    ND_INCOMPLETE,
1052 	    &nce);
1053 
1054 	switch (err) {
1055 	case 0:
1056 		/*
1057 		 * New cache entry was created. Make sure that the state
1058 		 * is not ND_INCOMPLETE. It can be in some other state
1059 		 * even before we send out the solicitation as we could
1060 		 * get un-solicited advertisements.
1061 		 *
1062 		 * If this is an XRESOLV interface, simply return 0,
1063 		 * since we don't want to solicit just yet.
1064 		 */
1065 		if (ill->ill_flags & ILLF_XRESOLV) {
1066 			NCE_REFRELE(nce);
1067 			return (0);
1068 		}
1069 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1070 		mutex_enter(&nce->nce_lock);
1071 		if (nce->nce_state != ND_INCOMPLETE) {
1072 			mutex_exit(&nce->nce_lock);
1073 			rw_exit(&ipst->ips_ill_g_lock);
1074 			NCE_REFRELE(nce);
1075 			return (0);
1076 		}
1077 		mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
1078 		if (mp_nce == NULL) {
1079 			/* The caller will free mp */
1080 			mutex_exit(&nce->nce_lock);
1081 			rw_exit(&ipst->ips_ill_g_lock);
1082 			ndp_delete(nce);
1083 			NCE_REFRELE(nce);
1084 			return (ENOMEM);
1085 		}
1086 		ms = nce_solicit(nce, mp_nce);
1087 		rw_exit(&ipst->ips_ill_g_lock);
1088 		if (ms == 0) {
1089 			/* The caller will free mp */
1090 			if (mp_nce != mp)
1091 				freeb(mp_nce);
1092 			mutex_exit(&nce->nce_lock);
1093 			ndp_delete(nce);
1094 			NCE_REFRELE(nce);
1095 			return (EBUSY);
1096 		}
1097 		mutex_exit(&nce->nce_lock);
1098 		NDP_RESTART_TIMER(nce, (clock_t)ms);
1099 		NCE_REFRELE(nce);
1100 		return (EINPROGRESS);
1101 	case EEXIST:
1102 		/* Resolution in progress just queue the packet */
1103 		mutex_enter(&nce->nce_lock);
1104 		if (nce->nce_state == ND_INCOMPLETE) {
1105 			mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
1106 			if (mp_nce == NULL) {
1107 				err = ENOMEM;
1108 			} else {
1109 				nce_queue_mp(nce, mp_nce);
1110 				err = EINPROGRESS;
1111 			}
1112 		} else {
1113 			/*
1114 			 * Any other state implies we have
1115 			 * a nce but IRE needs to be added ...
1116 			 * ire_add_v6() will take care of the
1117 			 * the case when the nce becomes CONDEMNED
1118 			 * before the ire is added to the table.
1119 			 */
1120 			err = 0;
1121 		}
1122 		mutex_exit(&nce->nce_lock);
1123 		NCE_REFRELE(nce);
1124 		break;
1125 	default:
1126 		ip1dbg(("ndp_resolver: Can't create NCE %d\n", err));
1127 		break;
1128 	}
1129 	return (err);
1130 }
1131 
1132 /*
1133  * When there is no resolver, the link layer template is passed in
1134  * the IRE.
1135  * Lookup a NCE for a given IRE.  Regardless of whether one exists
1136  * or one is created, we defer making ire point to nce until the
1137  * ire is actually added at which point the nce_refcnt on the nce is
1138  * incremented.  This is done primarily to have symmetry between ire_add()
1139  * and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
1140  */
1141 int
1142 ndp_noresolver(ill_t *ill, const in6_addr_t *dst)
1143 {
1144 	nce_t		*nce;
1145 	int		err = 0;
1146 
1147 	ASSERT(ill != NULL);
1148 	ASSERT(ill->ill_isv6);
1149 	if (IN6_IS_ADDR_MULTICAST(dst)) {
1150 		err = nce_set_multicast(ill, dst);
1151 		return (err);
1152 	}
1153 
1154 	err = ndp_lookup_then_add_v6(ill,
1155 	    NULL,	/* hardware address */
1156 	    dst,
1157 	    &ipv6_all_ones,
1158 	    &ipv6_all_zeros,
1159 	    0,
1160 	    (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0,
1161 	    ND_REACHABLE,
1162 	    &nce);
1163 
1164 	switch (err) {
1165 	case 0:
1166 		/*
1167 		 * Cache entry with a proper resolver cookie was
1168 		 * created.
1169 		 */
1170 		NCE_REFRELE(nce);
1171 		break;
1172 	case EEXIST:
1173 		err = 0;
1174 		NCE_REFRELE(nce);
1175 		break;
1176 	default:
1177 		ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err));
1178 		break;
1179 	}
1180 	return (err);
1181 }
1182 
1183 /*
1184  * For each interface an entry is added for the unspecified multicast group.
1185  * Here that mapping is used to form the multicast cache entry for a particular
1186  * multicast destination.
1187  */
1188 static int
1189 nce_set_multicast(ill_t *ill, const in6_addr_t *dst)
1190 {
1191 	nce_t		*mnce;	/* Multicast mapping entry */
1192 	nce_t		*nce;
1193 	uchar_t		*hw_addr = NULL;
1194 	int		err = 0;
1195 	ip_stack_t	*ipst = ill->ill_ipst;
1196 
1197 	ASSERT(ill != NULL);
1198 	ASSERT(ill->ill_isv6);
1199 	ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst)));
1200 
1201 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
1202 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst));
1203 	nce = nce_lookup_addr(ill, dst, nce);
1204 	if (nce != NULL) {
1205 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
1206 		NCE_REFRELE(nce);
1207 		return (0);
1208 	}
1209 	/* No entry, now lookup for a mapping this should never fail */
1210 	mnce = nce_lookup_mapping(ill, dst);
1211 	if (mnce == NULL) {
1212 		/* Something broken for the interface. */
1213 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
1214 		return (ESRCH);
1215 	}
1216 	ASSERT(mnce->nce_flags & NCE_F_MAPPING);
1217 	if (ill->ill_net_type == IRE_IF_RESOLVER) {
1218 		/*
1219 		 * For IRE_IF_RESOLVER a hardware mapping can be
1220 		 * generated, for IRE_IF_NORESOLVER, resolution cookie
1221 		 * in the ill is copied in ndp_add_v6().
1222 		 */
1223 		hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP);
1224 		if (hw_addr == NULL) {
1225 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
1226 			NCE_REFRELE(mnce);
1227 			return (ENOMEM);
1228 		}
1229 		nce_make_mapping(mnce, hw_addr, (uchar_t *)dst);
1230 	}
1231 	NCE_REFRELE(mnce);
1232 	/*
1233 	 * IRE_IF_NORESOLVER type simply copies the resolution
1234 	 * cookie passed in.  So no hw_addr is needed.
1235 	 */
1236 	err = ndp_add_v6(ill,
1237 	    hw_addr,
1238 	    dst,
1239 	    &ipv6_all_ones,
1240 	    &ipv6_all_zeros,
1241 	    0,
1242 	    NCE_F_NONUD,
1243 	    ND_REACHABLE,
1244 	    &nce);
1245 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
1246 	if (hw_addr != NULL)
1247 		kmem_free(hw_addr, ill->ill_nd_lla_len);
1248 	if (err != 0) {
1249 		ip1dbg(("nce_set_multicast: create failed" "%d\n", err));
1250 		return (err);
1251 	}
1252 	NCE_REFRELE(nce);
1253 	return (0);
1254 }
1255 
1256 /*
1257  * Return the link layer address, and any flags of a nce.
1258  */
1259 int
1260 ndp_query(ill_t *ill, struct lif_nd_req *lnr)
1261 {
1262 	nce_t		*nce;
1263 	in6_addr_t	*addr;
1264 	sin6_t		*sin6;
1265 	dl_unitdata_req_t	*dl;
1266 
1267 	ASSERT(ill != NULL && ill->ill_isv6);
1268 	sin6 = (sin6_t *)&lnr->lnr_addr;
1269 	addr =  &sin6->sin6_addr;
1270 
1271 	nce = ndp_lookup_v6(ill, addr, B_FALSE);
1272 	if (nce == NULL)
1273 		return (ESRCH);
1274 	/* If in INCOMPLETE state, no link layer address is available yet */
1275 	if (nce->nce_state == ND_INCOMPLETE)
1276 		goto done;
1277 	dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr;
1278 	if (ill->ill_flags & ILLF_XRESOLV)
1279 		lnr->lnr_hdw_len = dl->dl_dest_addr_length;
1280 	else
1281 		lnr->lnr_hdw_len = ill->ill_nd_lla_len;
1282 	ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <=
1283 	    sizeof (lnr->lnr_hdw_addr));
1284 	bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
1285 	    (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len);
1286 	if (nce->nce_flags & NCE_F_ISROUTER)
1287 		lnr->lnr_flags = NDF_ISROUTER_ON;
1288 	if (nce->nce_flags & NCE_F_ANYCAST)
1289 		lnr->lnr_flags |= NDF_ANYCAST_ON;
1290 done:
1291 	NCE_REFRELE(nce);
1292 	return (0);
1293 }
1294 
1295 /*
1296  * Send Enable/Disable multicast reqs to driver.
1297  */
1298 int
1299 ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len,
1300     uint32_t hw_addr_offset, mblk_t *mp)
1301 {
1302 	nce_t		*nce;
1303 	uchar_t		*hw_addr;
1304 	ip_stack_t	*ipst = ill->ill_ipst;
1305 
1306 	ASSERT(ill != NULL && ill->ill_isv6);
1307 	ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
1308 	hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len);
1309 	if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) {
1310 		freemsg(mp);
1311 		return (EINVAL);
1312 	}
1313 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
1314 	nce = nce_lookup_mapping(ill, addr);
1315 	if (nce == NULL) {
1316 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
1317 		freemsg(mp);
1318 		return (ESRCH);
1319 	}
1320 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
1321 	/*
1322 	 * Update dl_addr_length and dl_addr_offset for primitives that
1323 	 * have physical addresses as opposed to full saps
1324 	 */
1325 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1326 	case DL_ENABMULTI_REQ:
1327 		/* Track the state if this is the first enabmulti */
1328 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1329 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1330 		ip1dbg(("ndp_mcastreq: ENABMULTI\n"));
1331 		break;
1332 	case DL_DISABMULTI_REQ:
1333 		ip1dbg(("ndp_mcastreq: DISABMULTI\n"));
1334 		break;
1335 	default:
1336 		NCE_REFRELE(nce);
1337 		ip1dbg(("ndp_mcastreq: default\n"));
1338 		return (EINVAL);
1339 	}
1340 	nce_make_mapping(nce, hw_addr, (uchar_t *)addr);
1341 	NCE_REFRELE(nce);
1342 	ill_dlpi_send(ill, mp);
1343 	return (0);
1344 }
1345 
1346 /*
1347  * Send a neighbor solicitation.
1348  * Returns number of milliseconds after which we should either rexmit or abort.
1349  * Return of zero means we should abort.
1350  * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt.
1351  *
1352  * NOTE: This routine drops nce_lock (and later reacquires it) when sending
1353  * the packet.
1354  * NOTE: This routine does not consume mp.
1355  */
1356 uint32_t
1357 nce_solicit(nce_t *nce, mblk_t *mp)
1358 {
1359 	ill_t		*ill;
1360 	ill_t		*src_ill;
1361 	ip6_t		*ip6h;
1362 	in6_addr_t	src;
1363 	in6_addr_t	dst;
1364 	ipif_t		*ipif;
1365 	ip6i_t		*ip6i;
1366 	boolean_t	dropped = B_FALSE;
1367 	ip_stack_t	*ipst = nce->nce_ill->ill_ipst;
1368 
1369 	ASSERT(RW_READ_HELD(&ipst->ips_ill_g_lock));
1370 	ASSERT(MUTEX_HELD(&nce->nce_lock));
1371 	ill = nce->nce_ill;
1372 	ASSERT(ill != NULL);
1373 
1374 	if (nce->nce_rcnt == 0) {
1375 		return (0);
1376 	}
1377 
1378 	if (mp == NULL) {
1379 		ASSERT(nce->nce_qd_mp != NULL);
1380 		mp = nce->nce_qd_mp;
1381 	} else {
1382 		nce_queue_mp(nce, mp);
1383 	}
1384 
1385 	/* Handle ip_newroute_v6 giving us IPSEC packets */
1386 	if (mp->b_datap->db_type == M_CTL)
1387 		mp = mp->b_cont;
1388 
1389 	ip6h = (ip6_t *)mp->b_rptr;
1390 	if (ip6h->ip6_nxt == IPPROTO_RAW) {
1391 		/*
1392 		 * This message should have been pulled up already in
1393 		 * ip_wput_v6. We can't do pullups here because the message
1394 		 * could be from the nce_qd_mp which could have b_next/b_prev
1395 		 * non-NULL.
1396 		 */
1397 		ip6i = (ip6i_t *)ip6h;
1398 		ASSERT((mp->b_wptr - (uchar_t *)ip6i) >=
1399 		    sizeof (ip6i_t) + IPV6_HDR_LEN);
1400 		ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
1401 	}
1402 	src = ip6h->ip6_src;
1403 	/*
1404 	 * If the src of outgoing packet is one of the assigned interface
1405 	 * addresses use it, otherwise we will pick the source address below.
1406 	 */
1407 	src_ill = ill;
1408 	if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
1409 		if (ill->ill_group != NULL)
1410 			src_ill = ill->ill_group->illgrp_ill;
1411 		for (; src_ill != NULL; src_ill = src_ill->ill_group_next) {
1412 			for (ipif = src_ill->ill_ipif; ipif != NULL;
1413 			    ipif = ipif->ipif_next) {
1414 				if (IN6_ARE_ADDR_EQUAL(&src,
1415 				    &ipif->ipif_v6lcl_addr)) {
1416 					break;
1417 				}
1418 			}
1419 			if (ipif != NULL)
1420 				break;
1421 		}
1422 		/*
1423 		 * If no relevant ipif can be found, then it's not one of our
1424 		 * addresses.  Reset to :: and let nce_xmit.  If an ipif can be
1425 		 * found, but it's not yet done with DAD verification, then
1426 		 * just postpone this transmission until later.
1427 		 */
1428 		if (src_ill == NULL)
1429 			src = ipv6_all_zeros;
1430 		else if (!ipif->ipif_addr_ready)
1431 			return (ill->ill_reachable_retrans_time);
1432 	}
1433 	dst = nce->nce_addr;
1434 	/*
1435 	 * If source address is unspecified, nce_xmit will choose
1436 	 * one for us and initialize the hardware address also
1437 	 * appropriately.
1438 	 */
1439 	if (IN6_IS_ADDR_UNSPECIFIED(&src))
1440 		src_ill = NULL;
1441 	nce->nce_rcnt--;
1442 	mutex_exit(&nce->nce_lock);
1443 	rw_exit(&ipst->ips_ill_g_lock);
1444 	dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, src_ill, B_TRUE, &src,
1445 	    &dst, 0);
1446 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1447 	mutex_enter(&nce->nce_lock);
1448 	if (dropped)
1449 		nce->nce_rcnt++;
1450 	return (ill->ill_reachable_retrans_time);
1451 }
1452 
1453 /*
1454  * Attempt to recover an address on an interface that's been marked as a
1455  * duplicate.  Because NCEs are destroyed when the interface goes down, there's
1456  * no easy way to just probe the address and have the right thing happen if
1457  * it's no longer in use.  Instead, we just bring it up normally and allow the
1458  * regular interface start-up logic to probe for a remaining duplicate and take
1459  * us back down if necessary.
1460  * Neither DHCP nor temporary addresses arrive here; they're excluded by
1461  * ip_ndp_excl.
1462  */
1463 /* ARGSUSED */
1464 static void
1465 ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
1466 {
1467 	ill_t	*ill = rq->q_ptr;
1468 	ipif_t	*ipif;
1469 	in6_addr_t *addr = (in6_addr_t *)mp->b_rptr;
1470 
1471 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
1472 		/*
1473 		 * We do not support recovery of proxy ARP'd interfaces,
1474 		 * because the system lacks a complete proxy ARP mechanism.
1475 		 */
1476 		if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
1477 		    !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) {
1478 			continue;
1479 		}
1480 
1481 		/*
1482 		 * If we have already recovered or if the interface is going
1483 		 * away, then ignore.
1484 		 */
1485 		mutex_enter(&ill->ill_lock);
1486 		if (!(ipif->ipif_flags & IPIF_DUPLICATE) ||
1487 		    (ipif->ipif_flags & (IPIF_MOVING | IPIF_CONDEMNED))) {
1488 			mutex_exit(&ill->ill_lock);
1489 			continue;
1490 		}
1491 
1492 		ipif->ipif_flags &= ~IPIF_DUPLICATE;
1493 		ill->ill_ipif_dup_count--;
1494 		mutex_exit(&ill->ill_lock);
1495 		ipif->ipif_was_dup = B_TRUE;
1496 
1497 		if (ipif_ndp_up(ipif) != EINPROGRESS)
1498 			(void) ipif_up_done_v6(ipif);
1499 	}
1500 	freeb(mp);
1501 }
1502 
1503 /*
1504  * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
1505  * As long as someone else holds the address, the interface will stay down.
1506  * When that conflict goes away, the interface is brought back up.  This is
1507  * done so that accidental shutdowns of addresses aren't made permanent.  Your
1508  * server will recover from a failure.
1509  *
1510  * For DHCP and temporary addresses, recovery is not done in the kernel.
1511  * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
1512  *
1513  * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
1514  */
1515 static void
1516 ipif6_dup_recovery(void *arg)
1517 {
1518 	ipif_t *ipif = arg;
1519 
1520 	ipif->ipif_recovery_id = 0;
1521 	if (!(ipif->ipif_flags & IPIF_DUPLICATE))
1522 		return;
1523 
1524 	/*
1525 	 * No lock, because this is just an optimization.
1526 	 */
1527 	if (ipif->ipif_state_flags & (IPIF_MOVING | IPIF_CONDEMNED))
1528 		return;
1529 
1530 	/* If the link is down, we'll retry this later */
1531 	if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING))
1532 		return;
1533 
1534 	ndp_do_recovery(ipif);
1535 }
1536 
1537 /*
1538  * Perform interface recovery by forcing the duplicate interfaces up and
1539  * allowing the system to determine which ones should stay up.
1540  *
1541  * Called both by recovery timer expiry and link-up notification.
1542  */
1543 void
1544 ndp_do_recovery(ipif_t *ipif)
1545 {
1546 	ill_t *ill = ipif->ipif_ill;
1547 	mblk_t *mp;
1548 	ip_stack_t *ipst = ill->ill_ipst;
1549 
1550 	mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED);
1551 	if (mp == NULL) {
1552 		mutex_enter(&ill->ill_lock);
1553 		if (ipif->ipif_recovery_id == 0 &&
1554 		    !(ipif->ipif_state_flags & (IPIF_MOVING |
1555 		    IPIF_CONDEMNED))) {
1556 			ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
1557 			    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
1558 		}
1559 		mutex_exit(&ill->ill_lock);
1560 	} else {
1561 		bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
1562 		    sizeof (ipif->ipif_v6lcl_addr));
1563 		ill_refhold(ill);
1564 		qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP,
1565 		    B_FALSE);
1566 	}
1567 }
1568 
1569 /*
1570  * Find the solicitation in the given message, and extract printable details
1571  * (MAC and IP addresses) from it.
1572  */
1573 static nd_neighbor_solicit_t *
1574 ip_ndp_find_solicitation(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, char *hbuf,
1575     size_t hlen, char *sbuf, size_t slen, uchar_t **haddr)
1576 {
1577 	nd_neighbor_solicit_t *ns;
1578 	ip6_t *ip6h;
1579 	uchar_t *addr;
1580 	int alen;
1581 
1582 	alen = 0;
1583 	ip6h = (ip6_t *)mp->b_rptr;
1584 	if (dl_mp == NULL) {
1585 		nd_opt_hdr_t *opt;
1586 		int nslen;
1587 
1588 		/*
1589 		 * If it's from the fast-path, then it can't be a probe
1590 		 * message, and thus must include the source linkaddr option.
1591 		 * Extract that here.
1592 		 */
1593 		ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN);
1594 		nslen = mp->b_wptr - (uchar_t *)ns;
1595 		if ((nslen -= sizeof (*ns)) > 0) {
1596 			opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), nslen,
1597 			    ND_OPT_SOURCE_LINKADDR);
1598 			if (opt != NULL &&
1599 			    opt->nd_opt_len * 8 - sizeof (*opt) >=
1600 			    ill->ill_nd_lla_len) {
1601 				addr = (uchar_t *)(opt + 1);
1602 				alen = ill->ill_nd_lla_len;
1603 			}
1604 		}
1605 		/*
1606 		 * We cheat a bit here for the sake of printing usable log
1607 		 * messages in the rare case where the reply we got was unicast
1608 		 * without a source linkaddr option, and the interface is in
1609 		 * fastpath mode.  (Sigh.)
1610 		 */
1611 		if (alen == 0 && ill->ill_type == IFT_ETHER &&
1612 		    MBLKHEAD(mp) >= sizeof (struct ether_header)) {
1613 			struct ether_header *pether;
1614 
1615 			pether = (struct ether_header *)((char *)ip6h -
1616 			    sizeof (*pether));
1617 			addr = pether->ether_shost.ether_addr_octet;
1618 			alen = ETHERADDRL;
1619 		}
1620 	} else {
1621 		dl_unitdata_ind_t *dlu;
1622 
1623 		dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr;
1624 		alen = dlu->dl_src_addr_length;
1625 		if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) &&
1626 		    dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) {
1627 			addr = dl_mp->b_rptr + dlu->dl_src_addr_offset;
1628 			if (ill->ill_sap_length < 0) {
1629 				alen += ill->ill_sap_length;
1630 			} else {
1631 				addr += ill->ill_sap_length;
1632 				alen -= ill->ill_sap_length;
1633 			}
1634 		}
1635 	}
1636 	if (alen > 0) {
1637 		*haddr = addr;
1638 		(void) mac_colon_addr(addr, alen, hbuf, hlen);
1639 	} else {
1640 		*haddr = NULL;
1641 		(void) strcpy(hbuf, "?");
1642 	}
1643 	ns = (nd_neighbor_solicit_t *)((char *)ip6h + IPV6_HDR_LEN);
1644 	(void) inet_ntop(AF_INET6, &ns->nd_ns_target, sbuf, slen);
1645 	return (ns);
1646 }
1647 
1648 /*
1649  * This is for exclusive changes due to NDP duplicate address detection
1650  * failure.
1651  */
1652 /* ARGSUSED */
1653 static void
1654 ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
1655 {
1656 	ill_t	*ill = rq->q_ptr;
1657 	ipif_t	*ipif;
1658 	char ibuf[LIFNAMSIZ + 10];	/* 10 digits for logical i/f number */
1659 	char hbuf[MAC_STR_LEN];
1660 	char sbuf[INET6_ADDRSTRLEN];
1661 	nd_neighbor_solicit_t *ns;
1662 	mblk_t *dl_mp = NULL;
1663 	uchar_t *haddr;
1664 	ip_stack_t *ipst = ill->ill_ipst;
1665 
1666 	if (DB_TYPE(mp) != M_DATA) {
1667 		dl_mp = mp;
1668 		mp = mp->b_cont;
1669 	}
1670 	ns = ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf, sizeof (hbuf), sbuf,
1671 	    sizeof (sbuf), &haddr);
1672 	if (haddr != NULL &&
1673 	    bcmp(haddr, ill->ill_phys_addr, ill->ill_phys_addr_length) == 0) {
1674 		/*
1675 		 * Ignore conflicts generated by misbehaving switches that just
1676 		 * reflect our own messages back to us.
1677 		 */
1678 		goto ignore_conflict;
1679 	}
1680 
1681 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
1682 
1683 		if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
1684 		    !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
1685 		    &ns->nd_ns_target)) {
1686 			continue;
1687 		}
1688 
1689 		/* If it's already marked, then don't do anything. */
1690 		if (ipif->ipif_flags & IPIF_DUPLICATE)
1691 			continue;
1692 
1693 		/*
1694 		 * If this is a failure during duplicate recovery, then don't
1695 		 * complain.  It may take a long time to recover.
1696 		 */
1697 		if (!ipif->ipif_was_dup) {
1698 			ipif_get_name(ipif, ibuf, sizeof (ibuf));
1699 			cmn_err(CE_WARN, "%s has duplicate address %s (in "
1700 			    "use by %s); disabled", ibuf, sbuf, hbuf);
1701 		}
1702 		mutex_enter(&ill->ill_lock);
1703 		ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
1704 		ipif->ipif_flags |= IPIF_DUPLICATE;
1705 		ill->ill_ipif_dup_count++;
1706 		mutex_exit(&ill->ill_lock);
1707 		(void) ipif_down(ipif, NULL, NULL);
1708 		ipif_down_tail(ipif);
1709 		mutex_enter(&ill->ill_lock);
1710 		if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
1711 		    ill->ill_net_type == IRE_IF_RESOLVER &&
1712 		    !(ipif->ipif_state_flags & (IPIF_MOVING |
1713 		    IPIF_CONDEMNED)) &&
1714 		    ipst->ips_ip_dup_recovery > 0) {
1715 			ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
1716 			    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
1717 		}
1718 		mutex_exit(&ill->ill_lock);
1719 	}
1720 ignore_conflict:
1721 	if (dl_mp != NULL)
1722 		freeb(dl_mp);
1723 	freemsg(mp);
1724 }
1725 
1726 /*
1727  * Handle failure by tearing down the ipifs with the specified address.  Note
1728  * that tearing down the ipif also means deleting the nce through ipif_down, so
1729  * it's not possible to do recovery by just restarting the nce timer.  Instead,
1730  * we start a timer on the ipif.
1731  */
1732 static void
1733 ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
1734 {
1735 	if ((mp = copymsg(mp)) != NULL) {
1736 		if (dl_mp == NULL)
1737 			dl_mp = mp;
1738 		else if ((dl_mp = copyb(dl_mp)) != NULL)
1739 			dl_mp->b_cont = mp;
1740 		if (dl_mp == NULL) {
1741 			freemsg(mp);
1742 		} else {
1743 			ill_refhold(ill);
1744 			qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP,
1745 			    B_FALSE);
1746 		}
1747 	}
1748 	ndp_delete(nce);
1749 }
1750 
1751 /*
1752  * Handle a discovered conflict: some other system is advertising that it owns
1753  * one of our IP addresses.  We need to defend ourselves, or just shut down the
1754  * interface.
1755  */
1756 static void
1757 ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
1758 {
1759 	ipif_t *ipif;
1760 	uint32_t now;
1761 	uint_t maxdefense;
1762 	uint_t defs;
1763 	ip_stack_t *ipst = ill->ill_ipst;
1764 
1765 	ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL,
1766 	    NULL, NULL, ipst);
1767 	if (ipif == NULL)
1768 		return;
1769 	/*
1770 	 * First, figure out if this address is disposable.
1771 	 */
1772 	if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY))
1773 		maxdefense = ipst->ips_ip_max_temp_defend;
1774 	else
1775 		maxdefense = ipst->ips_ip_max_defend;
1776 
1777 	/*
1778 	 * Now figure out how many times we've defended ourselves.  Ignore
1779 	 * defenses that happened long in the past.
1780 	 */
1781 	now = gethrestime_sec();
1782 	mutex_enter(&nce->nce_lock);
1783 	if ((defs = nce->nce_defense_count) > 0 &&
1784 	    now - nce->nce_defense_time > ipst->ips_ip_defend_interval) {
1785 		nce->nce_defense_count = defs = 0;
1786 	}
1787 	nce->nce_defense_count++;
1788 	nce->nce_defense_time = now;
1789 	mutex_exit(&nce->nce_lock);
1790 	ipif_refrele(ipif);
1791 
1792 	/*
1793 	 * If we've defended ourselves too many times already, then give up and
1794 	 * tear down the interface(s) using this address.  Otherwise, defend by
1795 	 * sending out an unsolicited Neighbor Advertisement.
1796 	 */
1797 	if (defs >= maxdefense) {
1798 		ip_ndp_failure(ill, mp, dl_mp, nce);
1799 	} else {
1800 		char hbuf[MAC_STR_LEN];
1801 		char sbuf[INET6_ADDRSTRLEN];
1802 		uchar_t *haddr;
1803 
1804 		(void) ip_ndp_find_solicitation(mp, dl_mp, ill, hbuf,
1805 		    sizeof (hbuf), sbuf, sizeof (sbuf), &haddr);
1806 		cmn_err(CE_WARN, "node %s is using our IP address %s on %s",
1807 		    hbuf, sbuf, ill->ill_name);
1808 		(void) nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill, B_FALSE,
1809 		    &nce->nce_addr, &ipv6_all_hosts_mcast,
1810 		    nce_advert_flags(nce));
1811 	}
1812 }
1813 
1814 static void
1815 ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
1816 {
1817 	nd_neighbor_solicit_t *ns;
1818 	uint32_t	hlen = ill->ill_nd_lla_len;
1819 	uchar_t		*haddr = NULL;
1820 	icmp6_t		*icmp_nd;
1821 	ip6_t		*ip6h;
1822 	nce_t		*our_nce = NULL;
1823 	in6_addr_t	target;
1824 	in6_addr_t	src;
1825 	int		len;
1826 	int		flag = 0;
1827 	nd_opt_hdr_t	*opt = NULL;
1828 	boolean_t	bad_solicit = B_FALSE;
1829 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
1830 
1831 	ip6h = (ip6_t *)mp->b_rptr;
1832 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
1833 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
1834 	src = ip6h->ip6_src;
1835 	ns = (nd_neighbor_solicit_t *)icmp_nd;
1836 	target = ns->nd_ns_target;
1837 	if (IN6_IS_ADDR_MULTICAST(&target)) {
1838 		if (ip_debug > 2) {
1839 			/* ip1dbg */
1840 			pr_addr_dbg("ndp_input_solicit: Target is"
1841 			    " multicast! %s\n", AF_INET6, &target);
1842 		}
1843 		bad_solicit = B_TRUE;
1844 		goto done;
1845 	}
1846 	if (len > sizeof (nd_neighbor_solicit_t)) {
1847 		/* Options present */
1848 		opt = (nd_opt_hdr_t *)&ns[1];
1849 		len -= sizeof (nd_neighbor_solicit_t);
1850 		if (!ndp_verify_optlen(opt, len)) {
1851 			ip1dbg(("ndp_input_solicit: Bad opt len\n"));
1852 			bad_solicit = B_TRUE;
1853 			goto done;
1854 		}
1855 	}
1856 	if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1857 		/* Check to see if this is a valid DAD solicitation */
1858 		if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) {
1859 			if (ip_debug > 2) {
1860 				/* ip1dbg */
1861 				pr_addr_dbg("ndp_input_solicit: IPv6 "
1862 				    "Destination is not solicited node "
1863 				    "multicast %s\n", AF_INET6,
1864 				    &ip6h->ip6_dst);
1865 			}
1866 			bad_solicit = B_TRUE;
1867 			goto done;
1868 		}
1869 	}
1870 
1871 	our_nce = ndp_lookup_v6(ill, &target, B_FALSE);
1872 	/*
1873 	 * If this is a valid Solicitation, a permanent
1874 	 * entry should exist in the cache
1875 	 */
1876 	if (our_nce == NULL ||
1877 	    !(our_nce->nce_flags & NCE_F_PERMANENT)) {
1878 		ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
1879 		    "ifname=%s ", ill->ill_name));
1880 		if (ip_debug > 2) {
1881 			/* ip1dbg */
1882 			pr_addr_dbg(" dst %s\n", AF_INET6, &target);
1883 		}
1884 		bad_solicit = B_TRUE;
1885 		goto done;
1886 	}
1887 
1888 	/* At this point we should have a verified NS per spec */
1889 	if (opt != NULL) {
1890 		opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR);
1891 		if (opt != NULL) {
1892 			haddr = (uchar_t *)&opt[1];
1893 			if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
1894 			    hlen == 0) {
1895 				ip1dbg(("ndp_input_advert: bad SLLA\n"));
1896 				bad_solicit = B_TRUE;
1897 				goto done;
1898 			}
1899 		}
1900 	}
1901 
1902 	/* If sending directly to peer, set the unicast flag */
1903 	if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))
1904 		flag |= NDP_UNICAST;
1905 
1906 	/*
1907 	 * Create/update the entry for the soliciting node.
1908 	 * or respond to outstanding queries, don't if
1909 	 * the source is unspecified address.
1910 	 */
1911 	if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
1912 		int	err;
1913 		nce_t	*nnce;
1914 
1915 		ASSERT(ill->ill_isv6);
1916 		/*
1917 		 * Regular solicitations *must* include the Source Link-Layer
1918 		 * Address option.  Ignore messages that do not.
1919 		 */
1920 		if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
1921 			ip1dbg(("ndp_input_solicit: source link-layer address "
1922 			    "option missing with a specified source.\n"));
1923 			bad_solicit = B_TRUE;
1924 			goto done;
1925 		}
1926 
1927 		/*
1928 		 * This is a regular solicitation.  If we're still in the
1929 		 * process of verifying the address, then don't respond at all
1930 		 * and don't keep track of the sender.
1931 		 */
1932 		if (our_nce->nce_state == ND_PROBE)
1933 			goto done;
1934 
1935 		/*
1936 		 * If the solicitation doesn't have sender hardware address
1937 		 * (legal for unicast solicitation), then process without
1938 		 * installing the return NCE.  Either we already know it, or
1939 		 * we'll be forced to look it up when (and if) we reply to the
1940 		 * packet.
1941 		 */
1942 		if (haddr == NULL)
1943 			goto no_source;
1944 
1945 		err = ndp_lookup_then_add_v6(ill,
1946 		    haddr,
1947 		    &src,	/* Soliciting nodes address */
1948 		    &ipv6_all_ones,
1949 		    &ipv6_all_zeros,
1950 		    0,
1951 		    0,
1952 		    ND_STALE,
1953 		    &nnce);
1954 		switch (err) {
1955 		case 0:
1956 			/* done with this entry */
1957 			NCE_REFRELE(nnce);
1958 			break;
1959 		case EEXIST:
1960 			/*
1961 			 * B_FALSE indicates this is not an
1962 			 * an advertisement.
1963 			 */
1964 			ndp_process(nnce, haddr, 0, B_FALSE);
1965 			NCE_REFRELE(nnce);
1966 			break;
1967 		default:
1968 			ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
1969 			    err));
1970 			goto done;
1971 		}
1972 no_source:
1973 		flag |= NDP_SOLICITED;
1974 	} else {
1975 		/*
1976 		 * No source link layer address option should be present in a
1977 		 * valid DAD request.
1978 		 */
1979 		if (haddr != NULL) {
1980 			ip1dbg(("ndp_input_solicit: source link-layer address "
1981 			    "option present with an unspecified source.\n"));
1982 			bad_solicit = B_TRUE;
1983 			goto done;
1984 		}
1985 		if (our_nce->nce_state == ND_PROBE) {
1986 			/*
1987 			 * Internally looped-back probes won't have DLPI
1988 			 * attached to them.  External ones (which are sent by
1989 			 * multicast) always will.  Just ignore our own
1990 			 * transmissions.
1991 			 */
1992 			if (dl_mp != NULL) {
1993 				/*
1994 				 * If someone else is probing our address, then
1995 				 * we've crossed wires.  Declare failure.
1996 				 */
1997 				ip_ndp_failure(ill, mp, dl_mp, our_nce);
1998 			}
1999 			goto done;
2000 		}
2001 		/*
2002 		 * This is a DAD probe.  Multicast the advertisement to the
2003 		 * all-nodes address.
2004 		 */
2005 		src = ipv6_all_hosts_mcast;
2006 	}
2007 	flag |= nce_advert_flags(our_nce);
2008 	/* Response to a solicitation */
2009 	(void) nce_xmit(ill,
2010 	    ND_NEIGHBOR_ADVERT,
2011 	    ill,	/* ill to be used for extracting ill_nd_lla */
2012 	    B_TRUE,	/* use ill_nd_lla */
2013 	    &target,	/* Source and target of the advertisement pkt */
2014 	    &src,	/* IP Destination (source of original pkt) */
2015 	    flag);
2016 done:
2017 	if (bad_solicit)
2018 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations);
2019 	if (our_nce != NULL)
2020 		NCE_REFRELE(our_nce);
2021 }
2022 
2023 void
2024 ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
2025 {
2026 	nd_neighbor_advert_t *na;
2027 	uint32_t	hlen = ill->ill_nd_lla_len;
2028 	uchar_t		*haddr = NULL;
2029 	icmp6_t		*icmp_nd;
2030 	ip6_t		*ip6h;
2031 	nce_t		*dst_nce = NULL;
2032 	in6_addr_t	target;
2033 	nd_opt_hdr_t	*opt = NULL;
2034 	int		len;
2035 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
2036 	ip_stack_t	*ipst = ill->ill_ipst;
2037 
2038 	ip6h = (ip6_t *)mp->b_rptr;
2039 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
2040 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
2041 	na = (nd_neighbor_advert_t *)icmp_nd;
2042 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
2043 	    (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
2044 		ip1dbg(("ndp_input_advert: Target is multicast but the "
2045 		    "solicited flag is not zero\n"));
2046 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
2047 		return;
2048 	}
2049 	target = na->nd_na_target;
2050 	if (IN6_IS_ADDR_MULTICAST(&target)) {
2051 		ip1dbg(("ndp_input_advert: Target is multicast!\n"));
2052 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
2053 		return;
2054 	}
2055 	if (len > sizeof (nd_neighbor_advert_t)) {
2056 		opt = (nd_opt_hdr_t *)&na[1];
2057 		if (!ndp_verify_optlen(opt,
2058 		    len - sizeof (nd_neighbor_advert_t))) {
2059 			ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
2060 			BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
2061 			return;
2062 		}
2063 		/* At this point we have a verified NA per spec */
2064 		len -= sizeof (nd_neighbor_advert_t);
2065 		opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR);
2066 		if (opt != NULL) {
2067 			haddr = (uchar_t *)&opt[1];
2068 			if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
2069 			    hlen == 0) {
2070 				ip1dbg(("ndp_input_advert: bad SLLA\n"));
2071 				BUMP_MIB(mib,
2072 				    ipv6IfIcmpInBadNeighborAdvertisements);
2073 				return;
2074 			}
2075 		}
2076 	}
2077 
2078 	/*
2079 	 * If this interface is part of the group look at all the
2080 	 * ills in the group.
2081 	 */
2082 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2083 	if (ill->ill_group != NULL)
2084 		ill = ill->ill_group->illgrp_ill;
2085 
2086 	for (; ill != NULL; ill = ill->ill_group_next) {
2087 		mutex_enter(&ill->ill_lock);
2088 		if (!ILL_CAN_LOOKUP(ill)) {
2089 			mutex_exit(&ill->ill_lock);
2090 			continue;
2091 		}
2092 		ill_refhold_locked(ill);
2093 		mutex_exit(&ill->ill_lock);
2094 		dst_nce = ndp_lookup_v6(ill, &target, B_FALSE);
2095 		/* We have to drop the lock since ndp_process calls put* */
2096 		rw_exit(&ipst->ips_ill_g_lock);
2097 		if (dst_nce != NULL) {
2098 			if ((dst_nce->nce_flags & NCE_F_PERMANENT) &&
2099 			    dst_nce->nce_state == ND_PROBE) {
2100 				/*
2101 				 * Someone else sent an advertisement for an
2102 				 * address that we're trying to configure.
2103 				 * Tear it down.  Note that dl_mp might be NULL
2104 				 * if we're getting a unicast reply.  This
2105 				 * isn't typically done (multicast is the norm
2106 				 * in response to a probe), but ip_ndp_failure
2107 				 * will handle the dl_mp == NULL case as well.
2108 				 */
2109 				ip_ndp_failure(ill, mp, dl_mp, dst_nce);
2110 			} else if (dst_nce->nce_flags & NCE_F_PERMANENT) {
2111 				/*
2112 				 * Someone just announced one of our local
2113 				 * addresses.  If it wasn't us, then this is a
2114 				 * conflict.  Defend the address or shut it
2115 				 * down.
2116 				 */
2117 				if (dl_mp != NULL &&
2118 				    (haddr == NULL ||
2119 				    nce_cmp_ll_addr(dst_nce, haddr,
2120 				    ill->ill_nd_lla_len))) {
2121 					ip_ndp_conflict(ill, mp, dl_mp,
2122 					    dst_nce);
2123 				}
2124 			} else {
2125 				if (na->nd_na_flags_reserved &
2126 				    ND_NA_FLAG_ROUTER) {
2127 					dst_nce->nce_flags |= NCE_F_ISROUTER;
2128 				}
2129 				/* B_TRUE indicates this an advertisement */
2130 				ndp_process(dst_nce, haddr,
2131 				    na->nd_na_flags_reserved, B_TRUE);
2132 			}
2133 			NCE_REFRELE(dst_nce);
2134 		}
2135 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2136 		ill_refrele(ill);
2137 	}
2138 	rw_exit(&ipst->ips_ill_g_lock);
2139 }
2140 
2141 /*
2142  * Process NDP neighbor solicitation/advertisement messages.
2143  * The checksum has already checked o.k before reaching here.
2144  */
2145 void
2146 ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
2147 {
2148 	icmp6_t		*icmp_nd;
2149 	ip6_t		*ip6h;
2150 	int		len;
2151 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
2152 
2153 
2154 	if (!pullupmsg(mp, -1)) {
2155 		ip1dbg(("ndp_input: pullupmsg failed\n"));
2156 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2157 		goto done;
2158 	}
2159 	ip6h = (ip6_t *)mp->b_rptr;
2160 	if (ip6h->ip6_hops != IPV6_MAX_HOPS) {
2161 		ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
2162 		BUMP_MIB(mib, ipv6IfIcmpBadHoplimit);
2163 		goto done;
2164 	}
2165 	/*
2166 	 * NDP does not accept any extension headers between the
2167 	 * IP header and the ICMP header since e.g. a routing
2168 	 * header could be dangerous.
2169 	 * This assumes that any AH or ESP headers are removed
2170 	 * by ip prior to passing the packet to ndp_input.
2171 	 */
2172 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
2173 		ip1dbg(("ndp_input: Wrong next header 0x%x\n",
2174 		    ip6h->ip6_nxt));
2175 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
2176 		goto done;
2177 	}
2178 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
2179 	ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT ||
2180 	    icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT);
2181 	if (icmp_nd->icmp6_code != 0) {
2182 		ip1dbg(("ndp_input: icmp6 code != 0 \n"));
2183 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
2184 		goto done;
2185 	}
2186 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
2187 	/*
2188 	 * Make sure packet length is large enough for either
2189 	 * a NS or a NA icmp packet.
2190 	 */
2191 	if (len <  sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) {
2192 		ip1dbg(("ndp_input: packet too short\n"));
2193 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
2194 		goto done;
2195 	}
2196 	if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) {
2197 		ndp_input_solicit(ill, mp, dl_mp);
2198 	} else {
2199 		ndp_input_advert(ill, mp, dl_mp);
2200 	}
2201 done:
2202 	freemsg(mp);
2203 }
2204 
2205 /*
2206  * nce_xmit is called to form and transmit a ND solicitation or
2207  * advertisement ICMP packet.
2208  *
2209  * If the source address is unspecified and this isn't a probe (used for
2210  * duplicate address detection), an appropriate source address and link layer
2211  * address will be chosen here.  The link layer address option is included if
2212  * the source is specified (i.e., all non-probe packets), and omitted (per the
2213  * specification) otherwise.
2214  *
2215  * It returns B_FALSE only if it does a successful put() to the
2216  * corresponding ill's ill_wq otherwise returns B_TRUE.
2217  */
2218 static boolean_t
2219 nce_xmit(ill_t *ill, uint32_t operation, ill_t *hwaddr_ill,
2220     boolean_t use_nd_lla, const in6_addr_t *sender, const in6_addr_t *target,
2221     int flag)
2222 {
2223 	uint32_t	len;
2224 	icmp6_t 	*icmp6;
2225 	mblk_t		*mp;
2226 	ip6_t		*ip6h;
2227 	nd_opt_hdr_t	*opt;
2228 	uint_t		plen;
2229 	ip6i_t		*ip6i;
2230 	ipif_t		*src_ipif = NULL;
2231 	uint8_t		*hw_addr;
2232 	zoneid_t	zoneid = GLOBAL_ZONEID;
2233 
2234 	/*
2235 	 * If we have a unspecified source(sender) address, select a
2236 	 * proper source address for the solicitation here itself so
2237 	 * that we can initialize the h/w address correctly. This is
2238 	 * needed for interface groups as source address can come from
2239 	 * the whole group and the h/w address initialized from ill will
2240 	 * be wrong if the source address comes from a different ill.
2241 	 *
2242 	 * If the sender is specified then we use this address in order
2243 	 * to lookup the zoneid before calling ip_output_v6(). This is to
2244 	 * enable unicast ND_NEIGHBOR_ADVERT packets to be routed correctly
2245 	 * by IP (we cannot guarantee that the global zone has an interface
2246 	 * route to the destination).
2247 	 *
2248 	 * Note that the NA never comes here with the unspecified source
2249 	 * address. The following asserts that whenever the source
2250 	 * address is specified, the haddr also should be specified.
2251 	 */
2252 	ASSERT(IN6_IS_ADDR_UNSPECIFIED(sender) || (hwaddr_ill != NULL));
2253 
2254 	if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) {
2255 		ASSERT(operation != ND_NEIGHBOR_ADVERT);
2256 		/*
2257 		 * Pick a source address for this solicitation, but
2258 		 * restrict the selection to addresses assigned to the
2259 		 * output interface (or interface group).  We do this
2260 		 * because the destination will create a neighbor cache
2261 		 * entry for the source address of this packet, so the
2262 		 * source address had better be a valid neighbor.
2263 		 */
2264 		src_ipif = ipif_select_source_v6(ill, target, RESTRICT_TO_ILL,
2265 		    IPV6_PREFER_SRC_DEFAULT, ALL_ZONES);
2266 		if (src_ipif == NULL) {
2267 			char buf[INET6_ADDRSTRLEN];
2268 
2269 			ip1dbg(("nce_xmit: No source ipif for dst %s\n",
2270 			    inet_ntop(AF_INET6, (char *)target, buf,
2271 			    sizeof (buf))));
2272 			return (B_TRUE);
2273 		}
2274 		sender = &src_ipif->ipif_v6src_addr;
2275 		hwaddr_ill = src_ipif->ipif_ill;
2276 	} else if (!(IN6_IS_ADDR_UNSPECIFIED(sender))) {
2277 		zoneid = ipif_lookup_addr_zoneid_v6(sender, ill, ill->ill_ipst);
2278 		/*
2279 		 * It's possible for ipif_lookup_addr_zoneid_v6() to return
2280 		 * ALL_ZONES if it cannot find a matching ipif for the address
2281 		 * we are trying to use. In this case we err on the side of
2282 		 * trying to send the packet by defaulting to the GLOBAL_ZONEID.
2283 		 */
2284 		if (zoneid == ALL_ZONES)
2285 			zoneid = GLOBAL_ZONEID;
2286 	}
2287 
2288 	/*
2289 	 * Always make sure that the NS/NA packets don't get load
2290 	 * spread. This is needed so that the probe packets sent
2291 	 * by the in.mpathd daemon can really go out on the desired
2292 	 * interface. Probe packets are made to go out on a desired
2293 	 * interface by including a ip6i with ATTACH_IF flag. As these
2294 	 * packets indirectly end up sending/receiving NS/NA packets
2295 	 * (neighbor doing NUD), we have to make sure that NA
2296 	 * also go out on the same interface.
2297 	 */
2298 	plen = (sizeof (nd_opt_hdr_t) + ill->ill_nd_lla_len + 7) / 8;
2299 	len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) +
2300 	    plen * 8;
2301 	mp = allocb(len,  BPRI_LO);
2302 	if (mp == NULL) {
2303 		if (src_ipif != NULL)
2304 			ipif_refrele(src_ipif);
2305 		return (B_TRUE);
2306 	}
2307 	bzero((char *)mp->b_rptr, len);
2308 	mp->b_wptr = mp->b_rptr + len;
2309 
2310 	ip6i = (ip6i_t *)mp->b_rptr;
2311 	ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2312 	ip6i->ip6i_nxt = IPPROTO_RAW;
2313 	ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT;
2314 	if (flag & NDP_PROBE)
2315 		ip6i->ip6i_flags |= IP6I_UNSPEC_SRC;
2316 	ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex;
2317 
2318 	ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
2319 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2320 	ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
2321 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
2322 	ip6h->ip6_hops = IPV6_MAX_HOPS;
2323 	ip6h->ip6_dst = *target;
2324 	icmp6 = (icmp6_t *)&ip6h[1];
2325 
2326 	opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN +
2327 	    sizeof (nd_neighbor_advert_t));
2328 
2329 	if (operation == ND_NEIGHBOR_SOLICIT) {
2330 		nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
2331 
2332 		if (!(flag & NDP_PROBE))
2333 			opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
2334 		ip6h->ip6_src = *sender;
2335 		ns->nd_ns_target = *target;
2336 		if (!(flag & NDP_UNICAST)) {
2337 			/* Form multicast address of the target */
2338 			ip6h->ip6_dst = ipv6_solicited_node_mcast;
2339 			ip6h->ip6_dst.s6_addr32[3] |=
2340 			    ns->nd_ns_target.s6_addr32[3];
2341 		}
2342 	} else {
2343 		nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
2344 
2345 		ASSERT(!(flag & NDP_PROBE));
2346 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
2347 		ip6h->ip6_src = *sender;
2348 		na->nd_na_target = *sender;
2349 		if (flag & NDP_ISROUTER)
2350 			na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER;
2351 		if (flag & NDP_SOLICITED)
2352 			na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED;
2353 		if (flag & NDP_ORIDE)
2354 			na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE;
2355 	}
2356 
2357 	hw_addr = NULL;
2358 	if (!(flag & NDP_PROBE)) {
2359 		hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla :
2360 		    hwaddr_ill->ill_phys_addr;
2361 		if (hw_addr != NULL) {
2362 			/* Fill in link layer address and option len */
2363 			opt->nd_opt_len = (uint8_t)plen;
2364 			bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len);
2365 		}
2366 	}
2367 	if (hw_addr == NULL) {
2368 		/* If there's no link layer address option, then strip it. */
2369 		len -= plen * 8;
2370 		mp->b_wptr = mp->b_rptr + len;
2371 		ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
2372 	}
2373 
2374 	icmp6->icmp6_type = (uint8_t)operation;
2375 	icmp6->icmp6_code = 0;
2376 	/*
2377 	 * Prepare for checksum by putting icmp length in the icmp
2378 	 * checksum field. The checksum is calculated in ip_wput_v6.
2379 	 */
2380 	icmp6->icmp6_cksum = ip6h->ip6_plen;
2381 
2382 	if (src_ipif != NULL)
2383 		ipif_refrele(src_ipif);
2384 
2385 	ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT);
2386 	return (B_FALSE);
2387 }
2388 
2389 /*
2390  * Make a link layer address (does not include the SAP) from an nce.
2391  * To form the link layer address, use the last four bytes of ipv6
2392  * address passed in and the fixed offset stored in nce.
2393  */
2394 static void
2395 nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr)
2396 {
2397 	uchar_t *mask, *to;
2398 	ill_t	*ill = nce->nce_ill;
2399 	int 	len;
2400 
2401 	if (ill->ill_net_type == IRE_IF_NORESOLVER)
2402 		return;
2403 	ASSERT(nce->nce_res_mp != NULL);
2404 	ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
2405 	ASSERT(nce->nce_flags & NCE_F_MAPPING);
2406 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
2407 	ASSERT(addr != NULL);
2408 	bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
2409 	    addrpos, ill->ill_nd_lla_len);
2410 	len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start,
2411 	    IPV6_ADDR_LEN);
2412 	mask = (uchar_t *)&nce->nce_extract_mask;
2413 	mask += (IPV6_ADDR_LEN - len);
2414 	addr += (IPV6_ADDR_LEN - len);
2415 	to = addrpos + nce->nce_ll_extract_start;
2416 	while (len-- > 0)
2417 		*to++ |= *mask++ & *addr++;
2418 }
2419 
2420 /*
2421  * Pass a cache report back out via NDD.
2422  */
2423 /* ARGSUSED */
2424 int
2425 ndp_report(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr)
2426 {
2427 	ip_stack_t	*ipst;
2428 
2429 	if (CONN_Q(q))
2430 		ipst = CONNQ_TO_IPST(q);
2431 	else
2432 		ipst = ILLQ_TO_IPST(q);
2433 
2434 	(void) mi_mpprintf(mp, "ifname      hardware addr    flags"
2435 	    "     proto addr/mask");
2436 	ndp_walk(NULL, (pfi_t)nce_report1, (uchar_t *)mp, ipst);
2437 	return (0);
2438 }
2439 
2440 /*
2441  * Add a single line to the NDP Cache Entry Report.
2442  */
2443 static void
2444 nce_report1(nce_t *nce, uchar_t *mp_arg)
2445 {
2446 	ill_t		*ill = nce->nce_ill;
2447 	char		local_buf[INET6_ADDRSTRLEN];
2448 	uchar_t		flags_buf[10];
2449 	uint32_t	flags = nce->nce_flags;
2450 	mblk_t		*mp = (mblk_t *)mp_arg;
2451 	uchar_t		*h;
2452 	uchar_t		*m = flags_buf;
2453 	in6_addr_t	v6addr;
2454 	uint64_t	now;
2455 
2456 	/*
2457 	 * Lock the nce to protect nce_res_mp from being changed
2458 	 * if an external resolver address resolution completes
2459 	 * while nce_res_mp is being accessed here.
2460 	 *
2461 	 * Deal with all address formats, not just Ethernet-specific
2462 	 * In addition, make sure that the mblk has enough space
2463 	 * before writing to it. If is doesn't, allocate a new one.
2464 	 */
2465 	if (nce->nce_ipversion == IPV4_VERSION) {
2466 		/*
2467 		 * Don't include v4 NCEs in NDP cache entry report.
2468 		 * But sanity check for lingering ND_INITIAL entries
2469 		 * when we do 'ndd -get /dev/ip ip_ndp_cache_report'
2470 		 */
2471 		if (nce->nce_state == ND_INITIAL) {
2472 
2473 			now = TICK_TO_MSEC(lbolt64);
2474 			if (now - nce->nce_init_time > NCE_STUCK_TIMEOUT) {
2475 				DTRACE_PROBE1(nce__stuck, nce_t *, nce);
2476 			}
2477 		}
2478 		return;
2479 	}
2480 
2481 	ASSERT(ill != NULL);
2482 	v6addr = nce->nce_mask;
2483 	if (flags & NCE_F_PERMANENT)
2484 		*m++ = 'P';
2485 	if (flags & NCE_F_ISROUTER)
2486 		*m++ = 'R';
2487 	if (flags & NCE_F_MAPPING)
2488 		*m++ = 'M';
2489 	*m = '\0';
2490 
2491 	if (ill->ill_net_type == IRE_IF_RESOLVER) {
2492 		size_t		addrlen;
2493 		char		*addr_buf;
2494 		dl_unitdata_req_t	*dl;
2495 
2496 		mutex_enter(&nce->nce_lock);
2497 		h = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
2498 		dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr;
2499 		if (ill->ill_flags & ILLF_XRESOLV)
2500 			addrlen = (3 * (dl->dl_dest_addr_length));
2501 		else
2502 			addrlen = (3 * (ill->ill_nd_lla_len));
2503 		if (addrlen <= 0) {
2504 			mutex_exit(&nce->nce_lock);
2505 			(void) mi_mpprintf(mp,
2506 			    "%8s %9s %5s %s/%d",
2507 			    ill->ill_name,
2508 			    "None",
2509 			    (uchar_t *)&flags_buf,
2510 			    inet_ntop(AF_INET6, (char *)&nce->nce_addr,
2511 			    (char *)local_buf, sizeof (local_buf)),
2512 			    ip_mask_to_plen_v6(&v6addr));
2513 		} else {
2514 			/*
2515 			 * Convert the hardware/lla address to ascii
2516 			 */
2517 			addr_buf = kmem_zalloc(addrlen, KM_NOSLEEP);
2518 			if (addr_buf == NULL) {
2519 				mutex_exit(&nce->nce_lock);
2520 				return;
2521 			}
2522 			(void) mac_colon_addr((uint8_t *)h,
2523 			    (ill->ill_flags & ILLF_XRESOLV) ?
2524 			    dl->dl_dest_addr_length : ill->ill_nd_lla_len,
2525 			    addr_buf, addrlen);
2526 			mutex_exit(&nce->nce_lock);
2527 			(void) mi_mpprintf(mp, "%8s %17s %5s %s/%d",
2528 			    ill->ill_name, addr_buf, (uchar_t *)&flags_buf,
2529 			    inet_ntop(AF_INET6, (char *)&nce->nce_addr,
2530 			    (char *)local_buf, sizeof (local_buf)),
2531 			    ip_mask_to_plen_v6(&v6addr));
2532 			kmem_free(addr_buf, addrlen);
2533 		}
2534 	} else {
2535 		(void) mi_mpprintf(mp,
2536 		    "%8s %9s %5s %s/%d",
2537 		    ill->ill_name,
2538 		    "None",
2539 		    (uchar_t *)&flags_buf,
2540 		    inet_ntop(AF_INET6, (char *)&nce->nce_addr,
2541 		    (char *)local_buf, sizeof (local_buf)),
2542 		    ip_mask_to_plen_v6(&v6addr));
2543 	}
2544 }
2545 
2546 mblk_t *
2547 nce_udreq_alloc(ill_t *ill)
2548 {
2549 	mblk_t	*template_mp = NULL;
2550 	dl_unitdata_req_t *dlur;
2551 	int	sap_length;
2552 
2553 	ASSERT(ill->ill_isv6);
2554 
2555 	sap_length = ill->ill_sap_length;
2556 	template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) +
2557 	    ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ);
2558 	if (template_mp == NULL)
2559 		return (NULL);
2560 
2561 	dlur = (dl_unitdata_req_t *)template_mp->b_rptr;
2562 	dlur->dl_priority.dl_min = 0;
2563 	dlur->dl_priority.dl_max = 0;
2564 	dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len;
2565 	dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t);
2566 
2567 	/* Copy in the SAP value. */
2568 	NCE_LL_SAP_COPY(ill, template_mp);
2569 
2570 	return (template_mp);
2571 }
2572 
2573 /*
2574  * NDP retransmit timer.
2575  * This timer goes off when:
2576  * a. It is time to retransmit NS for resolver.
2577  * b. It is time to send reachability probes.
2578  */
2579 void
2580 ndp_timer(void *arg)
2581 {
2582 	nce_t		*nce = arg;
2583 	ill_t		*ill = nce->nce_ill;
2584 	uint32_t	ms;
2585 	char		addrbuf[INET6_ADDRSTRLEN];
2586 	mblk_t		*mp;
2587 	boolean_t	dropped = B_FALSE;
2588 	ip_stack_t	*ipst = ill->ill_ipst;
2589 
2590 	/*
2591 	 * The timer has to be cancelled by ndp_delete before doing the final
2592 	 * refrele. So the NCE is guaranteed to exist when the timer runs
2593 	 * until it clears the timeout_id. Before clearing the timeout_id
2594 	 * bump up the refcnt so that we can continue to use the nce
2595 	 */
2596 	ASSERT(nce != NULL);
2597 
2598 	/*
2599 	 * Grab the ill_g_lock now itself to avoid lock order problems.
2600 	 * nce_solicit needs ill_g_lock to be able to traverse ills
2601 	 */
2602 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2603 	mutex_enter(&nce->nce_lock);
2604 	NCE_REFHOLD_LOCKED(nce);
2605 	nce->nce_timeout_id = 0;
2606 
2607 	/*
2608 	 * Check the reachability state first.
2609 	 */
2610 	switch (nce->nce_state) {
2611 	case ND_DELAY:
2612 		rw_exit(&ipst->ips_ill_g_lock);
2613 		nce->nce_state = ND_PROBE;
2614 		mutex_exit(&nce->nce_lock);
2615 		(void) nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL, B_FALSE,
2616 		    &ipv6_all_zeros, &nce->nce_addr, NDP_UNICAST);
2617 		if (ip_debug > 3) {
2618 			/* ip2dbg */
2619 			pr_addr_dbg("ndp_timer: state for %s changed "
2620 			    "to PROBE\n", AF_INET6, &nce->nce_addr);
2621 		}
2622 		NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
2623 		NCE_REFRELE(nce);
2624 		return;
2625 	case ND_PROBE:
2626 		/* must be retransmit timer */
2627 		rw_exit(&ipst->ips_ill_g_lock);
2628 		nce->nce_pcnt--;
2629 		ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT &&
2630 		    nce->nce_pcnt >= -1);
2631 		if (nce->nce_pcnt > 0) {
2632 			/*
2633 			 * As per RFC2461, the nce gets deleted after
2634 			 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
2635 			 * Note that the first unicast solicitation is sent
2636 			 * during the DELAY state.
2637 			 */
2638 			ip2dbg(("ndp_timer: pcount=%x dst %s\n",
2639 			    nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr,
2640 			    addrbuf, sizeof (addrbuf))));
2641 			mutex_exit(&nce->nce_lock);
2642 			dropped = nce_xmit(ill, ND_NEIGHBOR_SOLICIT, NULL,
2643 			    B_FALSE, &ipv6_all_zeros, &nce->nce_addr,
2644 			    (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE :
2645 			    NDP_UNICAST);
2646 			if (dropped) {
2647 				mutex_enter(&nce->nce_lock);
2648 				nce->nce_pcnt++;
2649 				mutex_exit(&nce->nce_lock);
2650 			}
2651 			NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
2652 		} else if (nce->nce_pcnt < 0) {
2653 			/* No hope, delete the nce */
2654 			nce->nce_state = ND_UNREACHABLE;
2655 			mutex_exit(&nce->nce_lock);
2656 			if (ip_debug > 2) {
2657 				/* ip1dbg */
2658 				pr_addr_dbg("ndp_timer: Delete IRE for"
2659 				    " dst %s\n", AF_INET6, &nce->nce_addr);
2660 			}
2661 			ndp_delete(nce);
2662 		} else if (!(nce->nce_flags & NCE_F_PERMANENT)) {
2663 			/* Wait RetransTimer, before deleting the entry */
2664 			ip2dbg(("ndp_timer: pcount=%x dst %s\n",
2665 			    nce->nce_pcnt, inet_ntop(AF_INET6,
2666 			    &nce->nce_addr, addrbuf, sizeof (addrbuf))));
2667 			mutex_exit(&nce->nce_lock);
2668 			/* Wait one interval before killing */
2669 			NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
2670 		} else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) {
2671 			ipif_t *ipif;
2672 
2673 			/*
2674 			 * We're done probing, and we can now declare this
2675 			 * address to be usable.  Let IP know that it's ok to
2676 			 * use.
2677 			 */
2678 			nce->nce_state = ND_REACHABLE;
2679 			mutex_exit(&nce->nce_lock);
2680 			ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill,
2681 			    ALL_ZONES, NULL, NULL, NULL, NULL, ipst);
2682 			if (ipif != NULL) {
2683 				if (ipif->ipif_was_dup) {
2684 					char ibuf[LIFNAMSIZ + 10];
2685 					char sbuf[INET6_ADDRSTRLEN];
2686 
2687 					ipif->ipif_was_dup = B_FALSE;
2688 					(void) inet_ntop(AF_INET6,
2689 					    &ipif->ipif_v6lcl_addr,
2690 					    sbuf, sizeof (sbuf));
2691 					ipif_get_name(ipif, ibuf,
2692 					    sizeof (ibuf));
2693 					cmn_err(CE_NOTE, "recovered address "
2694 					    "%s on %s", sbuf, ibuf);
2695 				}
2696 				if ((ipif->ipif_flags & IPIF_UP) &&
2697 				    !ipif->ipif_addr_ready) {
2698 					ip_rts_ifmsg(ipif);
2699 					ip_rts_newaddrmsg(RTM_ADD, 0, ipif);
2700 					sctp_update_ipif(ipif, SCTP_IPIF_UP);
2701 				}
2702 				ipif->ipif_addr_ready = 1;
2703 				ipif_refrele(ipif);
2704 			}
2705 			/* Begin defending our new address */
2706 			nce->nce_unsolicit_count = 0;
2707 			dropped = nce_xmit(ill, ND_NEIGHBOR_ADVERT, ill,
2708 			    B_FALSE, &nce->nce_addr, &ipv6_all_hosts_mcast,
2709 			    nce_advert_flags(nce));
2710 			if (dropped) {
2711 				nce->nce_unsolicit_count = 1;
2712 				NDP_RESTART_TIMER(nce,
2713 				    ipst->ips_ip_ndp_unsolicit_interval);
2714 			} else if (ipst->ips_ip_ndp_defense_interval != 0) {
2715 				NDP_RESTART_TIMER(nce,
2716 				    ipst->ips_ip_ndp_defense_interval);
2717 			}
2718 		} else {
2719 			/*
2720 			 * This is an address we're probing to be our own, but
2721 			 * the ill is down.  Wait until it comes back before
2722 			 * doing anything, but switch to reachable state so
2723 			 * that the restart will work.
2724 			 */
2725 			nce->nce_state = ND_REACHABLE;
2726 			mutex_exit(&nce->nce_lock);
2727 		}
2728 		NCE_REFRELE(nce);
2729 		return;
2730 	case ND_INCOMPLETE:
2731 		/*
2732 		 * Must be resolvers retransmit timer.
2733 		 */
2734 		for (mp = nce->nce_qd_mp; mp != NULL; mp = mp->b_next) {
2735 			ip6i_t	*ip6i;
2736 			ip6_t	*ip6h;
2737 			mblk_t *data_mp;
2738 
2739 			/*
2740 			 * Walk the list of packets queued, and see if there
2741 			 * are any multipathing probe packets. Such packets
2742 			 * are always queued at the head. Since this is a
2743 			 * retransmit timer firing, mark such packets as
2744 			 * delayed in ND resolution. This info will be used
2745 			 * in ip_wput_v6(). Multipathing probe packets will
2746 			 * always have an ip6i_t. Once we hit a packet without
2747 			 * it, we can break out of this loop.
2748 			 */
2749 			if (mp->b_datap->db_type == M_CTL)
2750 				data_mp = mp->b_cont;
2751 			else
2752 				data_mp = mp;
2753 
2754 			ip6h = (ip6_t *)data_mp->b_rptr;
2755 			if (ip6h->ip6_nxt != IPPROTO_RAW)
2756 				break;
2757 
2758 			/*
2759 			 * This message should have been pulled up already in
2760 			 * ip_wput_v6. We can't do pullups here because the
2761 			 * b_next/b_prev is non-NULL.
2762 			 */
2763 			ip6i = (ip6i_t *)ip6h;
2764 			ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >=
2765 			    sizeof (ip6i_t) + IPV6_HDR_LEN);
2766 
2767 			/* Mark this packet as delayed due to ND resolution */
2768 			if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED)
2769 				ip6i->ip6i_flags |= IP6I_ND_DELAYED;
2770 		}
2771 		if (nce->nce_qd_mp != NULL) {
2772 			ms = nce_solicit(nce, NULL);
2773 			rw_exit(&ipst->ips_ill_g_lock);
2774 			if (ms == 0) {
2775 				if (nce->nce_state != ND_REACHABLE) {
2776 					mutex_exit(&nce->nce_lock);
2777 					nce_resolv_failed(nce);
2778 					ndp_delete(nce);
2779 				} else {
2780 					mutex_exit(&nce->nce_lock);
2781 				}
2782 			} else {
2783 				mutex_exit(&nce->nce_lock);
2784 				NDP_RESTART_TIMER(nce, (clock_t)ms);
2785 			}
2786 			NCE_REFRELE(nce);
2787 			return;
2788 		}
2789 		mutex_exit(&nce->nce_lock);
2790 		rw_exit(&ipst->ips_ill_g_lock);
2791 		NCE_REFRELE(nce);
2792 		break;
2793 	case ND_REACHABLE :
2794 		rw_exit(&ipst->ips_ill_g_lock);
2795 		if (((nce->nce_flags & NCE_F_UNSOL_ADV) &&
2796 		    nce->nce_unsolicit_count != 0) ||
2797 		    ((nce->nce_flags & NCE_F_PERMANENT) &&
2798 		    ipst->ips_ip_ndp_defense_interval != 0)) {
2799 			if (nce->nce_unsolicit_count > 0)
2800 				nce->nce_unsolicit_count--;
2801 			mutex_exit(&nce->nce_lock);
2802 			dropped = nce_xmit(ill,
2803 			    ND_NEIGHBOR_ADVERT,
2804 			    ill,	/* ill to be used for hw addr */
2805 			    B_FALSE,	/* use ill_phys_addr */
2806 			    &nce->nce_addr,
2807 			    &ipv6_all_hosts_mcast,
2808 			    nce_advert_flags(nce));
2809 			if (dropped) {
2810 				mutex_enter(&nce->nce_lock);
2811 				nce->nce_unsolicit_count++;
2812 				mutex_exit(&nce->nce_lock);
2813 			}
2814 			if (nce->nce_unsolicit_count != 0) {
2815 				NDP_RESTART_TIMER(nce,
2816 				    ipst->ips_ip_ndp_unsolicit_interval);
2817 			} else {
2818 				NDP_RESTART_TIMER(nce,
2819 				    ipst->ips_ip_ndp_defense_interval);
2820 			}
2821 		} else {
2822 			mutex_exit(&nce->nce_lock);
2823 		}
2824 		NCE_REFRELE(nce);
2825 		break;
2826 	default:
2827 		rw_exit(&ipst->ips_ill_g_lock);
2828 		mutex_exit(&nce->nce_lock);
2829 		NCE_REFRELE(nce);
2830 		break;
2831 	}
2832 }
2833 
2834 /*
2835  * Set a link layer address from the ll_addr passed in.
2836  * Copy SAP from ill.
2837  */
2838 static void
2839 nce_set_ll(nce_t *nce, uchar_t *ll_addr)
2840 {
2841 	ill_t	*ill = nce->nce_ill;
2842 	uchar_t	*woffset;
2843 
2844 	ASSERT(ll_addr != NULL);
2845 	/* Always called before fast_path_probe */
2846 	ASSERT(nce->nce_fp_mp == NULL);
2847 	if (ill->ill_sap_length != 0) {
2848 		/*
2849 		 * Copy the SAP type specified in the
2850 		 * request into the xmit template.
2851 		 */
2852 		NCE_LL_SAP_COPY(ill, nce->nce_res_mp);
2853 	}
2854 	if (ill->ill_phys_addr_length > 0) {
2855 		/*
2856 		 * The bcopy() below used to be called for the physical address
2857 		 * length rather than the link layer address length. For
2858 		 * ethernet and many other media, the phys_addr and lla are
2859 		 * identical.
2860 		 * However, with xresolv interfaces being introduced, the
2861 		 * phys_addr and lla are no longer the same, and the physical
2862 		 * address may not have any useful meaning, so we use the lla
2863 		 * for IPv6 address resolution and destination addressing.
2864 		 *
2865 		 * For PPP or other interfaces with a zero length
2866 		 * physical address, don't do anything here.
2867 		 * The bcopy() with a zero phys_addr length was previously
2868 		 * a no-op for interfaces with a zero-length physical address.
2869 		 * Using the lla for them would change the way they operate.
2870 		 * Doing nothing in such cases preserves expected behavior.
2871 		 */
2872 		woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
2873 		bcopy(ll_addr, woffset, ill->ill_nd_lla_len);
2874 	}
2875 }
2876 
2877 static boolean_t
2878 nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len)
2879 {
2880 	ill_t	*ill = nce->nce_ill;
2881 	uchar_t	*ll_offset;
2882 
2883 	ASSERT(nce->nce_res_mp != NULL);
2884 	if (ll_addr == NULL)
2885 		return (B_FALSE);
2886 	ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
2887 	if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0)
2888 		return (B_TRUE);
2889 	return (B_FALSE);
2890 }
2891 
2892 /*
2893  * Updates the link layer address or the reachability state of
2894  * a cache entry.  Reset probe counter if needed.
2895  */
2896 static void
2897 nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr)
2898 {
2899 	ill_t	*ill = nce->nce_ill;
2900 	boolean_t need_stop_timer = B_FALSE;
2901 	boolean_t need_fastpath_update = B_FALSE;
2902 
2903 	ASSERT(MUTEX_HELD(&nce->nce_lock));
2904 	ASSERT(nce->nce_ipversion == IPV6_VERSION);
2905 	/*
2906 	 * If this interface does not do NUD, there is no point
2907 	 * in allowing an update to the cache entry.  Although
2908 	 * we will respond to NS.
2909 	 * The only time we accept an update for a resolver when
2910 	 * NUD is turned off is when it has just been created.
2911 	 * Non-Resolvers will always be created as REACHABLE.
2912 	 */
2913 	if (new_state != ND_UNCHANGED) {
2914 		if ((nce->nce_flags & NCE_F_NONUD) &&
2915 		    (nce->nce_state != ND_INCOMPLETE))
2916 			return;
2917 		ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN);
2918 		ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX);
2919 		need_stop_timer = B_TRUE;
2920 		if (new_state == ND_REACHABLE)
2921 			nce->nce_last = TICK_TO_MSEC(lbolt64);
2922 		else {
2923 			/* We force NUD in this case */
2924 			nce->nce_last = 0;
2925 		}
2926 		nce->nce_state = new_state;
2927 		nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
2928 	}
2929 	/*
2930 	 * In case of fast path we need to free the the fastpath
2931 	 * M_DATA and do another probe.  Otherwise we can just
2932 	 * overwrite the DL_UNITDATA_REQ data, noting we'll lose
2933 	 * whatever packets that happens to be transmitting at the time.
2934 	 */
2935 	if (new_ll_addr != NULL) {
2936 		ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) +
2937 		    ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr);
2938 		bcopy(new_ll_addr, nce->nce_res_mp->b_rptr +
2939 		    NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len);
2940 		if (nce->nce_fp_mp != NULL) {
2941 			freemsg(nce->nce_fp_mp);
2942 			nce->nce_fp_mp = NULL;
2943 		}
2944 		need_fastpath_update = B_TRUE;
2945 	}
2946 	mutex_exit(&nce->nce_lock);
2947 	if (need_stop_timer) {
2948 		(void) untimeout(nce->nce_timeout_id);
2949 		nce->nce_timeout_id = 0;
2950 	}
2951 	if (need_fastpath_update)
2952 		nce_fastpath(nce);
2953 	mutex_enter(&nce->nce_lock);
2954 }
2955 
2956 void
2957 nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert)
2958 {
2959 	uint_t	count = 0;
2960 	mblk_t  **mpp;
2961 
2962 	ASSERT(MUTEX_HELD(&nce->nce_lock));
2963 
2964 	for (mpp = &nce->nce_qd_mp; *mpp != NULL;
2965 	    mpp = &(*mpp)->b_next) {
2966 		if (++count >
2967 		    nce->nce_ill->ill_max_buf) {
2968 			mblk_t *tmp = nce->nce_qd_mp->b_next;
2969 
2970 			nce->nce_qd_mp->b_next = NULL;
2971 			nce->nce_qd_mp->b_prev = NULL;
2972 			freemsg(nce->nce_qd_mp);
2973 			nce->nce_qd_mp = tmp;
2974 		}
2975 	}
2976 	/* put this on the list */
2977 	if (head_insert) {
2978 		mp->b_next = nce->nce_qd_mp;
2979 		nce->nce_qd_mp = mp;
2980 	} else {
2981 		*mpp = mp;
2982 	}
2983 }
2984 
2985 static void
2986 nce_queue_mp(nce_t *nce, mblk_t *mp)
2987 {
2988 	boolean_t head_insert = B_FALSE;
2989 	ip6_t	*ip6h;
2990 	ip6i_t	*ip6i;
2991 	mblk_t *data_mp;
2992 
2993 	ASSERT(MUTEX_HELD(&nce->nce_lock));
2994 
2995 	if (mp->b_datap->db_type == M_CTL)
2996 		data_mp = mp->b_cont;
2997 	else
2998 		data_mp = mp;
2999 	ip6h = (ip6_t *)data_mp->b_rptr;
3000 	if (ip6h->ip6_nxt == IPPROTO_RAW) {
3001 		/*
3002 		 * This message should have been pulled up already in
3003 		 * ip_wput_v6. We can't do pullups here because the message
3004 		 * could be from the nce_qd_mp which could have b_next/b_prev
3005 		 * non-NULL.
3006 		 */
3007 		ip6i = (ip6i_t *)ip6h;
3008 		ASSERT((data_mp->b_wptr - (uchar_t *)ip6i) >=
3009 		    sizeof (ip6i_t) + IPV6_HDR_LEN);
3010 		/*
3011 		 * Multipathing probe packets have IP6I_DROP_IFDELAYED set.
3012 		 * This has 2 aspects mentioned below.
3013 		 * 1. Perform head insertion in the nce_qd_mp for these packets.
3014 		 * This ensures that next retransmit of ND solicitation
3015 		 * will use the interface specified by the probe packet,
3016 		 * for both NS and NA. This corresponds to the src address
3017 		 * in the IPv6 packet. If we insert at tail, we will be
3018 		 * depending on the packet at the head for successful
3019 		 * ND resolution. This is not reliable, because the interface
3020 		 * on which the NA arrives could be different from the interface
3021 		 * on which the NS was sent, and if the receiving interface is
3022 		 * failed, it will appear that the sending interface is also
3023 		 * failed, causing in.mpathd to misdiagnose this as link
3024 		 * failure.
3025 		 * 2. Drop the original packet, if the ND resolution did not
3026 		 * succeed in the first attempt. However we will create the
3027 		 * nce and the ire, as soon as the ND resolution succeeds.
3028 		 * We don't gain anything by queueing multiple probe packets
3029 		 * and sending them back-to-back once resolution succeeds.
3030 		 * It is sufficient to send just 1 packet after ND resolution
3031 		 * succeeds. Since mpathd is sending down probe packets at a
3032 		 * constant rate, we don't need to send the queued packet. We
3033 		 * need to queue it only for NDP resolution. The benefit of
3034 		 * dropping the probe packets that were delayed in ND
3035 		 * resolution, is that in.mpathd will not see inflated
3036 		 * RTT. If the ND resolution does not succeed within
3037 		 * in.mpathd's failure detection time, mpathd may detect
3038 		 * a failure, and it does not matter whether the packet
3039 		 * was queued or dropped.
3040 		 */
3041 		if (ip6i->ip6i_flags & IP6I_DROP_IFDELAYED)
3042 			head_insert = B_TRUE;
3043 	}
3044 
3045 	nce_queue_mp_common(nce, mp, head_insert);
3046 }
3047 
3048 /*
3049  * Called when address resolution failed due to a timeout.
3050  * Send an ICMP unreachable in response to all queued packets.
3051  */
3052 void
3053 nce_resolv_failed(nce_t *nce)
3054 {
3055 	mblk_t	*mp, *nxt_mp, *first_mp;
3056 	char	buf[INET6_ADDRSTRLEN];
3057 	ip6_t *ip6h;
3058 	zoneid_t zoneid = GLOBAL_ZONEID;
3059 	ip_stack_t	*ipst = nce->nce_ill->ill_ipst;
3060 
3061 	ip1dbg(("nce_resolv_failed: dst %s\n",
3062 	    inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf))));
3063 	mutex_enter(&nce->nce_lock);
3064 	mp = nce->nce_qd_mp;
3065 	nce->nce_qd_mp = NULL;
3066 	mutex_exit(&nce->nce_lock);
3067 	while (mp != NULL) {
3068 		nxt_mp = mp->b_next;
3069 		mp->b_next = NULL;
3070 		mp->b_prev = NULL;
3071 
3072 		first_mp = mp;
3073 		if (mp->b_datap->db_type == M_CTL) {
3074 			ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr;
3075 			ASSERT(io->ipsec_out_type == IPSEC_OUT);
3076 			zoneid = io->ipsec_out_zoneid;
3077 			ASSERT(zoneid != ALL_ZONES);
3078 			mp = mp->b_cont;
3079 		}
3080 
3081 		ip6h = (ip6_t *)mp->b_rptr;
3082 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
3083 			ip6i_t *ip6i;
3084 			/*
3085 			 * This message should have been pulled up already
3086 			 * in ip_wput_v6. ip_hdr_complete_v6 assumes that
3087 			 * the header is pulled up.
3088 			 */
3089 			ip6i = (ip6i_t *)ip6h;
3090 			ASSERT((mp->b_wptr - (uchar_t *)ip6i) >=
3091 			    sizeof (ip6i_t) + IPV6_HDR_LEN);
3092 			mp->b_rptr += sizeof (ip6i_t);
3093 		}
3094 		/*
3095 		 * Ignore failure since icmp_unreachable_v6 will silently
3096 		 * drop packets with an unspecified source address.
3097 		 */
3098 		(void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst);
3099 		icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp,
3100 		    ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst);
3101 		mp = nxt_mp;
3102 	}
3103 }
3104 
3105 /*
3106  * Called by SIOCSNDP* ioctl to add/change an nce entry
3107  * and the corresponding attributes.
3108  * Disallow states other than ND_REACHABLE or ND_STALE.
3109  */
3110 int
3111 ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
3112 {
3113 	sin6_t		*sin6;
3114 	in6_addr_t	*addr;
3115 	nce_t		*nce;
3116 	int		err;
3117 	uint16_t	new_flags = 0;
3118 	uint16_t	old_flags = 0;
3119 	int		inflags = lnr->lnr_flags;
3120 	ip_stack_t	*ipst = ill->ill_ipst;
3121 
3122 	ASSERT(ill->ill_isv6);
3123 	if ((lnr->lnr_state_create != ND_REACHABLE) &&
3124 	    (lnr->lnr_state_create != ND_STALE))
3125 		return (EINVAL);
3126 
3127 	sin6 = (sin6_t *)&lnr->lnr_addr;
3128 	addr = &sin6->sin6_addr;
3129 
3130 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
3131 	/* We know it can not be mapping so just look in the hash table */
3132 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
3133 	nce = nce_lookup_addr(ill, addr, nce);
3134 	if (nce != NULL)
3135 		new_flags = nce->nce_flags;
3136 
3137 	switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) {
3138 	case NDF_ISROUTER_ON:
3139 		new_flags |= NCE_F_ISROUTER;
3140 		break;
3141 	case NDF_ISROUTER_OFF:
3142 		new_flags &= ~NCE_F_ISROUTER;
3143 		break;
3144 	case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON):
3145 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
3146 		if (nce != NULL)
3147 			NCE_REFRELE(nce);
3148 		return (EINVAL);
3149 	}
3150 
3151 	switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) {
3152 	case NDF_ANYCAST_ON:
3153 		new_flags |= NCE_F_ANYCAST;
3154 		break;
3155 	case NDF_ANYCAST_OFF:
3156 		new_flags &= ~NCE_F_ANYCAST;
3157 		break;
3158 	case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON):
3159 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
3160 		if (nce != NULL)
3161 			NCE_REFRELE(nce);
3162 		return (EINVAL);
3163 	}
3164 
3165 	if (nce == NULL) {
3166 		err = ndp_add_v6(ill,
3167 		    (uchar_t *)lnr->lnr_hdw_addr,
3168 		    addr,
3169 		    &ipv6_all_ones,
3170 		    &ipv6_all_zeros,
3171 		    0,
3172 		    new_flags,
3173 		    lnr->lnr_state_create,
3174 		    &nce);
3175 		if (err != 0) {
3176 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
3177 			ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err));
3178 			return (err);
3179 		}
3180 	}
3181 	old_flags = nce->nce_flags;
3182 	if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) {
3183 		/*
3184 		 * Router turned to host, delete all ires.
3185 		 * XXX Just delete the entry, but we need to add too.
3186 		 */
3187 		nce->nce_flags &= ~NCE_F_ISROUTER;
3188 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
3189 		ndp_delete(nce);
3190 		NCE_REFRELE(nce);
3191 		return (0);
3192 	}
3193 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
3194 
3195 	mutex_enter(&nce->nce_lock);
3196 	nce->nce_flags = new_flags;
3197 	mutex_exit(&nce->nce_lock);
3198 	/*
3199 	 * Note that we ignore the state at this point, which
3200 	 * should be either STALE or REACHABLE.  Instead we let
3201 	 * the link layer address passed in to determine the state
3202 	 * much like incoming packets.
3203 	 */
3204 	ndp_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE);
3205 	NCE_REFRELE(nce);
3206 	return (0);
3207 }
3208 
3209 /*
3210  * If the device driver supports it, we make nce_fp_mp to have
3211  * an M_DATA prepend.  Otherwise nce_fp_mp will be null.
3212  * The caller ensures there is hold on nce for this function.
3213  * Note that since ill_fastpath_probe() copies the mblk there is
3214  * no need for the hold beyond this function.
3215  */
3216 void
3217 nce_fastpath(nce_t *nce)
3218 {
3219 	ill_t	*ill = nce->nce_ill;
3220 	int res;
3221 
3222 	ASSERT(ill != NULL);
3223 	ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE);
3224 
3225 	if (nce->nce_fp_mp != NULL) {
3226 		/* Already contains fastpath info */
3227 		return;
3228 	}
3229 	if (nce->nce_res_mp != NULL) {
3230 		nce_fastpath_list_add(nce);
3231 		res = ill_fastpath_probe(ill, nce->nce_res_mp);
3232 		/*
3233 		 * EAGAIN is an indication of a transient error
3234 		 * i.e. allocation failure etc. leave the nce in the list it
3235 		 * will be updated when another probe happens for another ire
3236 		 * if not it will be taken out of the list when the ire is
3237 		 * deleted.
3238 		 */
3239 
3240 		if (res != 0 && res != EAGAIN)
3241 			nce_fastpath_list_delete(nce);
3242 	}
3243 }
3244 
3245 /*
3246  * Drain the list of nce's waiting for fastpath response.
3247  */
3248 void
3249 nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void  *),
3250     void *arg)
3251 {
3252 
3253 	nce_t *next_nce;
3254 	nce_t *current_nce;
3255 	nce_t *first_nce;
3256 	nce_t *prev_nce = NULL;
3257 
3258 	mutex_enter(&ill->ill_lock);
3259 	first_nce = current_nce = (nce_t *)ill->ill_fastpath_list;
3260 	while (current_nce != (nce_t *)&ill->ill_fastpath_list) {
3261 		next_nce = current_nce->nce_fastpath;
3262 		/*
3263 		 * Take it off the list if we're flushing, or if the callback
3264 		 * routine tells us to do so.  Otherwise, leave the nce in the
3265 		 * fastpath list to handle any pending response from the lower
3266 		 * layer.  We can't drain the list when the callback routine
3267 		 * comparison failed, because the response is asynchronous in
3268 		 * nature, and may not arrive in the same order as the list
3269 		 * insertion.
3270 		 */
3271 		if (func == NULL || func(current_nce, arg)) {
3272 			current_nce->nce_fastpath = NULL;
3273 			if (current_nce == first_nce)
3274 				ill->ill_fastpath_list = first_nce = next_nce;
3275 			else
3276 				prev_nce->nce_fastpath = next_nce;
3277 		} else {
3278 			/* previous element that is still in the list */
3279 			prev_nce = current_nce;
3280 		}
3281 		current_nce = next_nce;
3282 	}
3283 	mutex_exit(&ill->ill_lock);
3284 }
3285 
3286 /*
3287  * Add nce to the nce fastpath list.
3288  */
3289 void
3290 nce_fastpath_list_add(nce_t *nce)
3291 {
3292 	ill_t *ill;
3293 
3294 	ill = nce->nce_ill;
3295 
3296 	mutex_enter(&ill->ill_lock);
3297 	mutex_enter(&nce->nce_lock);
3298 
3299 	/*
3300 	 * if nce has not been deleted and
3301 	 * is not already in the list add it.
3302 	 */
3303 	if (!(nce->nce_flags & NCE_F_CONDEMNED) &&
3304 	    (nce->nce_fastpath == NULL)) {
3305 		nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list;
3306 		ill->ill_fastpath_list = nce;
3307 	}
3308 
3309 	mutex_exit(&nce->nce_lock);
3310 	mutex_exit(&ill->ill_lock);
3311 }
3312 
3313 /*
3314  * remove nce from the nce fastpath list.
3315  */
3316 void
3317 nce_fastpath_list_delete(nce_t *nce)
3318 {
3319 	nce_t *nce_ptr;
3320 
3321 	ill_t *ill;
3322 
3323 	ill = nce->nce_ill;
3324 	ASSERT(ill != NULL);
3325 
3326 	mutex_enter(&ill->ill_lock);
3327 	if (nce->nce_fastpath == NULL)
3328 		goto done;
3329 
3330 	ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list);
3331 
3332 	if (ill->ill_fastpath_list == nce) {
3333 		ill->ill_fastpath_list = nce->nce_fastpath;
3334 	} else {
3335 		nce_ptr = ill->ill_fastpath_list;
3336 		while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) {
3337 			if (nce_ptr->nce_fastpath == nce) {
3338 				nce_ptr->nce_fastpath = nce->nce_fastpath;
3339 				break;
3340 			}
3341 			nce_ptr = nce_ptr->nce_fastpath;
3342 		}
3343 	}
3344 
3345 	nce->nce_fastpath = NULL;
3346 done:
3347 	mutex_exit(&ill->ill_lock);
3348 }
3349 
3350 /*
3351  * Update all NCE's that are not in fastpath mode and
3352  * have an nce_fp_mp that matches mp. mp->b_cont contains
3353  * the fastpath header.
3354  *
3355  * Returns TRUE if entry should be dequeued, or FALSE otherwise.
3356  */
3357 boolean_t
3358 ndp_fastpath_update(nce_t *nce, void *arg)
3359 {
3360 	mblk_t 	*mp, *fp_mp;
3361 	uchar_t	*mp_rptr, *ud_mp_rptr;
3362 	mblk_t	*ud_mp = nce->nce_res_mp;
3363 	ptrdiff_t	cmplen;
3364 
3365 	if (nce->nce_flags & NCE_F_MAPPING)
3366 		return (B_TRUE);
3367 	if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL))
3368 		return (B_TRUE);
3369 
3370 	ip2dbg(("ndp_fastpath_update: trying\n"));
3371 	mp = (mblk_t *)arg;
3372 	mp_rptr = mp->b_rptr;
3373 	cmplen = mp->b_wptr - mp_rptr;
3374 	ASSERT(cmplen >= 0);
3375 	ud_mp_rptr = ud_mp->b_rptr;
3376 	/*
3377 	 * The nce is locked here to prevent any other threads
3378 	 * from accessing and changing nce_res_mp when the IPv6 address
3379 	 * becomes resolved to an lla while we're in the middle
3380 	 * of looking at and comparing the hardware address (lla).
3381 	 * It is also locked to prevent multiple threads in nce_fastpath_update
3382 	 * from examining nce_res_mp atthe same time.
3383 	 */
3384 	mutex_enter(&nce->nce_lock);
3385 	if (ud_mp->b_wptr - ud_mp_rptr != cmplen ||
3386 	    bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) {
3387 		mutex_exit(&nce->nce_lock);
3388 		/*
3389 		 * Don't take the ire off the fastpath list yet,
3390 		 * since the response may come later.
3391 		 */
3392 		return (B_FALSE);
3393 	}
3394 	/* Matched - install mp as the fastpath mp */
3395 	ip1dbg(("ndp_fastpath_update: match\n"));
3396 	fp_mp = dupb(mp->b_cont);
3397 	if (fp_mp != NULL) {
3398 		nce->nce_fp_mp = fp_mp;
3399 	}
3400 	mutex_exit(&nce->nce_lock);
3401 	return (B_TRUE);
3402 }
3403 
3404 /*
3405  * This function handles the DL_NOTE_FASTPATH_FLUSH notification from
3406  * driver.  Note that it assumes IP is exclusive...
3407  */
3408 /* ARGSUSED */
3409 void
3410 ndp_fastpath_flush(nce_t *nce, char *arg)
3411 {
3412 	if (nce->nce_flags & NCE_F_MAPPING)
3413 		return;
3414 	/* No fastpath info? */
3415 	if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL)
3416 		return;
3417 
3418 	if (nce->nce_ipversion == IPV4_VERSION &&
3419 	    nce->nce_flags & NCE_F_BCAST) {
3420 		/*
3421 		 * IPv4 BROADCAST entries:
3422 		 * We can't delete the nce since it is difficult to
3423 		 * recreate these without going through the
3424 		 * ipif down/up dance.
3425 		 *
3426 		 * All access to nce->nce_fp_mp in the case of these
3427 		 * is protected by nce_lock.
3428 		 */
3429 		mutex_enter(&nce->nce_lock);
3430 		if (nce->nce_fp_mp != NULL) {
3431 			freeb(nce->nce_fp_mp);
3432 			nce->nce_fp_mp = NULL;
3433 			mutex_exit(&nce->nce_lock);
3434 			nce_fastpath(nce);
3435 		} else {
3436 			mutex_exit(&nce->nce_lock);
3437 		}
3438 	} else {
3439 		/* Just delete the NCE... */
3440 		ndp_delete(nce);
3441 	}
3442 }
3443 
3444 /*
3445  * Return a pointer to a given option in the packet.
3446  * Assumes that option part of the packet have already been validated.
3447  */
3448 nd_opt_hdr_t *
3449 ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type)
3450 {
3451 	while (optlen > 0) {
3452 		if (opt->nd_opt_type == opt_type)
3453 			return (opt);
3454 		optlen -= 8 * opt->nd_opt_len;
3455 		opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len);
3456 	}
3457 	return (NULL);
3458 }
3459 
3460 /*
3461  * Verify all option lengths present are > 0, also check to see
3462  * if the option lengths and packet length are consistent.
3463  */
3464 boolean_t
3465 ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen)
3466 {
3467 	ASSERT(opt != NULL);
3468 	while (optlen > 0) {
3469 		if (opt->nd_opt_len == 0)
3470 			return (B_FALSE);
3471 		optlen -= 8 * opt->nd_opt_len;
3472 		if (optlen < 0)
3473 			return (B_FALSE);
3474 		opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len);
3475 	}
3476 	return (B_TRUE);
3477 }
3478 
3479 /*
3480  * ndp_walk function.
3481  * Free a fraction of the NCE cache entries.
3482  * A fraction of zero means to not free any in that category.
3483  */
3484 void
3485 ndp_cache_reclaim(nce_t *nce, char *arg)
3486 {
3487 	nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg;
3488 	uint_t	rand;
3489 
3490 	if (nce->nce_flags & NCE_F_PERMANENT)
3491 		return;
3492 
3493 	rand = (uint_t)lbolt +
3494 	    NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE);
3495 	if (ncr->ncr_host != 0 &&
3496 	    (rand/ncr->ncr_host)*ncr->ncr_host == rand) {
3497 		ndp_delete(nce);
3498 		return;
3499 	}
3500 }
3501 
3502 /*
3503  * ndp_walk function.
3504  * Count the number of NCEs that can be deleted.
3505  * These would be hosts but not routers.
3506  */
3507 void
3508 ndp_cache_count(nce_t *nce, char *arg)
3509 {
3510 	ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg;
3511 
3512 	if (nce->nce_flags & NCE_F_PERMANENT)
3513 		return;
3514 
3515 	ncc->ncc_total++;
3516 	if (!(nce->nce_flags & NCE_F_ISROUTER))
3517 		ncc->ncc_host++;
3518 }
3519 
3520 #ifdef NCE_DEBUG
3521 th_trace_t *
3522 th_trace_nce_lookup(nce_t *nce)
3523 {
3524 	int bucket_id;
3525 	th_trace_t *th_trace;
3526 
3527 	ASSERT(MUTEX_HELD(&nce->nce_lock));
3528 
3529 	bucket_id = IP_TR_HASH(curthread);
3530 	ASSERT(bucket_id < IP_TR_HASH_MAX);
3531 
3532 	for (th_trace = nce->nce_trace[bucket_id]; th_trace != NULL;
3533 	    th_trace = th_trace->th_next) {
3534 		if (th_trace->th_id == curthread)
3535 			return (th_trace);
3536 	}
3537 	return (NULL);
3538 }
3539 
3540 void
3541 nce_trace_ref(nce_t *nce)
3542 {
3543 	int bucket_id;
3544 	th_trace_t *th_trace;
3545 
3546 	/*
3547 	 * Attempt to locate the trace buffer for the curthread.
3548 	 * If it does not exist, then allocate a new trace buffer
3549 	 * and link it in list of trace bufs for this ipif, at the head
3550 	 */
3551 	ASSERT(MUTEX_HELD(&nce->nce_lock));
3552 
3553 	if (nce->nce_trace_disable == B_TRUE)
3554 		return;
3555 
3556 	th_trace = th_trace_nce_lookup(nce);
3557 	if (th_trace == NULL) {
3558 		bucket_id = IP_TR_HASH(curthread);
3559 		th_trace = (th_trace_t *)kmem_zalloc(sizeof (th_trace_t),
3560 		    KM_NOSLEEP);
3561 		if (th_trace == NULL) {
3562 			nce->nce_trace_disable = B_TRUE;
3563 			nce_trace_inactive(nce);
3564 			return;
3565 		}
3566 		th_trace->th_id = curthread;
3567 		th_trace->th_next = nce->nce_trace[bucket_id];
3568 		th_trace->th_prev = &nce->nce_trace[bucket_id];
3569 		if (th_trace->th_next != NULL)
3570 			th_trace->th_next->th_prev = &th_trace->th_next;
3571 		nce->nce_trace[bucket_id] = th_trace;
3572 	}
3573 	ASSERT(th_trace->th_refcnt < TR_BUF_MAX - 1);
3574 	th_trace->th_refcnt++;
3575 	th_trace_rrecord(th_trace);
3576 }
3577 
3578 void
3579 nce_untrace_ref(nce_t *nce)
3580 {
3581 	th_trace_t *th_trace;
3582 
3583 	ASSERT(MUTEX_HELD(&nce->nce_lock));
3584 
3585 	if (nce->nce_trace_disable == B_TRUE)
3586 		return;
3587 
3588 	th_trace = th_trace_nce_lookup(nce);
3589 	ASSERT(th_trace != NULL && th_trace->th_refcnt > 0);
3590 
3591 	th_trace_rrecord(th_trace);
3592 	th_trace->th_refcnt--;
3593 }
3594 
3595 void
3596 nce_trace_inactive(nce_t *nce)
3597 {
3598 	th_trace_t *th_trace;
3599 	int i;
3600 
3601 	ASSERT(MUTEX_HELD(&nce->nce_lock));
3602 
3603 	for (i = 0; i < IP_TR_HASH_MAX; i++) {
3604 		while (nce->nce_trace[i] != NULL) {
3605 			th_trace = nce->nce_trace[i];
3606 
3607 			/* unlink th_trace and free it */
3608 			nce->nce_trace[i] = th_trace->th_next;
3609 			if (th_trace->th_next != NULL)
3610 				th_trace->th_next->th_prev =
3611 				    &nce->nce_trace[i];
3612 
3613 			th_trace->th_next = NULL;
3614 			th_trace->th_prev = NULL;
3615 			kmem_free(th_trace, sizeof (th_trace_t));
3616 		}
3617 	}
3618 
3619 }
3620 
3621 /* ARGSUSED */
3622 int
3623 nce_thread_exit(nce_t *nce, caddr_t arg)
3624 {
3625 	th_trace_t	*th_trace;
3626 	uint64_t	now;
3627 
3628 	mutex_enter(&nce->nce_lock);
3629 	if (nce->nce_state == ND_INITIAL) {
3630 
3631 		now = TICK_TO_MSEC(lbolt64);
3632 		if (now - nce->nce_init_time > NCE_STUCK_TIMEOUT) {
3633 			DTRACE_PROBE1(nce__stuck, nce_t *, nce);
3634 		}
3635 	}
3636 	th_trace = th_trace_nce_lookup(nce);
3637 
3638 	if (th_trace == NULL) {
3639 		mutex_exit(&nce->nce_lock);
3640 		return (0);
3641 	}
3642 
3643 	ASSERT(th_trace->th_refcnt == 0);
3644 
3645 	/* unlink th_trace and free it */
3646 	*th_trace->th_prev = th_trace->th_next;
3647 	if (th_trace->th_next != NULL)
3648 		th_trace->th_next->th_prev = th_trace->th_prev;
3649 	th_trace->th_next = NULL;
3650 	th_trace->th_prev = NULL;
3651 	kmem_free(th_trace, sizeof (th_trace_t));
3652 	mutex_exit(&nce->nce_lock);
3653 	return (0);
3654 }
3655 #endif
3656 
3657 /*
3658  * Called when address resolution fails due to a timeout.
3659  * Send an ICMP unreachable in response to all queued packets.
3660  */
3661 void
3662 arp_resolv_failed(nce_t *nce)
3663 {
3664 	mblk_t	*mp, *nxt_mp, *first_mp;
3665 	char	buf[INET6_ADDRSTRLEN];
3666 	zoneid_t zoneid = GLOBAL_ZONEID;
3667 	struct in_addr ipv4addr;
3668 	ip_stack_t *ipst = nce->nce_ill->ill_ipst;
3669 
3670 	IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr);
3671 	ip3dbg(("arp_resolv_failed: dst %s\n",
3672 	    inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf))));
3673 	mutex_enter(&nce->nce_lock);
3674 	mp = nce->nce_qd_mp;
3675 	nce->nce_qd_mp = NULL;
3676 	mutex_exit(&nce->nce_lock);
3677 
3678 	while (mp != NULL) {
3679 		nxt_mp = mp->b_next;
3680 		mp->b_next = NULL;
3681 		mp->b_prev = NULL;
3682 
3683 		first_mp = mp;
3684 		/*
3685 		 * Send icmp unreachable messages
3686 		 * to the hosts.
3687 		 */
3688 		(void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst);
3689 		ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n"));
3690 		icmp_unreachable(nce->nce_ill->ill_wq, first_mp,
3691 		    ICMP_HOST_UNREACHABLE, zoneid, ipst);
3692 		mp = nxt_mp;
3693 	}
3694 }
3695 
3696 int
3697 ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
3698     nce_t **newnce, nce_t *src_nce)
3699 {
3700 	int	err;
3701 	nce_t	*nce;
3702 	in6_addr_t addr6;
3703 	ip_stack_t *ipst = ill->ill_ipst;
3704 
3705 	mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
3706 	nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
3707 	IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
3708 	nce = nce_lookup_addr(ill, &addr6, nce);
3709 	if (nce == NULL) {
3710 		err = ndp_add_v4(ill, addr, flags, newnce, src_nce);
3711 	} else {
3712 		*newnce = nce;
3713 		err = EEXIST;
3714 	}
3715 	mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
3716 	return (err);
3717 }
3718 
3719 /*
3720  * NDP Cache Entry creation routine for IPv4.
3721  * Mapped entries are handled in arp.
3722  * This routine must always be called with ndp4->ndp_g_lock held.
3723  * Prior to return, nce_refcnt is incremented.
3724  */
3725 static int
3726 ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
3727     nce_t **newnce, nce_t *src_nce)
3728 {
3729 	static	nce_t		nce_nil;
3730 	nce_t		*nce;
3731 	mblk_t		*mp;
3732 	mblk_t		*template = NULL;
3733 	nce_t		**ncep;
3734 	ip_stack_t	*ipst = ill->ill_ipst;
3735 	uint16_t	state = ND_INITIAL;
3736 	int		err;
3737 
3738 	ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock));
3739 	ASSERT(!ill->ill_isv6);
3740 	ASSERT((flags & NCE_F_MAPPING) == 0);
3741 
3742 	if (ill->ill_resolver_mp == NULL)
3743 		return (EINVAL);
3744 	/*
3745 	 * Allocate the mblk to hold the nce.
3746 	 */
3747 	mp = allocb(sizeof (nce_t), BPRI_MED);
3748 	if (mp == NULL)
3749 		return (ENOMEM);
3750 
3751 	nce = (nce_t *)mp->b_rptr;
3752 	mp->b_wptr = (uchar_t *)&nce[1];
3753 	*nce = nce_nil;
3754 	nce->nce_ill = ill;
3755 	nce->nce_ipversion = IPV4_VERSION;
3756 	nce->nce_flags = flags;
3757 	nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
3758 	nce->nce_rcnt = ill->ill_xmit_count;
3759 	IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr);
3760 	nce->nce_mask = ipv6_all_ones;
3761 	nce->nce_extract_mask = ipv6_all_zeros;
3762 	nce->nce_ll_extract_start = 0;
3763 	nce->nce_qd_mp = NULL;
3764 	nce->nce_mp = mp;
3765 	/* This one is for nce getting created */
3766 	nce->nce_refcnt = 1;
3767 	mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
3768 	ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
3769 
3770 #ifdef NCE_DEBUG
3771 	bzero(nce->nce_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX);
3772 #endif
3773 	if (src_nce != NULL) {
3774 		/*
3775 		 * src_nce has been provided by the caller. The only
3776 		 * caller who provides a non-null, non-broadcast
3777 		 * src_nce is from ip_newroute() which must pass in
3778 		 * a ND_REACHABLE src_nce (this condition is verified
3779 		 * via an ASSERT for the save_ire->ire_nce in ip_newroute())
3780 		 */
3781 		mutex_enter(&src_nce->nce_lock);
3782 		state = src_nce->nce_state;
3783 		if ((src_nce->nce_flags & NCE_F_CONDEMNED) ||
3784 		    (ipst->ips_ndp4->ndp_g_hw_change > 0)) {
3785 			/*
3786 			 * src_nce has been deleted, or
3787 			 * ip_arp_news is in the middle of
3788 			 * flushing entries in the the nce.
3789 			 * Fail the add, since we don't know
3790 			 * if it is safe to copy the contents of
3791 			 * src_nce
3792 			 */
3793 			DTRACE_PROBE2(nce__bad__src__nce,
3794 			    nce_t *, src_nce, ill_t *, ill);
3795 			mutex_exit(&src_nce->nce_lock);
3796 			err = EINVAL;
3797 			goto err_ret;
3798 		}
3799 		template = copyb(src_nce->nce_res_mp);
3800 		mutex_exit(&src_nce->nce_lock);
3801 		if (template == NULL) {
3802 			err = ENOMEM;
3803 			goto err_ret;
3804 		}
3805 	} else if (flags & NCE_F_BCAST) {
3806 		/*
3807 		 * broadcast nce.
3808 		 */
3809 		template = copyb(ill->ill_bcast_mp);
3810 		if (template == NULL) {
3811 			err = ENOMEM;
3812 			goto err_ret;
3813 		}
3814 		state = ND_REACHABLE;
3815 	} else if (ill->ill_net_type == IRE_IF_NORESOLVER) {
3816 		/*
3817 		 * NORESOLVER entries are always created in the REACHABLE
3818 		 * state. We create a nce_res_mp with the IP nexthop address
3819 		 * in the destination address in the DLPI hdr if the
3820 		 * physical length is exactly 4 bytes.
3821 		 *
3822 		 * XXX not clear which drivers set ill_phys_addr_length to
3823 		 * IP_ADDR_LEN.
3824 		 */
3825 		if (ill->ill_phys_addr_length == IP_ADDR_LEN) {
3826 			template = ill_dlur_gen((uchar_t *)addr,
3827 			    ill->ill_phys_addr_length,
3828 			    ill->ill_sap, ill->ill_sap_length);
3829 		} else {
3830 			template = copyb(ill->ill_resolver_mp);
3831 		}
3832 		if (template == NULL) {
3833 			err = ENOMEM;
3834 			goto err_ret;
3835 		}
3836 		state = ND_REACHABLE;
3837 	}
3838 	nce->nce_fp_mp = NULL;
3839 	nce->nce_res_mp = template;
3840 	nce->nce_state = state;
3841 	if (state == ND_REACHABLE) {
3842 		nce->nce_last = TICK_TO_MSEC(lbolt64);
3843 		nce->nce_init_time = TICK_TO_MSEC(lbolt64);
3844 	} else {
3845 		nce->nce_last = 0;
3846 		if (state == ND_INITIAL)
3847 			nce->nce_init_time = TICK_TO_MSEC(lbolt64);
3848 	}
3849 
3850 	ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) ||
3851 	    (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE));
3852 	/*
3853 	 * Atomically ensure that the ill is not CONDEMNED, before
3854 	 * adding the NCE.
3855 	 */
3856 	mutex_enter(&ill->ill_lock);
3857 	if (ill->ill_state_flags & ILL_CONDEMNED) {
3858 		mutex_exit(&ill->ill_lock);
3859 		err = EINVAL;
3860 		goto err_ret;
3861 	}
3862 	if ((nce->nce_next = *ncep) != NULL)
3863 		nce->nce_next->nce_ptpn = &nce->nce_next;
3864 	*ncep = nce;
3865 	nce->nce_ptpn = ncep;
3866 	*newnce = nce;
3867 	/* This one is for nce being used by an active thread */
3868 	NCE_REFHOLD(*newnce);
3869 
3870 	/* Bump up the number of nce's referencing this ill */
3871 	ill->ill_nce_cnt++;
3872 	mutex_exit(&ill->ill_lock);
3873 	DTRACE_PROBE1(ndp__add__v4, nce_t *, nce);
3874 	return (0);
3875 err_ret:
3876 	freeb(mp);
3877 	freemsg(template);
3878 	return (err);
3879 }
3880 
3881 void
3882 ndp_flush_qd_mp(nce_t *nce)
3883 {
3884 	mblk_t *qd_mp, *qd_next;
3885 
3886 	ASSERT(MUTEX_HELD(&nce->nce_lock));
3887 	qd_mp = nce->nce_qd_mp;
3888 	nce->nce_qd_mp = NULL;
3889 	while (qd_mp != NULL) {
3890 		qd_next = qd_mp->b_next;
3891 		qd_mp->b_next = NULL;
3892 		qd_mp->b_prev = NULL;
3893 		freemsg(qd_mp);
3894 		qd_mp = qd_next;
3895 	}
3896 }
3897 
3898 
3899 /*
3900  * ndp_walk routine to delete all entries that have a given destination or
3901  * gateway address and cached link layer (MAC) address.  This is used when ARP
3902  * informs us that a network-to-link-layer mapping may have changed.
3903  */
3904 void
3905 nce_delete_hw_changed(nce_t *nce, void *arg)
3906 {
3907 	nce_hw_map_t *hwm = arg;
3908 	mblk_t *mp;
3909 	dl_unitdata_req_t *dlu;
3910 	uchar_t *macaddr;
3911 	ill_t *ill;
3912 	int saplen;
3913 	ipaddr_t nce_addr;
3914 
3915 	if (nce->nce_state != ND_REACHABLE)
3916 		return;
3917 
3918 	IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr);
3919 	if (nce_addr != hwm->hwm_addr)
3920 		return;
3921 
3922 	mutex_enter(&nce->nce_lock);
3923 	if ((mp = nce->nce_res_mp) == NULL) {
3924 		mutex_exit(&nce->nce_lock);
3925 		return;
3926 	}
3927 	dlu = (dl_unitdata_req_t *)mp->b_rptr;
3928 	macaddr = (uchar_t *)(dlu + 1);
3929 	ill = nce->nce_ill;
3930 	if ((saplen = ill->ill_sap_length) > 0)
3931 		macaddr += saplen;
3932 	else
3933 		saplen = -saplen;
3934 
3935 	/*
3936 	 * If the hardware address is unchanged, then leave this one alone.
3937 	 * Note that saplen == abs(saplen) now.
3938 	 */
3939 	if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen &&
3940 	    bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) {
3941 		mutex_exit(&nce->nce_lock);
3942 		return;
3943 	}
3944 	mutex_exit(&nce->nce_lock);
3945 
3946 	DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce);
3947 	ndp_delete(nce);
3948 }
3949 
3950 /*
3951  * This function verifies whether a given IPv4 address is potentially known to
3952  * the NCE subsystem.  If so, then ARP must not delete the corresponding ace_t,
3953  * so that it can continue to look for hardware changes on that address.
3954  */
3955 boolean_t
3956 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns)
3957 {
3958 	nce_t		*nce;
3959 	struct in_addr	nceaddr;
3960 	ip_stack_t	*ipst = ns->netstack_ip;
3961 
3962 	if (addr == INADDR_ANY)
3963 		return (B_FALSE);
3964 
3965 	mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
3966 	nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr);
3967 	for (; nce != NULL; nce = nce->nce_next) {
3968 		/* Note that only v4 mapped entries are in the table. */
3969 		IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr);
3970 		if (addr == nceaddr.s_addr &&
3971 		    IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
3972 			/* Single flag check; no lock needed */
3973 			if (!(nce->nce_flags & NCE_F_CONDEMNED))
3974 				break;
3975 		}
3976 	}
3977 	mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
3978 	return (nce != NULL);
3979 }
3980