xref: /titanic_52/usr/src/uts/common/inet/ip_ndp.h (revision 17f1e64a433a4ca00ffed7539e10c297580a7002)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef	_INET_IP_NDP_H
27 #define	_INET_IP_NDP_H
28 
29 #include <sys/mutex.h>
30 #include <sys/stream.h>
31 #include <netinet/in.h>
32 #include <netinet/icmp6.h>
33 #include <inet/ip.h>
34 #include <inet/ip2mac.h>
35 
36 /*
37  * Internal definitions for the kernel implementation of the IPv6
38  * Neighbor Discovery Protocol (NDP).
39  */
40 
41 #ifdef	__cplusplus
42 extern "C" {
43 #endif
44 
45 #ifdef _KERNEL
46 #define	NCE_TABLE_SIZE	256
47 /*
48  * callbacks set up with ip2mac interface, waiting for result
49  * of neighbor resolution.
50  */
51 typedef struct nce_cb_s {
52 	list_node_t		nce_cb_node;
53 	void			*nce_cb_id;
54 	uint32_t		nce_cb_flags;
55 	ip2mac_callback_t	*nce_cb_func;
56 	void			*nce_cb_arg;
57 } nce_cb_t;
58 
59 #define	NCE_CB_DISPATCHED	0x00000001
60 
61 /*
62  * NDP Cache Entry
63  */
64 typedef struct nce_s {
65 	struct	nce_s	*nce_next;	/* Hash chain next pointer */
66 	struct	nce_s	**nce_ptpn;	/* Pointer to previous next */
67 	struct 	ill_s	*nce_ill;	/* Associated ill */
68 	uint16_t	nce_flags;	/* See below */
69 	uint16_t	nce_state;	/* See reachability states in if.h */
70 	int16_t		nce_pcnt;	/* Probe counter */
71 	uint16_t	nce_rcnt;	/* Retransmit counter */
72 	in6_addr_t	nce_addr;	/* address of the nighbor */
73 	in6_addr_t	nce_mask;	/* If not all ones, mask allows an */
74 	    /* entry  to respond to requests for a group of addresses, for */
75 	    /* instantance multicast addresses				   */
76 	in6_addr_t	nce_extract_mask; /* For mappings */
77 	uint32_t	nce_ll_extract_start;	/* For mappings */
78 #define	nce_first_mp_to_free	nce_fp_mp
79 	mblk_t		*nce_fp_mp;	/* link layer fast path mp */
80 	mblk_t		*nce_res_mp;	/* DL_UNITDATA_REQ */
81 	mblk_t		*nce_qd_mp;	/* Head outgoing queued packets */
82 #define	nce_last_mp_to_free	nce_qd_mp
83 	mblk_t		*nce_timer_mp;	/* NDP timer mblk */
84 	mblk_t		*nce_mp;	/* mblk we are in, last to be freed */
85 	uint64_t	nce_last;	/* Time last reachable in msec */
86 	uint32_t	nce_refcnt;	/* nce active usage count */
87 	kmutex_t	nce_lock;	/* See comments on top for what */
88 					/* this field protects */
89 	int		nce_unsolicit_count; /* Unsolicited Adv count */
90 	struct nce_s	*nce_fastpath;	/* for fastpath list */
91 	timeout_id_t	nce_timeout_id;
92 	uchar_t		nce_ipversion;	/* IPv4(ARP)/IPv6(NDP) version */
93 	uint_t		nce_defense_count;	/* number of NDP conflicts */
94 	uint_t		nce_defense_time;	/* last time defended (secs) */
95 	uint64_t	nce_init_time;  /* time when it was set to ND_INITIAL */
96 	boolean_t	nce_trace_disable;	/* True when alloc fails */
97 	list_t		nce_cb;
98 	uint_t		nce_cb_walker_cnt;
99 	uint_t		nce_ipif_cnt;	/* number of ipifs with the nce_addr */
100 					/* as their local address */
101 } nce_t;
102 
103 /*
104  * The ndp_g_t structure contains protocol specific information needed
105  * to synchronize and manage neighbor cache entries for IPv4 and IPv6.
106  * There are 2 such structures, ips_ndp4 and ips_ndp6.
107  * ips_ndp6 contains the data structures needed for IPv6 Neighbor Discovery.
108  * ips_ndp4 has IPv4 link layer info in its nce_t structures
109  * Note that the nce_t is not currently used as the arp cache itself;
110  * it is used for the following purposes:
111  *   - queue packets in nce_qd_mp while waiting for arp resolution to complete
112  *   - nce_{res, fp}_mp are used to track DL_UNITDATA request/responses.
113  *   - track state of ARP resolution in the nce_state;
114  *
115  * Locking notes:
116  * ndp_g_lock protects neighbor cache tables access and
117  * insertion/removal of cache entries into/from these tables.
118  * nce_lock protects nce_pcnt, nce_rcnt, nce_qd_mp nce_state, nce_res_mp,
119  * nce_refcnt, nce_last, and nce_cb_walker_cnt.
120  * nce_refcnt is incremented for every ire pointing to this nce and
121  * every time ndp_lookup() finds an nce.
122  * Should there be a need to obtain nce_lock and ndp_g_lock, ndp_g_lock is
123  * acquired first.
124  * To avoid becoming exclusive when deleting NCEs, ndp_walk() routine holds
125  * the ndp_g_lock (i.e global lock) and marks NCEs to be deleted with
126  * NCE_F_CONDEMNED.  When all active users of such NCEs are gone the walk
127  * routine passes a list for deletion to nce_ire_delete_list().
128  *
129  * When the link-layer address of some onlink host changes, ARP will send
130  * an AR_CN_ANNOUNCE message to ip so that stale neighbor-cache
131  * information will not get used. This message is processed in ip_arp_news()
132  * by walking the nce list, and updating as appropriate. The ndp_g_hw_change
133  * flag is set by ip_arp_news() to notify nce_t users that ip_arp_news() is
134  * in progress.
135  */
136 typedef	struct ndp_g_s {
137 	kmutex_t	ndp_g_lock;	/* Lock protecting  cache hash table */
138 	nce_t		*nce_mask_entries;	/* mask not all ones */
139 	nce_t		*nce_hash_tbl[NCE_TABLE_SIZE];
140 	int		ndp_g_walker; /* # of active thread walking hash list */
141 	boolean_t	ndp_g_walker_cleanup; /* true implies defer deletion. */
142 	int		ndp_g_hw_change; /* non-zero if nce flush in progress */
143 } ndp_g_t;
144 
145 #define	NDP_HW_CHANGE_INCR(ndp) {		\
146 	mutex_enter(&(ndp)->ndp_g_lock);	\
147 	(ndp)->ndp_g_hw_change++;		\
148 	mutex_exit(&(ndp)->ndp_g_lock);		\
149 }
150 
151 #define	NDP_HW_CHANGE_DECR(ndp) {		\
152 	mutex_enter(&(ndp)->ndp_g_lock);	\
153 	(ndp)->ndp_g_hw_change--;		\
154 	mutex_exit(&(ndp)->ndp_g_lock);		\
155 }
156 
157 /* nce_flags  */
158 #define	NCE_F_PERMANENT		0x1
159 #define	NCE_F_MAPPING		0x2
160 #define	NCE_F_ISROUTER		0x4
161 /*	unused			0x8 */
162 #define	NCE_F_NONUD		0x10
163 #define	NCE_F_ANYCAST		0x20
164 #define	NCE_F_CONDEMNED		0x40
165 #define	NCE_F_UNSOL_ADV		0x80
166 #define	NCE_F_BCAST		0x100
167 
168 #define	NCE_EXTERNAL_FLAGS_MASK \
169 	(NCE_F_PERMANENT | NCE_F_MAPPING | NCE_F_ISROUTER | NCE_F_NONUD | \
170 	NCE_F_ANYCAST | NCE_F_UNSOL_ADV)
171 
172 /* State REACHABLE, STALE, DELAY or PROBE */
173 #define	NCE_ISREACHABLE(nce)			\
174 	(((((nce)->nce_state) >= ND_REACHABLE) &&	\
175 	((nce)->nce_state) <= ND_PROBE))
176 
177 /* NDP flags set in SOL/ADV requests */
178 #define	NDP_UNICAST		0x1
179 #define	NDP_ISROUTER		0x2
180 #define	NDP_SOLICITED		0x4
181 #define	NDP_ORIDE		0x8
182 #define	NDP_PROBE		0x10
183 
184 /* Number of packets queued in NDP for a neighbor */
185 #define	ND_MAX_Q		4
186 
187 
188 #ifdef DEBUG
189 #define	NCE_TRACE_REF(nce)		nce_trace_ref(nce)
190 #define	NCE_UNTRACE_REF(nce)		nce_untrace_ref(nce)
191 #else
192 #define	NCE_TRACE_REF(nce)
193 #define	NCE_UNTRACE_REF(nce)
194 #endif
195 
196 #define	NCE_REFHOLD(nce) {		\
197 	mutex_enter(&(nce)->nce_lock);	\
198 	(nce)->nce_refcnt++;		\
199 	ASSERT((nce)->nce_refcnt != 0);	\
200 	NCE_TRACE_REF(nce);		\
201 	mutex_exit(&(nce)->nce_lock);	\
202 }
203 
204 #define	NCE_REFHOLD_NOTR(nce) {		\
205 	mutex_enter(&(nce)->nce_lock);	\
206 	(nce)->nce_refcnt++;		\
207 	ASSERT((nce)->nce_refcnt != 0);	\
208 	mutex_exit(&(nce)->nce_lock);	\
209 }
210 
211 #define	NCE_REFHOLD_LOCKED(nce) {		\
212 	ASSERT(MUTEX_HELD(&(nce)->nce_lock));	\
213 	(nce)->nce_refcnt++;			\
214 	NCE_TRACE_REF(nce);			\
215 }
216 
217 /* nce_inactive destroys the mutex thus no mutex_exit is needed */
218 #define	NCE_REFRELE(nce) {		\
219 	mutex_enter(&(nce)->nce_lock);	\
220 	NCE_UNTRACE_REF(nce);		\
221 	ASSERT((nce)->nce_refcnt != 0);	\
222 	if (--(nce)->nce_refcnt == 0)	\
223 		ndp_inactive(nce);	\
224 	else {				\
225 		mutex_exit(&(nce)->nce_lock);\
226 	}				\
227 }
228 
229 #define	NCE_REFRELE_NOTR(nce) {		\
230 	mutex_enter(&(nce)->nce_lock);	\
231 	ASSERT((nce)->nce_refcnt != 0);	\
232 	if (--(nce)->nce_refcnt == 0)	\
233 		ndp_inactive(nce);	\
234 	else {				\
235 		mutex_exit(&(nce)->nce_lock);\
236 	}				\
237 }
238 
239 #define	NDP_RESTART_TIMER(nce, ms) {	\
240 	ASSERT(!MUTEX_HELD(&(nce)->nce_lock));				\
241 	if ((nce)->nce_timeout_id != 0) {				\
242 		/* Ok to untimeout bad id. we don't hold a lock. */	\
243 		(void) untimeout((nce)->nce_timeout_id);		\
244 	}								\
245 	mutex_enter(&(nce)->nce_lock);					\
246 	/* Don't start the timer if the nce has been deleted */		\
247 	if (!((nce)->nce_flags & NCE_F_CONDEMNED)) 			\
248 		nce->nce_timeout_id = timeout(ndp_timer, nce, 		\
249 		    MSEC_TO_TICK(ms) == 0 ? 1 : MSEC_TO_TICK(ms));	\
250 	mutex_exit(&(nce)->nce_lock);					\
251 }
252 
253 /* Structure for ndp_cache_count() */
254 typedef struct {
255 	int	ncc_total;	/* Total number of NCEs */
256 	int	ncc_host;	/* NCE entries without R bit set */
257 } ncc_cache_count_t;
258 
259 /*
260  * Structure of ndp_cache_reclaim().  Each field is a fraction i.e. 1 means
261  * reclaim all, N means reclaim 1/Nth of all entries, 0 means reclaim none.
262  */
263 typedef struct {
264 	int	ncr_host;	/* Fraction for host entries */
265 } nce_cache_reclaim_t;
266 
267 /*
268  * Structure for nce_delete_hw_changed; specifies an IPv4 address to link-layer
269  * address mapping.  Any route that has a cached copy of a mapping for that
270  * IPv4 address that doesn't match the given mapping must be purged.
271  */
272 typedef struct {
273 	ipaddr_t hwm_addr;	/* IPv4 address */
274 	uint_t hwm_hwlen;	/* Length of hardware address (may be 0) */
275 	uchar_t *hwm_hwaddr;	/* Pointer to new hardware address, if any */
276 } nce_hw_map_t;
277 
278 /* When SAP is greater than zero address appears before SAP */
279 #define	NCE_LL_ADDR_OFFSET(ill)	(((ill)->ill_sap_length) < 0 ? \
280 	(sizeof (dl_unitdata_req_t)) : \
281 	((sizeof (dl_unitdata_req_t)) + (ABS((ill)->ill_sap_length))))
282 
283 #define	NCE_LL_SAP_OFFSET(ill) (((ill)->ill_sap_length) < 0 ? \
284 	((sizeof (dl_unitdata_req_t)) + ((ill)->ill_phys_addr_length)) : \
285 	(sizeof (dl_unitdata_req_t)))
286 
287 #ifdef _BIG_ENDIAN
288 #define	NCE_LL_SAP_COPY(ill, mp) \
289 	{ \
290 	size_t abs_sap_len = ABS((ill)->ill_sap_length); \
291 	if (abs_sap_len > 0) { \
292 		ASSERT(abs_sap_len <= sizeof (uint32_t)); \
293 		ASSERT((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill) + \
294 		    abs_sap_len <= ((mp)->b_wptr)); \
295 		bcopy((uint8_t *)&(ill)->ill_sap + sizeof (ill->ill_sap) - \
296 		    abs_sap_len, \
297 		    ((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill)), \
298 		    abs_sap_len); \
299 	} \
300 	}
301 #else
302 #define	NCE_LL_SAP_COPY(ill, mp) \
303 	{ \
304 	size_t abs_sap_len = ABS((ill)->ill_sap_length); \
305 	if (abs_sap_len > 0) { \
306 		uint32_t abs_sap_len = ABS((ill)->ill_sap_length); \
307 		ASSERT(abs_sap_len <= sizeof (uint32_t)); \
308 		ASSERT((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill) + \
309 		    abs_sap_len <= ((mp)->b_wptr)); \
310 		bcopy(&((ill)->ill_sap), \
311 		((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill)), \
312 		abs_sap_len); \
313 	} \
314 	}
315 #endif
316 
317 /*
318  * Exclusive-or the 6 bytes that are likely to contain the MAC
319  * address. Assumes table_size does not exceed 256.
320  * Assumes EUI-64 format for good hashing.
321  */
322 #define	NCE_ADDR_HASH_V6(addr, table_size)				\
323 	(((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^			\
324 	(addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^			\
325 	(addr).s6_addr8[14] ^ (addr).s6_addr8[15]) % (table_size))
326 
327 /* NDP Cache Entry Hash Table */
328 #define	NCE_TABLE_SIZE	256
329 
330 extern	void	ndp_cache_count(nce_t *, char *);
331 extern	void	ndp_cache_reclaim(nce_t *, char *);
332 extern	void	ndp_delete(nce_t *);
333 extern	void	ndp_delete_per_ill(nce_t *, uchar_t *);
334 extern	void	ndp_fastpath_flush(nce_t *, char  *);
335 extern	boolean_t ndp_fastpath_update(nce_t *, void  *);
336 extern	nd_opt_hdr_t *ndp_get_option(nd_opt_hdr_t *, int, int);
337 extern	void	ndp_inactive(nce_t *);
338 extern	void	ndp_input(ill_t *, mblk_t *, mblk_t *);
339 extern	boolean_t ndp_lookup_ipaddr(in_addr_t, netstack_t *);
340 extern	nce_t	*ndp_lookup_v6(ill_t *, boolean_t, const in6_addr_t *,
341     boolean_t);
342 extern	nce_t	*ndp_lookup_v4(ill_t *, const in_addr_t *, boolean_t);
343 extern	int	ndp_mcastreq(ill_t *, const in6_addr_t *, uint32_t, uint32_t,
344     mblk_t *);
345 extern	int	ndp_noresolver(ill_t *, const in6_addr_t *);
346 extern	void	ndp_process(nce_t *, uchar_t *, uint32_t, boolean_t);
347 extern	int	ndp_query(ill_t *, lif_nd_req_t *);
348 extern	int	ndp_resolver(ill_t *, const in6_addr_t *, mblk_t *, zoneid_t);
349 extern	int	ndp_sioc_update(ill_t *, lif_nd_req_t *);
350 extern	boolean_t	ndp_verify_optlen(nd_opt_hdr_t *, int);
351 extern	void	ndp_timer(void *);
352 extern	void	ndp_walk(ill_t *, pfi_t, void *, ip_stack_t *);
353 extern	void	ndp_walk_common(ndp_g_t *, ill_t *, pfi_t,
354     void *, boolean_t);
355 extern	boolean_t	ndp_restart_dad(nce_t *);
356 extern	void	ndp_do_recovery(ipif_t *);
357 extern	void	nce_resolv_failed(nce_t *);
358 extern	void	arp_resolv_failed(nce_t *);
359 extern	void	nce_fastpath_list_add(nce_t *);
360 extern	void	nce_fastpath_list_delete(nce_t *);
361 extern	void	nce_fastpath_list_dispatch(ill_t *,
362     boolean_t (*)(nce_t *, void  *), void *);
363 extern	void	nce_queue_mp_common(nce_t *, mblk_t *, boolean_t);
364 extern	void	nce_delete_hw_changed(nce_t *, void *);
365 extern	void	nce_fastpath(nce_t *);
366 extern	int	ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *,
367     const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t,
368     nce_t **);
369 extern	int	ndp_lookup_then_add_v6(ill_t *, boolean_t, uchar_t *,
370     const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, uint32_t,
371     uint16_t, uint16_t, nce_t **);
372 extern	int	ndp_lookup_then_add_v4(ill_t *,
373     const in_addr_t *, uint16_t, nce_t **, nce_t *);
374 extern void	ip_ndp_resolve(nce_t *);
375 
376 #ifdef DEBUG
377 extern	void	nce_trace_ref(nce_t *);
378 extern	void	nce_untrace_ref(nce_t *);
379 #endif
380 
381 #endif	/* _KERNEL */
382 
383 #ifdef	__cplusplus
384 }
385 #endif
386 
387 #endif	/* _INET_IP_NDP_H */
388