xref: /titanic_50/usr/src/uts/common/inet/ip_stack.h (revision 9a411307f0d1eedbc81618ec290e0685284d8a2b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_INET_IP_STACK_H
28 #define	_INET_IP_STACK_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #ifdef	__cplusplus
33 extern "C" {
34 #endif
35 
36 #include <sys/isa_defs.h>
37 #include <sys/md5.h>
38 #include <sys/types.h>
39 #include <inet/mib2.h>
40 #include <inet/nd.h>
41 #include <sys/atomic.h>
42 #include <sys/socket.h>
43 #include <sys/netstack.h>
44 #include <net/if_dl.h>
45 #include <net/if.h>
46 #include <netinet/ip.h>
47 #include <netinet/igmp_var.h>
48 #include <inet/ip.h>
49 #include <sys/list.h>
50 #include <sys/taskq.h>
51 
52 #ifdef _KERNEL
53 #include <netinet/ip6.h>
54 #include <sys/avl.h>
55 #include <sys/vmem.h>
56 #include <sys/squeue.h>
57 #endif	/* _KERNEL */
58 
59 #ifdef _KERNEL
60 
61 
62 /*
63  * IP statistics.
64  */
65 #define	IP_STAT(ipst, x)	((ipst)->ips_ip_statistics.x.value.ui64++)
66 #define	IP_STAT_UPDATE(ipst, x, n) \
67 		((ipst)->ips_ip_statistics.x.value.ui64 += (n))
68 
69 typedef struct ip_stat {
70 	kstat_named_t	ipsec_fanout_proto;
71 	kstat_named_t	ip_udp_fannorm;
72 	kstat_named_t	ip_udp_fanmb;
73 	kstat_named_t	ip_udp_fanothers;
74 	kstat_named_t	ip_udp_fast_path;
75 	kstat_named_t	ip_udp_slow_path;
76 	kstat_named_t	ip_udp_input_err;
77 	kstat_named_t	ip_tcppullup;
78 	kstat_named_t	ip_tcpoptions;
79 	kstat_named_t	ip_multipkttcp;
80 	kstat_named_t	ip_tcp_fast_path;
81 	kstat_named_t	ip_tcp_slow_path;
82 	kstat_named_t	ip_tcp_input_error;
83 	kstat_named_t	ip_db_ref;
84 	kstat_named_t	ip_notaligned1;
85 	kstat_named_t	ip_notaligned2;
86 	kstat_named_t	ip_multimblk3;
87 	kstat_named_t	ip_multimblk4;
88 	kstat_named_t	ip_ipoptions;
89 	kstat_named_t	ip_classify_fail;
90 	kstat_named_t	ip_opt;
91 	kstat_named_t	ip_udp_rput_local;
92 	kstat_named_t	ipsec_proto_ahesp;
93 	kstat_named_t	ip_conn_flputbq;
94 	kstat_named_t	ip_conn_walk_drain;
95 	kstat_named_t   ip_out_sw_cksum;
96 	kstat_named_t   ip_in_sw_cksum;
97 	kstat_named_t   ip_trash_ire_reclaim_calls;
98 	kstat_named_t   ip_trash_ire_reclaim_success;
99 	kstat_named_t   ip_ire_arp_timer_expired;
100 	kstat_named_t   ip_ire_redirect_timer_expired;
101 	kstat_named_t	ip_ire_pmtu_timer_expired;
102 	kstat_named_t	ip_input_multi_squeue;
103 	kstat_named_t	ip_tcp_in_full_hw_cksum_err;
104 	kstat_named_t	ip_tcp_in_part_hw_cksum_err;
105 	kstat_named_t	ip_tcp_in_sw_cksum_err;
106 	kstat_named_t	ip_tcp_out_sw_cksum_bytes;
107 	kstat_named_t	ip_udp_in_full_hw_cksum_err;
108 	kstat_named_t	ip_udp_in_part_hw_cksum_err;
109 	kstat_named_t	ip_udp_in_sw_cksum_err;
110 	kstat_named_t	ip_udp_out_sw_cksum_bytes;
111 	kstat_named_t	ip_frag_mdt_pkt_out;
112 	kstat_named_t	ip_frag_mdt_discarded;
113 	kstat_named_t	ip_frag_mdt_allocfail;
114 	kstat_named_t	ip_frag_mdt_addpdescfail;
115 	kstat_named_t	ip_frag_mdt_allocd;
116 } ip_stat_t;
117 
118 
119 /*
120  * IP6 statistics.
121  */
122 #define	IP6_STAT(ipst, x)	((ipst)->ips_ip6_statistics.x.value.ui64++)
123 #define	IP6_STAT_UPDATE(ipst, x, n)	\
124 	((ipst)->ips_ip6_statistics.x.value.ui64 += (n))
125 
126 typedef struct ip6_stat {
127 	kstat_named_t	ip6_udp_fast_path;
128 	kstat_named_t	ip6_udp_slow_path;
129 	kstat_named_t	ip6_udp_fannorm;
130 	kstat_named_t	ip6_udp_fanmb;
131 	kstat_named_t   ip6_out_sw_cksum;
132 	kstat_named_t   ip6_in_sw_cksum;
133 	kstat_named_t	ip6_tcp_in_full_hw_cksum_err;
134 	kstat_named_t	ip6_tcp_in_part_hw_cksum_err;
135 	kstat_named_t	ip6_tcp_in_sw_cksum_err;
136 	kstat_named_t	ip6_tcp_out_sw_cksum_bytes;
137 	kstat_named_t	ip6_udp_in_full_hw_cksum_err;
138 	kstat_named_t	ip6_udp_in_part_hw_cksum_err;
139 	kstat_named_t	ip6_udp_in_sw_cksum_err;
140 	kstat_named_t	ip6_udp_out_sw_cksum_bytes;
141 	kstat_named_t	ip6_frag_mdt_pkt_out;
142 	kstat_named_t	ip6_frag_mdt_discarded;
143 	kstat_named_t	ip6_frag_mdt_allocfail;
144 	kstat_named_t	ip6_frag_mdt_addpdescfail;
145 	kstat_named_t	ip6_frag_mdt_allocd;
146 } ip6_stat_t;
147 
148 typedef struct ire_stats {
149 	uint64_t ire_stats_alloced;	/* # of ires alloced */
150 	uint64_t ire_stats_freed;	/* # of ires freed */
151 	uint64_t ire_stats_inserted;	/* # of ires inserted in the bucket */
152 	uint64_t ire_stats_deleted;	/* # of ires deleted from the bucket */
153 } ire_stats_t;
154 
155 
156 /*
157  * IP stack instances
158  */
159 struct ip_stack {
160 	netstack_t	*ips_netstack;	/* Common netstack */
161 
162 	struct ipparam_s	*ips_param_arr; 	/* ndd variable table */
163 	struct ipndp_s		*ips_ndp_arr;
164 
165 	mib2_ipIfStatsEntry_t	ips_ip_mib;	/* SNMP fixed size info */
166 	mib2_icmp_t	ips_icmp_mib;
167 	/*
168 	 * IPv6 mibs when the interface (ill) is not known.
169 	 * When the ill is known the per-interface mib in the ill is used.
170 	 */
171 	mib2_ipIfStatsEntry_t	ips_ip6_mib;
172 	mib2_ipv6IfIcmpEntry_t	ips_icmp6_mib;
173 
174 	struct igmpstat		ips_igmpstat;
175 
176 	kstat_t		*ips_ip_mibkp;	/* kstat exporting ip_mib data */
177 	kstat_t		*ips_icmp_mibkp; /* kstat exporting icmp_mib data */
178 	kstat_t		*ips_ip_kstat;
179 	ip_stat_t	ips_ip_statistics;
180 	kstat_t		*ips_ip6_kstat;
181 	ip6_stat_t	ips_ip6_statistics;
182 
183 /* ip.c */
184 	krwlock_t	ips_ip_g_nd_lock;
185 	kmutex_t	ips_igmp_timer_lock;
186 	kmutex_t	ips_mld_timer_lock;
187 	kmutex_t	ips_ip_mi_lock;
188 	kmutex_t	ips_ip_addr_avail_lock;
189 	krwlock_t	ips_ill_g_lock;
190 	krwlock_t	ips_ipsec_capab_ills_lock;
191 				/* protects the list of IPsec capable ills */
192 	struct ipsec_capab_ill_s *ips_ipsec_capab_ills_ah;
193 	struct ipsec_capab_ill_s *ips_ipsec_capab_ills_esp;
194 
195 	krwlock_t	ips_ill_g_usesrc_lock;
196 
197 	struct ill_group *ips_illgrp_head_v4;	/* Head of IPv4 ill groups */
198 	struct ill_group *ips_illgrp_head_v6;	/* Head of IPv6 ill groups */
199 
200 /* ipclassifier.c - keep in ip_stack_t */
201 	/* ipclassifier hash tables */
202 	struct connf_s	*ips_rts_clients;
203 	struct connf_s	*ips_ipcl_conn_fanout;
204 	struct connf_s	*ips_ipcl_bind_fanout;
205 	struct connf_s	*ips_ipcl_proto_fanout;
206 	struct connf_s	*ips_ipcl_proto_fanout_v6;
207 	struct connf_s	*ips_ipcl_udp_fanout;
208 	struct connf_s	*ips_ipcl_raw_fanout;
209 	uint_t		ips_ipcl_conn_fanout_size;
210 	uint_t		ips_ipcl_bind_fanout_size;
211 	uint_t		ips_ipcl_udp_fanout_size;
212 	uint_t		ips_ipcl_raw_fanout_size;
213 	struct connf_s	*ips_ipcl_globalhash_fanout;
214 	int		ips_conn_g_index;
215 
216 /* ip.c */
217 	/* Following protected by ips_igmp_timer_lock */
218 	int 		ips_igmp_time_to_next;	/* Time since last timeout */
219 	int 		ips_igmp_timer_fired_last;
220 	int		ips_igmp_deferred_next;
221 	timeout_id_t	ips_igmp_timeout_id;
222 	/* Protected by igmp_timer_lock */
223 	boolean_t	ips_igmp_timer_setter_active;
224 
225 	/* Following protected by mld_timer_lock */
226 	int 		ips_mld_time_to_next;	/* Time since last timeout */
227 	int 		ips_mld_timer_fired_last;
228 	int		ips_mld_deferred_next;
229 	timeout_id_t	ips_mld_timeout_id;
230 	/* Protected by mld_timer_lock */
231 	boolean_t	ips_mld_timer_setter_active;
232 
233 	/* Protected by igmp_slowtimeout_lock */
234 	timeout_id_t	ips_igmp_slowtimeout_id;
235 	kmutex_t	ips_igmp_slowtimeout_lock;
236 
237 	/* Protected by mld_slowtimeout_lock */
238 	timeout_id_t	ips_mld_slowtimeout_id;
239 	kmutex_t	ips_mld_slowtimeout_lock;
240 
241 	/* IPv4 forwarding table */
242 	struct radix_node_head *ips_ip_ftable;
243 
244 	/* This is dynamically allocated in ip_ire_init */
245 	struct irb	 *ips_ip_cache_table;
246 	/* This is dynamically allocated in ire_add_mrtun */
247 	struct irb	*ips_ip_mrtun_table;
248 
249 #define	IPV6_ABITS		128
250 #define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */
251 
252 	struct irb	*ips_ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE];
253 	/* This is dynamically allocated in ip_ire_init */
254 	struct irb	*ips_ip_cache_table_v6;
255 
256 	uint32_t	ips_ire_handle;
257 	/*
258 	 * ire_ft_init_lock is used while initializing ip_forwarding_table
259 	 * dynamically in ire_add.
260 	 */
261 	kmutex_t	ips_ire_ft_init_lock;
262 	kmutex_t	ips_ire_mrtun_lock; /* Protects mrtun table and count */
263 	kmutex_t	ips_ire_srcif_table_lock; /* Same as above */
264 	/*
265 	 * The following counts are used to determine whether a walk is
266 	 * needed through the reverse tunnel table or through ills
267 	 */
268 	kmutex_t	ips_ire_handle_lock;	/* Protects ire_handle */
269 
270 	/* # of ires in reverse tun table */
271 	uint_t		ips_ire_mrtun_count;
272 
273 	/* # of ires in all srcif tables */
274 	uint_t		ips_ire_srcif_table_count;
275 
276 	uint32_t	ips_ip_cache_table_size;
277 	uint32_t	ips_ip6_cache_table_size;
278 	uint32_t	ips_ip6_ftable_hash_size;
279 
280 	ire_stats_t 	ips_ire_stats_v4;	/* IPv4 ire statistics */
281 	ire_stats_t 	ips_ire_stats_v6;	/* IPv6 ire statistics */
282 
283 	/* pending binds */
284 	mblk_t		*ips_ip6_asp_pending_ops;
285 	mblk_t		*ips_ip6_asp_pending_ops_tail;
286 
287 	/* Synchronize updates with table usage */
288 	mblk_t		*ips_ip6_asp_pending_update; /* pending table updates */
289 
290 	boolean_t	ips_ip6_asp_uip;	/* table update in progress */
291 	kmutex_t	ips_ip6_asp_lock;	/* protect all the above */
292 	uint32_t	ips_ip6_asp_refcnt;	/* outstanding references */
293 
294 	struct ip6_asp	*ips_ip6_asp_table;
295 	/* The number of policy entries in the table */
296 	uint_t		ips_ip6_asp_table_count;
297 
298 	int		ips_ip_g_forward;
299 	int		ips_ipv6_forward;
300 
301 	time_t		ips_ip_g_frag_timeout;
302 	clock_t		ips_ip_g_frag_timo_ms;
303 
304 	queue_t		*ips_ip_g_mrouter;
305 
306 	/* Time since last icmp_pkt_err */
307 	clock_t		ips_icmp_pkt_err_last;
308 	/* Number of packets sent in burst */
309 	uint_t		ips_icmp_pkt_err_sent;
310 	/* Used by icmp_send_redirect_v6 for picking random src. */
311 	uint_t		ips_icmp_redirect_v6_src_index;
312 
313 	/* Protected by ip_mi_lock */
314 	void		*ips_ip_g_head;		/* Instance Data List Head */
315 
316 	caddr_t		ips_ip_g_nd;		/* Named Dispatch List Head */
317 
318 	/* Multirouting stuff */
319 	/* Interval (in ms) between consecutive 'bad MTU' warnings */
320 	hrtime_t	ips_ip_multirt_log_interval;
321 	/* Time since last warning issued. */
322 	hrtime_t	ips_multirt_bad_mtu_last_time;
323 
324 	kmutex_t	ips_ip_trash_timer_lock;
325 	timeout_id_t	ips_ip_ire_expire_id;	/* IRE expiration timer. */
326 	struct ipsq_s	*ips_ipsq_g_head;
327 	uint_t		ips_ill_index;	/* Used to assign interface indicies */
328 	/* When set search for unused index */
329 	boolean_t	ips_ill_index_wrap;
330 
331 	clock_t		ips_ip_ire_arp_time_elapsed;
332 			/* Time since IRE cache last flushed */
333 	clock_t		ips_ip_ire_rd_time_elapsed;
334 			/* ... redirect IREs last flushed */
335 	clock_t		ips_ip_ire_pmtu_time_elapsed;
336 			/* Time since path mtu increase */
337 
338 	uint_t		ips_ip_redirect_cnt;
339 			/* Num of redirect routes in ftable */
340 	uint_t		ips_ipv6_ire_default_count;
341 			/* Number of IPv6 IRE_DEFAULT entries */
342 	uint_t		ips_ipv6_ire_default_index;
343 			/* Walking IPv6 index used to mod in */
344 
345 	uint_t		ips_loopback_packets;
346 
347 	/* NDP/NCE structures for IPv4 and IPv6 */
348 	struct ndp_g_s	*ips_ndp4;
349 	struct ndp_g_s	*ips_ndp6;
350 
351 	/* ip_mroute stuff */
352 	kmutex_t	ips_ip_g_mrouter_mutex;
353 
354 	struct mrtstat	*ips_mrtstat;	/* Stats for netstat */
355 	int		ips_saved_ip_g_forward;
356 
357 	/* numvifs is only a hint about the max interface being used. */
358 	ushort_t	ips_numvifs;
359 	kmutex_t	ips_numvifs_mutex;
360 
361 	struct vif	*ips_vifs;
362 	struct mfcb	*ips_mfcs;	/* kernel routing table	*/
363 	struct tbf	*ips_tbfs;
364 	/*
365 	 * One-back cache used to locate a tunnel's vif,
366 	 * given a datagram's src ip address.
367 	 */
368 	ipaddr_t	ips_last_encap_src;
369 	struct vif	*ips_last_encap_vif;
370 	kmutex_t	ips_last_encap_lock;	/* Protects the above */
371 
372 	/*
373 	 * reg_vif_num is protected by numvifs_mutex
374 	 */
375 	/* Whether or not special PIM assert processing is enabled. */
376 	ushort_t	ips_reg_vif_num; 	/* Index to Register vif */
377 	int		ips_pim_assert;
378 
379 	union ill_g_head_u *ips_ill_g_heads;   /* ILL List Head */
380 
381 	kstat_t		*ips_loopback_ksp;
382 
383 	uint_t		ips_ipif_src_random;
384 
385 	struct idl_s	*ips_conn_drain_list;	/* Array of conn drain lists */
386 	uint_t		ips_conn_drain_list_cnt; /* Count of conn_drain_list */
387 	int		ips_conn_drain_list_index; /* Next drain_list */
388 
389 	/*
390 	 * ID used to assign next free one.
391 	 * Increases by one. Once it wraps we search for an unused ID.
392 	 */
393 	uint_t		ips_ip_src_id;
394 	boolean_t	ips_srcid_wrapped;
395 
396 	struct srcid_map *ips_srcid_head;
397 	krwlock_t	ips_srcid_lock;
398 
399 	uint64_t	ips_ipif_g_seqid;
400 	union phyint_list_u *ips_phyint_g_list;	/* start of phyint list */
401 
402 	/*
403 	 * Reflects value of FAILBACK variable in IPMP config file
404 	 * /etc/default/mpathd. Default value is B_TRUE.
405 	 * Set to B_FALSE if user disabled failback by configuring
406 	 * "FAILBACK=no" in.mpathd uses SIOCSIPMPFAILBACK ioctl to pass this
407 	 * information to kernel.
408 	 */
409 	boolean_t ips_ipmp_enable_failback;
410 
411 /* ip_neti.c */
412 	hook_family_t	ips_ipv4root;
413 	hook_family_t	ips_ipv6root;
414 
415 	/*
416 	 * Hooks for firewalling
417 	 */
418 	hook_event_t		ips_ip4_physical_in_event;
419 	hook_event_t		ips_ip4_physical_out_event;
420 	hook_event_t		ips_ip4_forwarding_event;
421 	hook_event_t		ips_ip4_loopback_in_event;
422 	hook_event_t		ips_ip4_loopback_out_event;
423 	hook_event_t		ips_ip4_nic_events;
424 	hook_event_t		ips_ip6_physical_in_event;
425 	hook_event_t		ips_ip6_physical_out_event;
426 	hook_event_t		ips_ip6_forwarding_event;
427 	hook_event_t		ips_ip6_loopback_in_event;
428 	hook_event_t		ips_ip6_loopback_out_event;
429 	hook_event_t		ips_ip6_nic_events;
430 
431 	hook_event_token_t	ips_ipv4firewall_physical_in;
432 	hook_event_token_t	ips_ipv4firewall_physical_out;
433 	hook_event_token_t	ips_ipv4firewall_forwarding;
434 	hook_event_token_t	ips_ipv4firewall_loopback_in;
435 	hook_event_token_t	ips_ipv4firewall_loopback_out;
436 	hook_event_token_t	ips_ipv4nicevents;
437 	hook_event_token_t	ips_ipv6firewall_physical_in;
438 	hook_event_token_t	ips_ipv6firewall_physical_out;
439 	hook_event_token_t	ips_ipv6firewall_forwarding;
440 	hook_event_token_t	ips_ipv6firewall_loopback_in;
441 	hook_event_token_t	ips_ipv6firewall_loopback_out;
442 	hook_event_token_t	ips_ipv6nicevents;
443 
444 	net_data_t		ips_ipv4_net_data;
445 	net_data_t		ips_ipv6_net_data;
446 };
447 typedef struct ip_stack ip_stack_t;
448 
449 /* Finding an ip_stack_t */
450 #define	CONNQ_TO_IPST(_q)	(Q_TO_CONN(_q)->conn_netstack->netstack_ip)
451 #define	ILLQ_TO_IPST(_q)	(((ill_t *)(_q)->q_ptr)->ill_ipst)
452 
453 #else /* _KERNEL */
454 typedef int ip_stack_t;
455 #endif /* _KERNEL */
456 
457 #ifdef	__cplusplus
458 }
459 #endif
460 
461 #endif	/* _INET_IP_STACK_H */
462