xref: /illumos-gate/usr/src/uts/common/inet/ip_stack.h (revision bfed486ad8de8b8ebc6345a8e10accae08bf2f45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_INET_IP_STACK_H
28 #define	_INET_IP_STACK_H
29 
30 #ifdef	__cplusplus
31 extern "C" {
32 #endif
33 
34 #include <sys/netstack.h>
35 #include <netinet/igmp_var.h>
36 #include <sys/modhash.h>
37 
38 #ifdef _KERNEL
39 #include <sys/list.h>
40 
41 /*
42  * IP statistics.
43  */
44 #define	IP_STAT(ipst, x)	((ipst)->ips_ip_statistics.x.value.ui64++)
45 #define	IP_STAT_UPDATE(ipst, x, n) \
46 		((ipst)->ips_ip_statistics.x.value.ui64 += (n))
47 
48 typedef struct ip_stat {
49 	kstat_named_t	ipsec_fanout_proto;
50 	kstat_named_t	ip_udp_fannorm;
51 	kstat_named_t	ip_udp_fanmb;
52 	kstat_named_t	ip_udp_fanothers;
53 	kstat_named_t	ip_udp_fast_path;
54 	kstat_named_t	ip_udp_slow_path;
55 	kstat_named_t	ip_udp_input_err;
56 	kstat_named_t	ip_tcppullup;
57 	kstat_named_t	ip_tcpoptions;
58 	kstat_named_t	ip_multipkttcp;
59 	kstat_named_t	ip_tcp_fast_path;
60 	kstat_named_t	ip_tcp_slow_path;
61 	kstat_named_t	ip_tcp_input_error;
62 	kstat_named_t	ip_db_ref;
63 	kstat_named_t	ip_notaligned1;
64 	kstat_named_t	ip_notaligned2;
65 	kstat_named_t	ip_multimblk3;
66 	kstat_named_t	ip_multimblk4;
67 	kstat_named_t	ip_ipoptions;
68 	kstat_named_t	ip_classify_fail;
69 	kstat_named_t	ip_opt;
70 	kstat_named_t	ip_udp_rput_local;
71 	kstat_named_t	ipsec_proto_ahesp;
72 	kstat_named_t	ip_conn_flputbq;
73 	kstat_named_t	ip_conn_walk_drain;
74 	kstat_named_t   ip_out_sw_cksum;
75 	kstat_named_t   ip_in_sw_cksum;
76 	kstat_named_t   ip_trash_ire_reclaim_calls;
77 	kstat_named_t   ip_trash_ire_reclaim_success;
78 	kstat_named_t   ip_ire_arp_timer_expired;
79 	kstat_named_t   ip_ire_redirect_timer_expired;
80 	kstat_named_t	ip_ire_pmtu_timer_expired;
81 	kstat_named_t	ip_input_multi_squeue;
82 	kstat_named_t	ip_tcp_in_full_hw_cksum_err;
83 	kstat_named_t	ip_tcp_in_part_hw_cksum_err;
84 	kstat_named_t	ip_tcp_in_sw_cksum_err;
85 	kstat_named_t	ip_tcp_out_sw_cksum_bytes;
86 	kstat_named_t	ip_udp_in_full_hw_cksum_err;
87 	kstat_named_t	ip_udp_in_part_hw_cksum_err;
88 	kstat_named_t	ip_udp_in_sw_cksum_err;
89 	kstat_named_t	ip_udp_out_sw_cksum_bytes;
90 	kstat_named_t	ip_frag_mdt_pkt_out;
91 	kstat_named_t	ip_frag_mdt_discarded;
92 	kstat_named_t	ip_frag_mdt_allocfail;
93 	kstat_named_t	ip_frag_mdt_addpdescfail;
94 	kstat_named_t	ip_frag_mdt_allocd;
95 } ip_stat_t;
96 
97 
98 /*
99  * IP6 statistics.
100  */
101 #define	IP6_STAT(ipst, x)	((ipst)->ips_ip6_statistics.x.value.ui64++)
102 #define	IP6_STAT_UPDATE(ipst, x, n)	\
103 	((ipst)->ips_ip6_statistics.x.value.ui64 += (n))
104 
105 typedef struct ip6_stat {
106 	kstat_named_t	ip6_udp_fast_path;
107 	kstat_named_t	ip6_udp_slow_path;
108 	kstat_named_t	ip6_udp_fannorm;
109 	kstat_named_t	ip6_udp_fanmb;
110 	kstat_named_t   ip6_out_sw_cksum;
111 	kstat_named_t   ip6_in_sw_cksum;
112 	kstat_named_t	ip6_tcp_in_full_hw_cksum_err;
113 	kstat_named_t	ip6_tcp_in_part_hw_cksum_err;
114 	kstat_named_t	ip6_tcp_in_sw_cksum_err;
115 	kstat_named_t	ip6_tcp_out_sw_cksum_bytes;
116 	kstat_named_t	ip6_udp_in_full_hw_cksum_err;
117 	kstat_named_t	ip6_udp_in_part_hw_cksum_err;
118 	kstat_named_t	ip6_udp_in_sw_cksum_err;
119 	kstat_named_t	ip6_udp_out_sw_cksum_bytes;
120 	kstat_named_t	ip6_frag_mdt_pkt_out;
121 	kstat_named_t	ip6_frag_mdt_discarded;
122 	kstat_named_t	ip6_frag_mdt_allocfail;
123 	kstat_named_t	ip6_frag_mdt_addpdescfail;
124 	kstat_named_t	ip6_frag_mdt_allocd;
125 } ip6_stat_t;
126 
127 typedef struct ire_stats {
128 	uint64_t ire_stats_alloced;	/* # of ires alloced */
129 	uint64_t ire_stats_freed;	/* # of ires freed */
130 	uint64_t ire_stats_inserted;	/* # of ires inserted in the bucket */
131 	uint64_t ire_stats_deleted;	/* # of ires deleted from the bucket */
132 } ire_stats_t;
133 
134 
135 /*
136  * IP stack instances
137  */
138 struct ip_stack {
139 	netstack_t	*ips_netstack;	/* Common netstack */
140 
141 	struct ipparam_s	*ips_param_arr; 	/* ndd variable table */
142 	struct ipndp_s		*ips_ndp_arr;
143 
144 	mib2_ipIfStatsEntry_t	ips_ip_mib;	/* SNMP fixed size info */
145 	mib2_icmp_t	ips_icmp_mib;
146 	/*
147 	 * IPv6 mibs when the interface (ill) is not known.
148 	 * When the ill is known the per-interface mib in the ill is used.
149 	 */
150 	mib2_ipIfStatsEntry_t	ips_ip6_mib;
151 	mib2_ipv6IfIcmpEntry_t	ips_icmp6_mib;
152 
153 	struct igmpstat		ips_igmpstat;
154 
155 	kstat_t		*ips_ip_mibkp;	/* kstat exporting ip_mib data */
156 	kstat_t		*ips_icmp_mibkp; /* kstat exporting icmp_mib data */
157 	kstat_t		*ips_ip_kstat;
158 	ip_stat_t	ips_ip_statistics;
159 	kstat_t		*ips_ip6_kstat;
160 	ip6_stat_t	ips_ip6_statistics;
161 
162 /* ip.c */
163 	krwlock_t	ips_ip_g_nd_lock;
164 	kmutex_t	ips_igmp_timer_lock;
165 	kmutex_t	ips_mld_timer_lock;
166 	kmutex_t	ips_ip_mi_lock;
167 	kmutex_t	ips_ip_addr_avail_lock;
168 	krwlock_t	ips_ill_g_lock;
169 	krwlock_t	ips_ipsec_capab_ills_lock;
170 				/* protects the list of IPsec capable ills */
171 	struct ipsec_capab_ill_s *ips_ipsec_capab_ills_ah;
172 	struct ipsec_capab_ill_s *ips_ipsec_capab_ills_esp;
173 
174 	krwlock_t	ips_ill_g_usesrc_lock;
175 
176 	/* Taskq dispatcher for capability operations */
177 	kmutex_t	ips_capab_taskq_lock;
178 	kcondvar_t	ips_capab_taskq_cv;
179 	list_t		ips_capab_taskq_list;
180 	kthread_t	*ips_capab_taskq_thread;
181 	boolean_t	ips_capab_taskq_quit;
182 
183 /* ipclassifier.c - keep in ip_stack_t */
184 	/* ipclassifier hash tables */
185 	struct connf_s	*ips_rts_clients;
186 	struct connf_s	*ips_ipcl_conn_fanout;
187 	struct connf_s	*ips_ipcl_bind_fanout;
188 	struct connf_s	*ips_ipcl_proto_fanout;
189 	struct connf_s	*ips_ipcl_proto_fanout_v6;
190 	struct connf_s	*ips_ipcl_udp_fanout;
191 	struct connf_s	*ips_ipcl_raw_fanout;
192 	uint_t		ips_ipcl_conn_fanout_size;
193 	uint_t		ips_ipcl_bind_fanout_size;
194 	uint_t		ips_ipcl_udp_fanout_size;
195 	uint_t		ips_ipcl_raw_fanout_size;
196 	struct connf_s	*ips_ipcl_globalhash_fanout;
197 	int		ips_conn_g_index;
198 
199 /* ip.c */
200 	/* Following protected by igmp_timer_lock */
201 	int 		ips_igmp_time_to_next;	/* Time since last timeout */
202 	int 		ips_igmp_timer_scheduled_last;
203 	int		ips_igmp_deferred_next;
204 	timeout_id_t	ips_igmp_timeout_id;
205 	boolean_t	ips_igmp_timer_setter_active;
206 
207 	/* Following protected by mld_timer_lock */
208 	int 		ips_mld_time_to_next;	/* Time since last timeout */
209 	int 		ips_mld_timer_scheduled_last;
210 	int		ips_mld_deferred_next;
211 	timeout_id_t	ips_mld_timeout_id;
212 	boolean_t	ips_mld_timer_setter_active;
213 
214 	/* Protected by igmp_slowtimeout_lock */
215 	timeout_id_t	ips_igmp_slowtimeout_id;
216 	kmutex_t	ips_igmp_slowtimeout_lock;
217 
218 	/* Protected by mld_slowtimeout_lock */
219 	timeout_id_t	ips_mld_slowtimeout_id;
220 	kmutex_t	ips_mld_slowtimeout_lock;
221 
222 	/* IPv4 forwarding table */
223 	struct radix_node_head *ips_ip_ftable;
224 
225 	/* This is dynamically allocated in ip_ire_init */
226 	struct irb	 *ips_ip_cache_table;
227 
228 #define	IPV6_ABITS		128
229 #define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */
230 
231 	struct irb	*ips_ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE];
232 	/* This is dynamically allocated in ip_ire_init */
233 	struct irb	*ips_ip_cache_table_v6;
234 
235 	uint32_t	ips_ire_handle;
236 	/*
237 	 * ire_ft_init_lock is used while initializing ip_forwarding_table
238 	 * dynamically in ire_add.
239 	 */
240 	kmutex_t	ips_ire_ft_init_lock;
241 	kmutex_t	ips_ire_handle_lock;	/* Protects ire_handle */
242 
243 	uint32_t	ips_ip_cache_table_size;
244 	uint32_t	ips_ip6_cache_table_size;
245 	uint32_t	ips_ip6_ftable_hash_size;
246 
247 	ire_stats_t 	ips_ire_stats_v4;	/* IPv4 ire statistics */
248 	ire_stats_t 	ips_ire_stats_v6;	/* IPv6 ire statistics */
249 
250 	/* pending binds */
251 	mblk_t		*ips_ip6_asp_pending_ops;
252 	mblk_t		*ips_ip6_asp_pending_ops_tail;
253 
254 	/* Synchronize updates with table usage */
255 	mblk_t		*ips_ip6_asp_pending_update; /* pending table updates */
256 
257 	boolean_t	ips_ip6_asp_uip;	/* table update in progress */
258 	kmutex_t	ips_ip6_asp_lock;	/* protect all the above */
259 	uint32_t	ips_ip6_asp_refcnt;	/* outstanding references */
260 
261 	struct ip6_asp	*ips_ip6_asp_table;
262 	/* The number of policy entries in the table */
263 	uint_t		ips_ip6_asp_table_count;
264 
265 	int		ips_ip_g_forward;
266 	int		ips_ipv6_forward;
267 
268 	time_t		ips_ip_g_frag_timeout;
269 	clock_t		ips_ip_g_frag_timo_ms;
270 
271 	struct conn_s	*ips_ip_g_mrouter;
272 
273 	/* Time since last icmp_pkt_err */
274 	clock_t		ips_icmp_pkt_err_last;
275 	/* Number of packets sent in burst */
276 	uint_t		ips_icmp_pkt_err_sent;
277 
278 	/* Protected by ip_mi_lock */
279 	void		*ips_ip_g_head;		/* Instance Data List Head */
280 
281 	caddr_t		ips_ip_g_nd;		/* Named Dispatch List Head */
282 
283 	/* Multirouting stuff */
284 	/* Interval (in ms) between consecutive 'bad MTU' warnings */
285 	hrtime_t	ips_ip_multirt_log_interval;
286 	/* Time since last warning issued. */
287 	hrtime_t	ips_multirt_bad_mtu_last_time;
288 
289 	struct cgtp_filter_ops *ips_ip_cgtp_filter_ops;	/* CGTP hooks */
290 	boolean_t	ips_ip_cgtp_filter;	/* Enable/disable CGTP hooks */
291 
292 	kmutex_t	ips_ip_trash_timer_lock;
293 	timeout_id_t	ips_ip_ire_expire_id;	/* IRE expiration timer. */
294 	struct ipsq_s	*ips_ipsq_g_head;
295 	uint_t		ips_ill_index;	/* Used to assign interface indicies */
296 	/* When set search for unused index */
297 	boolean_t	ips_ill_index_wrap;
298 
299 	clock_t		ips_ip_ire_arp_time_elapsed;
300 			/* Time since IRE cache last flushed */
301 	clock_t		ips_ip_ire_rd_time_elapsed;
302 			/* ... redirect IREs last flushed */
303 	clock_t		ips_ip_ire_pmtu_time_elapsed;
304 			/* Time since path mtu increase */
305 
306 	uint_t		ips_ip_redirect_cnt;
307 			/* Num of redirect routes in ftable */
308 	uint_t		ips_ipv6_ire_default_count;
309 			/* Number of IPv6 IRE_DEFAULT entries */
310 	uint_t		ips_ipv6_ire_default_index;
311 			/* Walking IPv6 index used to mod in */
312 
313 	uint_t		ips_loopback_packets;
314 
315 	/* NDP/NCE structures for IPv4 and IPv6 */
316 	struct ndp_g_s	*ips_ndp4;
317 	struct ndp_g_s	*ips_ndp6;
318 
319 	/* ip_mroute stuff */
320 	kmutex_t	ips_ip_g_mrouter_mutex;
321 
322 	struct mrtstat	*ips_mrtstat;	/* Stats for netstat */
323 	int		ips_saved_ip_g_forward;
324 
325 	/* numvifs is only a hint about the max interface being used. */
326 	ushort_t	ips_numvifs;
327 	kmutex_t	ips_numvifs_mutex;
328 
329 	struct vif	*ips_vifs;
330 	struct mfcb	*ips_mfcs;	/* kernel routing table	*/
331 	struct tbf	*ips_tbfs;
332 	/*
333 	 * One-back cache used to locate a tunnel's vif,
334 	 * given a datagram's src ip address.
335 	 */
336 	ipaddr_t	ips_last_encap_src;
337 	struct vif	*ips_last_encap_vif;
338 	kmutex_t	ips_last_encap_lock;	/* Protects the above */
339 
340 	/*
341 	 * reg_vif_num is protected by numvifs_mutex
342 	 */
343 	/* Whether or not special PIM assert processing is enabled. */
344 	ushort_t	ips_reg_vif_num; 	/* Index to Register vif */
345 	int		ips_pim_assert;
346 
347 	union ill_g_head_u *ips_ill_g_heads;   /* ILL List Head */
348 
349 	kstat_t		*ips_loopback_ksp;
350 
351 	struct idl_s	*ips_conn_drain_list;	/* Array of conn drain lists */
352 	uint_t		ips_conn_drain_list_cnt; /* Count of conn_drain_list */
353 	int		ips_conn_drain_list_index; /* Next drain_list */
354 
355 	/*
356 	 * ID used to assign next free one.
357 	 * Increases by one. Once it wraps we search for an unused ID.
358 	 */
359 	uint_t		ips_ip_src_id;
360 	boolean_t	ips_srcid_wrapped;
361 
362 	struct srcid_map *ips_srcid_head;
363 	krwlock_t	ips_srcid_lock;
364 
365 	uint64_t	ips_ipif_g_seqid;
366 	union phyint_list_u *ips_phyint_g_list;	/* start of phyint list */
367 
368 /* ip_neti.c */
369 	hook_family_t	ips_ipv4root;
370 	hook_family_t	ips_ipv6root;
371 
372 	/*
373 	 * Hooks for firewalling
374 	 */
375 	hook_event_t		ips_ip4_physical_in_event;
376 	hook_event_t		ips_ip4_physical_out_event;
377 	hook_event_t		ips_ip4_forwarding_event;
378 	hook_event_t		ips_ip4_loopback_in_event;
379 	hook_event_t		ips_ip4_loopback_out_event;
380 	hook_event_t		ips_ip4_nic_events;
381 	hook_event_t		ips_ip6_physical_in_event;
382 	hook_event_t		ips_ip6_physical_out_event;
383 	hook_event_t		ips_ip6_forwarding_event;
384 	hook_event_t		ips_ip6_loopback_in_event;
385 	hook_event_t		ips_ip6_loopback_out_event;
386 	hook_event_t		ips_ip6_nic_events;
387 
388 	hook_event_token_t	ips_ipv4firewall_physical_in;
389 	hook_event_token_t	ips_ipv4firewall_physical_out;
390 	hook_event_token_t	ips_ipv4firewall_forwarding;
391 	hook_event_token_t	ips_ipv4firewall_loopback_in;
392 	hook_event_token_t	ips_ipv4firewall_loopback_out;
393 	hook_event_token_t	ips_ipv4nicevents;
394 	hook_event_token_t	ips_ipv6firewall_physical_in;
395 	hook_event_token_t	ips_ipv6firewall_physical_out;
396 	hook_event_token_t	ips_ipv6firewall_forwarding;
397 	hook_event_token_t	ips_ipv6firewall_loopback_in;
398 	hook_event_token_t	ips_ipv6firewall_loopback_out;
399 	hook_event_token_t	ips_ipv6nicevents;
400 
401 	net_handle_t		ips_ipv4_net_data;
402 	net_handle_t		ips_ipv6_net_data;
403 
404 	boolean_t		ips_ipobs_enabled;
405 	list_t			ips_ipobs_cb_list;
406 	kmutex_t		ips_ipobs_cb_lock;
407 	uint_t			ips_ipobs_cb_nwalkers;
408 	kcondvar_t		ips_ipobs_cb_cv;
409 
410 	struct __ldi_ident	*ips_ldi_ident;
411 
412 /* ipmp.c */
413 	krwlock_t		ips_ipmp_lock;
414 	mod_hash_t		*ips_ipmp_grp_hash;
415 
416 /* igmp.c */
417 	/* multicast restart timers thread logic */
418 	kmutex_t		ips_mrt_lock;
419 	uint_t			ips_mrt_flags;
420 	kcondvar_t		ips_mrt_cv;
421 	kcondvar_t		ips_mrt_done_cv;
422 	kthread_t		*ips_mrt_thread;
423 };
424 typedef struct ip_stack ip_stack_t;
425 
426 /* Finding an ip_stack_t */
427 #define	CONNQ_TO_IPST(_q)	(Q_TO_CONN(_q)->conn_netstack->netstack_ip)
428 #define	ILLQ_TO_IPST(_q)	(((ill_t *)(_q)->q_ptr)->ill_ipst)
429 #define	PHYINT_TO_IPST(phyi)	((phyi)->phyint_ipsq->ipsq_ipst)
430 
431 #else /* _KERNEL */
432 typedef int ip_stack_t;
433 #endif /* _KERNEL */
434 
435 #ifdef	__cplusplus
436 }
437 #endif
438 
439 #endif	/* _INET_IP_STACK_H */
440