xref: /illumos-gate/usr/src/uts/common/inet/ip_stack.h (revision d70bcb7258b79267aad36309c42fd499e844458f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright 2019 Joyent, Inc.
29  */
30 
31 #ifndef	_INET_IP_STACK_H
32 #define	_INET_IP_STACK_H
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 #include <sys/netstack.h>
39 #include <netinet/igmp_var.h>
40 #include <sys/modhash.h>
41 
42 #ifdef _KERNEL
43 #include <sys/list.h>
44 
45 
46 /*
47  * IP statistics.
48  */
49 #define	IP_STAT(ipst, x)	((ipst)->ips_ip_statistics.x.value.ui64++)
50 #define	IP_STAT_UPDATE(ipst, x, n) \
51 		((ipst)->ips_ip_statistics.x.value.ui64 += (n))
52 
53 typedef struct ip_stat {
54 	kstat_named_t	ip_udp_fannorm;
55 	kstat_named_t	ip_udp_fanmb;
56 	kstat_named_t	ip_recv_pullup;
57 	kstat_named_t	ip_db_ref;
58 	kstat_named_t	ip_notaligned;
59 	kstat_named_t	ip_multimblk;
60 	kstat_named_t	ip_opt;
61 	kstat_named_t	ipsec_proto_ahesp;
62 	kstat_named_t	ip_conn_flputbq;
63 	kstat_named_t	ip_conn_walk_drain;
64 	kstat_named_t   ip_out_sw_cksum;
65 	kstat_named_t	ip_out_sw_cksum_bytes;
66 	kstat_named_t   ip_in_sw_cksum;
67 	kstat_named_t   ip_ire_reclaim_calls;
68 	kstat_named_t   ip_ire_reclaim_deleted;
69 	kstat_named_t   ip_nce_reclaim_calls;
70 	kstat_named_t   ip_nce_reclaim_deleted;
71 	kstat_named_t   ip_nce_mcast_reclaim_calls;
72 	kstat_named_t   ip_nce_mcast_reclaim_deleted;
73 	kstat_named_t   ip_nce_mcast_reclaim_tqfail;
74 	kstat_named_t   ip_dce_reclaim_calls;
75 	kstat_named_t   ip_dce_reclaim_deleted;
76 	kstat_named_t	ip_tcp_in_full_hw_cksum_err;
77 	kstat_named_t	ip_tcp_in_part_hw_cksum_err;
78 	kstat_named_t	ip_tcp_in_sw_cksum_err;
79 	kstat_named_t	ip_udp_in_full_hw_cksum_err;
80 	kstat_named_t	ip_udp_in_part_hw_cksum_err;
81 	kstat_named_t	ip_udp_in_sw_cksum_err;
82 	kstat_named_t	conn_in_recvdstaddr;
83 	kstat_named_t	conn_in_recvopts;
84 	kstat_named_t	conn_in_recvif;
85 	kstat_named_t	conn_in_recvslla;
86 	kstat_named_t	conn_in_recvucred;
87 	kstat_named_t	conn_in_recvttl;
88 	kstat_named_t	conn_in_recvhopopts;
89 	kstat_named_t	conn_in_recvhoplimit;
90 	kstat_named_t	conn_in_recvdstopts;
91 	kstat_named_t	conn_in_recvrthdrdstopts;
92 	kstat_named_t	conn_in_recvrthdr;
93 	kstat_named_t	conn_in_recvpktinfo;
94 	kstat_named_t	conn_in_recvtclass;
95 	kstat_named_t	conn_in_timestamp;
96 } ip_stat_t;
97 
98 
99 /*
100  * IP6 statistics.
101  */
102 #define	IP6_STAT(ipst, x)	((ipst)->ips_ip6_statistics.x.value.ui64++)
103 #define	IP6_STAT_UPDATE(ipst, x, n)	\
104 	((ipst)->ips_ip6_statistics.x.value.ui64 += (n))
105 
106 typedef struct ip6_stat {
107 	kstat_named_t	ip6_udp_fannorm;
108 	kstat_named_t	ip6_udp_fanmb;
109 	kstat_named_t	ip6_recv_pullup;
110 	kstat_named_t	ip6_db_ref;
111 	kstat_named_t	ip6_notaligned;
112 	kstat_named_t	ip6_multimblk;
113 	kstat_named_t	ipsec_proto_ahesp;
114 	kstat_named_t   ip6_out_sw_cksum;
115 	kstat_named_t	ip6_out_sw_cksum_bytes;
116 	kstat_named_t   ip6_in_sw_cksum;
117 	kstat_named_t	ip6_tcp_in_full_hw_cksum_err;
118 	kstat_named_t	ip6_tcp_in_part_hw_cksum_err;
119 	kstat_named_t	ip6_tcp_in_sw_cksum_err;
120 	kstat_named_t	ip6_udp_in_full_hw_cksum_err;
121 	kstat_named_t	ip6_udp_in_part_hw_cksum_err;
122 	kstat_named_t	ip6_udp_in_sw_cksum_err;
123 } ip6_stat_t;
124 
125 typedef struct ire_stats {
126 	uint64_t ire_stats_alloced;	/* # of ires alloced */
127 	uint64_t ire_stats_freed;	/* # of ires freed */
128 	uint64_t ire_stats_inserted;	/* # of ires inserted in the bucket */
129 	uint64_t ire_stats_deleted;	/* # of ires deleted from the bucket */
130 } ire_stats_t;
131 
132 #define	TX_FANOUT_SIZE	128
133 #define	IDLHASHINDEX(X)	\
134 	((((uintptr_t)(X) >> 2) + ((uintptr_t)(X) >> 9)) & (TX_FANOUT_SIZE - 1))
135 
136 /* Data structure to represent addresses */
137 typedef struct srcid_map {
138 	struct srcid_map	*sm_next;
139 	in6_addr_t		sm_addr;	/* Local address */
140 	uint_t			sm_srcid;	/* source id */
141 	uint_t			sm_refcnt;	/* > 1 ipif with same addr? */
142 	zoneid_t		sm_zoneid;	/* zone id */
143 } srcid_map_t;
144 
145 /*
146  * IP stack instances
147  */
148 struct ip_stack {
149 	netstack_t	*ips_netstack;	/* Common netstack */
150 
151 	uint_t			ips_src_generation;	/* Both IPv4 and IPv6 */
152 
153 	struct mod_prop_info_s	*ips_propinfo_tbl;	/* ip tunables table */
154 
155 	mib2_ipIfStatsEntry_t	ips_ip_mib;	/* SNMP fixed size info */
156 	mib2_icmp_t	ips_icmp_mib;
157 	/*
158 	 * IPv6 mibs when the interface (ill) is not known.
159 	 * When the ill is known the per-interface mib in the ill is used.
160 	 */
161 	mib2_ipIfStatsEntry_t	ips_ip6_mib;
162 	mib2_ipv6IfIcmpEntry_t	ips_icmp6_mib;
163 
164 	struct igmpstat		ips_igmpstat;
165 
166 	kstat_t		*ips_ip_mibkp;	/* kstat exporting ip_mib data */
167 	kstat_t		*ips_icmp_mibkp; /* kstat exporting icmp_mib data */
168 	kstat_t		*ips_ip_kstat;
169 	ip_stat_t	ips_ip_statistics;
170 	kstat_t		*ips_ip6_kstat;
171 	ip6_stat_t	ips_ip6_statistics;
172 
173 /* ip.c */
174 	kmutex_t	ips_igmp_timer_lock;
175 	kmutex_t	ips_mld_timer_lock;
176 	kmutex_t	ips_ip_mi_lock;
177 	kmutex_t	ips_ip_addr_avail_lock;
178 	krwlock_t	ips_ill_g_lock;
179 
180 	krwlock_t	ips_ill_g_usesrc_lock;
181 
182 	/* Taskq dispatcher for capability operations */
183 	kmutex_t	ips_capab_taskq_lock;
184 	kcondvar_t	ips_capab_taskq_cv;
185 	mblk_t		*ips_capab_taskq_head;
186 	mblk_t		*ips_capab_taskq_tail;
187 	kthread_t	*ips_capab_taskq_thread;
188 	boolean_t	ips_capab_taskq_quit;
189 
190 /* ipclassifier.c - keep in ip_stack_t */
191 	/* ipclassifier hash tables */
192 	struct connf_s	*ips_rts_clients;
193 	struct connf_s	*ips_ipcl_conn_fanout;
194 	struct connf_s	*ips_ipcl_bind_fanout;
195 	struct connf_s	*ips_ipcl_proto_fanout_v4;
196 	struct connf_s	*ips_ipcl_proto_fanout_v6;
197 	struct connf_s	*ips_ipcl_udp_fanout;
198 	struct connf_s	*ips_ipcl_raw_fanout;		/* RAW SCTP sockets */
199 	struct connf_s	*ips_ipcl_iptun_fanout;
200 	uint_t		ips_ipcl_conn_fanout_size;
201 	uint_t		ips_ipcl_bind_fanout_size;
202 	uint_t		ips_ipcl_udp_fanout_size;
203 	uint_t		ips_ipcl_raw_fanout_size;
204 	uint_t		ips_ipcl_iptun_fanout_size;
205 	struct connf_s	*ips_ipcl_globalhash_fanout;
206 	int		ips_conn_g_index;
207 
208 /* ip.c */
209 	/* Following protected by igmp_timer_lock */
210 	int		ips_igmp_time_to_next;	/* Time since last timeout */
211 	int		ips_igmp_timer_scheduled_last;
212 	int		ips_igmp_deferred_next;
213 	timeout_id_t	ips_igmp_timeout_id;
214 	boolean_t	ips_igmp_timer_setter_active;
215 	boolean_t	ips_igmp_timer_quiesce;
216 
217 	/* Following protected by mld_timer_lock */
218 	int		ips_mld_time_to_next;	/* Time since last timeout */
219 	int		ips_mld_timer_scheduled_last;
220 	int		ips_mld_deferred_next;
221 	timeout_id_t	ips_mld_timeout_id;
222 	boolean_t	ips_mld_timer_setter_active;
223 	boolean_t	ips_mld_timer_quiesce;
224 
225 	/* Protected by igmp_slowtimeout_lock */
226 	timeout_id_t	ips_igmp_slowtimeout_id;
227 	kmutex_t	ips_igmp_slowtimeout_lock;
228 	boolean_t	ips_igmp_slowtimeout_quiesce;
229 
230 	/* Protected by mld_slowtimeout_lock */
231 	timeout_id_t	ips_mld_slowtimeout_id;
232 	kmutex_t	ips_mld_slowtimeout_lock;
233 	boolean_t	ips_mld_slowtimeout_quiesce;
234 
235 	/* IPv4 forwarding table */
236 	struct radix_node_head *ips_ip_ftable;
237 
238 #define	IPV6_ABITS		128
239 #define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */
240 	struct irb	*ips_ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE];
241 
242 	/*
243 	 * ire_ft_init_lock is used while initializing ip_forwarding_table
244 	 * dynamically in ire_add.
245 	 */
246 	kmutex_t	ips_ire_ft_init_lock;
247 
248 	/*
249 	 * This is the IPv6 counterpart of RADIX_NODE_HEAD_LOCK. It is used
250 	 * to prevent adds and deletes while we are doing a ftable_lookup
251 	 * and extracting the ire_generation.
252 	 */
253 	krwlock_t	ips_ip6_ire_head_lock;
254 
255 	uint32_t	ips_ip6_ftable_hash_size;
256 
257 	ire_stats_t	ips_ire_stats_v4;	/* IPv4 ire statistics */
258 	ire_stats_t	ips_ire_stats_v6;	/* IPv6 ire statistics */
259 
260 	/* Count how many condemned objects for kmem_cache callbacks */
261 	uint32_t	ips_num_ire_condemned;
262 	uint32_t	ips_num_nce_condemned;
263 	uint32_t	ips_num_dce_condemned;
264 
265 	struct ire_s	*ips_ire_reject_v4;	/* For unreachable dests */
266 	struct ire_s	*ips_ire_reject_v6;	/* For unreachable dests */
267 	struct ire_s	*ips_ire_blackhole_v4;	/* For temporary failures */
268 	struct ire_s	*ips_ire_blackhole_v6;	/* For temporary failures */
269 
270 	/* ips_ire_dep_lock protects ire_dep_* relationship between IREs */
271 	krwlock_t	ips_ire_dep_lock;
272 
273 	/* Destination Cache Entries */
274 	struct dce_s	*ips_dce_default;
275 	uint_t		ips_dce_hashsize;
276 	struct dcb_s	*ips_dce_hash_v4;
277 	struct dcb_s	*ips_dce_hash_v6;
278 	uint_t		ips_dce_reclaim_needed;
279 
280 	/* pending binds */
281 	mblk_t		*ips_ip6_asp_pending_ops;
282 	mblk_t		*ips_ip6_asp_pending_ops_tail;
283 
284 	/* Synchronize updates with table usage */
285 	mblk_t		*ips_ip6_asp_pending_update; /* pending table updates */
286 
287 	boolean_t	ips_ip6_asp_uip;	/* table update in progress */
288 	kmutex_t	ips_ip6_asp_lock;	/* protect all the above */
289 	uint32_t	ips_ip6_asp_refcnt;	/* outstanding references */
290 
291 	struct ip6_asp	*ips_ip6_asp_table;
292 	/* The number of policy entries in the table */
293 	uint_t		ips_ip6_asp_table_count;
294 
295 	struct conn_s	*ips_ip_g_mrouter;
296 
297 	/* Time since last icmp_pkt_err */
298 	clock_t		ips_icmp_pkt_err_last;
299 	/* Number of packets sent in burst */
300 	uint_t		ips_icmp_pkt_err_sent;
301 
302 	/* Protected by ip_mi_lock */
303 	void		*ips_ip_g_head;	/* IP Instance Data List Head */
304 	void		*ips_arp_g_head; /* ARP Instance Data List Head */
305 
306 	/* Multirouting stuff */
307 	/* Interval (in ms) between consecutive 'bad MTU' warnings */
308 	hrtime_t	ips_ip_multirt_log_interval;
309 	/* Time since last warning issued. */
310 	hrtime_t	ips_multirt_bad_mtu_last_time;
311 
312 	/*
313 	 * CGTP hooks. Enabling and disabling of hooks is controlled by an
314 	 * IP tunable 'ips_ip_cgtp_filter'.
315 	 */
316 	struct cgtp_filter_ops *ips_ip_cgtp_filter_ops;
317 
318 	struct ipsq_s	*ips_ipsq_g_head;
319 	uint_t		ips_ill_index;	/* Used to assign interface indicies */
320 	/* When set search for unused index */
321 	boolean_t	ips_ill_index_wrap;
322 
323 	uint_t		ips_loopback_packets;
324 
325 	/* NDP/NCE structures for IPv4 and IPv6 */
326 	struct ndp_g_s	*ips_ndp4;
327 	struct ndp_g_s	*ips_ndp6;
328 
329 	/* ip_mroute stuff */
330 	kmutex_t	ips_ip_g_mrouter_mutex;
331 
332 	struct mrtstat	*ips_mrtstat;	/* Stats for netstat */
333 	int		ips_saved_ip_forwarding;
334 
335 	/* numvifs is only a hint about the max interface being used. */
336 	ushort_t	ips_numvifs;
337 	kmutex_t	ips_numvifs_mutex;
338 
339 	struct vif	*ips_vifs;
340 	struct mfcb	*ips_mfcs;	/* kernel routing table	*/
341 	struct tbf	*ips_tbfs;
342 	/*
343 	 * One-back cache used to locate a tunnel's vif,
344 	 * given a datagram's src ip address.
345 	 */
346 	ipaddr_t	ips_last_encap_src;
347 	struct vif	*ips_last_encap_vif;
348 	kmutex_t	ips_last_encap_lock;	/* Protects the above */
349 
350 	/*
351 	 * reg_vif_num is protected by numvifs_mutex
352 	 */
353 	/* Whether or not special PIM assert processing is enabled. */
354 	ushort_t	ips_reg_vif_num;	/* Index to Register vif */
355 	int		ips_pim_assert;
356 
357 	union ill_g_head_u *ips_ill_g_heads;   /* ILL List Head */
358 
359 	kstat_t		*ips_loopback_ksp;
360 
361 	/* Array of conn drain lists */
362 	struct idl_tx_list_s	*ips_idl_tx_list;
363 	uint_t		ips_conn_drain_list_cnt; /* Count of conn_drain_list */
364 
365 	/*
366 	 * ID used to assign next free one.
367 	 * Increases by one. Once it wraps we search for an unused ID.
368 	 */
369 	uint_t		ips_ip_src_id;
370 	boolean_t	ips_srcid_wrapped;
371 
372 	struct srcid_map *ips_srcid_head;
373 	krwlock_t	ips_srcid_lock;
374 
375 	uint64_t	ips_ipif_g_seqid;	/* Used only for sctp_addr.c */
376 	union phyint_list_u *ips_phyint_g_list;	/* start of phyint list */
377 
378 /* ip_netinfo.c */
379 	hook_family_t	ips_ipv4root;
380 	hook_family_t	ips_ipv6root;
381 	hook_family_t	ips_arproot;
382 
383 	net_handle_t		ips_ipv4_net_data;
384 	net_handle_t		ips_ipv6_net_data;
385 	net_handle_t		ips_arp_net_data;
386 
387 	/*
388 	 * Hooks for firewalling
389 	 */
390 	hook_event_t		ips_ip4_physical_in_event;
391 	hook_event_t		ips_ip4_physical_out_event;
392 	hook_event_t		ips_ip4_forwarding_event;
393 	hook_event_t		ips_ip4_loopback_in_event;
394 	hook_event_t		ips_ip4_loopback_out_event;
395 
396 	hook_event_t		ips_ip6_physical_in_event;
397 	hook_event_t		ips_ip6_physical_out_event;
398 	hook_event_t		ips_ip6_forwarding_event;
399 	hook_event_t		ips_ip6_loopback_in_event;
400 	hook_event_t		ips_ip6_loopback_out_event;
401 
402 	hook_event_t		ips_arp_physical_in_event;
403 	hook_event_t		ips_arp_physical_out_event;
404 	hook_event_t		ips_arp_nic_events;
405 
406 	hook_event_token_t	ips_ipv4firewall_physical_in;
407 	hook_event_token_t	ips_ipv4firewall_physical_out;
408 	hook_event_token_t	ips_ipv4firewall_forwarding;
409 	hook_event_token_t	ips_ipv4firewall_loopback_in;
410 	hook_event_token_t	ips_ipv4firewall_loopback_out;
411 
412 	hook_event_token_t	ips_ipv6firewall_physical_in;
413 	hook_event_token_t	ips_ipv6firewall_physical_out;
414 	hook_event_token_t	ips_ipv6firewall_forwarding;
415 	hook_event_token_t	ips_ipv6firewall_loopback_in;
416 	hook_event_token_t	ips_ipv6firewall_loopback_out;
417 
418 	hook_event_t		ips_ip4_nic_events;
419 	hook_event_t		ips_ip6_nic_events;
420 	hook_event_token_t	ips_ipv4nicevents;
421 	hook_event_token_t	ips_ipv6nicevents;
422 
423 	hook_event_token_t	ips_arp_physical_in;
424 	hook_event_token_t	ips_arp_physical_out;
425 	hook_event_token_t	ips_arpnicevents;
426 
427 	net_handle_t		ips_ip4_observe_pr;
428 	net_handle_t		ips_ip6_observe_pr;
429 	hook_event_t		ips_ip4_observe;
430 	hook_event_t		ips_ip6_observe;
431 	hook_event_token_t	ips_ipv4observing;
432 	hook_event_token_t	ips_ipv6observing;
433 
434 	struct __ldi_ident	*ips_ldi_ident;
435 
436 /* ipmp.c */
437 	krwlock_t		ips_ipmp_lock;
438 	mod_hash_t		*ips_ipmp_grp_hash;
439 
440 };
441 typedef struct ip_stack ip_stack_t;
442 
443 /* Finding an ip_stack_t */
444 #define	CONNQ_TO_IPST(_q)	(Q_TO_CONN(_q)->conn_netstack->netstack_ip)
445 #define	ILLQ_TO_IPST(_q)	(((ill_t *)(_q)->q_ptr)->ill_ipst)
446 #define	PHYINT_TO_IPST(phyi)	((phyi)->phyint_ipsq->ipsq_ipst)
447 
448 #else /* _KERNEL */
449 typedef int ip_stack_t;
450 #endif /* _KERNEL */
451 
452 #ifdef	__cplusplus
453 }
454 #endif
455 
456 #endif	/* _INET_IP_STACK_H */
457