xref: /illumos-gate/usr/src/uts/common/inet/udp_impl.h (revision b1593d50e783f7d66722dde093752b74ffa95176)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #ifndef	_UDP_IMPL_H
27 #define	_UDP_IMPL_H
28 
29 /*
30  * UDP implementation private declarations.  These interfaces are
31  * used to build the IP module and are not meant to be accessed
32  * by any modules except IP itself.  They are undocumented and are
33  * subject to change without notice.
34  */
35 
36 #ifdef	__cplusplus
37 extern "C" {
38 #endif
39 
40 #ifdef _KERNEL
41 
42 #include <sys/int_types.h>
43 #include <sys/netstack.h>
44 
45 #include <netinet/in.h>
46 #include <netinet/ip6.h>
47 
48 #include <inet/common.h>
49 #include <inet/ip.h>
50 #include <inet/optcom.h>
51 
52 #define	UDP_MOD_ID		5607
53 
54 typedef struct udp_bits_s {
55 
56 	uint32_t
57 
58 	udpb_debug : 1,		/* SO_DEBUG "socket" option. */
59 	udpb_dontroute : 1,	/* SO_DONTROUTE "socket" option. */
60 	udpb_broadcast : 1,	/* SO_BROADCAST "socket" option. */
61 	udpb_useloopback : 1,	/* SO_USELOOPBACK "socket" option */
62 
63 	udpb_reuseaddr : 1,	/* SO_REUSEADDR "socket" option. */
64 	udpb_dgram_errind : 1,	/* SO_DGRAM_ERRIND option */
65 	udpb_recvdstaddr : 1,	/* IP_RECVDSTADDR option */
66 	udpb_recvopts : 1,	/* IP_RECVOPTS option */
67 
68 	udpb_unspec_source : 1,	/* IP*_UNSPEC_SRC option */
69 	udpb_ip_recvpktinfo : 1,	/* IPV6_RECVPKTINFO option  */
70 	udpb_ipv6_recvhoplimit : 1,	/* IPV6_RECVHOPLIMIT option */
71 	udpb_ipv6_recvhopopts : 1,	/* IPV6_RECVHOPOPTS option */
72 
73 	udpb_ipv6_recvdstopts : 1,	/* IPV6_RECVDSTOPTS option */
74 	udpb_ipv6_recvrthdr : 1,	/* IPV6_RECVRTHDR option */
75 	udpb_ipv6_recvtclass : 1,	/* IPV6_RECVTCLASS */
76 	udpb_ipv6_recvpathmtu : 1,	/* IPV6_RECVPATHMTU */
77 
78 	udpb_anon_priv_bind : 1,
79 	udpb_exclbind : 1,		/* ``exclusive'' binding */
80 	udpb_recvif : 1,		/* IP_RECVIF option */
81 	udpb_recvslla : 1,		/* IP_RECVSLLA option */
82 
83 	udpb_recvttl : 1,		/* IP_RECVTTL option */
84 	udpb_recvucred : 1,		/* IP_RECVUCRED option */
85 	udpb_old_ipv6_recvdstopts : 1,	/* old form of IPV6_DSTOPTS */
86 	udpb_ipv6_recvrthdrdstopts : 1,	/* IPV6_RECVRTHDRDSTOPTS */
87 
88 	udpb_rcvhdr : 1,		/* UDP_RCVHDR option */
89 	udpb_issocket : 1,		/* socket mode */
90 	udpb_direct_sockfs : 1,		/* direct calls to/from sockfs */
91 	udpb_timestamp : 1,		/* SO_TIMESTAMP "socket" option */
92 
93 	udpb_nat_t_endpoint : 1,	/* UDP_NAT_T_ENDPOINT option */
94 	udpb_pad_to_bit_31 : 3;
95 } udp_bits_t;
96 
97 #define	udp_debug	udp_bits.udpb_debug
98 #define	udp_dontroute	udp_bits.udpb_dontroute
99 #define	udp_broadcast	udp_bits.udpb_broadcast
100 #define	udp_useloopback	udp_bits.udpb_useloopback
101 
102 #define	udp_reuseaddr		udp_bits.udpb_reuseaddr
103 #define	udp_dgram_errind	udp_bits.udpb_dgram_errind
104 #define	udp_recvdstaddr		udp_bits.udpb_recvdstaddr
105 #define	udp_recvopts		udp_bits.udpb_recvopts
106 
107 #define	udp_unspec_source	udp_bits.udpb_unspec_source
108 #define	udp_ip_recvpktinfo	udp_bits.udpb_ip_recvpktinfo
109 #define	udp_ipv6_recvhoplimit	udp_bits.udpb_ipv6_recvhoplimit
110 #define	udp_ipv6_recvhopopts	udp_bits.udpb_ipv6_recvhopopts
111 
112 #define	udp_ipv6_recvdstopts	udp_bits.udpb_ipv6_recvdstopts
113 #define	udp_ipv6_recvrthdr	udp_bits.udpb_ipv6_recvrthdr
114 #define	udp_ipv6_recvtclass	udp_bits.udpb_ipv6_recvtclass
115 #define	udp_ipv6_recvpathmtu	udp_bits.udpb_ipv6_recvpathmtu
116 
117 #define	udp_anon_priv_bind	udp_bits.udpb_anon_priv_bind
118 #define	udp_exclbind		udp_bits.udpb_exclbind
119 #define	udp_recvif		udp_bits.udpb_recvif
120 #define	udp_recvslla		udp_bits.udpb_recvslla
121 
122 #define	udp_recvttl		udp_bits.udpb_recvttl
123 #define	udp_recvucred		udp_bits.udpb_recvucred
124 #define	udp_old_ipv6_recvdstopts	udp_bits.udpb_old_ipv6_recvdstopts
125 #define	udp_ipv6_recvrthdrdstopts	udp_bits.udpb_ipv6_recvrthdrdstopts
126 
127 #define	udp_rcvhdr		udp_bits.udpb_rcvhdr
128 #define	udp_issocket		udp_bits.udpb_issocket
129 #define	udp_direct_sockfs	udp_bits.udpb_direct_sockfs
130 #define	udp_timestamp		udp_bits.udpb_timestamp
131 
132 #define	udp_nat_t_endpoint	udp_bits.udpb_nat_t_endpoint
133 
134 /*
135  * Bind hash list size and hash function.  It has to be a power of 2 for
136  * hashing.
137  */
138 #define	UDP_BIND_FANOUT_SIZE	512
139 #define	UDP_BIND_HASH(lport, size) \
140 	((ntohs((uint16_t)lport)) & (size - 1))
141 
142 /* UDP bind fanout hash structure. */
143 typedef struct udp_fanout_s {
144 	struct udp_s *uf_udp;
145 	kmutex_t uf_lock;
146 #if defined(_LP64) || defined(_I32LPx)
147 	char	uf_pad[48];
148 #else
149 	char	uf_pad[56];
150 #endif
151 } udp_fanout_t;
152 
153 /*
154  * dev_q is the write side queue of the entity below IP.
155  * If there is a module below IP, we can't optimize by looking
156  * at q_first of the queue below IP. If the driver is directly
157  * below IP and if the q_first is NULL, we optimize by not doing
158  * the canput check
159  */
160 #define	DEV_Q_FLOW_BLOCKED(dev_q)					\
161 	(((dev_q)->q_next != NULL || (dev_q)->q_first != NULL) &&	\
162 	!canput(dev_q))
163 
164 /* Kstats */
165 typedef struct udp_stat {			/* Class "net" kstats */
166 	kstat_named_t	udp_ip_send;
167 	kstat_named_t	udp_ip_ire_send;
168 	kstat_named_t	udp_ire_null;
169 	kstat_named_t	udp_drain;
170 	kstat_named_t	udp_sock_fallback;
171 	kstat_named_t	udp_rrw_busy;
172 	kstat_named_t	udp_rrw_msgcnt;
173 	kstat_named_t	udp_out_sw_cksum;
174 	kstat_named_t	udp_out_sw_cksum_bytes;
175 	kstat_named_t	udp_out_opt;
176 	kstat_named_t	udp_out_err_notconn;
177 	kstat_named_t	udp_out_err_output;
178 	kstat_named_t	udp_out_err_tudr;
179 	kstat_named_t	udp_in_pktinfo;
180 	kstat_named_t	udp_in_recvdstaddr;
181 	kstat_named_t	udp_in_recvopts;
182 	kstat_named_t	udp_in_recvif;
183 	kstat_named_t	udp_in_recvslla;
184 	kstat_named_t	udp_in_recvucred;
185 	kstat_named_t	udp_in_recvttl;
186 	kstat_named_t	udp_in_recvhopopts;
187 	kstat_named_t	udp_in_recvhoplimit;
188 	kstat_named_t	udp_in_recvdstopts;
189 	kstat_named_t	udp_in_recvrtdstopts;
190 	kstat_named_t	udp_in_recvrthdr;
191 	kstat_named_t	udp_in_recvpktinfo;
192 	kstat_named_t	udp_in_recvtclass;
193 	kstat_named_t	udp_in_timestamp;
194 	kstat_named_t	udp_ip_rcvpktinfo;
195 	kstat_named_t	udp_cookie_coll;
196 #ifdef DEBUG
197 	kstat_named_t	udp_data_conn;
198 	kstat_named_t	udp_data_notconn;
199 #endif
200 
201 } udp_stat_t;
202 
203 /* Named Dispatch Parameter Management Structure */
204 typedef struct udpparam_s {
205 	uint32_t udp_param_min;
206 	uint32_t udp_param_max;
207 	uint32_t udp_param_value;
208 	char	*udp_param_name;
209 } udpparam_t;
210 
211 #define	UDP_NUM_EPRIV_PORTS	64
212 
213 /*
214  * UDP stack instances
215  */
216 struct udp_stack {
217 	netstack_t	*us_netstack;	/* Common netstack */
218 
219 	uint_t		us_bind_fanout_size;
220 	udp_fanout_t	*us_bind_fanout;
221 
222 	int		us_num_epriv_ports;
223 	in_port_t	us_epriv_ports[UDP_NUM_EPRIV_PORTS];
224 
225 	/* Hint not protected by any lock */
226 	in_port_t	us_next_port_to_try;
227 
228 	IDP		us_nd;	/* Points to table of UDP ND variables. */
229 	udpparam_t	*us_param_arr; 	/* ndd variable table */
230 
231 	kstat_t		*us_mibkp;	/* kstats exporting mib data */
232 	kstat_t		*us_kstat;
233 	udp_stat_t	us_statistics;
234 
235 	mib2_udp_t	us_udp_mib;	/* SNMP fixed size info */
236 
237 /*
238  * The smallest anonymous port in the priviledged port range which UDP
239  * looks for free port.  Use in the option UDP_ANONPRIVBIND.
240  */
241 	in_port_t	us_min_anonpriv_port;
242 
243 	ldi_ident_t	us_ldi_ident;
244 };
245 
246 typedef struct udp_stack udp_stack_t;
247 
248 /* Internal udp control structure, one per open stream */
249 typedef	struct udp_s {
250 	krwlock_t	udp_rwlock;	/* Protects most of udp_t */
251 	t_scalar_t	udp_pending_op;	/* The current TPI operation */
252 	/*
253 	 * Following fields up to udp_ipversion protected by conn_lock,
254 	 * and the fanout lock i.e.uf_lock. Need both locks to change the
255 	 * field, either lock is sufficient for reading the field.
256 	 */
257 	uint32_t	udp_state;	/* TPI state */
258 	in_port_t	udp_port;	/* Port bound to this stream */
259 	in_port_t	udp_dstport;	/* Connected port */
260 	in6_addr_t	udp_v6src;	/* Source address of this stream */
261 	in6_addr_t	udp_bound_v6src; /* Explicitly bound address */
262 	in6_addr_t	udp_v6dst;	/* Connected destination */
263 	/*
264 	 * IP format that packets transmitted from this struct should use.
265 	 * Value can be IP4_VERSION or IPV6_VERSION.
266 	 */
267 	ushort_t	udp_ipversion;
268 
269 	/* Written to only once at the time of opening the endpoint */
270 	sa_family_t	udp_family;	/* Family from socket() call */
271 
272 	/* Following protected by udp_rwlock */
273 	uint32_t	udp_flowinfo;	/* Connected flow id and tclass */
274 	uint32_t	udp_max_hdr_len; /* For write offset in stream head */
275 	uint32_t	udp_ip_snd_options_len; /* Len of IPv4 options */
276 	uchar_t		*udp_ip_snd_options;    /* Ptr to IPv4 options */
277 	uint32_t	udp_ip_rcv_options_len; /* Len of IPv4 options recvd */
278 	uchar_t		*udp_ip_rcv_options;    /* Ptr to IPv4 options recvd */
279 	uchar_t		udp_multicast_ttl;	/* IP*_MULTICAST_TTL/HOPS */
280 	ipaddr_t	udp_multicast_if_addr;  /* IP_MULTICAST_IF option */
281 	uint_t		udp_multicast_if_index;	/* IPV6_MULTICAST_IF option */
282 	int		udp_bound_if;		/* IP*_BOUND_IF option */
283 
284 	/* Written to only once at the time of opening the endpoint */
285 	conn_t		*udp_connp;
286 
287 	/* Following protected by udp_rwlock */
288 	udp_bits_t	udp_bits;		/* Bit fields defined above */
289 	uint8_t		udp_type_of_service;	/* IP_TOS option */
290 	uint8_t		udp_ttl;		/* TTL or hoplimit */
291 	ip6_pkt_t	udp_sticky_ipp;		/* Sticky options */
292 	uint8_t		*udp_sticky_hdrs;	/* Prebuilt IPv6 hdrs */
293 	uint_t		udp_sticky_hdrs_len;	/* Incl. ip6h and any ip6i */
294 
295 	/* Following 2 fields protected by the uf_lock */
296 	struct udp_s	*udp_bind_hash; /* Bind hash chain */
297 	struct udp_s	**udp_ptpbhn; /* Pointer to previous bind hash next. */
298 
299 	kmutex_t	udp_drain_lock;		/* lock for udp_rcv_list */
300 	/* Protected by udp_drain_lock */
301 	boolean_t	udp_drain_qfull;	/* drain queue is full */
302 
303 	/* Following protected by udp_rwlock */
304 	mblk_t		*udp_rcv_list_head;	/* b_next chain of mblks */
305 	mblk_t		*udp_rcv_list_tail;	/* last mblk in chain */
306 	kmutex_t	udp_recv_lock;		/* recv lock */
307 	uint_t		udp_rcv_cnt;		/* total data in rcv_list */
308 	uint_t		udp_rcv_msgcnt;		/* total msgs in rcv_list */
309 	size_t		udp_rcv_disply_hiwat;	/* user's view of rcvbuf */
310 	size_t		udp_rcv_hiwat;		/* receive high watermark */
311 	size_t		udp_rcv_lowat;		/* receive low watermark */
312 	size_t		udp_xmit_hiwat;		/* Send buffer high watermark */
313 	size_t		udp_xmit_lowat;		/* Send buffer low watermark */
314 	uint_t		udp_label_len;		/* length of security label */
315 	uint_t		udp_label_len_v6;	/* len of v6 security label */
316 	in6_addr_t 	udp_v6lastdst;		/* most recent destination */
317 	in_port_t	udp_lastdstport;	/* most recent dest port */
318 
319 	uint64_t	udp_open_time;	/* time when this was opened */
320 	pid_t		udp_open_pid;	/* process id when this was opened */
321 	udp_stack_t	*udp_us;		/* Stack instance for zone */
322 	int		udp_delayed_error;
323 	mblk_t		*udp_fallback_queue_head;
324 	mblk_t		*udp_fallback_queue_tail;
325 	struct sockaddr_storage	udp_delayed_addr;
326 } udp_t;
327 
328 /* UDP Protocol header */
329 /* UDP Protocol header aligned */
330 typedef	struct udpahdr_s {
331 	in_port_t	uha_src_port;		/* Source port */
332 	in_port_t	uha_dst_port;		/* Destination port */
333 	uint16_t	uha_length;		/* UDP length */
334 	uint16_t	uha_checksum;		/* UDP checksum */
335 } udpha_t;
336 
337 #define	us_wroff_extra			us_param_arr[0].udp_param_value
338 #define	us_ipv4_ttl			us_param_arr[1].udp_param_value
339 #define	us_ipv6_hoplimit		us_param_arr[2].udp_param_value
340 #define	us_smallest_nonpriv_port	us_param_arr[3].udp_param_value
341 #define	us_do_checksum			us_param_arr[4].udp_param_value
342 #define	us_smallest_anon_port		us_param_arr[5].udp_param_value
343 #define	us_largest_anon_port		us_param_arr[6].udp_param_value
344 #define	us_xmit_hiwat			us_param_arr[7].udp_param_value
345 #define	us_xmit_lowat			us_param_arr[8].udp_param_value
346 #define	us_recv_hiwat			us_param_arr[9].udp_param_value
347 #define	us_max_buf			us_param_arr[10].udp_param_value
348 
349 
350 #define	UDP_STAT(us, x)		((us)->us_statistics.x.value.ui64++)
351 #define	UDP_STAT_UPDATE(us, x, n)	\
352 			((us)->us_statistics.x.value.ui64 += (n))
353 #ifdef DEBUG
354 #define	UDP_DBGSTAT(us, x)	UDP_STAT(us, x)
355 #else
356 #define	UDP_DBGSTAT(us, x)
357 #endif /* DEBUG */
358 
359 extern int	udp_opt_default(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
360 extern int	udp_tpi_opt_get(queue_t *, t_scalar_t, t_scalar_t, uchar_t *);
361 extern int	udp_tpi_opt_set(queue_t *, uint_t, int, int, uint_t, uchar_t *,
362 		    uint_t *, uchar_t *, void *, cred_t *, mblk_t *);
363 extern mblk_t	*udp_snmp_get(queue_t *, mblk_t *);
364 extern int	udp_snmp_set(queue_t *, t_scalar_t, t_scalar_t, uchar_t *, int);
365 extern void	udp_close_free(conn_t *);
366 extern void	udp_quiesce_conn(conn_t *);
367 extern void	udp_ddi_g_init(void);
368 extern void	udp_ddi_g_destroy(void);
369 extern void	udp_g_q_inactive(udp_stack_t *);
370 extern void	udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr,
371 		    socklen_t addrlen);
372 extern void	udp_wput(queue_t *, mblk_t *);
373 
374 /*
375  * Object to represent database of options to search passed to
376  * {sock,tpi}optcom_req() interface routine to take care of option
377  * management and associated methods.
378  */
379 extern optdb_obj_t	udp_opt_obj;
380 extern uint_t		udp_max_optsize;
381 
382 extern sock_lower_handle_t udp_create(int, int, int, sock_downcalls_t **,
383     uint_t *, int *, int, cred_t *);
384 extern int udp_fallback(sock_lower_handle_t, queue_t *, boolean_t,
385     so_proto_quiesced_cb_t);
386 
387 extern sock_downcalls_t sock_udp_downcalls;
388 
389 #endif	/*  _KERNEL */
390 
391 #ifdef	__cplusplus
392 }
393 #endif
394 
395 #endif	/* _UDP_IMPL_H */
396