xref: /freebsd/sys/sys/sockbuf.h (revision d8773fdcbfa363da3e14ec3661b565edbfce03e0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 #ifndef _SYS_SOCKBUF_H_
32 #define _SYS_SOCKBUF_H_
33 
34 /*
35  * Constants for sb_flags field of struct sockbuf/xsockbuf.
36  */
37 #define	SB_TLS_RX	0x01		/* using KTLS on RX */
38 #define	SB_TLS_RX_RUNNING 0x02		/* KTLS RX operation running */
39 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
40 #define	SB_SEL		0x08		/* someone is selecting */
41 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
42 #define	SB_UPCALL	0x20		/* someone wants an upcall */
43 /* was	SB_NOINTR	0x40		*/
44 #define	SB_AIO		0x80		/* AIO operations queued */
45 #define	SB_KNOTE	0x100		/* kernel note attached */
46 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
47 #define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
48 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
49 /* was	SB_STOP		0x1000		*/
50 #define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
51 #define	SB_SPLICED	0x4000		/* socket buffer is spliced;
52 					   previously used for SB_TLS_IFNET */
53 #define	SB_TLS_RX_RESYNC 0x8000		/* KTLS RX lost HW sync */
54 
55 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
56 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
57 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
58 
59 #if defined(_KERNEL) || defined(_WANT_SOCKET)
60 #include <sys/_lock.h>
61 #include <sys/_mutex.h>
62 #include <sys/_sx.h>
63 #include <sys/_task.h>
64 
65 #define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
66 
67 struct ktls_session;
68 struct mbuf;
69 struct sockaddr;
70 struct socket;
71 struct sockopt;
72 struct thread;
73 struct selinfo;
74 
75 /*
76  * Socket buffer
77  *
78  * A buffer starts with the fields that are accessed by I/O multiplexing
79  * APIs like select(2), kevent(2) or AIO and thus are shared between different
80  * buffer implementations.  They are protected by the SOCK_RECVBUF_LOCK()
81  * or SOCK_SENDBUF_LOCK() of the owning socket.
82  *
83  * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
84  * methods.
85  *
86  * Protocol specific implementations follow in a union.
87  */
88 struct sockbuf {
89 	struct	selinfo *sb_sel;	/* process selecting read/write */
90 	short	sb_state;		/* socket state on sockbuf */
91 	short	sb_flags;		/* flags, see above */
92 	u_int	sb_acc;			/* available chars in buffer */
93 	u_int	sb_ccc;			/* claimed chars in buffer */
94 	u_int	sb_mbcnt;		/* chars of mbufs used */
95 	u_int	sb_ctl;			/* non-data chars in buffer */
96 	u_int	sb_hiwat;		/* max actual char count */
97 	u_int	sb_lowat;		/* low water mark */
98 	u_int	sb_mbmax;		/* max chars of mbufs to use */
99 	sbintime_t sb_timeo;		/* timeout for read/write */
100 	int	(*sb_upcall)(struct socket *, void *, int);
101 	void	*sb_upcallarg;
102 	TAILQ_HEAD(, kaiocb) sb_aiojobq;	/* pending AIO ops */
103 	struct	task sb_aiotask;		/* AIO task */
104 	union {
105 		/*
106 		 * Classic BSD one-size-fits-all socket buffer, capable of
107 		 * doing streams and datagrams. The stream part is able
108 		 * to perform special features:
109 		 * - not ready data (sendfile)
110 		 * - TLS
111 		 */
112 		struct {
113 			/* compat: sockbuf lock pointer */
114 			struct	mtx *sb_mtx;
115 			/* first and last mbufs in the chain */
116 			struct	mbuf *sb_mb;
117 			struct	mbuf *sb_mbtail;
118 			/* first mbuf of last record in socket buffer */
119 			struct	mbuf *sb_lastrecord;
120 			/* pointer to data to send next (TCP */
121 			struct	mbuf *sb_sndptr;
122 			/* pointer to first not ready buffer */
123 			struct	mbuf *sb_fnrdy;
124 			/* byte offset of ptr into chain, used with sb_sndptr */
125 			u_int	sb_sndptroff;
126 			/* TLS */
127 			u_int	sb_tlscc;	/* TLS chain characters */
128 			u_int	sb_tlsdcc;	/* characters being decrypted */
129 			struct	mbuf *sb_mtls;	/*  TLS mbuf chain */
130 			struct	mbuf *sb_mtlstail; /* last mbuf in TLS chain */
131 			uint64_t sb_tls_seqno;	/* TLS seqno */
132 			/* TLS state, locked by sockbuf and sock I/O mutexes. */
133 			struct	ktls_session *sb_tls_info;
134 		};
135 		/*
136 		 * PF_UNIX/SOCK_STREAM and PF_UNIX/SOCK_SEQPACKET
137 		 * A simple stream buffer with not ready data pointer.
138 		 */
139 		struct {
140 			STAILQ_HEAD(, mbuf)	uxst_mbq;
141 			struct mbuf		*uxst_fnrdy;
142 			struct socket		*uxst_peer;
143 			u_int			uxst_flags;
144 #define	UXST_PEER_AIO	0x1
145 #define	UXST_PEER_SEL	0x2
146 		};
147 		/*
148 		 * PF_UNIX/SOCK_DGRAM
149 		 *
150 		 * Local protocol, thus we should buffer on the receive side
151 		 * only.  However, in one to many configuration we don't want
152 		 * a single receive buffer to be shared.  So we would link
153 		 * send buffers onto receive buffer.  All the fields are locked
154 		 * by the receive buffer lock.
155 		 */
156 		struct {
157 			/*
158 			 * For receive buffer: own queue of this buffer for
159 			 * unconnected sends.  For send buffer: queue lended
160 			 * to the peer receive buffer, to isolate ourselves
161 			 * from other senders.
162 			 */
163 			STAILQ_HEAD(, mbuf)	uxdg_mb;
164 			/* For receive buffer: datagram seen via MSG_PEEK. */
165 			struct mbuf		*uxdg_peeked;
166 			/*
167 			 * For receive buffer: queue of send buffers of
168 			 * connected peers.  For send buffer: linkage on
169 			 * connected peer receive buffer queue.
170 			 */
171 			union {
172 				TAILQ_HEAD(, sockbuf)	uxdg_conns;
173 				TAILQ_ENTRY(sockbuf)	uxdg_clist;
174 			};
175 			/* Counters for this buffer uxdg_mb chain + peeked. */
176 			u_int uxdg_cc;
177 			u_int uxdg_ctl;
178 			u_int uxdg_mbcnt;
179 		};
180 		/*
181 		 * Netlink socket.
182 		 */
183 		struct {
184 			TAILQ_HEAD(, nl_buf)	nl_queue;
185 		};
186 	};
187 };
188 
189 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
190 #ifdef _KERNEL
191 
192 /* 'which' values for KPIs that operate on one buffer of a socket. */
193 typedef enum { SO_RCV, SO_SND } sb_which;
194 
195 /*
196  * Per-socket buffer mutex used to protect most fields in the socket buffer.
197  * These make use of the mutex pointer embedded in struct sockbuf, which
198  * currently just references mutexes in the containing socket.  The
199  * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
200  * these locking macros.
201  */
202 #define	SOCKBUF_MTX(_sb)		((_sb)->sb_mtx)
203 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
204 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
205 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
206 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
207 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
208 
209 /*
210  * Socket buffer private mbuf(9) flags.
211  */
212 #define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
213 #define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
214 #define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
215 
216 void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
217 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
218 void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
219 void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
220 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
221 	    struct mbuf *m0, struct mbuf *control);
222 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
223 	    struct mbuf *m0, struct mbuf *control);
224 int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
225 	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
226 void	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
227 	    struct mbuf *control, int flags);
228 void	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
229 	    struct mbuf *control, int flags);
230 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
231 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
232 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
233 struct mbuf *
234 	sbcreatecontrol(const void *p, u_int size, int type, int level,
235 	    int wait);
236 void	sbdestroy(struct socket *, sb_which);
237 void	sbdrop(struct sockbuf *sb, int len);
238 void	sbdrop_locked(struct sockbuf *sb, int len);
239 struct mbuf *
240 	sbcut_locked(struct sockbuf *sb, int len);
241 void	sbdroprecord(struct sockbuf *sb);
242 void	sbdroprecord_locked(struct sockbuf *sb);
243 void	sbflush(struct sockbuf *sb);
244 void	sbflush_locked(struct sockbuf *sb);
245 void	sbrelease(struct socket *, sb_which);
246 void	sbrelease_locked(struct socket *, sb_which);
247 int	sbsetopt(struct socket *so, struct sockopt *);
248 bool	sbreserve_locked(struct socket *so, sb_which which, u_long cc,
249 	    struct thread *td);
250 bool	sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
251 	    u_long buf_max, struct thread *td);
252 void	sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
253 struct mbuf *
254 	sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
255 struct mbuf *
256 	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
257 int	sbwait(struct socket *, sb_which);
258 void	sballoc(struct sockbuf *, struct mbuf *);
259 void	sbfree(struct sockbuf *, struct mbuf *);
260 void	sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
261 void	sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
262 int	sbready(struct sockbuf *, struct mbuf *, int);
263 
264 /*
265  * Return how much data is available to be taken out of socket
266  * buffer right now.
267  */
268 static inline u_int
sbavail(struct sockbuf * sb)269 sbavail(struct sockbuf *sb)
270 {
271 
272 #if 0
273 	SOCKBUF_LOCK_ASSERT(sb);
274 #endif
275 	return (sb->sb_acc);
276 }
277 
278 /*
279  * Return how much data sits there in the socket buffer
280  * It might be that some data is not yet ready to be read.
281  */
282 static inline u_int
sbused(struct sockbuf * sb)283 sbused(struct sockbuf *sb)
284 {
285 
286 #if 0
287 	SOCKBUF_LOCK_ASSERT(sb);
288 #endif
289 	return (sb->sb_ccc);
290 }
291 
292 /*
293  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
294  * This is problematical if the fields are unsigned, as the space might
295  * still be negative (ccc > hiwat or mbcnt > mbmax).
296  */
297 static inline long
sbspace(struct sockbuf * sb)298 sbspace(struct sockbuf *sb)
299 {
300 	int bleft, mleft;		/* size should match sockbuf fields */
301 
302 #if 0
303 	SOCKBUF_LOCK_ASSERT(sb);
304 #endif
305 
306 	bleft = sb->sb_hiwat - sb->sb_ccc;
307 	mleft = sb->sb_mbmax - sb->sb_mbcnt;
308 
309 	return ((bleft < mleft) ? bleft : mleft);
310 }
311 
312 #define SB_EMPTY_FIXUP(sb) do {						\
313 	if ((sb)->sb_mb == NULL) {					\
314 		(sb)->sb_mbtail = NULL;					\
315 		(sb)->sb_lastrecord = NULL;				\
316 	}								\
317 } while (/*CONSTCOND*/0)
318 
319 #ifdef SOCKBUF_DEBUG
320 void	sblastrecordchk(struct sockbuf *, const char *, int);
321 void	sblastmbufchk(struct sockbuf *, const char *, int);
322 void	sbcheck(struct sockbuf *, const char *, int);
323 #define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
324 #define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
325 #define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
326 #else
327 #define	SBLASTRECORDCHK(sb)	do {} while (0)
328 #define	SBLASTMBUFCHK(sb)	do {} while (0)
329 #define	SBCHECK(sb)		do {} while (0)
330 #endif /* SOCKBUF_DEBUG */
331 
332 #endif /* _KERNEL */
333 
334 #endif /* _SYS_SOCKBUF_H_ */
335