1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1990, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 #ifndef _SYS_SOCKBUF_H_
32 #define _SYS_SOCKBUF_H_
33
34 /*
35 * Constants for sb_flags field of struct sockbuf/xsockbuf.
36 */
37 #define SB_TLS_RX 0x01 /* using KTLS on RX */
38 #define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */
39 #define SB_WAIT 0x04 /* someone is waiting for data/space */
40 #define SB_SEL 0x08 /* someone is selecting */
41 #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
42 #define SB_UPCALL 0x20 /* someone wants an upcall */
43 #define SB_NOINTR 0x40 /* operations not interruptible */
44 #define SB_AIO 0x80 /* AIO operations queued */
45 #define SB_KNOTE 0x100 /* kernel note attached */
46 #define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
47 #define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */
48 #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
49 #define SB_STOP 0x1000 /* backpressure indicator */
50 #define SB_AIO_RUNNING 0x2000 /* AIO operation running */
51 #define SB_SPLICED 0x4000 /* socket buffer is spliced;
52 previously used for SB_TLS_IFNET */
53 #define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */
54
55 #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
56 #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
57 #define SBS_RCVATMARK 0x0040 /* at mark on input */
58
59 #if defined(_KERNEL) || defined(_WANT_SOCKET)
60 #include <sys/_lock.h>
61 #include <sys/_mutex.h>
62 #include <sys/_sx.h>
63 #include <sys/_task.h>
64
65 #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
66
67 struct ktls_session;
68 struct mbuf;
69 struct sockaddr;
70 struct socket;
71 struct sockopt;
72 struct thread;
73 struct selinfo;
74
75 /*
76 * Socket buffer
77 *
78 * A buffer starts with the fields that are accessed by I/O multiplexing
79 * APIs like select(2), kevent(2) or AIO and thus are shared between different
80 * buffer implementations. They are protected by the SOCK_RECVBUF_LOCK()
81 * or SOCK_SENDBUF_LOCK() of the owning socket.
82 *
83 * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
84 * methods.
85 *
86 * Protocol specific implementations follow in a union.
87 */
88 struct sockbuf {
89 struct selinfo *sb_sel; /* process selecting read/write */
90 short sb_state; /* socket state on sockbuf */
91 short sb_flags; /* flags, see above */
92 u_int sb_acc; /* available chars in buffer */
93 u_int sb_ccc; /* claimed chars in buffer */
94 u_int sb_mbcnt; /* chars of mbufs used */
95 u_int sb_ctl; /* non-data chars in buffer */
96 u_int sb_hiwat; /* max actual char count */
97 u_int sb_lowat; /* low water mark */
98 u_int sb_mbmax; /* max chars of mbufs to use */
99 sbintime_t sb_timeo; /* timeout for read/write */
100 int (*sb_upcall)(struct socket *, void *, int);
101 void *sb_upcallarg;
102 TAILQ_HEAD(, kaiocb) sb_aiojobq; /* pending AIO ops */
103 struct task sb_aiotask; /* AIO task */
104 union {
105 /*
106 * Classic BSD one-size-fits-all socket buffer, capable of
107 * doing streams and datagrams. The stream part is able
108 * to perform special features:
109 * - not ready data (sendfile)
110 * - TLS
111 */
112 struct {
113 /* compat: sockbuf lock pointer */
114 struct mtx *sb_mtx;
115 /* first and last mbufs in the chain */
116 struct mbuf *sb_mb;
117 struct mbuf *sb_mbtail;
118 /* first mbuf of last record in socket buffer */
119 struct mbuf *sb_lastrecord;
120 /* pointer to data to send next (TCP */
121 struct mbuf *sb_sndptr;
122 /* pointer to first not ready buffer */
123 struct mbuf *sb_fnrdy;
124 /* byte offset of ptr into chain, used with sb_sndptr */
125 u_int sb_sndptroff;
126 /* TLS */
127 u_int sb_tlscc; /* TLS chain characters */
128 u_int sb_tlsdcc; /* characters being decrypted */
129 struct mbuf *sb_mtls; /* TLS mbuf chain */
130 struct mbuf *sb_mtlstail; /* last mbuf in TLS chain */
131 uint64_t sb_tls_seqno; /* TLS seqno */
132 /* TLS state, locked by sockbuf and sock I/O mutexes. */
133 struct ktls_session *sb_tls_info;
134 };
135 /*
136 * PF_UNIX/SOCK_DGRAM
137 *
138 * Local protocol, thus we should buffer on the receive side
139 * only. However, in one to many configuration we don't want
140 * a single receive buffer to be shared. So we would link
141 * send buffers onto receive buffer. All the fields are locked
142 * by the receive buffer lock.
143 */
144 struct {
145 /*
146 * For receive buffer: own queue of this buffer for
147 * unconnected sends. For send buffer: queue lended
148 * to the peer receive buffer, to isolate ourselves
149 * from other senders.
150 */
151 STAILQ_HEAD(, mbuf) uxdg_mb;
152 /* For receive buffer: datagram seen via MSG_PEEK. */
153 struct mbuf *uxdg_peeked;
154 /*
155 * For receive buffer: queue of send buffers of
156 * connected peers. For send buffer: linkage on
157 * connected peer receive buffer queue.
158 */
159 union {
160 TAILQ_HEAD(, sockbuf) uxdg_conns;
161 TAILQ_ENTRY(sockbuf) uxdg_clist;
162 };
163 /* Counters for this buffer uxdg_mb chain + peeked. */
164 u_int uxdg_cc;
165 u_int uxdg_ctl;
166 u_int uxdg_mbcnt;
167 };
168 /*
169 * Netlink socket.
170 */
171 struct {
172 TAILQ_HEAD(, nl_buf) nl_queue;
173 };
174 };
175 };
176
177 #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */
178 #ifdef _KERNEL
179
180 /* 'which' values for KPIs that operate on one buffer of a socket. */
181 typedef enum { SO_RCV, SO_SND } sb_which;
182
183 /*
184 * Per-socket buffer mutex used to protect most fields in the socket buffer.
185 * These make use of the mutex pointer embedded in struct sockbuf, which
186 * currently just references mutexes in the containing socket. The
187 * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
188 * these locking macros.
189 */
190 #define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx)
191 #define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb))
192 #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb))
193 #define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb))
194 #define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
195 #define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
196
197 /*
198 * Socket buffer private mbuf(9) flags.
199 */
200 #define M_NOTREADY M_PROTO1 /* m_data not populated yet */
201 #define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */
202 #define M_NOTAVAIL (M_NOTREADY | M_BLOCKED)
203
204 void sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
205 void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
206 void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
207 void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
208 int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
209 struct mbuf *m0, struct mbuf *control);
210 int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
211 struct mbuf *m0, struct mbuf *control);
212 int sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
213 const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
214 void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
215 struct mbuf *control, int flags);
216 void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
217 struct mbuf *control, int flags);
218 void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
219 void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
220 void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
221 struct mbuf *
222 sbcreatecontrol(const void *p, u_int size, int type, int level,
223 int wait);
224 void sbdestroy(struct socket *, sb_which);
225 void sbdrop(struct sockbuf *sb, int len);
226 void sbdrop_locked(struct sockbuf *sb, int len);
227 struct mbuf *
228 sbcut_locked(struct sockbuf *sb, int len);
229 void sbdroprecord(struct sockbuf *sb);
230 void sbdroprecord_locked(struct sockbuf *sb);
231 void sbflush(struct sockbuf *sb);
232 void sbflush_locked(struct sockbuf *sb);
233 void sbrelease(struct socket *, sb_which);
234 void sbrelease_locked(struct socket *, sb_which);
235 int sbsetopt(struct socket *so, struct sockopt *);
236 bool sbreserve_locked(struct socket *so, sb_which which, u_long cc,
237 struct thread *td);
238 bool sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
239 u_long buf_max, struct thread *td);
240 void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
241 struct mbuf *
242 sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
243 struct mbuf *
244 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
245 int sbwait(struct socket *, sb_which);
246 void sballoc(struct sockbuf *, struct mbuf *);
247 void sbfree(struct sockbuf *, struct mbuf *);
248 void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
249 void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
250 int sbready(struct sockbuf *, struct mbuf *, int);
251
252 /*
253 * Return how much data is available to be taken out of socket
254 * buffer right now.
255 */
256 static inline u_int
sbavail(struct sockbuf * sb)257 sbavail(struct sockbuf *sb)
258 {
259
260 #if 0
261 SOCKBUF_LOCK_ASSERT(sb);
262 #endif
263 return (sb->sb_acc);
264 }
265
266 /*
267 * Return how much data sits there in the socket buffer
268 * It might be that some data is not yet ready to be read.
269 */
270 static inline u_int
sbused(struct sockbuf * sb)271 sbused(struct sockbuf *sb)
272 {
273
274 #if 0
275 SOCKBUF_LOCK_ASSERT(sb);
276 #endif
277 return (sb->sb_ccc);
278 }
279
280 /*
281 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
282 * This is problematical if the fields are unsigned, as the space might
283 * still be negative (ccc > hiwat or mbcnt > mbmax).
284 */
285 static inline long
sbspace(struct sockbuf * sb)286 sbspace(struct sockbuf *sb)
287 {
288 int bleft, mleft; /* size should match sockbuf fields */
289
290 #if 0
291 SOCKBUF_LOCK_ASSERT(sb);
292 #endif
293
294 if (sb->sb_flags & SB_STOP)
295 return(0);
296
297 bleft = sb->sb_hiwat - sb->sb_ccc;
298 mleft = sb->sb_mbmax - sb->sb_mbcnt;
299
300 return ((bleft < mleft) ? bleft : mleft);
301 }
302
303 #define SB_EMPTY_FIXUP(sb) do { \
304 if ((sb)->sb_mb == NULL) { \
305 (sb)->sb_mbtail = NULL; \
306 (sb)->sb_lastrecord = NULL; \
307 } \
308 } while (/*CONSTCOND*/0)
309
310 #ifdef SOCKBUF_DEBUG
311 void sblastrecordchk(struct sockbuf *, const char *, int);
312 void sblastmbufchk(struct sockbuf *, const char *, int);
313 void sbcheck(struct sockbuf *, const char *, int);
314 #define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__)
315 #define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__)
316 #define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__)
317 #else
318 #define SBLASTRECORDCHK(sb) do {} while (0)
319 #define SBLASTMBUFCHK(sb) do {} while (0)
320 #define SBCHECK(sb) do {} while (0)
321 #endif /* SOCKBUF_DEBUG */
322
323 #endif /* _KERNEL */
324
325 #endif /* _SYS_SOCKBUF_H_ */
326