xref: /freebsd/sys/netinet/tcp_log_buf.h (revision 52c2bb75163559a6e2866ad374a7de67a4ea1273)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2016-2018 Netflix, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 #ifndef __tcp_log_buf_h__
31 #define __tcp_log_buf_h__
32 
33 #define	TCP_LOG_REASON_LEN	32
34 #define	TCP_LOG_BUF_VER		(6)
35 
36 /*
37  * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires
38  * 8-byte alignment to work properly on all platforms. Therefore, we will
39  * enforce 8-byte alignment for all the structures that may appear by
40  * themselves (instead of being embedded in another structure) in a data
41  * stream.
42  */
43 #define	ALIGN_TCP_LOG		__aligned(8)
44 
45 /* Information about the socketbuffer state. */
46 struct tcp_log_sockbuf
47 {
48 	uint32_t	tls_sb_acc;	/* available chars (sb->sb_acc) */
49 	uint32_t	tls_sb_ccc;	/* claimed chars (sb->sb_ccc) */
50 	uint32_t	tls_sb_spare;	/* spare */
51 };
52 
53 /* Optional, verbose information that may be appended to an event log. */
54 struct tcp_log_verbose
55 {
56 #define	TCP_FUNC_LEN	32
57 	char		tlv_snd_frm[TCP_FUNC_LEN]; /* tcp_output() caller */
58 	char		tlv_trace_func[TCP_FUNC_LEN]; /* Function that
59 							 generated trace */
60 	uint32_t	tlv_trace_line;	/* Line number that generated trace */
61 	uint8_t		_pad[4];
62 } ALIGN_TCP_LOG;
63 
64 /* Internal RACK state variables. */
65 struct tcp_log_rack
66 {
67 	uint32_t	tlr_rack_rtt;		/* rc_rack_rtt */
68 	uint8_t		tlr_state;		/* Internal RACK state */
69 	uint8_t		_pad[3];		/* Padding */
70 };
71 
72 struct tcp_log_bbr {
73 	uint64_t cur_del_rate;
74 	uint64_t delRate;
75 	uint64_t rttProp;
76 	uint64_t bw_inuse;
77 	uint32_t inflight;
78 	uint32_t applimited;
79 	uint32_t delivered;
80 	uint32_t timeStamp;
81 	uint32_t epoch;
82 	uint32_t lt_epoch;
83 	uint32_t pkts_out;
84 	uint32_t flex1;
85 	uint32_t flex2;
86 	uint32_t flex3;
87 	uint32_t flex4;
88 	uint32_t flex5;
89 	uint32_t flex6;
90 	uint32_t lost;
91 	uint16_t pacing_gain;
92 	uint16_t cwnd_gain;
93 	uint16_t flex7;
94 	uint8_t bbr_state;
95 	uint8_t bbr_substate;
96 	uint8_t inhpts;
97 	uint8_t ininput;
98 	uint8_t use_lt_bw;
99 	uint8_t flex8;
100 	uint32_t pkt_epoch;
101 };
102 
103 /* Per-stack stack-specific info. */
104 union tcp_log_stackspecific
105 {
106 	struct tcp_log_rack u_rack;
107 	struct tcp_log_bbr u_bbr;
108 };
109 
110 struct tcp_log_buffer
111 {
112 	/* Event basics */
113 	struct timeval	tlb_tv;		/* Timestamp of trace */
114 	uint32_t	tlb_ticks;	/* Timestamp of trace */
115 	uint32_t	tlb_sn;		/* Serial number */
116 	uint8_t		tlb_stackid;	/* Stack ID */
117 	uint8_t		tlb_eventid;	/* Event ID */
118 	uint16_t	tlb_eventflags;	/* Flags for the record */
119 #define	TLB_FLAG_RXBUF		0x0001	/* Includes receive buffer info */
120 #define	TLB_FLAG_TXBUF		0x0002	/* Includes send buffer info */
121 #define	TLB_FLAG_HDR		0x0004	/* Includes a TCP header */
122 #define	TLB_FLAG_VERBOSE	0x0008	/* Includes function/line numbers */
123 #define	TLB_FLAG_STACKINFO	0x0010	/* Includes stack-specific info */
124 	int		tlb_errno;	/* Event error (if any) */
125 
126 	/* Internal session state */
127 	struct tcp_log_sockbuf tlb_rxbuf; /* Receive buffer */
128 	struct tcp_log_sockbuf tlb_txbuf; /* Send buffer */
129 
130 	int		tlb_state;	/* TCPCB t_state */
131 	uint32_t	tlb_starttime;	/* TCPCB t_starttime */
132 	uint32_t	tlb_iss;		/* TCPCB iss */
133 	uint32_t	tlb_flags;	/* TCPCB flags */
134 	uint32_t	tlb_snd_una;	/* TCPCB snd_una */
135 	uint32_t	tlb_snd_max;	/* TCPCB snd_max */
136 	uint32_t	tlb_snd_cwnd;	/* TCPCB snd_cwnd */
137 	uint32_t	tlb_snd_nxt;	/* TCPCB snd_nxt */
138 	uint32_t	tlb_snd_recover;/* TCPCB snd_recover */
139 	uint32_t	tlb_snd_wnd;	/* TCPCB snd_wnd */
140 	uint32_t	tlb_snd_ssthresh; /* TCPCB snd_ssthresh */
141 	uint32_t	tlb_srtt;	/* TCPCB t_srtt */
142 	uint32_t	tlb_rttvar;	/* TCPCB t_rttvar */
143 	uint32_t	tlb_rcv_up;	/* TCPCB rcv_up */
144 	uint32_t	tlb_rcv_adv;	/* TCPCB rcv_adv */
145 	uint32_t	tlb_rcv_nxt;	/* TCPCB rcv_nxt */
146 	tcp_seq		tlb_sack_newdata; /* TCPCB sack_newdata */
147 	uint32_t       	tlb_rcv_wnd;	/* TCPCB rcv_wnd */
148 	uint32_t	tlb_dupacks;	/* TCPCB t_dupacks */
149 	int		tlb_segqlen;	/* TCPCB segqlen */
150 	int		tlb_snd_numholes; /* TCPCB snd_numholes */
151 	uint32_t 	tlb_flex1; /* Event specific information */
152 	uint32_t 	tlb_flex2; /* Event specific information */
153 	uint8_t		tlb_snd_scale:4, /* TCPCB snd_scale */
154 			tlb_rcv_scale:4; /* TCPCB rcv_scale */
155 	uint8_t		_pad[3];	/* Padding */
156 
157 	/* Per-stack info */
158 	union tcp_log_stackspecific tlb_stackinfo;
159 #define	tlb_rack	tlb_stackinfo.u_rack
160 
161 	/* The packet */
162 	uint32_t	tlb_len;	/* The packet's data length */
163 	struct tcphdr	tlb_th;		/* The TCP header */
164 	uint8_t		tlb_opts[TCP_MAXOLEN]; /* The TCP options */
165 
166 	/* Verbose information (optional) */
167 	struct tcp_log_verbose tlb_verbose[0];
168 } ALIGN_TCP_LOG;
169 
170 enum tcp_log_events {
171 	TCP_LOG_IN = 1,	/* Incoming packet                 1 */
172 	TCP_LOG_OUT,	/* Transmit (without other event)  2 */
173 	TCP_LOG_RTO,	/* Retransmit timeout              3 */
174 	TCP_LOG_TF_ACK,	/* Transmit due to TF_ACK          4 */
175 	TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */
176 	TCP_LOG_PRR,	/* Doing PRR                       6 */
177 	TCP_LOG_REORDER,/* Detected reorder                7 */
178 	TCP_LOG_PACER,	/* Pacer sending a packet          8 */
179 	BBR_LOG_BBRUPD,		/* We updated BBR info     9 */
180 	BBR_LOG_BBRSND,		/* We did a slot calculation and sending is done 10 */
181 	BBR_LOG_ACKCLEAR,	/* A ack clears all outstanding     11 */
182 	BBR_LOG_INQUEUE,	/* The tcb had a packet input to it 12 */
183 	BBR_LOG_TIMERSTAR,	/* Start a timer                    13 */
184 	BBR_LOG_TIMERCANC,	/* Cancel a timer                   14 */
185 	BBR_LOG_ENTREC,		/* Entered recovery                 15 */
186 	BBR_LOG_EXITREC,	/* Exited recovery                  16 */
187 	BBR_LOG_CWND,		/* Cwnd change                      17 */
188 	BBR_LOG_BWSAMP,		/* LT B/W sample has been made      18 */
189 	BBR_LOG_MSGSIZE,	/* We received a EMSGSIZE error     19 */
190 	BBR_LOG_BBRRTT,		/* BBR RTT is updated               20 */
191 	BBR_LOG_JUSTRET,	/* We just returned out of output   21 */
192 	BBR_LOG_STATE,		/* A BBR state change occured       22 */
193 	BBR_LOG_PKT_EPOCH,      /* A BBR packet epoch occured       23 */
194 	BBR_LOG_PERSIST,        /* BBR changed to/from a persists   24 */
195 	TCP_LOG_FLOWEND,        /* End of a flow                    25 */
196 	BBR_LOG_RTO,            /* BBR's timeout includes BBR info  26 */
197 	BBR_LOG_DOSEG_DONE,     /* pacer do_segment completes       27 */
198 	BBR_LOG_EXIT_GAIN,      /* pacer do_segment completes       28 */
199 	BBR_LOG_THRESH_CALC,    /* Doing threshold calculation      29 */
200 	BBR_LOG_EXTRACWNDGAIN,	/* Removed                          30 */
201 	TCP_LOG_USERSEND, 	/* User level sends data            31 */
202 	UNUSED_32,	 	/* Unused                           32 */
203 	UNUSED_33, 		/* Unused                           33 */
204 	BBR_LOG_TIME_EPOCH, 	/* A timed based Epoch occured      34 */
205 	BBR_LOG_TO_PROCESS,	/* A to was processed               35 */
206 	BBR_LOG_BBRTSO, 	/* TSO update	                    36 */
207 	BBR_LOG_PACERDIAG,	/* Pacer diag insert                37 */
208 	BBR_LOG_LOWGAIN,	/* Low gain accounting              38 */
209 	BBR_LOG_PROGRESS,	/* Progress timer event             39 */
210 	TCP_LOG_SOCKET_OPT,	/* A socket option is set	    40 */
211 	BBR_LOG_TIMERPREP,	/* A BBR var to debug out TLP issues  41 */
212 	BBR_LOG_ENOBUF_JMP,	/* We had a enobuf jump 42 */
213 	BBR_LOG_PACING_CALC,	/* calc the pacing time 43 */
214 	BBR_LOG_RTT_SHRINKS,	/* We had a log reduction of rttProp 44 */
215 	BBR_LOG_BW_RED_EV,	/* B/W reduction events 45 */
216 	BBR_LOG_REDUCE,		/* old bbr log reduce for 4.1 and earlier 46*/
217 	TCP_LOG_RTT,		/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
218 	BBR_LOG_SETTINGS_CHG,   /* Settings changed for loss response 48 */
219 	BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
220 	TCP_LOG_REASS,		/* Reassembly buffer logging 50 */
221 	TCP_LOG_END		/* End (keep at end)	            51 */
222 };
223 
224 enum tcp_log_states {
225 	TCP_LOG_STATE_CLEAR = -1,	/* Deactivate and clear tracing */
226 	TCP_LOG_STATE_OFF = 0,		/* Pause */
227 	TCP_LOG_STATE_TAIL=1,		/* Keep the trailing events */
228 	TCP_LOG_STATE_HEAD=2,		/* Keep the leading events */
229 	TCP_LOG_STATE_HEAD_AUTO=3,	/* Keep the leading events, and
230 					   automatically dump them to the
231 					   device  */
232 	TCP_LOG_STATE_CONTINUAL=4,	/* Continually dump the data when full */
233 	TCP_LOG_STATE_TAIL_AUTO=5,	/* Keep the trailing events, and
234 					   automatically dump them when the
235 					   session ends */
236 };
237 
238 /* Use this if we don't know whether the operation succeeded. */
239 #define	ERRNO_UNK	(-1)
240 
241 /*
242  * If the user included dev/tcp_log/tcp_log_dev.h, then include our private
243  * headers. Otherwise, there is no reason to pollute all the files with an
244  * additional include.
245  *
246  * This structure is aligned to an 8-byte boundary to match the alignment
247  * requirements of (struct tcp_log_buffer).
248  */
249 #ifdef __tcp_log_dev_h__
250 struct tcp_log_header {
251 	struct tcp_log_common_header tlh_common;
252 #define	tlh_version	tlh_common.tlch_version
253 #define	tlh_type	tlh_common.tlch_type
254 #define	tlh_length	tlh_common.tlch_length
255 	struct in_endpoints	tlh_ie;
256 	struct timeval		tlh_offset;	/* Uptime -> UTC offset */
257 	char			tlh_id[TCP_LOG_ID_LEN];
258 	char			tlh_reason[TCP_LOG_REASON_LEN];
259 	uint8_t		tlh_af;
260 	uint8_t		_pad[7];
261 } ALIGN_TCP_LOG;
262 
263 #ifdef _KERNEL
264 struct tcp_log_dev_log_queue {
265 	struct tcp_log_dev_queue tldl_common;
266 	char			tldl_id[TCP_LOG_ID_LEN];
267 	char			tldl_reason[TCP_LOG_REASON_LEN];
268 	struct in_endpoints	tldl_ie;
269 	struct tcp_log_stailq	tldl_entries;
270 	int			tldl_count;
271 	uint8_t			tldl_af;
272 };
273 #endif /* _KERNEL */
274 #endif /* __tcp_log_dev_h__ */
275 
276 #ifdef _KERNEL
277 
278 #define	TCP_LOG_BUF_DEFAULT_SESSION_LIMIT	10000
279 #define	TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT	1000000
280 
281 /*
282  * TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always
283  * tries to record verbose information.
284  */
285 #define	TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
286 	do {								\
287 		if (tp->t_logstate != TCP_LOG_STATE_OFF)		\
288 			tcp_log_event_(tp, th, rxbuf, txbuf, eventid,	\
289 	 	        errornum, len, stackinfo, th_hostorder,		\
290 		        tp->t_output_caller, __func__, __LINE__, tv);	\
291 	} while (0)
292 
293 /*
294  * TCP_LOG_EVENT: This is a macro so we can capture function/line
295  * information when needed.
296  *
297  * Prototype:
298  * TCP_LOG_EVENT(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
299  *     struct sockbuf *txbuf, uint8_t eventid, int errornum,
300  *     union tcp_log_stackspecific *stackinfo)
301  *
302  * tp is mandatory and must be write locked.
303  * th is optional; if present, it will appear in the record.
304  * rxbuf and txbuf are optional; if present, they will appear in the record.
305  * eventid is mandatory.
306  * errornum is mandatory (it indicates the success or failure of the
307  *     operation associated with the event).
308  * len indicates the length of the packet. If no packet, use 0.
309  * stackinfo is optional; if present, it will appear in the record.
310  */
311 #ifdef TCP_LOG_FORCEVERBOSE
312 #define	TCP_LOG_EVENT	TCP_LOG_EVENT_VERBOSE
313 #else
314 #define	TCP_LOG_EVENT(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder) \
315 	do {								\
316 		if (tcp_log_verbose)					\
317 			TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf,	\
318 			    eventid, errornum, len, stackinfo,		\
319 			    th_hostorder, NULL);				\
320 		else if (tp->t_logstate != TCP_LOG_STATE_OFF)		\
321 			tcp_log_event_(tp, th, rxbuf, txbuf, eventid,	\
322 			    errornum, len, stackinfo, th_hostorder,	\
323 			    NULL, NULL, 0, NULL);				\
324 	} while (0)
325 #endif /* TCP_LOG_FORCEVERBOSE */
326 #define	TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
327 	do {								\
328 		if (tp->t_logstate != TCP_LOG_STATE_OFF)		\
329 			tcp_log_event_(tp, th, rxbuf, txbuf, eventid,	\
330 			    errornum, len, stackinfo, th_hostorder,	\
331 			    NULL, NULL, 0, tv);				\
332 	} while (0)
333 
334 
335 #ifdef TCP_BLACKBOX
336 extern bool tcp_log_verbose;
337 void tcp_log_drain(struct tcpcb *tp);
338 int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force);
339 void tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason);
340 struct tcp_log_buffer *tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
341     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
342     union tcp_log_stackspecific *stackinfo, int th_hostorder,
343     const char *output_caller, const char *func, int line, const struct timeval *tv);
344 size_t tcp_log_get_id(struct tcpcb *tp, char *buf);
345 u_int tcp_log_get_id_cnt(struct tcpcb *tp);
346 int tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp);
347 void tcp_log_init(void);
348 int tcp_log_set_id(struct tcpcb *tp, char *id);
349 int tcp_log_state_change(struct tcpcb *tp, int state);
350 void tcp_log_tcpcbinit(struct tcpcb *tp);
351 void tcp_log_tcpcbfini(struct tcpcb *tp);
352 void tcp_log_flowend(struct tcpcb *tp);
353 #else /* !TCP_BLACKBOX */
354 #define tcp_log_verbose	(false)
355 
356 static inline struct tcp_log_buffer *
357 tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
358     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
359     union tcp_log_stackspecific *stackinfo, int th_hostorder,
360     const char *output_caller, const char *func, int line,
361     const struct timeval *tv)
362 {
363 
364 	return (NULL);
365 }
366 #endif /* TCP_BLACKBOX */
367 
368 #endif	/* _KERNEL */
369 #endif	/* __tcp_log_buf_h__ */
370