xref: /freebsd/sys/netinet/tcp_log_buf.h (revision 0dc78204a7dcbc51bfab4c701e1335084548bc05)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2016-2020 Netflix, Inc.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #ifndef __tcp_log_buf_h__
29 #define __tcp_log_buf_h__
30 
31 #define	TCP_LOG_REASON_LEN	32
32 #define	TCP_LOG_TAG_LEN		32
33 #define	TCP_LOG_BUF_VER		(9)
34 
35 /*
36  * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires
37  * 8-byte alignment to work properly on all platforms. Therefore, we will
38  * enforce 8-byte alignment for all the structures that may appear by
39  * themselves (instead of being embedded in another structure) in a data
40  * stream.
41  */
42 #define	ALIGN_TCP_LOG		__aligned(8)
43 
44 /* Information about the socketbuffer state. */
45 struct tcp_log_sockbuf
46 {
47 	uint32_t	tls_sb_acc;	/* available chars (sb->sb_acc) */
48 	uint32_t	tls_sb_ccc;	/* claimed chars (sb->sb_ccc) */
49 	uint32_t	tls_sb_spare;	/* spare */
50 };
51 
52 /* Optional, verbose information that may be appended to an event log. */
53 struct tcp_log_verbose
54 {
55 #define	TCP_FUNC_LEN	32
56 	char		tlv_snd_frm[TCP_FUNC_LEN]; /* tcp_output() caller */
57 	char		tlv_trace_func[TCP_FUNC_LEN]; /* Function that
58 							 generated trace */
59 	uint32_t	tlv_trace_line;	/* Line number that generated trace */
60 	uint8_t		_pad[4];
61 } ALIGN_TCP_LOG;
62 
63 struct tcp_log_bbr {
64 	uint64_t cur_del_rate;
65 	uint64_t delRate;
66 	uint64_t rttProp;
67 	uint64_t bw_inuse;
68 	uint32_t inflight;
69 	uint32_t applimited;
70 	uint32_t delivered;
71 	uint32_t timeStamp;
72 	uint32_t epoch;
73 	uint32_t lt_epoch;
74 	uint32_t pkts_out;
75 	uint32_t flex1;
76 	uint32_t flex2;
77 	uint32_t flex3;
78 	uint32_t flex4;
79 	uint32_t flex5;
80 	uint32_t flex6;
81 	uint32_t lost;
82 	uint16_t pacing_gain;
83 	uint16_t cwnd_gain;
84 	uint16_t flex7;
85 	uint8_t bbr_state;
86 	uint8_t bbr_substate;
87 	uint8_t inhpts;
88 	uint8_t __spare;
89 	uint8_t use_lt_bw;
90 	uint8_t flex8;
91 	uint32_t pkt_epoch;
92 };
93 
94 /* shadows tcp_log_bbr struct element sizes */
95 struct tcp_log_raw {
96 	uint64_t u64_flex[4];
97 	uint32_t u32_flex[14];
98 	uint16_t u16_flex[3];
99 	uint8_t u8_flex[6];
100 	uint32_t u32_flex2[1];
101 };
102 
103 struct tcp_log_uint64 {
104 	uint64_t u64_flex[13];
105 };
106 
107 struct tcp_log_sendfile {
108 	uint64_t offset;
109 	uint64_t length;
110 	uint32_t flags;
111 };
112 
113 /*
114  * tcp_log_stackspecific is currently being used as "event specific" log
115  * info by all stacks (i.e. struct tcp_log_bbr is used for generic event
116  * logging). Until this is cleaned up more generically and throughout,
117  * allow events to use the same space in the union.
118  */
119 union tcp_log_stackspecific
120 {
121 	struct tcp_log_bbr u_bbr;
122 	struct tcp_log_sendfile u_sf;
123 	struct tcp_log_raw u_raw;	/* "raw" log access */
124 	struct tcp_log_uint64 u64_raw;	/* just u64's - used by process info */
125 };
126 
127 typedef union tcp_log_stackspecific tcp_log_eventspecific_t;
128 
129 struct tcp_log_buffer
130 {
131 	/* Event basics */
132 	struct timeval	tlb_tv;		/* Timestamp of trace */
133 	uint32_t	tlb_ticks;	/* Timestamp of trace */
134 	uint32_t	tlb_sn;		/* Serial number */
135 	uint8_t		tlb_stackid;	/* Stack ID */
136 	uint8_t		tlb_eventid;	/* Event ID */
137 	uint16_t	tlb_eventflags;	/* Flags for the record */
138 #define	TLB_FLAG_RXBUF		0x0001	/* Includes receive buffer info */
139 #define	TLB_FLAG_TXBUF		0x0002	/* Includes send buffer info */
140 #define	TLB_FLAG_HDR		0x0004	/* Includes a TCP header */
141 #define	TLB_FLAG_VERBOSE	0x0008	/* Includes function/line numbers */
142 #define	TLB_FLAG_STACKINFO	0x0010	/* Includes stack-specific info */
143 	int		tlb_errno;	/* Event error (if any) */
144 
145 	/* Internal session state */
146 	struct tcp_log_sockbuf tlb_rxbuf; /* Receive buffer */
147 	struct tcp_log_sockbuf tlb_txbuf; /* Send buffer */
148 
149 	int		tlb_state;	/* TCPCB t_state */
150 	uint32_t	tlb_starttime;	/* TCPCB t_starttime */
151 	uint32_t	tlb_iss;	/* TCPCB iss */
152 	uint32_t	tlb_flags;	/* TCPCB flags */
153 	uint32_t	tlb_snd_una;	/* TCPCB snd_una */
154 	uint32_t	tlb_snd_max;	/* TCPCB snd_max */
155 	uint32_t	tlb_snd_cwnd;	/* TCPCB snd_cwnd */
156 	uint32_t	tlb_snd_nxt;	/* TCPCB snd_nxt */
157 	uint32_t	tlb_snd_recover;/* TCPCB snd_recover */
158 	uint32_t	tlb_snd_wnd;	/* TCPCB snd_wnd */
159 	uint32_t	tlb_snd_ssthresh; /* TCPCB snd_ssthresh */
160 	uint32_t	tlb_srtt;	/* TCPCB t_srtt */
161 	uint32_t	tlb_rttvar;	/* TCPCB t_rttvar */
162 	uint32_t	tlb_rcv_up;	/* TCPCB rcv_up */
163 	uint32_t	tlb_rcv_adv;	/* TCPCB rcv_adv */
164 	uint32_t	tlb_flags2;	/* TCPCB t_flags2 */
165 	uint32_t	tlb_rcv_nxt;	/* TCPCB rcv_nxt */
166 	uint32_t	tlb_rcv_wnd;	/* TCPCB rcv_wnd */
167 	uint32_t	tlb_dupacks;	/* TCPCB t_dupacks */
168 	int		tlb_segqlen;	/* TCPCB segqlen */
169 	int		tlb_snd_numholes; /* TCPCB snd_numholes */
170 	uint32_t	tlb_flex1;	/* Event specific information */
171 	uint32_t	tlb_flex2;	/* Event specific information */
172 	uint32_t	tlb_fbyte_in;	/* TCPCB first byte in time */
173 	uint32_t	tlb_fbyte_out;	/* TCPCB first byte out time */
174 	uint8_t		tlb_snd_scale:4, /* TCPCB snd_scale */
175 			tlb_rcv_scale:4; /* TCPCB rcv_scale */
176 	uint8_t		_pad[3];	/* Padding */
177 	/* Per-stack info */
178 	union tcp_log_stackspecific tlb_stackinfo;
179 
180 	/* The packet */
181 	uint32_t	tlb_len;	/* The packet's data length */
182 	struct tcphdr	tlb_th;		/* The TCP header */
183 	uint8_t		tlb_opts[TCP_MAXOLEN]; /* The TCP options */
184 
185 	/* Verbose information (optional) */
186 	struct tcp_log_verbose tlb_verbose[0];
187 } ALIGN_TCP_LOG;
188 
189 enum tcp_log_events {
190 	TCP_LOG_IN = 1,		/* Incoming packet                   1 */
191 	TCP_LOG_OUT,		/* Transmit (without other event)    2 */
192 	TCP_LOG_RTO,		/* Retransmit timeout                3 */
193 	TCP_LOG_SB_WAKE,	/* Awaken socket buffer              4 */
194 	TCP_UNUSED_5,		/* Detected bad retransmission       5 */
195 	TCP_LOG_PRR,		/* Doing PRR                         6 */
196 	TCP_UNUSED_7,		/* Detected reorder                  7 */
197 	TCP_LOG_HPTS,		/* Hpts sending a packet             8 */
198 	BBR_LOG_BBRUPD,		/* We updated BBR info               9 */
199 	BBR_LOG_BBRSND,		/* We did a slot calculation and sending is done 10 */
200 	BBR_LOG_ACKCLEAR,	/* A ack clears all outstanding     11 */
201 	TCP_UNUSED_12,		/* The tcb had a packet input to it 12 */
202 	BBR_LOG_TIMERSTAR,	/* Start a timer                    13 */
203 	BBR_LOG_TIMERCANC,	/* Cancel a timer                   14 */
204 	BBR_LOG_ENTREC,		/* Entered recovery                 15 */
205 	BBR_LOG_EXITREC,	/* Exited recovery                  16 */
206 	BBR_LOG_CWND,		/* Cwnd change                      17 */
207 	BBR_LOG_BWSAMP,		/* LT B/W sample has been made      18 */
208 	BBR_LOG_MSGSIZE,	/* We received a EMSGSIZE error     19 */
209 	BBR_LOG_BBRRTT,		/* BBR RTT is updated               20 */
210 	BBR_LOG_JUSTRET,	/* We just returned out of output   21 */
211 	BBR_LOG_STATE,		/* A BBR state change occurred      22 */
212 	BBR_LOG_PKT_EPOCH,	/* A BBR packet epoch occurred      23 */
213 	BBR_LOG_PERSIST,	/* BBR changed to/from a persists   24 */
214 	TCP_LOG_FLOWEND,	/* End of a flow                    25 */
215 	BBR_LOG_RTO,		/* BBR's timeout includes BBR info  26 */
216 	BBR_LOG_DOSEG_DONE,	/* hpts do_segment completes        27 */
217 	BBR_LOG_EXIT_GAIN,	/* hpts do_segment completes        28 */
218 	BBR_LOG_THRESH_CALC,	/* Doing threshold calculation      29 */
219 	TCP_LOG_MAPCHG,		/* Map Changes to the sendmap       30 */
220 	TCP_LOG_USERSEND,	/* User level sends data            31 */
221 	BBR_RSM_CLEARED,	/* RSM cleared of ACK flags         32 */
222 	BBR_LOG_STATE_TARGET,	/* Log of target at state           33 */
223 	BBR_LOG_TIME_EPOCH,	/* A timed based Epoch occurred     34 */
224 	BBR_LOG_TO_PROCESS,	/* A to was processed               35 */
225 	BBR_LOG_BBRTSO,		/* TSO update                       36 */
226 	BBR_LOG_HPTSDIAG,	/* Hpts diag insert                 37 */
227 	BBR_LOG_LOWGAIN,	/* Low gain accounting              38 */
228 	BBR_LOG_PROGRESS,	/* Progress timer event             39 */
229 	TCP_LOG_SOCKET_OPT,	/* A socket option is set           40 */
230 	BBR_LOG_TIMERPREP,	/* A BBR var to debug out TLP issues  41 */
231 	BBR_LOG_ENOBUF_JMP,	/* We had a enobuf jump             42 */
232 	BBR_LOG_HPTSI_CALC,	/* calc the hptsi time              43 */
233 	BBR_LOG_RTT_SHRINKS,	/* We had a log reduction of rttProp 44 */
234 	BBR_LOG_BW_RED_EV,	/* B/W reduction events             45 */
235 	BBR_LOG_REDUCE,		/* old bbr log reduce for 4.1 and earlier 46*/
236 	TCP_LOG_RTT,		/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */
237 	BBR_LOG_SETTINGS_CHG,	/* Settings changed for loss response 48 */
238 	TCP_UNUSED_49,		/* SRTT gaining -- now not used    49 */
239 	TCP_LOG_REASS,		/* Reassembly buffer logging        50 */
240 	TCP_HDWR_PACE_SIZE,	/*  TCP pacing size set (rl and rack uses this)  51 */
241 	BBR_LOG_HDWR_PACE,	/* TCP Hardware pacing log          52 */
242 	BBR_LOG_TSTMP_VAL,	/* Temp debug timestamp validation  53 */
243 	TCP_LOG_CONNEND,	/* End of connection                54 */
244 	TCP_LOG_LRO,		/* LRO entry                        55 */
245 	TCP_SACK_FILTER_RES,	/* Results of SACK Filter           56 */
246 	TCP_UNUSED_57,		/* Sack Attack Detection            57 */
247 	TCP_TIMELY_WORK,	/* Logs regarding Timely CC tweaks  58 */
248 	TCP_UNUSED_59,		/* User space event data            59 */
249 	TCP_LOG_SENDFILE,	/* sendfile() logging for TCP connections 60 */
250 	TCP_LOG_REQ_T,		/* logging of request tracking      61 */
251 	TCP_LOG_ACCOUNTING,	/* Log of TCP Accounting data       62 */
252 	TCP_LOG_FSB,		/* FSB information                  63 */
253 	RACK_DSACK_HANDLING,	/* Handling of DSACK in rack for reordering window 64 */
254 	TCP_HYSTART,		/* TCP Hystart logging              65 */
255 	TCP_CHG_QUERY,		/* Change query during fnc_init()   66 */
256 	TCP_RACK_LOG_COLLAPSE,	/* Window collapse by peer          67 */
257 	TCP_RACK_TP_TRIGGERED,	/* A rack tracepoint is triggered   68 */
258 	TCP_HYBRID_PACING_LOG,	/* Hybrid pacing log                69 */
259 	TCP_LOG_PRU,		/* TCP protocol user request        70 */
260 	TCP_UNUSED_71,		/* old TCP Policer detectionn, not used 71 */
261 	TCP_PCM_MEASURE,	/* TCP Path Capacity Measurement    72 */
262 	TCP_LOG_END		/* End (keep at end)                73 */
263 };
264 
265 enum tcp_log_states {
266 	TCP_LOG_STATE_RATIO_OFF = -2,	/* Log ratio evaluation yielded an OFF
267 					   result. Only used for tlb_logstate */
268 	TCP_LOG_STATE_CLEAR = -1,	/* Deactivate and clear tracing. Passed
269 					   to tcp_log_state_change() but never
270 					   stored in any logstate variable */
271 	TCP_LOG_STATE_OFF = 0,		/* Pause */
272 
273 	/* Positively numbered states represent active logging modes */
274 	TCP_LOG_STATE_TAIL=1,		/* Keep the trailing events */
275 	TCP_LOG_STATE_HEAD=2,		/* Keep the leading events */
276 	TCP_LOG_STATE_HEAD_AUTO=3,	/* Keep the leading events, and
277 					   automatically dump them to the
278 					   device  */
279 	TCP_LOG_STATE_CONTINUAL=4,	/* Continually dump the data when full */
280 	TCP_LOG_STATE_TAIL_AUTO=5,	/* Keep the trailing events, and
281 					   automatically dump them when the
282 					   session ends */
283 	TCP_LOG_VIA_BBPOINTS=6		/* Log only if the BB point has been configured */
284 };
285 
286 /* Use this if we don't know whether the operation succeeded. */
287 #define	ERRNO_UNK	(-1)
288 
289 /*
290  * If the user included dev/tcp_log/tcp_log_dev.h, then include our private
291  * headers. Otherwise, there is no reason to pollute all the files with an
292  * additional include.
293  *
294  * This structure is aligned to an 8-byte boundary to match the alignment
295  * requirements of (struct tcp_log_buffer).
296  */
297 #ifdef __tcp_log_dev_h__
298 struct tcp_log_header {
299 	struct tcp_log_common_header tlh_common;
300 #define	tlh_version	tlh_common.tlch_version
301 #define	tlh_type	tlh_common.tlch_type
302 #define	tlh_length	tlh_common.tlch_length
303 	struct in_endpoints	tlh_ie;
304 	struct timeval		tlh_offset;	/* Uptime -> UTC offset */
305 	char			tlh_id[TCP_LOG_ID_LEN];
306 	char			tlh_reason[TCP_LOG_REASON_LEN];
307 	char			tlh_tag[TCP_LOG_TAG_LEN];
308 	uint8_t		tlh_af;
309 	uint8_t		_pad[7];
310 } ALIGN_TCP_LOG;
311 
312 #ifdef _KERNEL
313 struct tcp_log_dev_log_queue {
314 	struct tcp_log_dev_queue tldl_common;
315 	char			tldl_id[TCP_LOG_ID_LEN];
316 	char			tldl_reason[TCP_LOG_REASON_LEN];
317 	char			tldl_tag[TCP_LOG_TAG_LEN];
318 	struct in_endpoints	tldl_ie;
319 	struct tcp_log_stailq	tldl_entries;
320 	int			tldl_count;
321 	uint8_t			tldl_af;
322 };
323 #endif /* _KERNEL */
324 #endif /* __tcp_log_dev_h__ */
325 
326 /*
327  * Defined BBPOINTS that can be used
328  * with TCP_LOG_VIA_BBPOINTS.
329  */
330 #define TCP_BBPOINT_NONE		0
331 #define TCP_BBPOINT_REQ_LEVEL_LOGGING	1
332 
333 /*********************/
334 /* TCP Trace points */
335 /*********************/
336 /*
337  * TCP trace points are interesting points within
338  * the TCP code that the author/debugger may want
339  * to have BB logging enabled if we hit that point.
340  * In order to enable a trace point you set the
341  * sysctl var net.inet.tcp.bb.tp.number to
342  * one of the numbers listed below. You also
343  * must make sure net.inet.tcp.bb.tp.bbmode is
344  * non-zero, the default is 4 for continuous tracing.
345  * You also set in the number of connections you want
346  * have get BB logs in net.inet.tcp.bb.tp.count.
347  *
348  * Count will decrement every time BB logging is assigned
349  * to a connection that hit your tracepoint.
350  *
351  * You can enable all trace points by setting the number
352  * to 0xffffffff. You can disable all trace points by
353  * setting number to zero (or count to 0).
354  *
355  * Below are the enumerated list of tracepoints that
356  * have currently been defined in the code. Add more
357  * as you add a call to rack_trace_point(rack, <name>);
358  * where <name> is defined below.
359  */
360 #define TCP_TP_HWENOBUF		0x00000001	/* When we are doing hardware pacing and hit enobufs */
361 #define TCP_TP_ENOBUF		0x00000002	/* When we hit enobufs with software pacing */
362 #define TCP_TP_COLLAPSED_WND	0x00000003	/* When a peer to collapses its rwnd on us */
363 #define TCP_TP_COLLAPSED_RXT	0x00000004	/* When we actually retransmit a collapsed window rsm */
364 #define TCP_TP_REQ_LOG_FAIL	0x00000005	/* We tried to allocate a Request log but had no space */
365 #define TCP_TP_RESET_RCV	0x00000006	/* Triggers when we receive a RST */
366 #define TCP_TP_POLICER_DET	0x00000007	/* When we detect a policer */
367 #define TCP_TP_EXCESS_RXT	TCP_TP_POLICER_DET	/* alias */
368 #define TCP_TP_SAD_TRIGGERED	0x00000008	/* Sack Attack Detection triggers */
369 #define TCP_TP_SAD_SUSPECT	0x0000000a	/* A sack has supicious information in it */
370 #define TCP_TP_PACED_BOTTOM	0x0000000b	/* We have paced at the bottom */
371 
372 #ifdef _KERNEL
373 
374 extern uint32_t tcp_trace_point_config;
375 extern uint32_t tcp_trace_point_bb_mode;
376 extern int32_t tcp_trace_point_count;
377 
378 /*
379  * Returns true if any sort of BB logging is enabled,
380  * commonly used throughout the codebase.
381  */
382 static inline int
tcp_bblogging_on(struct tcpcb * tp)383 tcp_bblogging_on(struct tcpcb *tp)
384 {
385 	if (tp->_t_logstate <= TCP_LOG_STATE_OFF)
386 		return (0);
387 	if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS)
388 		return (0);
389 	return (1);
390 }
391 
392 /*
393  * Returns true if we match a specific bbpoint when
394  * in TCP_LOG_VIA_BBPOINTS, but also returns true
395  * for all the other logging states.
396  */
397 static inline int
tcp_bblogging_point_on(struct tcpcb * tp,uint8_t bbpoint)398 tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint)
399 {
400 	if (tp->_t_logstate <= TCP_LOG_STATE_OFF)
401 		return (0);
402 	if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) &&
403 	    (tp->_t_logpoint == bbpoint))
404 		return (1);
405 	else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS)
406 		return (0);
407 	return (1);
408 }
409 
410 static inline void
tcp_set_bblog_state(struct tcpcb * tp,uint8_t ls,uint8_t bbpoint)411 tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint)
412 {
413 	if ((ls == TCP_LOG_VIA_BBPOINTS) &&
414 	    (tp->_t_logstate == TCP_LOG_STATE_OFF)){
415 		/*
416 		 * We don't allow a BBPOINTS set to override
417 		 * other types of BB logging set by other means such
418 		 * as the bb_ratio/bb_state URL parameters. In other
419 		 * words BBlogging must be *off* in order to turn on
420 		 * a BBpoint.
421 		 */
422 		tp->_t_logpoint = bbpoint;
423 		tp->_t_logstate = ls;
424 	} else if (ls < TCP_LOG_VIA_BBPOINTS) {
425 		tp->_t_logpoint = TCP_BBPOINT_NONE;
426 		tp->_t_logstate = ls;
427 	}
428 }
429 
430 static inline uint32_t
tcp_get_bblog_state(struct tcpcb * tp)431 tcp_get_bblog_state(struct tcpcb *tp)
432 {
433 	return (tp->_t_logstate);
434 }
435 
436 static inline void
tcp_trace_point(struct tcpcb * tp,int num)437 tcp_trace_point(struct tcpcb *tp, int num)
438 {
439 #ifdef TCP_BLACKBOX
440 	if (((tcp_trace_point_config == num)  ||
441 	     (tcp_trace_point_config == 0xffffffff)) &&
442 	    (tcp_trace_point_bb_mode != 0) &&
443 	    (tcp_trace_point_count > 0) &&
444 	    (tcp_bblogging_on(tp) == 0)) {
445 		int res;
446 		res = atomic_fetchadd_int(&tcp_trace_point_count, -1);
447 		if (res > 0) {
448 			tcp_set_bblog_state(tp, tcp_trace_point_bb_mode, TCP_BBPOINT_NONE);
449 		} else {
450 			/* Loss a race assure its zero now */
451 			tcp_trace_point_count = 0;
452 		}
453 	}
454 #endif
455 }
456 
457 #define	TCP_LOG_BUF_DEFAULT_SESSION_LIMIT	5000
458 #define	TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT	5000000
459 
460 /*
461  * TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always
462  * tries to record verbose information.
463  */
464 #define	TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
465 	do {								\
466 		if (tcp_bblogging_on(tp)) \
467 			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
468 			    errornum, len, stackinfo, th_hostorder,	\
469 			    tp->t_output_caller, __func__, __LINE__, tv);\
470 	} while (0)
471 
472 /*
473  * TCP_LOG_EVENT: This is a macro so we can capture function/line
474  * information when needed. You can use the macro when you are not
475  * doing a lot of prep in the stack specific information i.e. you
476  * don't add extras (stackinfo). If you are adding extras which
477  * means filling out a stack variable instead use the tcp_log_event()
478  * function but enclose the call to the log (and all the setup) in a
479  * if (tcp_bblogging_on(tp)) {
480  *   ... setup and logging call ...
481  * }
482  *
483  * Always use the macro tcp_bblogging_on() since sometimes the defintions
484  * do change.
485  *
486  * BBlogging also supports the concept of a BBpoint. The idea behind this
487  * is that when you set a specific BBpoint on and turn the logging into
488  * the BBpoint mode (TCP_LOG_VIA_BBPOINTS) you will be defining very very
489  * few of these points to come out. The point is specific to a code you
490  * want tied to that one BB logging. This allows you to turn on a much broader
491  * scale set of limited logging on more connections without overwhelming the
492  * I/O system with too much BBlogs. This of course means you need to be quite
493  * careful on how many BBlogs go with each point, but you can have multiple points
494  * only one of which is active at a time.
495  *
496  * To define a point you add it above under the define for TCP_BBPOINT_NONE (which
497  * is the default i.e. no point is defined. You then, for your point use the
498  * tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint) inline to enclose
499  * your call to tcp_log_event.  Do not use one of the TCP_LOGGING macros else
500  * your point will never come out. You specify your defined point in the bbpoint
501  * side of the inline. An example of this you can find in rack where the
502  * TCP_BBPOINT_REQ_LEVEL_LOGGING is used. There a specific set of logs are generated
503  * for each request that tcp is tracking.
504  *
505  * When turning on BB logging use the inline:
506  * tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint)
507  * the ls field is the logging state TCP_LOG_STATE_CONTINUAL etc. The
508  * bbpoint field is ignored unless the ls field is set to TCP_LOG_VIA_BBPOINTS.
509  * Currently there is only a socket option that turns on the non-BBPOINT
510  * logging.
511  *
512  * Prototype:
513  * TCP_LOG_EVENT(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
514  *     struct sockbuf *txbuf, uint8_t eventid, int errornum,
515  *     union tcp_log_stackspecific *stackinfo)
516  *
517  * tp is mandatory and must be write locked.
518  * th is optional; if present, it will appear in the record.
519  * rxbuf and txbuf are optional; if present, they will appear in the record.
520  * eventid is mandatory.
521  * errornum is mandatory (it indicates the success or failure of the
522  *     operation associated with the event).
523  * len indicates the length of the packet. If no packet, use 0.
524  * stackinfo is optional; if present, it will appear in the record.
525  */
526 struct tcpcb;
527 #ifdef TCP_LOG_FORCEVERBOSE
528 #define	TCP_LOG_EVENT	TCP_LOG_EVENT_VERBOSE
529 #else
530 #define	TCP_LOG_EVENT(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder) \
531 	do {								\
532 		if (tcp_log_verbose)					\
533 			TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf,	\
534 			    eventid, errornum, len, stackinfo,		\
535 			    th_hostorder, NULL);			\
536 		else if (tcp_bblogging_on(tp))				\
537 			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
538 			    errornum, len, stackinfo, th_hostorder,	\
539 			    NULL, NULL, 0, NULL);			\
540 	} while (0)
541 #endif /* TCP_LOG_FORCEVERBOSE */
542 #define	TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \
543 	do {								\
544 		if (tcp_bblogging_on(tp))				\
545 			tcp_log_event(tp, th, rxbuf, txbuf, eventid,	\
546 			    errornum, len, stackinfo, th_hostorder,	\
547 			    NULL, NULL, 0, tv);				\
548 	} while (0)
549 
550 #ifdef TCP_BLACKBOX
551 extern bool tcp_log_verbose;
552 void tcp_log_drain(struct tcpcb *tp);
553 int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force);
554 void tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason);
555 struct tcp_log_buffer *tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
556     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
557     union tcp_log_stackspecific *stackinfo, int th_hostorder,
558     const char *output_caller, const char *func, int line, const struct timeval *tv);
559 size_t tcp_log_get_id(struct tcpcb *tp, char *buf);
560 size_t tcp_log_get_tag(struct tcpcb *tp, char *buf);
561 u_int tcp_log_get_id_cnt(struct tcpcb *tp);
562 int tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp);
563 void tcp_log_init(void);
564 int tcp_log_set_id(struct tcpcb *tp, char *id);
565 int tcp_log_set_tag(struct tcpcb *tp, char *tag);
566 int tcp_log_state_change(struct tcpcb *tp, int state);
567 void tcp_log_tcpcbinit(struct tcpcb *tp);
568 void tcp_log_tcpcbfini(struct tcpcb *tp);
569 void tcp_log_flowend(struct tcpcb *tp);
570 void tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes,
571     int flags);
572 int tcp_log_apply_ratio(struct tcpcb *tp, int ratio);
573 #ifdef DDB
574 void db_print_bblog_entries(struct tcp_log_stailq *log_entries, int indent);
575 #endif
576 #else /* !TCP_BLACKBOX */
577 #define tcp_log_verbose	(false)
578 
579 static inline struct tcp_log_buffer *
tcp_log_event(struct tcpcb * tp,struct tcphdr * th,struct sockbuf * rxbuf,struct sockbuf * txbuf,uint8_t eventid,int errornum,uint32_t len,union tcp_log_stackspecific * stackinfo,int th_hostorder,const char * output_caller,const char * func,int line,const struct timeval * tv)580 tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf,
581     struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len,
582     union tcp_log_stackspecific *stackinfo, int th_hostorder,
583     const char *output_caller, const char *func, int line,
584     const struct timeval *tv)
585 {
586 
587 	return (NULL);
588 }
589 #endif /* TCP_BLACKBOX */
590 
591 #endif	/* _KERNEL */
592 #endif	/* __tcp_log_buf_h__ */
593