1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2016-2018 Netflix, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #ifndef __tcp_log_buf_h__ 31 #define __tcp_log_buf_h__ 32 33 #define TCP_LOG_REASON_LEN 32 34 #define TCP_LOG_TAG_LEN 32 35 #define TCP_LOG_BUF_VER (9) 36 37 /* 38 * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires 39 * 8-byte alignment to work properly on all platforms. Therefore, we will 40 * enforce 8-byte alignment for all the structures that may appear by 41 * themselves (instead of being embedded in another structure) in a data 42 * stream. 43 */ 44 #define ALIGN_TCP_LOG __aligned(8) 45 46 /* Information about the socketbuffer state. */ 47 struct tcp_log_sockbuf 48 { 49 uint32_t tls_sb_acc; /* available chars (sb->sb_acc) */ 50 uint32_t tls_sb_ccc; /* claimed chars (sb->sb_ccc) */ 51 uint32_t tls_sb_spare; /* spare */ 52 }; 53 54 /* Optional, verbose information that may be appended to an event log. */ 55 struct tcp_log_verbose 56 { 57 #define TCP_FUNC_LEN 32 58 char tlv_snd_frm[TCP_FUNC_LEN]; /* tcp_output() caller */ 59 char tlv_trace_func[TCP_FUNC_LEN]; /* Function that 60 generated trace */ 61 uint32_t tlv_trace_line; /* Line number that generated trace */ 62 uint8_t _pad[4]; 63 } ALIGN_TCP_LOG; 64 65 /* Internal RACK state variables. */ 66 struct tcp_log_rack 67 { 68 uint32_t tlr_rack_rtt; /* rc_rack_rtt */ 69 uint8_t tlr_state; /* Internal RACK state */ 70 uint8_t _pad[3]; /* Padding */ 71 }; 72 73 struct tcp_log_bbr { 74 uint64_t cur_del_rate; 75 uint64_t delRate; 76 uint64_t rttProp; 77 uint64_t bw_inuse; 78 uint32_t inflight; 79 uint32_t applimited; 80 uint32_t delivered; 81 uint32_t timeStamp; 82 uint32_t epoch; 83 uint32_t lt_epoch; 84 uint32_t pkts_out; 85 uint32_t flex1; 86 uint32_t flex2; 87 uint32_t flex3; 88 uint32_t flex4; 89 uint32_t flex5; 90 uint32_t flex6; 91 uint32_t lost; 92 uint16_t pacing_gain; 93 uint16_t cwnd_gain; 94 uint16_t flex7; 95 uint8_t bbr_state; 96 uint8_t bbr_substate; 97 uint8_t inhpts; 98 uint8_t __spare; 99 uint8_t use_lt_bw; 100 uint8_t flex8; 101 uint32_t pkt_epoch; 102 }; 103 104 /* Per-stack stack-specific info. */ 105 union tcp_log_stackspecific 106 { 107 struct tcp_log_rack u_rack; 108 struct tcp_log_bbr u_bbr; 109 }; 110 111 struct tcp_log_buffer 112 { 113 /* Event basics */ 114 struct timeval tlb_tv; /* Timestamp of trace */ 115 uint32_t tlb_ticks; /* Timestamp of trace */ 116 uint32_t tlb_sn; /* Serial number */ 117 uint8_t tlb_stackid; /* Stack ID */ 118 uint8_t tlb_eventid; /* Event ID */ 119 uint16_t tlb_eventflags; /* Flags for the record */ 120 #define TLB_FLAG_RXBUF 0x0001 /* Includes receive buffer info */ 121 #define TLB_FLAG_TXBUF 0x0002 /* Includes send buffer info */ 122 #define TLB_FLAG_HDR 0x0004 /* Includes a TCP header */ 123 #define TLB_FLAG_VERBOSE 0x0008 /* Includes function/line numbers */ 124 #define TLB_FLAG_STACKINFO 0x0010 /* Includes stack-specific info */ 125 int tlb_errno; /* Event error (if any) */ 126 127 /* Internal session state */ 128 struct tcp_log_sockbuf tlb_rxbuf; /* Receive buffer */ 129 struct tcp_log_sockbuf tlb_txbuf; /* Send buffer */ 130 131 int tlb_state; /* TCPCB t_state */ 132 uint32_t tlb_starttime; /* TCPCB t_starttime */ 133 uint32_t tlb_iss; /* TCPCB iss */ 134 uint32_t tlb_flags; /* TCPCB flags */ 135 uint32_t tlb_snd_una; /* TCPCB snd_una */ 136 uint32_t tlb_snd_max; /* TCPCB snd_max */ 137 uint32_t tlb_snd_cwnd; /* TCPCB snd_cwnd */ 138 uint32_t tlb_snd_nxt; /* TCPCB snd_nxt */ 139 uint32_t tlb_snd_recover;/* TCPCB snd_recover */ 140 uint32_t tlb_snd_wnd; /* TCPCB snd_wnd */ 141 uint32_t tlb_snd_ssthresh; /* TCPCB snd_ssthresh */ 142 uint32_t tlb_srtt; /* TCPCB t_srtt */ 143 uint32_t tlb_rttvar; /* TCPCB t_rttvar */ 144 uint32_t tlb_rcv_up; /* TCPCB rcv_up */ 145 uint32_t tlb_rcv_adv; /* TCPCB rcv_adv */ 146 uint32_t tlb_flags2; /* TCPCB t_flags2 */ 147 uint32_t tlb_rcv_nxt; /* TCPCB rcv_nxt */ 148 uint32_t tlb_rcv_wnd; /* TCPCB rcv_wnd */ 149 uint32_t tlb_dupacks; /* TCPCB t_dupacks */ 150 int tlb_segqlen; /* TCPCB segqlen */ 151 int tlb_snd_numholes; /* TCPCB snd_numholes */ 152 uint32_t tlb_flex1; /* Event specific information */ 153 uint32_t tlb_flex2; /* Event specific information */ 154 uint32_t tlb_fbyte_in; /* TCPCB first byte in time */ 155 uint32_t tlb_fbyte_out; /* TCPCB first byte out time */ 156 uint8_t tlb_snd_scale:4, /* TCPCB snd_scale */ 157 tlb_rcv_scale:4; /* TCPCB rcv_scale */ 158 uint8_t _pad[3]; /* Padding */ 159 160 /* Per-stack info */ 161 union tcp_log_stackspecific tlb_stackinfo; 162 #define tlb_rack tlb_stackinfo.u_rack 163 164 /* The packet */ 165 uint32_t tlb_len; /* The packet's data length */ 166 struct tcphdr tlb_th; /* The TCP header */ 167 uint8_t tlb_opts[TCP_MAXOLEN]; /* The TCP options */ 168 169 /* Verbose information (optional) */ 170 struct tcp_log_verbose tlb_verbose[0]; 171 } ALIGN_TCP_LOG; 172 173 enum tcp_log_events { 174 TCP_LOG_IN = 1, /* Incoming packet 1 */ 175 TCP_LOG_OUT, /* Transmit (without other event) 2 */ 176 TCP_LOG_RTO, /* Retransmit timeout 3 */ 177 TCP_LOG_SB_WAKE, /* Awaken socket buffer 4 */ 178 TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */ 179 TCP_LOG_PRR, /* Doing PRR 6 */ 180 TCP_LOG_REORDER, /* Detected reorder 7 */ 181 TCP_LOG_HPTS, /* Hpts sending a packet 8 */ 182 BBR_LOG_BBRUPD, /* We updated BBR info 9 */ 183 BBR_LOG_BBRSND, /* We did a slot calculation and sending is done 10 */ 184 BBR_LOG_ACKCLEAR, /* A ack clears all outstanding 11 */ 185 BBR_LOG_INQUEUE, /* The tcb had a packet input to it 12 */ 186 BBR_LOG_TIMERSTAR, /* Start a timer 13 */ 187 BBR_LOG_TIMERCANC, /* Cancel a timer 14 */ 188 BBR_LOG_ENTREC, /* Entered recovery 15 */ 189 BBR_LOG_EXITREC, /* Exited recovery 16 */ 190 BBR_LOG_CWND, /* Cwnd change 17 */ 191 BBR_LOG_BWSAMP, /* LT B/W sample has been made 18 */ 192 BBR_LOG_MSGSIZE, /* We received a EMSGSIZE error 19 */ 193 BBR_LOG_BBRRTT, /* BBR RTT is updated 20 */ 194 BBR_LOG_JUSTRET, /* We just returned out of output 21 */ 195 BBR_LOG_STATE, /* A BBR state change occurred 22 */ 196 BBR_LOG_PKT_EPOCH, /* A BBR packet epoch occurred 23 */ 197 BBR_LOG_PERSIST, /* BBR changed to/from a persists 24 */ 198 TCP_LOG_FLOWEND, /* End of a flow 25 */ 199 BBR_LOG_RTO, /* BBR's timeout includes BBR info 26 */ 200 BBR_LOG_DOSEG_DONE, /* hpts do_segment completes 27 */ 201 BBR_LOG_EXIT_GAIN, /* hpts do_segment completes 28 */ 202 BBR_LOG_THRESH_CALC, /* Doing threshold calculation 29 */ 203 TCP_LOG_MAPCHG, /* Map Changes to the sendmap 30 */ 204 TCP_LOG_USERSEND, /* User level sends data 31 */ 205 BBR_RSM_CLEARED, /* RSM cleared of ACK flags 32 */ 206 BBR_LOG_STATE_TARGET, /* Log of target at state 33 */ 207 BBR_LOG_TIME_EPOCH, /* A timed based Epoch occurred 34 */ 208 BBR_LOG_TO_PROCESS, /* A to was processed 35 */ 209 BBR_LOG_BBRTSO, /* TSO update 36 */ 210 BBR_LOG_HPTSDIAG, /* Hpts diag insert 37 */ 211 BBR_LOG_LOWGAIN, /* Low gain accounting 38 */ 212 BBR_LOG_PROGRESS, /* Progress timer event 39 */ 213 TCP_LOG_SOCKET_OPT, /* A socket option is set 40 */ 214 BBR_LOG_TIMERPREP, /* A BBR var to debug out TLP issues 41 */ 215 BBR_LOG_ENOBUF_JMP, /* We had a enobuf jump 42 */ 216 BBR_LOG_HPTSI_CALC, /* calc the hptsi time 43 */ 217 BBR_LOG_RTT_SHRINKS, /* We had a log reduction of rttProp 44 */ 218 BBR_LOG_BW_RED_EV, /* B/W reduction events 45 */ 219 BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/ 220 TCP_LOG_RTT, /* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */ 221 BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */ 222 BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining -- now not used 49 */ 223 TCP_LOG_REASS, /* Reassembly buffer logging 50 */ 224 TCP_HDWR_PACE_SIZE, /* TCP pacing size set (rl and rack uses this) 51 */ 225 BBR_LOG_HDWR_PACE, /* TCP Hardware pacing log 52 */ 226 BBR_LOG_TSTMP_VAL, /* Temp debug timestamp validation 53 */ 227 TCP_LOG_CONNEND, /* End of connection 54 */ 228 TCP_LOG_LRO, /* LRO entry 55 */ 229 TCP_SACK_FILTER_RES, /* Results of SACK Filter 56 */ 230 TCP_SAD_DETECTION, /* Sack Attack Detection 57 */ 231 TCP_TIMELY_WORK, /* Logs regarding Timely CC tweaks 58 */ 232 TCP_LOG_USER_EVENT, /* User space event data 59 */ 233 TCP_LOG_SENDFILE, /* sendfile() logging for TCP connections 60 */ 234 TCP_LOG_HTTP_T, /* logging of http request tracking 61 */ 235 TCP_LOG_ACCOUNTING, /* Log of TCP Accounting data 62 */ 236 TCP_LOG_FSB, /* FSB information 63 */ 237 RACK_DSACK_HANDLING, /* Handling of DSACK in rack for reordering window 64 */ 238 TCP_HYSTART, /* TCP Hystart logging 65 */ 239 TCP_LOG_END /* End (keep at end) 66 */ 240 }; 241 242 enum tcp_log_states { 243 TCP_LOG_STATE_CLEAR = -1, /* Deactivate and clear tracing */ 244 TCP_LOG_STATE_OFF = 0, /* Pause */ 245 TCP_LOG_STATE_TAIL=1, /* Keep the trailing events */ 246 TCP_LOG_STATE_HEAD=2, /* Keep the leading events */ 247 TCP_LOG_STATE_HEAD_AUTO=3, /* Keep the leading events, and 248 automatically dump them to the 249 device */ 250 TCP_LOG_STATE_CONTINUAL=4, /* Continually dump the data when full */ 251 TCP_LOG_STATE_TAIL_AUTO=5, /* Keep the trailing events, and 252 automatically dump them when the 253 session ends */ 254 }; 255 256 /* Use this if we don't know whether the operation succeeded. */ 257 #define ERRNO_UNK (-1) 258 259 /* 260 * If the user included dev/tcp_log/tcp_log_dev.h, then include our private 261 * headers. Otherwise, there is no reason to pollute all the files with an 262 * additional include. 263 * 264 * This structure is aligned to an 8-byte boundary to match the alignment 265 * requirements of (struct tcp_log_buffer). 266 */ 267 #ifdef __tcp_log_dev_h__ 268 struct tcp_log_header { 269 struct tcp_log_common_header tlh_common; 270 #define tlh_version tlh_common.tlch_version 271 #define tlh_type tlh_common.tlch_type 272 #define tlh_length tlh_common.tlch_length 273 struct in_endpoints tlh_ie; 274 struct timeval tlh_offset; /* Uptime -> UTC offset */ 275 char tlh_id[TCP_LOG_ID_LEN]; 276 char tlh_reason[TCP_LOG_REASON_LEN]; 277 char tlh_tag[TCP_LOG_TAG_LEN]; 278 uint8_t tlh_af; 279 uint8_t _pad[7]; 280 } ALIGN_TCP_LOG; 281 282 #ifdef _KERNEL 283 struct tcp_log_dev_log_queue { 284 struct tcp_log_dev_queue tldl_common; 285 char tldl_id[TCP_LOG_ID_LEN]; 286 char tldl_reason[TCP_LOG_REASON_LEN]; 287 char tldl_tag[TCP_LOG_TAG_LEN]; 288 struct in_endpoints tldl_ie; 289 struct tcp_log_stailq tldl_entries; 290 int tldl_count; 291 uint8_t tldl_af; 292 }; 293 #endif /* _KERNEL */ 294 #endif /* __tcp_log_dev_h__ */ 295 296 #ifdef _KERNEL 297 298 #define TCP_LOG_BUF_DEFAULT_SESSION_LIMIT 5000 299 #define TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT 5000000 300 301 /* 302 * TCP_LOG_EVENT_VERBOSE: The same as TCP_LOG_EVENT, except it always 303 * tries to record verbose information. 304 */ 305 #define TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \ 306 do { \ 307 if (tp->t_logstate != TCP_LOG_STATE_OFF) \ 308 tcp_log_event_(tp, th, rxbuf, txbuf, eventid, \ 309 errornum, len, stackinfo, th_hostorder, \ 310 tp->t_output_caller, __func__, __LINE__, tv);\ 311 } while (0) 312 313 /* 314 * TCP_LOG_EVENT: This is a macro so we can capture function/line 315 * information when needed. 316 * 317 * Prototype: 318 * TCP_LOG_EVENT(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, 319 * struct sockbuf *txbuf, uint8_t eventid, int errornum, 320 * union tcp_log_stackspecific *stackinfo) 321 * 322 * tp is mandatory and must be write locked. 323 * th is optional; if present, it will appear in the record. 324 * rxbuf and txbuf are optional; if present, they will appear in the record. 325 * eventid is mandatory. 326 * errornum is mandatory (it indicates the success or failure of the 327 * operation associated with the event). 328 * len indicates the length of the packet. If no packet, use 0. 329 * stackinfo is optional; if present, it will appear in the record. 330 */ 331 #ifdef TCP_LOG_FORCEVERBOSE 332 #define TCP_LOG_EVENT TCP_LOG_EVENT_VERBOSE 333 #else 334 #define TCP_LOG_EVENT(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder) \ 335 do { \ 336 if (tcp_log_verbose) \ 337 TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, \ 338 eventid, errornum, len, stackinfo, \ 339 th_hostorder, NULL); \ 340 else if (tp->t_logstate != TCP_LOG_STATE_OFF) \ 341 tcp_log_event_(tp, th, rxbuf, txbuf, eventid, \ 342 errornum, len, stackinfo, th_hostorder, \ 343 NULL, NULL, 0, NULL); \ 344 } while (0) 345 #endif /* TCP_LOG_FORCEVERBOSE */ 346 #define TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \ 347 do { \ 348 if (tp->t_logstate != TCP_LOG_STATE_OFF) \ 349 tcp_log_event_(tp, th, rxbuf, txbuf, eventid, \ 350 errornum, len, stackinfo, th_hostorder, \ 351 NULL, NULL, 0, tv); \ 352 } while (0) 353 354 #ifdef TCP_BLACKBOX 355 extern bool tcp_log_verbose; 356 void tcp_log_drain(struct tcpcb *tp); 357 int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force); 358 void tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason); 359 struct tcp_log_buffer *tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, 360 struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, 361 union tcp_log_stackspecific *stackinfo, int th_hostorder, 362 const char *output_caller, const char *func, int line, const struct timeval *tv); 363 size_t tcp_log_get_id(struct tcpcb *tp, char *buf); 364 size_t tcp_log_get_tag(struct tcpcb *tp, char *buf); 365 u_int tcp_log_get_id_cnt(struct tcpcb *tp); 366 int tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp); 367 void tcp_log_init(void); 368 int tcp_log_set_id(struct tcpcb *tp, char *id); 369 int tcp_log_set_tag(struct tcpcb *tp, char *tag); 370 int tcp_log_state_change(struct tcpcb *tp, int state); 371 void tcp_log_tcpcbinit(struct tcpcb *tp); 372 void tcp_log_tcpcbfini(struct tcpcb *tp); 373 void tcp_log_flowend(struct tcpcb *tp); 374 #else /* !TCP_BLACKBOX */ 375 #define tcp_log_verbose (false) 376 377 static inline struct tcp_log_buffer * 378 tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, 379 struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, 380 union tcp_log_stackspecific *stackinfo, int th_hostorder, 381 const char *output_caller, const char *func, int line, 382 const struct timeval *tv) 383 { 384 385 return (NULL); 386 } 387 #endif /* TCP_BLACKBOX */ 388 389 #endif /* _KERNEL */ 390 #endif /* __tcp_log_buf_h__ */ 391