1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2016-2018 Netflix, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_inet.h" 33 #include <sys/param.h> 34 #include <sys/arb.h> 35 #include <sys/hash.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/qmath.h> 41 #include <sys/queue.h> 42 #include <sys/refcount.h> 43 #include <sys/rwlock.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/tree.h> 48 #include <sys/stats.h> /* Must come after qmath.h and tree.h */ 49 #include <sys/counter.h> 50 #include <dev/tcp_log/tcp_log_dev.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/vnet.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet/tcp_var.h> 60 #include <netinet/tcp_log_buf.h> 61 #include <netinet/tcp_hpts.h> 62 63 /* Default expiry time */ 64 #define TCP_LOG_EXPIRE_TIME ((sbintime_t)60 * SBT_1S) 65 66 /* Max interval at which to run the expiry timer */ 67 #define TCP_LOG_EXPIRE_INTVL ((sbintime_t)5 * SBT_1S) 68 69 bool tcp_log_verbose; 70 static uma_zone_t tcp_log_id_bucket_zone, tcp_log_id_node_zone, tcp_log_zone; 71 static int tcp_log_session_limit = TCP_LOG_BUF_DEFAULT_SESSION_LIMIT; 72 static uint32_t tcp_log_version = TCP_LOG_BUF_VER; 73 RB_HEAD(tcp_log_id_tree, tcp_log_id_bucket); 74 static struct tcp_log_id_tree tcp_log_id_head; 75 static STAILQ_HEAD(, tcp_log_id_node) tcp_log_expireq_head = 76 STAILQ_HEAD_INITIALIZER(tcp_log_expireq_head); 77 static struct mtx tcp_log_expireq_mtx; 78 static struct callout tcp_log_expireq_callout; 79 static u_long tcp_log_auto_ratio = 0; 80 static volatile u_long tcp_log_auto_ratio_cur = 0; 81 static uint32_t tcp_log_auto_mode = TCP_LOG_STATE_TAIL; 82 static bool tcp_log_auto_all = false; 83 static uint32_t tcp_disable_all_bb_logs = 0; 84 85 RB_PROTOTYPE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp) 86 87 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, bb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 88 "TCP Black Box controls"); 89 90 SYSCTL_NODE(_net_inet_tcp_bb, OID_AUTO, tp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 91 "TCP Black Box Trace Point controls"); 92 93 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_verbose, CTLFLAG_RW, &tcp_log_verbose, 94 0, "Force verbose logging for TCP traces"); 95 96 SYSCTL_INT(_net_inet_tcp_bb, OID_AUTO, log_session_limit, 97 CTLFLAG_RW, &tcp_log_session_limit, 0, 98 "Maximum number of events maintained for each TCP session"); 99 100 uint32_t tcp_trace_point_config = 0; 101 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, number, CTLFLAG_RW, 102 &tcp_trace_point_config, TCP_LOG_STATE_HEAD_AUTO, 103 "What is the trace point number to activate (0=none, 0xffffffff = all)?"); 104 105 uint32_t tcp_trace_point_bb_mode = TCP_LOG_STATE_CONTINUAL; 106 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, bbmode, CTLFLAG_RW, 107 &tcp_trace_point_bb_mode, TCP_LOG_STATE_HEAD_AUTO, 108 "What is BB logging mode that is activated?"); 109 110 int32_t tcp_trace_point_count = 0; 111 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, count, CTLFLAG_RW, 112 &tcp_trace_point_count, TCP_LOG_STATE_HEAD_AUTO, 113 "How many connections will have BB logging turned on that hit the tracepoint?"); 114 115 116 117 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_global_limit, CTLFLAG_RW, 118 &tcp_log_zone, "Maximum number of events maintained for all TCP sessions"); 119 120 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_global_entries, CTLFLAG_RD, 121 &tcp_log_zone, "Current number of events maintained for all TCP sessions"); 122 123 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_limit, CTLFLAG_RW, 124 &tcp_log_id_bucket_zone, "Maximum number of log IDs"); 125 126 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_entries, CTLFLAG_RD, 127 &tcp_log_id_bucket_zone, "Current number of log IDs"); 128 129 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_limit, CTLFLAG_RW, 130 &tcp_log_id_node_zone, "Maximum number of tcpcbs with log IDs"); 131 132 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_entries, CTLFLAG_RD, 133 &tcp_log_id_node_zone, "Current number of tcpcbs with log IDs"); 134 135 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, &tcp_log_version, 136 0, "Version of log formats exported"); 137 138 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, disable_all, CTLFLAG_RW, 139 &tcp_disable_all_bb_logs, 0, 140 "Disable all BB logging for all connections"); 141 142 SYSCTL_ULONG(_net_inet_tcp_bb, OID_AUTO, log_auto_ratio, CTLFLAG_RW, 143 &tcp_log_auto_ratio, 0, "Do auto capturing for 1 out of N sessions"); 144 145 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_auto_mode, CTLFLAG_RW, 146 &tcp_log_auto_mode, 0, 147 "Logging mode for auto-selected sessions (default is TCP_LOG_STATE_TAIL)"); 148 149 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_auto_all, CTLFLAG_RW, 150 &tcp_log_auto_all, 0, 151 "Auto-select from all sessions (rather than just those with IDs)"); 152 153 #ifdef TCPLOG_DEBUG_COUNTERS 154 counter_u64_t tcp_log_queued; 155 counter_u64_t tcp_log_que_fail1; 156 counter_u64_t tcp_log_que_fail2; 157 counter_u64_t tcp_log_que_fail3; 158 counter_u64_t tcp_log_que_fail4; 159 counter_u64_t tcp_log_que_fail5; 160 counter_u64_t tcp_log_que_copyout; 161 counter_u64_t tcp_log_que_read; 162 counter_u64_t tcp_log_que_freed; 163 164 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, queued, CTLFLAG_RD, 165 &tcp_log_queued, "Number of entries queued"); 166 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail1, CTLFLAG_RD, 167 &tcp_log_que_fail1, "Number of entries queued but fail 1"); 168 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail2, CTLFLAG_RD, 169 &tcp_log_que_fail2, "Number of entries queued but fail 2"); 170 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail3, CTLFLAG_RD, 171 &tcp_log_que_fail3, "Number of entries queued but fail 3"); 172 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail4, CTLFLAG_RD, 173 &tcp_log_que_fail4, "Number of entries queued but fail 4"); 174 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail5, CTLFLAG_RD, 175 &tcp_log_que_fail5, "Number of entries queued but fail 4"); 176 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, copyout, CTLFLAG_RD, 177 &tcp_log_que_copyout, "Number of entries copied out"); 178 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, read, CTLFLAG_RD, 179 &tcp_log_que_read, "Number of entries read from the queue"); 180 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, freed, CTLFLAG_RD, 181 &tcp_log_que_freed, "Number of entries freed after reading"); 182 #endif 183 184 #ifdef INVARIANTS 185 #define TCPLOG_DEBUG_RINGBUF 186 #endif 187 /* Number of requests to consider a PBCID "active". */ 188 #define ACTIVE_REQUEST_COUNT 10 189 190 /* Statistic tracking for "active" PBCIDs. */ 191 static counter_u64_t tcp_log_pcb_ids_cur; 192 static counter_u64_t tcp_log_pcb_ids_tot; 193 194 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_cur, CTLFLAG_RD, 195 &tcp_log_pcb_ids_cur, "Number of pcb IDs allocated in the system"); 196 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_tot, CTLFLAG_RD, 197 &tcp_log_pcb_ids_tot, "Total number of pcb IDs that have been allocated"); 198 199 struct tcp_log_mem 200 { 201 STAILQ_ENTRY(tcp_log_mem) tlm_queue; 202 struct tcp_log_buffer tlm_buf; 203 struct tcp_log_verbose tlm_v; 204 #ifdef TCPLOG_DEBUG_RINGBUF 205 volatile int tlm_refcnt; 206 #endif 207 }; 208 209 /* 60 bytes for the header, + 16 bytes for padding */ 210 static uint8_t zerobuf[76]; 211 212 /* 213 * Lock order: 214 * 1. TCPID_TREE 215 * 2. TCPID_BUCKET 216 * 3. INP 217 * 218 * Rules: 219 * A. You need a lock on the Tree to add/remove buckets. 220 * B. You need a lock on the bucket to add/remove nodes from the bucket. 221 * C. To change information in a node, you need the INP lock if the tln_closed 222 * field is false. Otherwise, you need the bucket lock. (Note that the 223 * tln_closed field can change at any point, so you need to recheck the 224 * entry after acquiring the INP lock.) 225 * D. To remove a node from the bucket, you must have that entry locked, 226 * according to the criteria of Rule C. Also, the node must not be on 227 * the expiry queue. 228 * E. The exception to C is the expiry queue fields, which are locked by 229 * the TCPLOG_EXPIREQ lock. 230 * 231 * Buckets have a reference count. Each node is a reference. Further, 232 * other callers may add reference counts to keep a bucket from disappearing. 233 * You can add a reference as long as you own a lock sufficient to keep the 234 * bucket from disappearing. For example, a common use is: 235 * a. Have a locked INP, but need to lock the TCPID_BUCKET. 236 * b. Add a refcount on the bucket. (Safe because the INP lock prevents 237 * the TCPID_BUCKET from going away.) 238 * c. Drop the INP lock. 239 * d. Acquire a lock on the TCPID_BUCKET. 240 * e. Acquire a lock on the INP. 241 * f. Drop the refcount on the bucket. 242 * (At this point, the bucket may disappear.) 243 * 244 * Expire queue lock: 245 * You can acquire this with either the bucket or INP lock. Don't reverse it. 246 * When the expire code has committed to freeing a node, it resets the expiry 247 * time to SBT_MAX. That is the signal to everyone else that they should 248 * leave that node alone. 249 */ 250 static struct rwlock tcp_id_tree_lock; 251 #define TCPID_TREE_WLOCK() rw_wlock(&tcp_id_tree_lock) 252 #define TCPID_TREE_RLOCK() rw_rlock(&tcp_id_tree_lock) 253 #define TCPID_TREE_UPGRADE() rw_try_upgrade(&tcp_id_tree_lock) 254 #define TCPID_TREE_WUNLOCK() rw_wunlock(&tcp_id_tree_lock) 255 #define TCPID_TREE_RUNLOCK() rw_runlock(&tcp_id_tree_lock) 256 #define TCPID_TREE_WLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_WLOCKED) 257 #define TCPID_TREE_RLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_RLOCKED) 258 #define TCPID_TREE_UNLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_UNLOCKED) 259 260 #define TCPID_BUCKET_LOCK_INIT(tlb) mtx_init(&((tlb)->tlb_mtx), "tcp log id bucket", NULL, MTX_DEF) 261 #define TCPID_BUCKET_LOCK_DESTROY(tlb) mtx_destroy(&((tlb)->tlb_mtx)) 262 #define TCPID_BUCKET_LOCK(tlb) mtx_lock(&((tlb)->tlb_mtx)) 263 #define TCPID_BUCKET_UNLOCK(tlb) mtx_unlock(&((tlb)->tlb_mtx)) 264 #define TCPID_BUCKET_LOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_OWNED) 265 #define TCPID_BUCKET_UNLOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_NOTOWNED) 266 267 #define TCPID_BUCKET_REF(tlb) refcount_acquire(&((tlb)->tlb_refcnt)) 268 #define TCPID_BUCKET_UNREF(tlb) refcount_release(&((tlb)->tlb_refcnt)) 269 270 #define TCPLOG_EXPIREQ_LOCK() mtx_lock(&tcp_log_expireq_mtx) 271 #define TCPLOG_EXPIREQ_UNLOCK() mtx_unlock(&tcp_log_expireq_mtx) 272 273 SLIST_HEAD(tcp_log_id_head, tcp_log_id_node); 274 275 struct tcp_log_id_bucket 276 { 277 /* 278 * tlb_id must be first. This lets us use strcmp on 279 * (struct tcp_log_id_bucket *) and (char *) interchangeably. 280 */ 281 char tlb_id[TCP_LOG_ID_LEN]; 282 char tlb_tag[TCP_LOG_TAG_LEN]; 283 RB_ENTRY(tcp_log_id_bucket) tlb_rb; 284 struct tcp_log_id_head tlb_head; 285 struct mtx tlb_mtx; 286 volatile u_int tlb_refcnt; 287 volatile u_int tlb_reqcnt; 288 uint32_t tlb_loglimit; 289 int8_t tlb_logstate; 290 }; 291 292 struct tcp_log_id_node 293 { 294 SLIST_ENTRY(tcp_log_id_node) tln_list; 295 STAILQ_ENTRY(tcp_log_id_node) tln_expireq; /* Locked by the expireq lock */ 296 sbintime_t tln_expiretime; /* Locked by the expireq lock */ 297 298 /* 299 * If INP is NULL, that means the connection has closed. We've 300 * saved the connection endpoint information and the log entries 301 * in the tln_ie and tln_entries members. We've also saved a pointer 302 * to the enclosing bucket here. If INP is not NULL, the information is 303 * in the PCB and not here. 304 */ 305 struct inpcb *tln_inp; 306 struct tcpcb *tln_tp; 307 struct tcp_log_id_bucket *tln_bucket; 308 struct in_endpoints tln_ie; 309 struct tcp_log_stailq tln_entries; 310 int tln_count; 311 volatile int tln_closed; 312 uint8_t tln_af; 313 }; 314 315 enum tree_lock_state { 316 TREE_UNLOCKED = 0, 317 TREE_RLOCKED, 318 TREE_WLOCKED, 319 }; 320 321 /* Do we want to select this session for auto-logging? */ 322 static __inline bool 323 tcp_log_selectauto(void) 324 { 325 326 /* 327 * If we are doing auto-capturing, figure out whether we will capture 328 * this session. 329 */ 330 if (tcp_log_auto_ratio && 331 (tcp_disable_all_bb_logs == 0) && 332 (atomic_fetchadd_long(&tcp_log_auto_ratio_cur, 1) % 333 tcp_log_auto_ratio) == 0) 334 return (true); 335 return (false); 336 } 337 338 static __inline int 339 tcp_log_id_cmp(struct tcp_log_id_bucket *a, struct tcp_log_id_bucket *b) 340 { 341 KASSERT(a != NULL, ("tcp_log_id_cmp: argument a is unexpectedly NULL")); 342 KASSERT(b != NULL, ("tcp_log_id_cmp: argument b is unexpectedly NULL")); 343 return strncmp(a->tlb_id, b->tlb_id, TCP_LOG_ID_LEN); 344 } 345 346 RB_GENERATE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp) 347 348 static __inline void 349 tcp_log_id_validate_tree_lock(int tree_locked) 350 { 351 352 #ifdef INVARIANTS 353 switch (tree_locked) { 354 case TREE_WLOCKED: 355 TCPID_TREE_WLOCK_ASSERT(); 356 break; 357 case TREE_RLOCKED: 358 TCPID_TREE_RLOCK_ASSERT(); 359 break; 360 case TREE_UNLOCKED: 361 TCPID_TREE_UNLOCK_ASSERT(); 362 break; 363 default: 364 kassert_panic("%s:%d: unknown tree lock state", __func__, 365 __LINE__); 366 } 367 #endif 368 } 369 370 static __inline void 371 tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb) 372 { 373 374 TCPID_TREE_WLOCK_ASSERT(); 375 KASSERT(SLIST_EMPTY(&tlb->tlb_head), 376 ("%s: Attempt to remove non-empty bucket", __func__)); 377 if (RB_REMOVE(tcp_log_id_tree, &tcp_log_id_head, tlb) == NULL) { 378 #ifdef INVARIANTS 379 kassert_panic("%s:%d: error removing element from tree", 380 __func__, __LINE__); 381 #endif 382 } 383 TCPID_BUCKET_LOCK_DESTROY(tlb); 384 counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); 385 uma_zfree(tcp_log_id_bucket_zone, tlb); 386 } 387 388 /* 389 * Call with a referenced and locked bucket. 390 * Will return true if the bucket was freed; otherwise, false. 391 * tlb: The bucket to unreference. 392 * tree_locked: A pointer to the state of the tree lock. If the tree lock 393 * state changes, the function will update it. 394 * inp: If not NULL and the function needs to drop the inp lock to relock the 395 * tree, it will do so. (The caller must ensure inp will not become invalid, 396 * probably by holding a reference to it.) 397 */ 398 static bool 399 tcp_log_unref_bucket(struct tcp_log_id_bucket *tlb, int *tree_locked, 400 struct inpcb *inp) 401 { 402 403 KASSERT(tlb != NULL, ("%s: called with NULL tlb", __func__)); 404 KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked", 405 __func__)); 406 407 tcp_log_id_validate_tree_lock(*tree_locked); 408 409 /* 410 * Did we hold the last reference on the tlb? If so, we may need 411 * to free it. (Note that we can realistically only execute the 412 * loop twice: once without a write lock and once with a write 413 * lock.) 414 */ 415 while (TCPID_BUCKET_UNREF(tlb)) { 416 /* 417 * We need a write lock on the tree to free this. 418 * If we can upgrade the tree lock, this is "easy". If we 419 * can't upgrade the tree lock, we need to do this the 420 * "hard" way: unwind all our locks and relock everything. 421 * In the meantime, anything could have changed. We even 422 * need to validate that we still need to free the bucket. 423 */ 424 if (*tree_locked == TREE_RLOCKED && TCPID_TREE_UPGRADE()) 425 *tree_locked = TREE_WLOCKED; 426 else if (*tree_locked != TREE_WLOCKED) { 427 TCPID_BUCKET_REF(tlb); 428 if (inp != NULL) 429 INP_WUNLOCK(inp); 430 TCPID_BUCKET_UNLOCK(tlb); 431 if (*tree_locked == TREE_RLOCKED) 432 TCPID_TREE_RUNLOCK(); 433 TCPID_TREE_WLOCK(); 434 *tree_locked = TREE_WLOCKED; 435 TCPID_BUCKET_LOCK(tlb); 436 if (inp != NULL) 437 INP_WLOCK(inp); 438 continue; 439 } 440 441 /* 442 * We have an empty bucket and a write lock on the tree. 443 * Remove the empty bucket. 444 */ 445 tcp_log_remove_bucket(tlb); 446 return (true); 447 } 448 return (false); 449 } 450 451 /* 452 * Call with a locked bucket. This function will release the lock on the 453 * bucket before returning. 454 * 455 * The caller is responsible for freeing the tp->t_lin/tln node! 456 * 457 * Note: one of tp or both tlb and tln must be supplied. 458 * 459 * inp: A pointer to the inp. If the function needs to drop the inp lock to 460 * acquire the tree write lock, it will do so. (The caller must ensure inp 461 * will not become invalid, probably by holding a reference to it.) 462 * tp: A pointer to the tcpcb. (optional; if specified, tlb and tln are ignored) 463 * tlb: A pointer to the bucket. (optional; ignored if tp is specified) 464 * tln: A pointer to the node. (optional; ignored if tp is specified) 465 * tree_locked: A pointer to the state of the tree lock. If the tree lock 466 * state changes, the function will update it. 467 * 468 * Will return true if the INP lock was reacquired; otherwise, false. 469 */ 470 static bool 471 tcp_log_remove_id_node(struct inpcb *inp, struct tcpcb *tp, 472 struct tcp_log_id_bucket *tlb, struct tcp_log_id_node *tln, 473 int *tree_locked) 474 { 475 int orig_tree_locked; 476 477 KASSERT(tp != NULL || (tlb != NULL && tln != NULL), 478 ("%s: called with tp=%p, tlb=%p, tln=%p", __func__, 479 tp, tlb, tln)); 480 KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked", 481 __func__)); 482 483 if (tp != NULL) { 484 tlb = tp->t_lib; 485 tln = tp->t_lin; 486 KASSERT(tlb != NULL, ("%s: unexpectedly NULL tlb", __func__)); 487 KASSERT(tln != NULL, ("%s: unexpectedly NULL tln", __func__)); 488 } 489 490 tcp_log_id_validate_tree_lock(*tree_locked); 491 TCPID_BUCKET_LOCK_ASSERT(tlb); 492 493 /* 494 * Remove the node, clear the log bucket and node from the TCPCB, and 495 * decrement the bucket refcount. In the process, if this is the 496 * last reference, the bucket will be freed. 497 */ 498 SLIST_REMOVE(&tlb->tlb_head, tln, tcp_log_id_node, tln_list); 499 if (tp != NULL) { 500 tp->t_lib = NULL; 501 tp->t_lin = NULL; 502 } 503 orig_tree_locked = *tree_locked; 504 if (!tcp_log_unref_bucket(tlb, tree_locked, inp)) 505 TCPID_BUCKET_UNLOCK(tlb); 506 return (*tree_locked != orig_tree_locked); 507 } 508 509 #define RECHECK_INP_CLEAN(cleanup) do { \ 510 if (inp->inp_flags & INP_DROPPED) { \ 511 rv = ECONNRESET; \ 512 cleanup; \ 513 goto done; \ 514 } \ 515 tp = intotcpcb(inp); \ 516 } while (0) 517 518 #define RECHECK_INP() RECHECK_INP_CLEAN(/* noop */) 519 520 static void 521 tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp) 522 { 523 524 INP_WLOCK_ASSERT(tptoinpcb(tp)); 525 526 #ifdef STATS 527 if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL) 528 (void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id)); 529 #endif 530 } 531 532 static void 533 tcp_log_increment_reqcnt(struct tcp_log_id_bucket *tlb) 534 { 535 536 atomic_fetchadd_int(&tlb->tlb_reqcnt, 1); 537 } 538 539 int 540 tcp_log_apply_ratio(struct tcpcb *tp, int ratio) 541 { 542 struct tcp_log_id_bucket *tlb; 543 struct inpcb *inp = tptoinpcb(tp); 544 uint32_t hash, ratio_hash_thresh; 545 int rv, tree_locked; 546 547 rv = 0; 548 tree_locked = TREE_UNLOCKED; 549 tlb = tp->t_lib; 550 551 INP_WLOCK_ASSERT(inp); 552 if (tlb == NULL) { 553 INP_WUNLOCK(inp); 554 return (EOPNOTSUPP); 555 } 556 ratio_hash_thresh = max(1, UINT32_MAX / ratio); 557 TCPID_BUCKET_REF(tlb); 558 INP_WUNLOCK(inp); 559 TCPID_BUCKET_LOCK(tlb); 560 561 hash = hash32_buf(tlb->tlb_id, strlen(tlb->tlb_id), 0); 562 if (hash > ratio_hash_thresh && tp->_t_logstate == TCP_LOG_STATE_OFF && 563 tlb->tlb_logstate == TCP_LOG_STATE_OFF) { 564 /* 565 * Ratio decision not to log this log ID (and this connection by 566 * way of association). We only apply a log ratio log disable 567 * decision if it would not interfere with a log enable decision 568 * made elsewhere e.g. tcp_log_selectauto() or setsockopt(). 569 */ 570 tlb->tlb_logstate = TCP_LOG_STATE_RATIO_OFF; 571 INP_WLOCK(inp); 572 RECHECK_INP(); 573 (void)tcp_log_state_change(tp, TCP_LOG_STATE_OFF); 574 done: 575 INP_WUNLOCK(inp); 576 } 577 578 INP_UNLOCK_ASSERT(inp); 579 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 580 TCPID_BUCKET_UNLOCK(tlb); 581 582 if (tree_locked == TREE_WLOCKED) { 583 TCPID_TREE_WLOCK_ASSERT(); 584 TCPID_TREE_WUNLOCK(); 585 } else if (tree_locked == TREE_RLOCKED) { 586 TCPID_TREE_RLOCK_ASSERT(); 587 TCPID_TREE_RUNLOCK(); 588 } else 589 TCPID_TREE_UNLOCK_ASSERT(); 590 591 return (rv); 592 } 593 594 /* 595 * Associate the specified tag with a particular TCP log ID. 596 * Called with INPCB locked. Returns with it unlocked. 597 * Returns 0 on success or EOPNOTSUPP if the connection has no TCP log ID. 598 */ 599 int 600 tcp_log_set_tag(struct tcpcb *tp, char *tag) 601 { 602 struct inpcb *inp = tptoinpcb(tp); 603 struct tcp_log_id_bucket *tlb; 604 int tree_locked; 605 606 INP_WLOCK_ASSERT(inp); 607 608 tree_locked = TREE_UNLOCKED; 609 tlb = tp->t_lib; 610 if (tlb == NULL) { 611 INP_WUNLOCK(inp); 612 return (EOPNOTSUPP); 613 } 614 615 TCPID_BUCKET_REF(tlb); 616 INP_WUNLOCK(inp); 617 TCPID_BUCKET_LOCK(tlb); 618 strlcpy(tlb->tlb_tag, tag, TCP_LOG_TAG_LEN); 619 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 620 TCPID_BUCKET_UNLOCK(tlb); 621 622 if (tree_locked == TREE_WLOCKED) { 623 TCPID_TREE_WLOCK_ASSERT(); 624 TCPID_TREE_WUNLOCK(); 625 } else if (tree_locked == TREE_RLOCKED) { 626 TCPID_TREE_RLOCK_ASSERT(); 627 TCPID_TREE_RUNLOCK(); 628 } else 629 TCPID_TREE_UNLOCK_ASSERT(); 630 631 return (0); 632 } 633 634 /* 635 * Set the TCP log ID for a TCPCB. 636 * Called with INPCB locked. Returns with it unlocked. 637 */ 638 int 639 tcp_log_set_id(struct tcpcb *tp, char *id) 640 { 641 struct tcp_log_id_bucket *tlb, *tmp_tlb; 642 struct tcp_log_id_node *tln; 643 struct inpcb *inp = tptoinpcb(tp); 644 int tree_locked, rv; 645 bool bucket_locked, same; 646 647 tlb = NULL; 648 tln = NULL; 649 tree_locked = TREE_UNLOCKED; 650 bucket_locked = false; 651 652 restart: 653 INP_WLOCK_ASSERT(inp); 654 /* See if the ID is unchanged. */ 655 same = ((tp->t_lib != NULL && !strcmp(tp->t_lib->tlb_id, id)) || 656 (tp->t_lib == NULL && *id == 0)); 657 if (tp->_t_logstate && STAILQ_FIRST(&tp->t_logs) && !same) { 658 /* 659 * There are residual logs left we may 660 * be changing id's so dump what we can. 661 */ 662 switch(tp->_t_logstate) { 663 case TCP_LOG_STATE_HEAD_AUTO: 664 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at id switch", 665 M_NOWAIT, false); 666 break; 667 case TCP_LOG_STATE_TAIL_AUTO: 668 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at id switch", 669 M_NOWAIT, false); 670 break; 671 case TCP_LOG_STATE_CONTINUAL: 672 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual at id switch", 673 M_NOWAIT, false); 674 break; 675 case TCP_LOG_VIA_BBPOINTS: 676 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints at id switch", 677 M_NOWAIT, false); 678 break; 679 } 680 } 681 if (same) { 682 if (tp->t_lib != NULL) { 683 tcp_log_increment_reqcnt(tp->t_lib); 684 if ((tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) && 685 (tp->t_log_state_set == 0)) { 686 /* Clone in any logging */ 687 688 tp->_t_logstate = tp->t_lib->tlb_logstate; 689 } 690 if ((tp->t_lib->tlb_loglimit) && 691 (tp->t_log_state_set == 0)) { 692 /* We also have a limit set */ 693 694 tp->t_loglimit = tp->t_lib->tlb_loglimit; 695 } 696 } 697 rv = 0; 698 goto done; 699 } 700 701 /* 702 * If the TCPCB had a previous ID, we need to extricate it from 703 * the previous list. 704 * 705 * Drop the TCPCB lock and lock the tree and the bucket. 706 * Because this is called in the socket context, we (theoretically) 707 * don't need to worry about the INPCB completely going away 708 * while we are gone. 709 */ 710 if (tp->t_lib != NULL) { 711 tlb = tp->t_lib; 712 TCPID_BUCKET_REF(tlb); 713 INP_WUNLOCK(inp); 714 715 if (tree_locked == TREE_UNLOCKED) { 716 TCPID_TREE_RLOCK(); 717 tree_locked = TREE_RLOCKED; 718 } 719 TCPID_BUCKET_LOCK(tlb); 720 bucket_locked = true; 721 INP_WLOCK(inp); 722 723 /* 724 * Unreference the bucket. If our bucket went away, it is no 725 * longer locked or valid. 726 */ 727 if (tcp_log_unref_bucket(tlb, &tree_locked, inp)) { 728 bucket_locked = false; 729 tlb = NULL; 730 } 731 732 /* Validate the INP. */ 733 RECHECK_INP(); 734 735 /* 736 * Evaluate whether the bucket changed while we were unlocked. 737 * 738 * Possible scenarios here: 739 * 1. Bucket is unchanged and the same one we started with. 740 * 2. The TCPCB no longer has a bucket and our bucket was 741 * freed. 742 * 3. The TCPCB has a new bucket, whether ours was freed. 743 * 4. The TCPCB no longer has a bucket and our bucket was 744 * not freed. 745 * 746 * In cases 2-4, we will start over. In case 1, we will 747 * proceed here to remove the bucket. 748 */ 749 if (tlb == NULL || tp->t_lib != tlb) { 750 KASSERT(bucket_locked || tlb == NULL, 751 ("%s: bucket_locked (%d) and tlb (%p) are " 752 "inconsistent", __func__, bucket_locked, tlb)); 753 754 if (bucket_locked) { 755 TCPID_BUCKET_UNLOCK(tlb); 756 bucket_locked = false; 757 tlb = NULL; 758 } 759 goto restart; 760 } 761 762 /* 763 * Store the (struct tcp_log_id_node) for reuse. Then, remove 764 * it from the bucket. In the process, we may end up relocking. 765 * If so, we need to validate that the INP is still valid, and 766 * the TCPCB entries match we expect. 767 * 768 * We will clear tlb and change the bucket_locked state just 769 * before calling tcp_log_remove_id_node(), since that function 770 * will unlock the bucket. 771 */ 772 if (tln != NULL) 773 uma_zfree(tcp_log_id_node_zone, tln); 774 tln = tp->t_lin; 775 tlb = NULL; 776 bucket_locked = false; 777 if (tcp_log_remove_id_node(inp, tp, NULL, NULL, &tree_locked)) { 778 RECHECK_INP(); 779 780 /* 781 * If the TCPCB moved to a new bucket while we had 782 * dropped the lock, restart. 783 */ 784 if (tp->t_lib != NULL || tp->t_lin != NULL) 785 goto restart; 786 } 787 788 /* 789 * Yay! We successfully removed the TCPCB from its old 790 * bucket. Phew! 791 * 792 * On to bigger and better things... 793 */ 794 } 795 796 /* At this point, the TCPCB should not be in any bucket. */ 797 KASSERT(tp->t_lib == NULL, ("%s: tp->t_lib is not NULL", __func__)); 798 799 /* 800 * If the new ID is not empty, we need to now assign this TCPCB to a 801 * new bucket. 802 */ 803 if (*id) { 804 /* Get a new tln, if we don't already have one to reuse. */ 805 if (tln == NULL) { 806 tln = uma_zalloc(tcp_log_id_node_zone, 807 M_NOWAIT | M_ZERO); 808 if (tln == NULL) { 809 rv = ENOBUFS; 810 goto done; 811 } 812 tln->tln_inp = inp; 813 tln->tln_tp = tp; 814 } 815 816 /* 817 * Drop the INP lock for a bit. We don't need it, and dropping 818 * it prevents lock order reversals. 819 */ 820 INP_WUNLOCK(inp); 821 822 /* Make sure we have at least a read lock on the tree. */ 823 tcp_log_id_validate_tree_lock(tree_locked); 824 if (tree_locked == TREE_UNLOCKED) { 825 TCPID_TREE_RLOCK(); 826 tree_locked = TREE_RLOCKED; 827 } 828 829 refind: 830 /* 831 * Remember that we constructed (struct tcp_log_id_node) so 832 * we can safely cast the id to it for the purposes of finding. 833 */ 834 KASSERT(tlb == NULL, ("%s:%d tlb unexpectedly non-NULL", 835 __func__, __LINE__)); 836 tmp_tlb = RB_FIND(tcp_log_id_tree, &tcp_log_id_head, 837 (struct tcp_log_id_bucket *) id); 838 839 /* 840 * If we didn't find a matching bucket, we need to add a new 841 * one. This requires a write lock. But, of course, we will 842 * need to recheck some things when we re-acquire the lock. 843 */ 844 if (tmp_tlb == NULL && tree_locked != TREE_WLOCKED) { 845 tree_locked = TREE_WLOCKED; 846 if (!TCPID_TREE_UPGRADE()) { 847 TCPID_TREE_RUNLOCK(); 848 TCPID_TREE_WLOCK(); 849 850 /* 851 * The tree may have changed while we were 852 * unlocked. 853 */ 854 goto refind; 855 } 856 } 857 858 /* If we need to add a new bucket, do it now. */ 859 if (tmp_tlb == NULL) { 860 /* Allocate new bucket. */ 861 tlb = uma_zalloc(tcp_log_id_bucket_zone, M_NOWAIT); 862 if (tlb == NULL) { 863 rv = ENOBUFS; 864 goto done_noinp; 865 } 866 counter_u64_add(tcp_log_pcb_ids_cur, 1); 867 counter_u64_add(tcp_log_pcb_ids_tot, 1); 868 869 if ((tcp_log_auto_all == false) && 870 tcp_log_auto_mode && 871 tcp_log_selectauto()) { 872 /* Save off the log state */ 873 tlb->tlb_logstate = tcp_log_auto_mode; 874 } else 875 tlb->tlb_logstate = TCP_LOG_STATE_OFF; 876 tlb->tlb_loglimit = 0; 877 tlb->tlb_tag[0] = '\0'; /* Default to an empty tag. */ 878 879 /* 880 * Copy the ID to the bucket. 881 * NB: Don't use strlcpy() unless you are sure 882 * we've always validated NULL termination. 883 * 884 * TODO: When I'm done writing this, see if we 885 * we have correctly validated NULL termination and 886 * can use strlcpy(). :-) 887 */ 888 strncpy(tlb->tlb_id, id, TCP_LOG_ID_LEN - 1); 889 tlb->tlb_id[TCP_LOG_ID_LEN - 1] = '\0'; 890 891 /* 892 * Take the refcount for the first node and go ahead 893 * and lock this. Note that we zero the tlb_mtx 894 * structure, since 0xdeadc0de flips the right bits 895 * for the code to think that this mutex has already 896 * been initialized. :-( 897 */ 898 SLIST_INIT(&tlb->tlb_head); 899 refcount_init(&tlb->tlb_refcnt, 1); 900 tlb->tlb_reqcnt = 1; 901 memset(&tlb->tlb_mtx, 0, sizeof(struct mtx)); 902 TCPID_BUCKET_LOCK_INIT(tlb); 903 TCPID_BUCKET_LOCK(tlb); 904 bucket_locked = true; 905 906 #define FREE_NEW_TLB() do { \ 907 TCPID_BUCKET_LOCK_DESTROY(tlb); \ 908 uma_zfree(tcp_log_id_bucket_zone, tlb); \ 909 counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); \ 910 counter_u64_add(tcp_log_pcb_ids_tot, (int64_t)-1); \ 911 bucket_locked = false; \ 912 tlb = NULL; \ 913 } while (0) 914 /* 915 * Relock the INP and make sure we are still 916 * unassigned. 917 */ 918 INP_WLOCK(inp); 919 RECHECK_INP_CLEAN(FREE_NEW_TLB()); 920 if (tp->t_lib != NULL) { 921 FREE_NEW_TLB(); 922 goto restart; 923 } 924 925 /* Add the new bucket to the tree. */ 926 tmp_tlb = RB_INSERT(tcp_log_id_tree, &tcp_log_id_head, 927 tlb); 928 KASSERT(tmp_tlb == NULL, 929 ("%s: Unexpected conflicting bucket (%p) while " 930 "adding new bucket (%p)", __func__, tmp_tlb, tlb)); 931 932 /* 933 * If we found a conflicting bucket, free the new 934 * one we made and fall through to use the existing 935 * bucket. 936 */ 937 if (tmp_tlb != NULL) { 938 FREE_NEW_TLB(); 939 INP_WUNLOCK(inp); 940 } 941 #undef FREE_NEW_TLB 942 } 943 944 /* If we found an existing bucket, use it. */ 945 if (tmp_tlb != NULL) { 946 tlb = tmp_tlb; 947 TCPID_BUCKET_LOCK(tlb); 948 bucket_locked = true; 949 950 /* 951 * Relock the INP and make sure we are still 952 * unassigned. 953 */ 954 INP_UNLOCK_ASSERT(inp); 955 INP_WLOCK(inp); 956 RECHECK_INP(); 957 if (tp->t_lib != NULL) { 958 TCPID_BUCKET_UNLOCK(tlb); 959 bucket_locked = false; 960 tlb = NULL; 961 goto restart; 962 } 963 964 /* Take a reference on the bucket. */ 965 TCPID_BUCKET_REF(tlb); 966 967 /* Record the request. */ 968 tcp_log_increment_reqcnt(tlb); 969 } 970 971 tcp_log_grow_tlb(tlb->tlb_id, tp); 972 973 /* Add the new node to the list. */ 974 SLIST_INSERT_HEAD(&tlb->tlb_head, tln, tln_list); 975 tp->t_lib = tlb; 976 tp->t_lin = tln; 977 if (tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) { 978 /* Clone in any logging */ 979 980 tp->_t_logstate = tp->t_lib->tlb_logstate; 981 } 982 if (tp->t_lib->tlb_loglimit) { 983 /* The loglimit too */ 984 985 tp->t_loglimit = tp->t_lib->tlb_loglimit; 986 } 987 tln = NULL; 988 } 989 990 rv = 0; 991 992 done: 993 /* Unlock things, as needed, and return. */ 994 INP_WUNLOCK(inp); 995 done_noinp: 996 INP_UNLOCK_ASSERT(inp); 997 if (bucket_locked) { 998 TCPID_BUCKET_LOCK_ASSERT(tlb); 999 TCPID_BUCKET_UNLOCK(tlb); 1000 } else if (tlb != NULL) 1001 TCPID_BUCKET_UNLOCK_ASSERT(tlb); 1002 if (tree_locked == TREE_WLOCKED) { 1003 TCPID_TREE_WLOCK_ASSERT(); 1004 TCPID_TREE_WUNLOCK(); 1005 } else if (tree_locked == TREE_RLOCKED) { 1006 TCPID_TREE_RLOCK_ASSERT(); 1007 TCPID_TREE_RUNLOCK(); 1008 } else 1009 TCPID_TREE_UNLOCK_ASSERT(); 1010 if (tln != NULL) 1011 uma_zfree(tcp_log_id_node_zone, tln); 1012 return (rv); 1013 } 1014 1015 /* 1016 * Get the TCP log ID for a TCPCB. 1017 * Called with INPCB locked. 1018 * 'buf' must point to a buffer that is at least TCP_LOG_ID_LEN bytes long. 1019 * Returns number of bytes copied. 1020 */ 1021 size_t 1022 tcp_log_get_id(struct tcpcb *tp, char *buf) 1023 { 1024 size_t len; 1025 1026 INP_LOCK_ASSERT(tptoinpcb(tp)); 1027 if (tp->t_lib != NULL) { 1028 len = strlcpy(buf, tp->t_lib->tlb_id, TCP_LOG_ID_LEN); 1029 KASSERT(len < TCP_LOG_ID_LEN, 1030 ("%s:%d: tp->t_lib->tlb_id too long (%zu)", 1031 __func__, __LINE__, len)); 1032 } else { 1033 *buf = '\0'; 1034 len = 0; 1035 } 1036 return (len); 1037 } 1038 1039 /* 1040 * Get the tag associated with the TCPCB's log ID. 1041 * Called with INPCB locked. Returns with it unlocked. 1042 * 'buf' must point to a buffer that is at least TCP_LOG_TAG_LEN bytes long. 1043 * Returns number of bytes copied. 1044 */ 1045 size_t 1046 tcp_log_get_tag(struct tcpcb *tp, char *buf) 1047 { 1048 struct inpcb *inp = tptoinpcb(tp); 1049 struct tcp_log_id_bucket *tlb; 1050 size_t len; 1051 int tree_locked; 1052 1053 INP_WLOCK_ASSERT(inp); 1054 1055 tree_locked = TREE_UNLOCKED; 1056 tlb = tp->t_lib; 1057 1058 if (tlb != NULL) { 1059 TCPID_BUCKET_REF(tlb); 1060 INP_WUNLOCK(inp); 1061 TCPID_BUCKET_LOCK(tlb); 1062 len = strlcpy(buf, tlb->tlb_tag, TCP_LOG_TAG_LEN); 1063 KASSERT(len < TCP_LOG_TAG_LEN, 1064 ("%s:%d: tp->t_lib->tlb_tag too long (%zu)", 1065 __func__, __LINE__, len)); 1066 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 1067 TCPID_BUCKET_UNLOCK(tlb); 1068 1069 if (tree_locked == TREE_WLOCKED) { 1070 TCPID_TREE_WLOCK_ASSERT(); 1071 TCPID_TREE_WUNLOCK(); 1072 } else if (tree_locked == TREE_RLOCKED) { 1073 TCPID_TREE_RLOCK_ASSERT(); 1074 TCPID_TREE_RUNLOCK(); 1075 } else 1076 TCPID_TREE_UNLOCK_ASSERT(); 1077 } else { 1078 INP_WUNLOCK(inp); 1079 *buf = '\0'; 1080 len = 0; 1081 } 1082 1083 return (len); 1084 } 1085 1086 /* 1087 * Get number of connections with the same log ID. 1088 * Log ID is taken from given TCPCB. 1089 * Called with INPCB locked. 1090 */ 1091 u_int 1092 tcp_log_get_id_cnt(struct tcpcb *tp) 1093 { 1094 1095 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1096 return ((tp->t_lib == NULL) ? 0 : tp->t_lib->tlb_refcnt); 1097 } 1098 1099 #ifdef TCPLOG_DEBUG_RINGBUF 1100 /* 1101 * Functions/macros to increment/decrement reference count for a log 1102 * entry. This should catch when we do a double-free/double-remove or 1103 * a double-add. 1104 */ 1105 static inline void 1106 _tcp_log_entry_refcnt_add(struct tcp_log_mem *log_entry, const char *func, 1107 int line) 1108 { 1109 int refcnt; 1110 1111 refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, 1); 1112 if (refcnt != 0) 1113 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 0)", 1114 func, line, log_entry, refcnt); 1115 } 1116 #define tcp_log_entry_refcnt_add(l) \ 1117 _tcp_log_entry_refcnt_add((l), __func__, __LINE__) 1118 1119 static inline void 1120 _tcp_log_entry_refcnt_rem(struct tcp_log_mem *log_entry, const char *func, 1121 int line) 1122 { 1123 int refcnt; 1124 1125 refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, -1); 1126 if (refcnt != 1) 1127 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 1)", 1128 func, line, log_entry, refcnt); 1129 } 1130 #define tcp_log_entry_refcnt_rem(l) \ 1131 _tcp_log_entry_refcnt_rem((l), __func__, __LINE__) 1132 1133 #else /* !TCPLOG_DEBUG_RINGBUF */ 1134 1135 #define tcp_log_entry_refcnt_add(l) 1136 #define tcp_log_entry_refcnt_rem(l) 1137 1138 #endif 1139 1140 /* 1141 * Cleanup after removing a log entry, but only decrement the count if we 1142 * are running INVARIANTS. 1143 */ 1144 static inline void 1145 tcp_log_free_log_common(struct tcp_log_mem *log_entry, int *count __unused) 1146 { 1147 1148 uma_zfree(tcp_log_zone, log_entry); 1149 #ifdef INVARIANTS 1150 (*count)--; 1151 KASSERT(*count >= 0, 1152 ("%s: count unexpectedly negative", __func__)); 1153 #endif 1154 } 1155 1156 static void 1157 tcp_log_free_entries(struct tcp_log_stailq *head, int *count) 1158 { 1159 struct tcp_log_mem *log_entry; 1160 1161 /* Free the entries. */ 1162 while ((log_entry = STAILQ_FIRST(head)) != NULL) { 1163 STAILQ_REMOVE_HEAD(head, tlm_queue); 1164 tcp_log_entry_refcnt_rem(log_entry); 1165 tcp_log_free_log_common(log_entry, count); 1166 } 1167 } 1168 1169 /* Cleanup after removing a log entry. */ 1170 static inline void 1171 tcp_log_remove_log_cleanup(struct tcpcb *tp, struct tcp_log_mem *log_entry) 1172 { 1173 uma_zfree(tcp_log_zone, log_entry); 1174 tp->t_lognum--; 1175 KASSERT(tp->t_lognum >= 0, 1176 ("%s: tp->t_lognum unexpectedly negative", __func__)); 1177 } 1178 1179 /* Remove a log entry from the head of a list. */ 1180 static inline void 1181 tcp_log_remove_log_head(struct tcpcb *tp, struct tcp_log_mem *log_entry) 1182 { 1183 1184 KASSERT(log_entry == STAILQ_FIRST(&tp->t_logs), 1185 ("%s: attempt to remove non-HEAD log entry", __func__)); 1186 STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue); 1187 tcp_log_entry_refcnt_rem(log_entry); 1188 tcp_log_remove_log_cleanup(tp, log_entry); 1189 } 1190 1191 #ifdef TCPLOG_DEBUG_RINGBUF 1192 /* 1193 * Initialize the log entry's reference count, which we want to 1194 * survive allocations. 1195 */ 1196 static int 1197 tcp_log_zone_init(void *mem, int size, int flags __unused) 1198 { 1199 struct tcp_log_mem *tlm; 1200 1201 KASSERT(size >= sizeof(struct tcp_log_mem), 1202 ("%s: unexpectedly short (%d) allocation", __func__, size)); 1203 tlm = (struct tcp_log_mem *)mem; 1204 tlm->tlm_refcnt = 0; 1205 return (0); 1206 } 1207 1208 /* 1209 * Double check that the refcnt is zero on allocation and return. 1210 */ 1211 static int 1212 tcp_log_zone_ctor(void *mem, int size, void *args __unused, int flags __unused) 1213 { 1214 struct tcp_log_mem *tlm; 1215 1216 KASSERT(size >= sizeof(struct tcp_log_mem), 1217 ("%s: unexpectedly short (%d) allocation", __func__, size)); 1218 tlm = (struct tcp_log_mem *)mem; 1219 if (tlm->tlm_refcnt != 0) 1220 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)", 1221 __func__, __LINE__, tlm, tlm->tlm_refcnt); 1222 return (0); 1223 } 1224 1225 static void 1226 tcp_log_zone_dtor(void *mem, int size, void *args __unused) 1227 { 1228 struct tcp_log_mem *tlm; 1229 1230 KASSERT(size >= sizeof(struct tcp_log_mem), 1231 ("%s: unexpectedly short (%d) allocation", __func__, size)); 1232 tlm = (struct tcp_log_mem *)mem; 1233 if (tlm->tlm_refcnt != 0) 1234 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)", 1235 __func__, __LINE__, tlm, tlm->tlm_refcnt); 1236 } 1237 #endif /* TCPLOG_DEBUG_RINGBUF */ 1238 1239 /* Do global initialization. */ 1240 void 1241 tcp_log_init(void) 1242 { 1243 1244 tcp_log_zone = uma_zcreate("tcp_log", sizeof(struct tcp_log_mem), 1245 #ifdef TCPLOG_DEBUG_RINGBUF 1246 tcp_log_zone_ctor, tcp_log_zone_dtor, tcp_log_zone_init, 1247 #else 1248 NULL, NULL, NULL, 1249 #endif 1250 NULL, UMA_ALIGN_PTR, 0); 1251 (void)uma_zone_set_max(tcp_log_zone, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT); 1252 tcp_log_id_bucket_zone = uma_zcreate("tcp_log_id_bucket", 1253 sizeof(struct tcp_log_id_bucket), NULL, NULL, NULL, NULL, 1254 UMA_ALIGN_PTR, 0); 1255 tcp_log_id_node_zone = uma_zcreate("tcp_log_id_node", 1256 sizeof(struct tcp_log_id_node), NULL, NULL, NULL, NULL, 1257 UMA_ALIGN_PTR, 0); 1258 #ifdef TCPLOG_DEBUG_COUNTERS 1259 tcp_log_queued = counter_u64_alloc(M_WAITOK); 1260 tcp_log_que_fail1 = counter_u64_alloc(M_WAITOK); 1261 tcp_log_que_fail2 = counter_u64_alloc(M_WAITOK); 1262 tcp_log_que_fail3 = counter_u64_alloc(M_WAITOK); 1263 tcp_log_que_fail4 = counter_u64_alloc(M_WAITOK); 1264 tcp_log_que_fail5 = counter_u64_alloc(M_WAITOK); 1265 tcp_log_que_copyout = counter_u64_alloc(M_WAITOK); 1266 tcp_log_que_read = counter_u64_alloc(M_WAITOK); 1267 tcp_log_que_freed = counter_u64_alloc(M_WAITOK); 1268 #endif 1269 tcp_log_pcb_ids_cur = counter_u64_alloc(M_WAITOK); 1270 tcp_log_pcb_ids_tot = counter_u64_alloc(M_WAITOK); 1271 1272 rw_init_flags(&tcp_id_tree_lock, "TCP ID tree", RW_NEW); 1273 mtx_init(&tcp_log_expireq_mtx, "TCP log expireq", NULL, MTX_DEF); 1274 callout_init(&tcp_log_expireq_callout, 1); 1275 } 1276 1277 /* Do per-TCPCB initialization. */ 1278 void 1279 tcp_log_tcpcbinit(struct tcpcb *tp) 1280 { 1281 1282 /* A new TCPCB should start out zero-initialized. */ 1283 STAILQ_INIT(&tp->t_logs); 1284 1285 /* 1286 * If we are doing auto-capturing, figure out whether we will capture 1287 * this session. 1288 */ 1289 tp->t_loglimit = tcp_log_session_limit; 1290 if ((tcp_log_auto_all == true) && 1291 tcp_log_auto_mode && 1292 tcp_log_selectauto()) { 1293 tp->_t_logstate = tcp_log_auto_mode; 1294 tp->t_flags2 |= TF2_LOG_AUTO; 1295 } 1296 } 1297 1298 /* Remove entries */ 1299 static void 1300 tcp_log_expire(void *unused __unused) 1301 { 1302 struct tcp_log_id_bucket *tlb; 1303 struct tcp_log_id_node *tln; 1304 sbintime_t expiry_limit; 1305 int tree_locked; 1306 1307 TCPLOG_EXPIREQ_LOCK(); 1308 if (callout_pending(&tcp_log_expireq_callout)) { 1309 /* Callout was reset. */ 1310 TCPLOG_EXPIREQ_UNLOCK(); 1311 return; 1312 } 1313 1314 /* 1315 * Process entries until we reach one that expires too far in the 1316 * future. Look one second in the future. 1317 */ 1318 expiry_limit = getsbinuptime() + SBT_1S; 1319 tree_locked = TREE_UNLOCKED; 1320 1321 while ((tln = STAILQ_FIRST(&tcp_log_expireq_head)) != NULL && 1322 tln->tln_expiretime <= expiry_limit) { 1323 if (!callout_active(&tcp_log_expireq_callout)) { 1324 /* 1325 * Callout was stopped. I guess we should 1326 * just quit at this point. 1327 */ 1328 TCPLOG_EXPIREQ_UNLOCK(); 1329 return; 1330 } 1331 1332 /* 1333 * Remove the node from the head of the list and unlock 1334 * the list. Change the expiry time to SBT_MAX as a signal 1335 * to other threads that we now own this. 1336 */ 1337 STAILQ_REMOVE_HEAD(&tcp_log_expireq_head, tln_expireq); 1338 tln->tln_expiretime = SBT_MAX; 1339 TCPLOG_EXPIREQ_UNLOCK(); 1340 1341 /* 1342 * Remove the node from the bucket. 1343 */ 1344 tlb = tln->tln_bucket; 1345 TCPID_BUCKET_LOCK(tlb); 1346 if (tcp_log_remove_id_node(NULL, NULL, tlb, tln, &tree_locked)) { 1347 tcp_log_id_validate_tree_lock(tree_locked); 1348 if (tree_locked == TREE_WLOCKED) 1349 TCPID_TREE_WUNLOCK(); 1350 else 1351 TCPID_TREE_RUNLOCK(); 1352 tree_locked = TREE_UNLOCKED; 1353 } 1354 1355 /* Drop the INP reference. */ 1356 INP_WLOCK(tln->tln_inp); 1357 if (!in_pcbrele_wlocked(tln->tln_inp)) 1358 INP_WUNLOCK(tln->tln_inp); 1359 1360 /* Free the log records. */ 1361 tcp_log_free_entries(&tln->tln_entries, &tln->tln_count); 1362 1363 /* Free the node. */ 1364 uma_zfree(tcp_log_id_node_zone, tln); 1365 1366 /* Relock the expiry queue. */ 1367 TCPLOG_EXPIREQ_LOCK(); 1368 } 1369 1370 /* 1371 * We've expired all the entries we can. Do we need to reschedule 1372 * ourselves? 1373 */ 1374 callout_deactivate(&tcp_log_expireq_callout); 1375 if (tln != NULL) { 1376 /* 1377 * Get max(now + TCP_LOG_EXPIRE_INTVL, tln->tln_expiretime) and 1378 * set the next callout to that. (This helps ensure we generally 1379 * run the callout no more often than desired.) 1380 */ 1381 expiry_limit = getsbinuptime() + TCP_LOG_EXPIRE_INTVL; 1382 if (expiry_limit < tln->tln_expiretime) 1383 expiry_limit = tln->tln_expiretime; 1384 callout_reset_sbt(&tcp_log_expireq_callout, expiry_limit, 1385 SBT_1S, tcp_log_expire, NULL, C_ABSOLUTE); 1386 } 1387 1388 /* We're done. */ 1389 TCPLOG_EXPIREQ_UNLOCK(); 1390 return; 1391 } 1392 1393 /* 1394 * Move log data from the TCPCB to a new node. This will reset the TCPCB log 1395 * entries and log count; however, it will not touch other things from the 1396 * TCPCB (e.g. t_lin, t_lib). 1397 * 1398 * NOTE: Must hold a lock on the INP. 1399 */ 1400 static void 1401 tcp_log_move_tp_to_node(struct tcpcb *tp, struct tcp_log_id_node *tln) 1402 { 1403 struct inpcb *inp = tptoinpcb(tp); 1404 1405 INP_WLOCK_ASSERT(inp); 1406 1407 tln->tln_ie = inp->inp_inc.inc_ie; 1408 if (inp->inp_inc.inc_flags & INC_ISIPV6) 1409 tln->tln_af = AF_INET6; 1410 else 1411 tln->tln_af = AF_INET; 1412 tln->tln_entries = tp->t_logs; 1413 tln->tln_count = tp->t_lognum; 1414 tln->tln_bucket = tp->t_lib; 1415 1416 /* Clear information from the PCB. */ 1417 STAILQ_INIT(&tp->t_logs); 1418 tp->t_lognum = 0; 1419 } 1420 1421 /* Do per-TCPCB cleanup */ 1422 void 1423 tcp_log_tcpcbfini(struct tcpcb *tp) 1424 { 1425 struct tcp_log_id_node *tln, *tln_first; 1426 struct tcp_log_mem *log_entry; 1427 sbintime_t callouttime; 1428 1429 1430 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1431 #ifdef TCP_ACCOUNTING 1432 if (tp->_t_logstate) { 1433 struct tcp_log_buffer *lgb; 1434 union tcp_log_stackspecific log; 1435 struct timeval tv; 1436 int i; 1437 1438 memset(&log, 0, sizeof(log)); 1439 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { 1440 for (i = 0; i<TCP_NUM_CNT_COUNTERS; i++) { 1441 log.u_raw.u64_flex[i] = tp->tcp_cnt_counters[i]; 1442 } 1443 lgb = tcp_log_event(tp, NULL, 1444 NULL, 1445 NULL, 1446 TCP_LOG_ACCOUNTING, 0, 1447 0, &log, false, NULL, NULL, 0, &tv); 1448 lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS; 1449 lgb->tlb_flex2 = 1; 1450 for (i = 0; i<TCP_NUM_CNT_COUNTERS; i++) { 1451 log.u_raw.u64_flex[i] = tp->tcp_proc_time[i]; 1452 } 1453 lgb = tcp_log_event(tp, NULL, 1454 NULL, 1455 NULL, 1456 TCP_LOG_ACCOUNTING, 0, 1457 0, &log, false, NULL, NULL, 0, &tv); 1458 if (tptoinpcb(tp)->inp_flags2 & INP_MBUF_ACKCMP) 1459 lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS; 1460 else 1461 lgb->tlb_flex1 = TCP_NUM_PROC_COUNTERS; 1462 lgb->tlb_flex2 = 2; 1463 } 1464 log.u_bbr.timeStamp = tcp_get_usecs(&tv); 1465 log.u_bbr.cur_del_rate = tp->t_end_info; 1466 TCP_LOG_EVENTP(tp, NULL, 1467 NULL, 1468 NULL, 1469 TCP_LOG_CONNEND, 0, 1470 0, &log, false, &tv); 1471 } 1472 #endif 1473 /* 1474 * If we were gathering packets to be automatically dumped, try to do 1475 * it now. If this succeeds, the log information in the TCPCB will be 1476 * cleared. Otherwise, we'll handle the log information as we do 1477 * for other states. 1478 */ 1479 switch(tp->_t_logstate) { 1480 case TCP_LOG_STATE_HEAD_AUTO: 1481 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", 1482 M_NOWAIT, false); 1483 break; 1484 case TCP_LOG_STATE_TAIL_AUTO: 1485 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail", 1486 M_NOWAIT, false); 1487 break; 1488 case TCP_LOG_VIA_BBPOINTS: 1489 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", 1490 M_NOWAIT, false); 1491 break; 1492 case TCP_LOG_STATE_CONTINUAL: 1493 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 1494 M_NOWAIT, false); 1495 break; 1496 } 1497 1498 /* 1499 * There are two ways we could keep logs: per-socket or per-ID. If 1500 * we are tracking logs with an ID, then the logs survive the 1501 * destruction of the TCPCB. 1502 * 1503 * If the TCPCB is associated with an ID node, move the logs from the 1504 * TCPCB to the ID node. In theory, this is safe, for reasons which I 1505 * will now explain for my own benefit when I next need to figure out 1506 * this code. :-) 1507 * 1508 * We own the INP lock. Therefore, no one else can change the contents 1509 * of this node (Rule C). Further, no one can remove this node from 1510 * the bucket while we hold the lock (Rule D). Basically, no one can 1511 * mess with this node. That leaves two states in which we could be: 1512 * 1513 * 1. Another thread is currently waiting to acquire the INP lock, with 1514 * plans to do something with this node. When we drop the INP lock, 1515 * they will have a chance to do that. They will recheck the 1516 * tln_closed field (see note to Rule C) and then acquire the 1517 * bucket lock before proceeding further. 1518 * 1519 * 2. Another thread will try to acquire a lock at some point in the 1520 * future. If they try to acquire a lock before we set the 1521 * tln_closed field, they will follow state #1. If they try to 1522 * acquire a lock after we set the tln_closed field, they will be 1523 * able to make changes to the node, at will, following Rule C. 1524 * 1525 * Therefore, we currently own this node and can make any changes 1526 * we want. But, as soon as we set the tln_closed field to true, we 1527 * have effectively dropped our lock on the node. (For this reason, we 1528 * also need to make sure our writes are ordered correctly. An atomic 1529 * operation with "release" semantics should be sufficient.) 1530 */ 1531 1532 if (tp->t_lin != NULL) { 1533 struct inpcb *inp = tptoinpcb(tp); 1534 1535 /* Copy the relevant information to the log entry. */ 1536 tln = tp->t_lin; 1537 KASSERT(tln->tln_inp == inp, 1538 ("%s: Mismatched inp (tln->tln_inp=%p, tp inpcb=%p)", 1539 __func__, tln->tln_inp, inp)); 1540 tcp_log_move_tp_to_node(tp, tln); 1541 1542 /* Clear information from the PCB. */ 1543 tp->t_lin = NULL; 1544 tp->t_lib = NULL; 1545 1546 /* 1547 * Take a reference on the INP. This ensures that the INP 1548 * remains valid while the node is on the expiry queue. This 1549 * ensures the INP is valid for other threads that may be 1550 * racing to lock this node when we move it to the expire 1551 * queue. 1552 */ 1553 in_pcbref(inp); 1554 1555 /* 1556 * Store the entry on the expiry list. The exact behavior 1557 * depends on whether we have entries to keep. If so, we 1558 * put the entry at the tail of the list and expire in 1559 * TCP_LOG_EXPIRE_TIME. Otherwise, we expire "now" and put 1560 * the entry at the head of the list. (Handling the cleanup 1561 * via the expiry timer lets us avoid locking messy-ness here.) 1562 */ 1563 tln->tln_expiretime = getsbinuptime(); 1564 TCPLOG_EXPIREQ_LOCK(); 1565 if (tln->tln_count) { 1566 tln->tln_expiretime += TCP_LOG_EXPIRE_TIME; 1567 if (STAILQ_EMPTY(&tcp_log_expireq_head) && 1568 !callout_active(&tcp_log_expireq_callout)) { 1569 /* 1570 * We are adding the first entry and a callout 1571 * is not currently scheduled; therefore, we 1572 * need to schedule one. 1573 */ 1574 callout_reset_sbt(&tcp_log_expireq_callout, 1575 tln->tln_expiretime, SBT_1S, tcp_log_expire, 1576 NULL, C_ABSOLUTE); 1577 } 1578 STAILQ_INSERT_TAIL(&tcp_log_expireq_head, tln, 1579 tln_expireq); 1580 } else { 1581 callouttime = tln->tln_expiretime + 1582 TCP_LOG_EXPIRE_INTVL; 1583 tln_first = STAILQ_FIRST(&tcp_log_expireq_head); 1584 1585 if ((tln_first == NULL || 1586 callouttime < tln_first->tln_expiretime) && 1587 (callout_pending(&tcp_log_expireq_callout) || 1588 !callout_active(&tcp_log_expireq_callout))) { 1589 /* 1590 * The list is empty, or we want to run the 1591 * expire code before the first entry's timer 1592 * fires. Also, we are in a case where a callout 1593 * is not actively running. We want to reset 1594 * the callout to occur sooner. 1595 */ 1596 callout_reset_sbt(&tcp_log_expireq_callout, 1597 callouttime, SBT_1S, tcp_log_expire, NULL, 1598 C_ABSOLUTE); 1599 } 1600 1601 /* 1602 * Insert to the head, or just after the head, as 1603 * appropriate. (This might result in small 1604 * mis-orderings as a bunch of "expire now" entries 1605 * gather at the start of the list, but that should 1606 * not produce big problems, since the expire timer 1607 * will walk through all of them.) 1608 */ 1609 if (tln_first == NULL || 1610 tln->tln_expiretime < tln_first->tln_expiretime) 1611 STAILQ_INSERT_HEAD(&tcp_log_expireq_head, tln, 1612 tln_expireq); 1613 else 1614 STAILQ_INSERT_AFTER(&tcp_log_expireq_head, 1615 tln_first, tln, tln_expireq); 1616 } 1617 TCPLOG_EXPIREQ_UNLOCK(); 1618 1619 /* 1620 * We are done messing with the tln. After this point, we 1621 * can't touch it. (Note that the "release" semantics should 1622 * be included with the TCPLOG_EXPIREQ_UNLOCK() call above. 1623 * Therefore, they should be unnecessary here. However, it 1624 * seems like a good idea to include them anyway, since we 1625 * really are releasing a lock here.) 1626 */ 1627 atomic_store_rel_int(&tln->tln_closed, 1); 1628 } else { 1629 /* Remove log entries. */ 1630 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1631 tcp_log_remove_log_head(tp, log_entry); 1632 KASSERT(tp->t_lognum == 0, 1633 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 1634 __func__, tp->t_lognum)); 1635 } 1636 1637 /* 1638 * Change the log state to off (just in case anything tries to sneak 1639 * in a last-minute log). 1640 */ 1641 tp->_t_logstate = TCP_LOG_STATE_OFF; 1642 } 1643 1644 static void 1645 tcp_log_purge_tp_logbuf(struct tcpcb *tp) 1646 { 1647 struct tcp_log_mem *log_entry; 1648 1649 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1650 if (tp->t_lognum == 0) 1651 return; 1652 1653 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1654 tcp_log_remove_log_head(tp, log_entry); 1655 KASSERT(tp->t_lognum == 0, 1656 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 1657 __func__, tp->t_lognum)); 1658 tp->_t_logstate = TCP_LOG_STATE_OFF; 1659 } 1660 1661 /* 1662 * This logs an event for a TCP socket. Normally, this is called via 1663 * TCP_LOG_EVENT or TCP_LOG_EVENT_VERBOSE. See the documentation for 1664 * TCP_LOG_EVENT(). 1665 */ 1666 1667 struct tcp_log_buffer * 1668 tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, 1669 struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, 1670 union tcp_log_stackspecific *stackinfo, int th_hostorder, 1671 const char *output_caller, const char *func, int line, const struct timeval *itv) 1672 { 1673 struct tcp_log_mem *log_entry; 1674 struct tcp_log_buffer *log_buf; 1675 int attempt_count = 0; 1676 struct tcp_log_verbose *log_verbose; 1677 uint32_t logsn; 1678 1679 KASSERT((func == NULL && line == 0) || (func != NULL && line > 0), 1680 ("%s called with inconsistent func (%p) and line (%d) arguments", 1681 __func__, func, line)); 1682 1683 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1684 if (tcp_disable_all_bb_logs) { 1685 /* 1686 * The global shutdown logging 1687 * switch has been thrown. Call 1688 * the purge function that frees 1689 * purges out the logs and 1690 * turns off logging. 1691 */ 1692 tcp_log_purge_tp_logbuf(tp); 1693 return (NULL); 1694 } 1695 KASSERT(tp->_t_logstate == TCP_LOG_STATE_HEAD || 1696 tp->_t_logstate == TCP_LOG_STATE_TAIL || 1697 tp->_t_logstate == TCP_LOG_STATE_CONTINUAL || 1698 tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO || 1699 tp->_t_logstate == TCP_LOG_VIA_BBPOINTS || 1700 tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO, 1701 ("%s called with unexpected tp->_t_logstate (%d)", __func__, 1702 tp->_t_logstate)); 1703 1704 /* 1705 * Get the serial number. We do this early so it will 1706 * increment even if we end up skipping the log entry for some 1707 * reason. 1708 */ 1709 logsn = tp->t_logsn++; 1710 1711 /* 1712 * Can we get a new log entry? If so, increment the lognum counter 1713 * here. 1714 */ 1715 retry: 1716 if (tp->t_lognum < tp->t_loglimit) { 1717 if ((log_entry = uma_zalloc(tcp_log_zone, M_NOWAIT)) != NULL) 1718 tp->t_lognum++; 1719 } else 1720 log_entry = NULL; 1721 1722 /* Do we need to try to reuse? */ 1723 if (log_entry == NULL) { 1724 /* 1725 * Sacrifice auto-logged sessions without a log ID if 1726 * tcp_log_auto_all is false. (If they don't have a log 1727 * ID by now, it is probable that either they won't get one 1728 * or we are resource-constrained.) 1729 */ 1730 if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) && 1731 !tcp_log_auto_all) { 1732 if (tcp_log_state_change(tp, TCP_LOG_STATE_CLEAR)) { 1733 #ifdef INVARIANTS 1734 panic("%s:%d: tcp_log_state_change() failed " 1735 "to set tp %p to TCP_LOG_STATE_CLEAR", 1736 __func__, __LINE__, tp); 1737 #endif 1738 tp->_t_logstate = TCP_LOG_STATE_OFF; 1739 } 1740 return (NULL); 1741 } 1742 /* 1743 * If we are in TCP_LOG_STATE_HEAD_AUTO state, try to dump 1744 * the buffers. If successful, deactivate tracing. Otherwise, 1745 * leave it active so we will retry. 1746 */ 1747 if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO && 1748 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", 1749 M_NOWAIT, false)) { 1750 tp->_t_logstate = TCP_LOG_STATE_OFF; 1751 return(NULL); 1752 } else if ((tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) && 1753 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 1754 M_NOWAIT, false)) { 1755 if (attempt_count == 0) { 1756 attempt_count++; 1757 goto retry; 1758 } 1759 #ifdef TCPLOG_DEBUG_COUNTERS 1760 counter_u64_add(tcp_log_que_fail4, 1); 1761 #endif 1762 return(NULL); 1763 1764 } else if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) && 1765 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", 1766 M_NOWAIT, false)) { 1767 if (attempt_count == 0) { 1768 attempt_count++; 1769 goto retry; 1770 } 1771 #ifdef TCPLOG_DEBUG_COUNTERS 1772 counter_u64_add(tcp_log_que_fail4, 1); 1773 #endif 1774 return(NULL); 1775 } else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO) 1776 return(NULL); 1777 1778 /* If in HEAD state, just deactivate the tracing and return. */ 1779 if (tp->_t_logstate == TCP_LOG_STATE_HEAD) { 1780 tp->_t_logstate = TCP_LOG_STATE_OFF; 1781 return(NULL); 1782 } 1783 /* 1784 * Get a buffer to reuse. If that fails, just give up. 1785 * (We can't log anything without a buffer in which to 1786 * put it.) 1787 * 1788 * Note that we don't change the t_lognum counter 1789 * here. Because we are re-using the buffer, the total 1790 * number won't change. 1791 */ 1792 if ((log_entry = STAILQ_FIRST(&tp->t_logs)) == NULL) 1793 return(NULL); 1794 STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue); 1795 tcp_log_entry_refcnt_rem(log_entry); 1796 } 1797 1798 KASSERT(log_entry != NULL, 1799 ("%s: log_entry unexpectedly NULL", __func__)); 1800 1801 /* Extract the log buffer and verbose buffer pointers. */ 1802 log_buf = &log_entry->tlm_buf; 1803 log_verbose = &log_entry->tlm_v; 1804 1805 /* Basic entries. */ 1806 if (itv == NULL) 1807 microuptime(&log_buf->tlb_tv); 1808 else 1809 memcpy(&log_buf->tlb_tv, itv, sizeof(struct timeval)); 1810 log_buf->tlb_ticks = ticks; 1811 log_buf->tlb_sn = logsn; 1812 log_buf->tlb_stackid = tp->t_fb->tfb_id; 1813 log_buf->tlb_eventid = eventid; 1814 log_buf->tlb_eventflags = 0; 1815 log_buf->tlb_errno = errornum; 1816 1817 /* Socket buffers */ 1818 if (rxbuf != NULL) { 1819 log_buf->tlb_eventflags |= TLB_FLAG_RXBUF; 1820 log_buf->tlb_rxbuf.tls_sb_acc = rxbuf->sb_acc; 1821 log_buf->tlb_rxbuf.tls_sb_ccc = rxbuf->sb_ccc; 1822 log_buf->tlb_rxbuf.tls_sb_spare = 0; 1823 } else { 1824 log_buf->tlb_rxbuf.tls_sb_acc = 0; 1825 log_buf->tlb_rxbuf.tls_sb_ccc = 0; 1826 } 1827 if (txbuf != NULL) { 1828 log_buf->tlb_eventflags |= TLB_FLAG_TXBUF; 1829 log_buf->tlb_txbuf.tls_sb_acc = txbuf->sb_acc; 1830 log_buf->tlb_txbuf.tls_sb_ccc = txbuf->sb_ccc; 1831 log_buf->tlb_txbuf.tls_sb_spare = 0; 1832 } else { 1833 log_buf->tlb_txbuf.tls_sb_acc = 0; 1834 log_buf->tlb_txbuf.tls_sb_ccc = 0; 1835 } 1836 /* Copy values from tp to the log entry. */ 1837 #define COPY_STAT(f) log_buf->tlb_ ## f = tp->f 1838 #define COPY_STAT_T(f) log_buf->tlb_ ## f = tp->t_ ## f 1839 COPY_STAT_T(state); 1840 COPY_STAT_T(starttime); 1841 COPY_STAT(iss); 1842 COPY_STAT_T(flags); 1843 COPY_STAT(snd_una); 1844 COPY_STAT(snd_max); 1845 COPY_STAT(snd_cwnd); 1846 COPY_STAT(snd_nxt); 1847 COPY_STAT(snd_recover); 1848 COPY_STAT(snd_wnd); 1849 COPY_STAT(snd_ssthresh); 1850 COPY_STAT_T(srtt); 1851 COPY_STAT_T(rttvar); 1852 COPY_STAT(rcv_up); 1853 COPY_STAT(rcv_adv); 1854 COPY_STAT(rcv_nxt); 1855 COPY_STAT(rcv_wnd); 1856 COPY_STAT_T(dupacks); 1857 COPY_STAT_T(segqlen); 1858 COPY_STAT(snd_numholes); 1859 COPY_STAT(snd_scale); 1860 COPY_STAT(rcv_scale); 1861 COPY_STAT_T(flags2); 1862 COPY_STAT_T(fbyte_in); 1863 COPY_STAT_T(fbyte_out); 1864 #undef COPY_STAT 1865 #undef COPY_STAT_T 1866 /* Copy stack-specific info. */ 1867 if (stackinfo != NULL) { 1868 memcpy(&log_buf->tlb_stackinfo, stackinfo, 1869 sizeof(log_buf->tlb_stackinfo)); 1870 log_buf->tlb_eventflags |= TLB_FLAG_STACKINFO; 1871 } 1872 1873 /* The packet */ 1874 log_buf->tlb_len = len; 1875 if (th) { 1876 int optlen; 1877 1878 log_buf->tlb_eventflags |= TLB_FLAG_HDR; 1879 log_buf->tlb_th = *th; 1880 if (th_hostorder) 1881 tcp_fields_to_net(&log_buf->tlb_th); 1882 optlen = (th->th_off << 2) - sizeof (struct tcphdr); 1883 if (optlen > 0) 1884 memcpy(log_buf->tlb_opts, th + 1, optlen); 1885 } else { 1886 memset(&log_buf->tlb_th, 0, sizeof(*th)); 1887 } 1888 1889 /* Verbose information */ 1890 if (func != NULL) { 1891 log_buf->tlb_eventflags |= TLB_FLAG_VERBOSE; 1892 if (output_caller != NULL) 1893 strlcpy(log_verbose->tlv_snd_frm, output_caller, 1894 TCP_FUNC_LEN); 1895 else 1896 *log_verbose->tlv_snd_frm = 0; 1897 strlcpy(log_verbose->tlv_trace_func, func, TCP_FUNC_LEN); 1898 log_verbose->tlv_trace_line = line; 1899 } 1900 1901 /* Insert the new log at the tail. */ 1902 STAILQ_INSERT_TAIL(&tp->t_logs, log_entry, tlm_queue); 1903 tcp_log_entry_refcnt_add(log_entry); 1904 return (log_buf); 1905 } 1906 1907 /* 1908 * Change the logging state for a TCPCB. Returns 0 on success or an 1909 * error code on failure. 1910 */ 1911 int 1912 tcp_log_state_change(struct tcpcb *tp, int state) 1913 { 1914 struct tcp_log_mem *log_entry; 1915 int rv; 1916 1917 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1918 rv = 0; 1919 switch(state) { 1920 case TCP_LOG_STATE_CLEAR: 1921 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1922 tcp_log_remove_log_head(tp, log_entry); 1923 /* Fall through */ 1924 1925 case TCP_LOG_STATE_OFF: 1926 tp->_t_logstate = TCP_LOG_STATE_OFF; 1927 break; 1928 1929 case TCP_LOG_STATE_TAIL: 1930 case TCP_LOG_STATE_HEAD: 1931 case TCP_LOG_STATE_CONTINUAL: 1932 case TCP_LOG_VIA_BBPOINTS: 1933 case TCP_LOG_STATE_HEAD_AUTO: 1934 case TCP_LOG_STATE_TAIL_AUTO: 1935 /* 1936 * When the RATIO_OFF state is set for the bucket, the log ID 1937 * this tp is associated with has been probabilistically opted 1938 * out of logging per tcp_log_apply_ratio(). 1939 */ 1940 if (tp->t_lib == NULL || 1941 tp->t_lib->tlb_logstate != TCP_LOG_STATE_RATIO_OFF) { 1942 tp->_t_logstate = state; 1943 } else { 1944 rv = ECANCELED; 1945 tp->_t_logstate = TCP_LOG_STATE_OFF; 1946 } 1947 break; 1948 1949 default: 1950 return (EINVAL); 1951 } 1952 if (tcp_disable_all_bb_logs) { 1953 /* We are prohibited from doing any logs */ 1954 tp->_t_logstate = TCP_LOG_STATE_OFF; 1955 rv = EBUSY; 1956 } 1957 tp->t_flags2 &= ~(TF2_LOG_AUTO); 1958 1959 return (rv); 1960 } 1961 1962 /* If tcp_drain() is called, flush half the log entries. */ 1963 void 1964 tcp_log_drain(struct tcpcb *tp) 1965 { 1966 struct tcp_log_mem *log_entry, *next; 1967 int target, skip; 1968 1969 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1970 if ((target = tp->t_lognum / 2) == 0) 1971 return; 1972 1973 /* 1974 * XXXRRS: At this I don't think this is wise that 1975 * we do this. All that a drain call means is that 1976 * we are hitting one of the system mbuf limits. BB 1977 * logging, or freeing of them, will not create any 1978 * more mbufs and really has nothing to do with 1979 * the system running out of mbufs. For now I 1980 * am changing this to free any "AUTO" by dumping 1981 * them out. But this should either be changed 1982 * so that it gets called when we hit the BB limit 1983 * or it should just not get called (one of the two) 1984 * since I don't think the mbuf <-> BB log cleanup 1985 * is the right thing to do here. 1986 */ 1987 /* 1988 * If we are logging the "head" packets, we want to discard 1989 * from the tail of the queue. Otherwise, we want to discard 1990 * from the head. 1991 */ 1992 if (tp->_t_logstate == TCP_LOG_STATE_HEAD) { 1993 skip = tp->t_lognum - target; 1994 STAILQ_FOREACH(log_entry, &tp->t_logs, tlm_queue) 1995 if (!--skip) 1996 break; 1997 KASSERT(log_entry != NULL, 1998 ("%s: skipped through all entries!", __func__)); 1999 if (log_entry == NULL) 2000 return; 2001 while ((next = STAILQ_NEXT(log_entry, tlm_queue)) != NULL) { 2002 STAILQ_REMOVE_AFTER(&tp->t_logs, log_entry, tlm_queue); 2003 tcp_log_entry_refcnt_rem(next); 2004 tcp_log_remove_log_cleanup(tp, next); 2005 #ifdef INVARIANTS 2006 target--; 2007 #endif 2008 } 2009 KASSERT(target == 0, 2010 ("%s: After removing from tail, target was %d", __func__, 2011 target)); 2012 } else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO) { 2013 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at drain", 2014 M_NOWAIT, false); 2015 } else if (tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO) { 2016 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at drain", 2017 M_NOWAIT, false); 2018 } else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) { 2019 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", 2020 M_NOWAIT, false); 2021 } else if (tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) { 2022 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 2023 M_NOWAIT, false); 2024 } else { 2025 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL && 2026 target--) 2027 tcp_log_remove_log_head(tp, log_entry); 2028 KASSERT(target <= 0, 2029 ("%s: After removing from head, target was %d", __func__, 2030 target)); 2031 KASSERT(tp->t_lognum > 0, 2032 ("%s: After removing from head, tp->t_lognum was %d", 2033 __func__, target)); 2034 KASSERT(log_entry != NULL, 2035 ("%s: After removing from head, the tailq was empty", 2036 __func__)); 2037 } 2038 } 2039 2040 static inline int 2041 tcp_log_copyout(struct sockopt *sopt, void *src, void *dst, size_t len) 2042 { 2043 2044 if (sopt->sopt_td != NULL) 2045 return (copyout(src, dst, len)); 2046 bcopy(src, dst, len); 2047 return (0); 2048 } 2049 2050 static int 2051 tcp_log_logs_to_buf(struct sockopt *sopt, struct tcp_log_stailq *log_tailqp, 2052 struct tcp_log_buffer **end, int count) 2053 { 2054 struct tcp_log_buffer *out_entry; 2055 struct tcp_log_mem *log_entry; 2056 size_t entrysize; 2057 int error; 2058 #ifdef INVARIANTS 2059 int orig_count = count; 2060 #endif 2061 2062 /* Copy the data out. */ 2063 error = 0; 2064 out_entry = (struct tcp_log_buffer *) sopt->sopt_val; 2065 STAILQ_FOREACH(log_entry, log_tailqp, tlm_queue) { 2066 count--; 2067 KASSERT(count >= 0, 2068 ("%s:%d: Exceeded expected count (%d) processing list %p", 2069 __func__, __LINE__, orig_count, log_tailqp)); 2070 2071 #ifdef TCPLOG_DEBUG_COUNTERS 2072 counter_u64_add(tcp_log_que_copyout, 1); 2073 #endif 2074 2075 /* 2076 * Skip copying out the header if it isn't present. 2077 * Instead, copy out zeros (to ensure we don't leak info). 2078 * TODO: Make sure we truly do zero everything we don't 2079 * explicitly set. 2080 */ 2081 if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR) 2082 entrysize = sizeof(struct tcp_log_buffer); 2083 else 2084 entrysize = offsetof(struct tcp_log_buffer, tlb_th); 2085 error = tcp_log_copyout(sopt, &log_entry->tlm_buf, out_entry, 2086 entrysize); 2087 if (error) 2088 break; 2089 if (!(log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR)) { 2090 error = tcp_log_copyout(sopt, zerobuf, 2091 ((uint8_t *)out_entry) + entrysize, 2092 sizeof(struct tcp_log_buffer) - entrysize); 2093 } 2094 2095 /* 2096 * Copy out the verbose bit, if needed. Either way, 2097 * increment the output pointer the correct amount. 2098 */ 2099 if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_VERBOSE) { 2100 error = tcp_log_copyout(sopt, &log_entry->tlm_v, 2101 out_entry->tlb_verbose, 2102 sizeof(struct tcp_log_verbose)); 2103 if (error) 2104 break; 2105 out_entry = (struct tcp_log_buffer *) 2106 (((uint8_t *) (out_entry + 1)) + 2107 sizeof(struct tcp_log_verbose)); 2108 } else 2109 out_entry++; 2110 } 2111 *end = out_entry; 2112 KASSERT(error || count == 0, 2113 ("%s:%d: Less than expected count (%d) processing list %p" 2114 " (%d remain)", __func__, __LINE__, orig_count, 2115 log_tailqp, count)); 2116 2117 return (error); 2118 } 2119 2120 /* 2121 * Copy out the buffer. Note that we do incremental copying, so 2122 * sooptcopyout() won't work. However, the goal is to produce the same 2123 * end result as if we copied in the entire user buffer, updated it, 2124 * and then used sooptcopyout() to copy it out. 2125 * 2126 * NOTE: This should be called with a write lock on the PCB; however, 2127 * the function will drop it after it extracts the data from the TCPCB. 2128 */ 2129 int 2130 tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp) 2131 { 2132 struct tcp_log_stailq log_tailq; 2133 struct tcp_log_mem *log_entry, *log_next; 2134 struct tcp_log_buffer *out_entry; 2135 struct inpcb *inp = tptoinpcb(tp); 2136 size_t outsize, entrysize; 2137 int error, outnum; 2138 2139 INP_WLOCK_ASSERT(inp); 2140 2141 /* 2142 * Determine which log entries will fit in the buffer. As an 2143 * optimization, skip this if all the entries will clearly fit 2144 * in the buffer. (However, get an exact size if we are using 2145 * INVARIANTS.) 2146 */ 2147 #ifndef INVARIANTS 2148 if (sopt->sopt_valsize / (sizeof(struct tcp_log_buffer) + 2149 sizeof(struct tcp_log_verbose)) >= tp->t_lognum) { 2150 log_entry = STAILQ_LAST(&tp->t_logs, tcp_log_mem, tlm_queue); 2151 log_next = NULL; 2152 outsize = 0; 2153 outnum = tp->t_lognum; 2154 } else { 2155 #endif 2156 outsize = outnum = 0; 2157 log_entry = NULL; 2158 STAILQ_FOREACH(log_next, &tp->t_logs, tlm_queue) { 2159 entrysize = sizeof(struct tcp_log_buffer); 2160 if (log_next->tlm_buf.tlb_eventflags & 2161 TLB_FLAG_VERBOSE) 2162 entrysize += sizeof(struct tcp_log_verbose); 2163 if ((sopt->sopt_valsize - outsize) < entrysize) 2164 break; 2165 outsize += entrysize; 2166 outnum++; 2167 log_entry = log_next; 2168 } 2169 KASSERT(outsize <= sopt->sopt_valsize, 2170 ("%s: calculated output size (%zu) greater than available" 2171 "space (%zu)", __func__, outsize, sopt->sopt_valsize)); 2172 #ifndef INVARIANTS 2173 } 2174 #endif 2175 2176 /* 2177 * Copy traditional sooptcopyout() behavior: if sopt->sopt_val 2178 * is NULL, silently skip the copy. However, in this case, we 2179 * will leave the list alone and return. Functionally, this 2180 * gives userspace a way to poll for an approximate buffer 2181 * size they will need to get the log entries. 2182 */ 2183 if (sopt->sopt_val == NULL) { 2184 INP_WUNLOCK(inp); 2185 if (outsize == 0) { 2186 outsize = outnum * (sizeof(struct tcp_log_buffer) + 2187 sizeof(struct tcp_log_verbose)); 2188 } 2189 if (sopt->sopt_valsize > outsize) 2190 sopt->sopt_valsize = outsize; 2191 return (0); 2192 } 2193 2194 /* 2195 * Break apart the list. We'll save the ones we want to copy 2196 * out locally and remove them from the TCPCB list. We can 2197 * then drop the INPCB lock while we do the copyout. 2198 * 2199 * There are roughly three cases: 2200 * 1. There was nothing to copy out. That's easy: drop the 2201 * lock and return. 2202 * 2. We are copying out the entire list. Again, that's easy: 2203 * move the whole list. 2204 * 3. We are copying out a partial list. That's harder. We 2205 * need to update the list book-keeping entries. 2206 */ 2207 if (log_entry != NULL && log_next == NULL) { 2208 /* Move entire list. */ 2209 KASSERT(outnum == tp->t_lognum, 2210 ("%s:%d: outnum (%d) should match tp->t_lognum (%d)", 2211 __func__, __LINE__, outnum, tp->t_lognum)); 2212 log_tailq = tp->t_logs; 2213 tp->t_lognum = 0; 2214 STAILQ_INIT(&tp->t_logs); 2215 } else if (log_entry != NULL) { 2216 /* Move partial list. */ 2217 KASSERT(outnum < tp->t_lognum, 2218 ("%s:%d: outnum (%d) not less than tp->t_lognum (%d)", 2219 __func__, __LINE__, outnum, tp->t_lognum)); 2220 STAILQ_FIRST(&log_tailq) = STAILQ_FIRST(&tp->t_logs); 2221 STAILQ_FIRST(&tp->t_logs) = STAILQ_NEXT(log_entry, tlm_queue); 2222 KASSERT(STAILQ_NEXT(log_entry, tlm_queue) != NULL, 2223 ("%s:%d: tp->t_logs is unexpectedly shorter than expected" 2224 "(tp: %p, log_tailq: %p, outnum: %d, tp->t_lognum: %d)", 2225 __func__, __LINE__, tp, &log_tailq, outnum, tp->t_lognum)); 2226 STAILQ_NEXT(log_entry, tlm_queue) = NULL; 2227 log_tailq.stqh_last = &STAILQ_NEXT(log_entry, tlm_queue); 2228 tp->t_lognum -= outnum; 2229 } else 2230 STAILQ_INIT(&log_tailq); 2231 2232 /* Drop the PCB lock. */ 2233 INP_WUNLOCK(inp); 2234 2235 /* Copy the data out. */ 2236 error = tcp_log_logs_to_buf(sopt, &log_tailq, &out_entry, outnum); 2237 2238 if (error) { 2239 /* Restore list */ 2240 INP_WLOCK(inp); 2241 if ((inp->inp_flags & INP_DROPPED) == 0) { 2242 tp = intotcpcb(inp); 2243 2244 /* Merge the two lists. */ 2245 STAILQ_CONCAT(&log_tailq, &tp->t_logs); 2246 tp->t_logs = log_tailq; 2247 tp->t_lognum += outnum; 2248 } 2249 INP_WUNLOCK(inp); 2250 } else { 2251 /* Sanity check entries */ 2252 KASSERT(((caddr_t)out_entry - (caddr_t)sopt->sopt_val) == 2253 outsize, ("%s: Actual output size (%zu) != " 2254 "calculated output size (%zu)", __func__, 2255 (size_t)((caddr_t)out_entry - (caddr_t)sopt->sopt_val), 2256 outsize)); 2257 2258 /* Free the entries we just copied out. */ 2259 STAILQ_FOREACH_SAFE(log_entry, &log_tailq, tlm_queue, log_next) { 2260 tcp_log_entry_refcnt_rem(log_entry); 2261 uma_zfree(tcp_log_zone, log_entry); 2262 } 2263 } 2264 2265 sopt->sopt_valsize = (size_t)((caddr_t)out_entry - 2266 (caddr_t)sopt->sopt_val); 2267 return (error); 2268 } 2269 2270 static void 2271 tcp_log_free_queue(struct tcp_log_dev_queue *param) 2272 { 2273 struct tcp_log_dev_log_queue *entry; 2274 2275 KASSERT(param != NULL, ("%s: called with NULL param", __func__)); 2276 if (param == NULL) 2277 return; 2278 2279 entry = (struct tcp_log_dev_log_queue *)param; 2280 2281 /* Free the entries. */ 2282 tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count); 2283 2284 /* Free the buffer, if it is allocated. */ 2285 if (entry->tldl_common.tldq_buf != NULL) 2286 free(entry->tldl_common.tldq_buf, M_TCPLOGDEV); 2287 2288 /* Free the queue entry. */ 2289 free(entry, M_TCPLOGDEV); 2290 } 2291 2292 static struct tcp_log_common_header * 2293 tcp_log_expandlogbuf(struct tcp_log_dev_queue *param) 2294 { 2295 struct tcp_log_dev_log_queue *entry; 2296 struct tcp_log_header *hdr; 2297 uint8_t *end; 2298 struct sockopt sopt; 2299 int error; 2300 2301 entry = (struct tcp_log_dev_log_queue *)param; 2302 2303 /* Take a worst-case guess at space needs. */ 2304 sopt.sopt_valsize = sizeof(struct tcp_log_header) + 2305 entry->tldl_count * (sizeof(struct tcp_log_buffer) + 2306 sizeof(struct tcp_log_verbose)); 2307 hdr = malloc(sopt.sopt_valsize, M_TCPLOGDEV, M_NOWAIT); 2308 if (hdr == NULL) { 2309 #ifdef TCPLOG_DEBUG_COUNTERS 2310 counter_u64_add(tcp_log_que_fail5, entry->tldl_count); 2311 #endif 2312 return (NULL); 2313 } 2314 sopt.sopt_val = hdr + 1; 2315 sopt.sopt_valsize -= sizeof(struct tcp_log_header); 2316 sopt.sopt_td = NULL; 2317 2318 error = tcp_log_logs_to_buf(&sopt, &entry->tldl_entries, 2319 (struct tcp_log_buffer **)&end, entry->tldl_count); 2320 if (error) { 2321 free(hdr, M_TCPLOGDEV); 2322 return (NULL); 2323 } 2324 2325 /* Free the entries. */ 2326 tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count); 2327 entry->tldl_count = 0; 2328 2329 memset(hdr, 0, sizeof(struct tcp_log_header)); 2330 hdr->tlh_version = TCP_LOG_BUF_VER; 2331 hdr->tlh_type = TCP_LOG_DEV_TYPE_BBR; 2332 hdr->tlh_length = end - (uint8_t *)hdr; 2333 hdr->tlh_ie = entry->tldl_ie; 2334 hdr->tlh_af = entry->tldl_af; 2335 getboottime(&hdr->tlh_offset); 2336 strlcpy(hdr->tlh_id, entry->tldl_id, TCP_LOG_ID_LEN); 2337 strlcpy(hdr->tlh_tag, entry->tldl_tag, TCP_LOG_TAG_LEN); 2338 strlcpy(hdr->tlh_reason, entry->tldl_reason, TCP_LOG_REASON_LEN); 2339 return ((struct tcp_log_common_header *)hdr); 2340 } 2341 2342 /* 2343 * Queue the tcpcb's log buffer for transmission via the log buffer facility. 2344 * 2345 * NOTE: This should be called with a write lock on the PCB. 2346 * 2347 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop 2348 * and reacquire the INP lock if it needs to do so. 2349 * 2350 * If force is false, this will only dump auto-logged sessions if 2351 * tcp_log_auto_all is true or if there is a log ID defined for the session. 2352 */ 2353 int 2354 tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force) 2355 { 2356 struct tcp_log_dev_log_queue *entry; 2357 struct inpcb *inp = tptoinpcb(tp); 2358 #ifdef TCPLOG_DEBUG_COUNTERS 2359 int num_entries; 2360 #endif 2361 2362 INP_WLOCK_ASSERT(inp); 2363 2364 /* If there are no log entries, there is nothing to do. */ 2365 if (tp->t_lognum == 0) 2366 return (0); 2367 2368 /* Check for a log ID. */ 2369 if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) && 2370 !tcp_log_auto_all && !force) { 2371 struct tcp_log_mem *log_entry; 2372 2373 /* 2374 * We needed a log ID and none was found. Free the log entries 2375 * and return success. Also, cancel further logging. If the 2376 * session doesn't have a log ID by now, we'll assume it isn't 2377 * going to get one. 2378 */ 2379 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 2380 tcp_log_remove_log_head(tp, log_entry); 2381 KASSERT(tp->t_lognum == 0, 2382 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 2383 __func__, tp->t_lognum)); 2384 tp->_t_logstate = TCP_LOG_STATE_OFF; 2385 return (0); 2386 } 2387 2388 /* 2389 * Allocate memory. If we must wait, we'll need to drop the locks 2390 * and reacquire them (and do all the related business that goes 2391 * along with that). 2392 */ 2393 entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV, 2394 M_NOWAIT); 2395 if (entry == NULL && (how & M_NOWAIT)) { 2396 #ifdef TCPLOG_DEBUG_COUNTERS 2397 counter_u64_add(tcp_log_que_fail3, 1); 2398 #endif 2399 return (ENOBUFS); 2400 } 2401 if (entry == NULL) { 2402 INP_WUNLOCK(inp); 2403 entry = malloc(sizeof(struct tcp_log_dev_log_queue), 2404 M_TCPLOGDEV, M_WAITOK); 2405 INP_WLOCK(inp); 2406 /* 2407 * Note that this check is slightly overly-restrictive in 2408 * that the TCB can survive either of these events. 2409 * However, there is currently not a good way to ensure 2410 * that is the case. So, if we hit this M_WAIT path, we 2411 * may end up dropping some entries. That seems like a 2412 * small price to pay for safety. 2413 */ 2414 if (inp->inp_flags & INP_DROPPED) { 2415 free(entry, M_TCPLOGDEV); 2416 #ifdef TCPLOG_DEBUG_COUNTERS 2417 counter_u64_add(tcp_log_que_fail2, 1); 2418 #endif 2419 return (ECONNRESET); 2420 } 2421 tp = intotcpcb(inp); 2422 if (tp->t_lognum == 0) { 2423 free(entry, M_TCPLOGDEV); 2424 return (0); 2425 } 2426 } 2427 2428 /* Fill in the unique parts of the queue entry. */ 2429 if (tp->t_lib != NULL) { 2430 strlcpy(entry->tldl_id, tp->t_lib->tlb_id, TCP_LOG_ID_LEN); 2431 strlcpy(entry->tldl_tag, tp->t_lib->tlb_tag, TCP_LOG_TAG_LEN); 2432 } else { 2433 strlcpy(entry->tldl_id, "UNKNOWN", TCP_LOG_ID_LEN); 2434 strlcpy(entry->tldl_tag, "UNKNOWN", TCP_LOG_TAG_LEN); 2435 } 2436 if (reason != NULL) 2437 strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN); 2438 else 2439 strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN); 2440 entry->tldl_ie = inp->inp_inc.inc_ie; 2441 if (inp->inp_inc.inc_flags & INC_ISIPV6) 2442 entry->tldl_af = AF_INET6; 2443 else 2444 entry->tldl_af = AF_INET; 2445 entry->tldl_entries = tp->t_logs; 2446 entry->tldl_count = tp->t_lognum; 2447 2448 /* Fill in the common parts of the queue entry. */ 2449 entry->tldl_common.tldq_buf = NULL; 2450 entry->tldl_common.tldq_xform = tcp_log_expandlogbuf; 2451 entry->tldl_common.tldq_dtor = tcp_log_free_queue; 2452 2453 /* Clear the log data from the TCPCB. */ 2454 #ifdef TCPLOG_DEBUG_COUNTERS 2455 num_entries = tp->t_lognum; 2456 #endif 2457 tp->t_lognum = 0; 2458 STAILQ_INIT(&tp->t_logs); 2459 2460 /* Add the entry. If no one is listening, free the entry. */ 2461 if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) { 2462 tcp_log_free_queue((struct tcp_log_dev_queue *)entry); 2463 #ifdef TCPLOG_DEBUG_COUNTERS 2464 counter_u64_add(tcp_log_que_fail1, num_entries); 2465 } else { 2466 counter_u64_add(tcp_log_queued, num_entries); 2467 #endif 2468 } 2469 return (0); 2470 } 2471 2472 /* 2473 * Queue the log_id_node's log buffers for transmission via the log buffer 2474 * facility. 2475 * 2476 * NOTE: This should be called with the bucket locked and referenced. 2477 * 2478 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop 2479 * and reacquire the bucket lock if it needs to do so. (The caller must 2480 * ensure that the tln is no longer on any lists so no one else will mess 2481 * with this while the lock is dropped!) 2482 */ 2483 static int 2484 tcp_log_dump_node_logbuf(struct tcp_log_id_node *tln, char *reason, int how) 2485 { 2486 struct tcp_log_dev_log_queue *entry; 2487 struct tcp_log_id_bucket *tlb; 2488 2489 tlb = tln->tln_bucket; 2490 TCPID_BUCKET_LOCK_ASSERT(tlb); 2491 KASSERT(tlb->tlb_refcnt > 0, 2492 ("%s:%d: Called with unreferenced bucket (tln=%p, tlb=%p)", 2493 __func__, __LINE__, tln, tlb)); 2494 KASSERT(tln->tln_closed, 2495 ("%s:%d: Called for node with tln_closed==false (tln=%p)", 2496 __func__, __LINE__, tln)); 2497 2498 /* If there are no log entries, there is nothing to do. */ 2499 if (tln->tln_count == 0) 2500 return (0); 2501 2502 /* 2503 * Allocate memory. If we must wait, we'll need to drop the locks 2504 * and reacquire them (and do all the related business that goes 2505 * along with that). 2506 */ 2507 entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV, 2508 M_NOWAIT); 2509 if (entry == NULL && (how & M_NOWAIT)) 2510 return (ENOBUFS); 2511 if (entry == NULL) { 2512 TCPID_BUCKET_UNLOCK(tlb); 2513 entry = malloc(sizeof(struct tcp_log_dev_log_queue), 2514 M_TCPLOGDEV, M_WAITOK); 2515 TCPID_BUCKET_LOCK(tlb); 2516 } 2517 2518 /* Fill in the common parts of the queue entry.. */ 2519 entry->tldl_common.tldq_buf = NULL; 2520 entry->tldl_common.tldq_xform = tcp_log_expandlogbuf; 2521 entry->tldl_common.tldq_dtor = tcp_log_free_queue; 2522 2523 /* Fill in the unique parts of the queue entry. */ 2524 strlcpy(entry->tldl_id, tlb->tlb_id, TCP_LOG_ID_LEN); 2525 strlcpy(entry->tldl_tag, tlb->tlb_tag, TCP_LOG_TAG_LEN); 2526 if (reason != NULL) 2527 strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN); 2528 else 2529 strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN); 2530 entry->tldl_ie = tln->tln_ie; 2531 entry->tldl_entries = tln->tln_entries; 2532 entry->tldl_count = tln->tln_count; 2533 entry->tldl_af = tln->tln_af; 2534 2535 /* Add the entry. If no one is listening, free the entry. */ 2536 if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) 2537 tcp_log_free_queue((struct tcp_log_dev_queue *)entry); 2538 2539 return (0); 2540 } 2541 2542 /* 2543 * Queue the log buffers for all sessions in a bucket for transmissions via 2544 * the log buffer facility. 2545 * 2546 * NOTE: This should be called with a locked bucket; however, the function 2547 * will drop the lock. 2548 */ 2549 #define LOCAL_SAVE 10 2550 static void 2551 tcp_log_dumpbucketlogs(struct tcp_log_id_bucket *tlb, char *reason) 2552 { 2553 struct tcp_log_id_node local_entries[LOCAL_SAVE]; 2554 struct inpcb *inp; 2555 struct tcpcb *tp; 2556 struct tcp_log_id_node *cur_tln, *prev_tln, *tmp_tln; 2557 int i, num_local_entries, tree_locked; 2558 bool expireq_locked; 2559 2560 TCPID_BUCKET_LOCK_ASSERT(tlb); 2561 2562 /* 2563 * Take a reference on the bucket to keep it from disappearing until 2564 * we are done. 2565 */ 2566 TCPID_BUCKET_REF(tlb); 2567 2568 /* 2569 * We'll try to create these without dropping locks. However, we 2570 * might very well need to drop locks to get memory. If that's the 2571 * case, we'll save up to 10 on the stack, and sacrifice the rest. 2572 * (Otherwise, we need to worry about finding our place again in a 2573 * potentially changed list. It just doesn't seem worth the trouble 2574 * to do that. 2575 */ 2576 expireq_locked = false; 2577 num_local_entries = 0; 2578 prev_tln = NULL; 2579 tree_locked = TREE_UNLOCKED; 2580 SLIST_FOREACH_SAFE(cur_tln, &tlb->tlb_head, tln_list, tmp_tln) { 2581 /* 2582 * If this isn't associated with a TCPCB, we can pull it off 2583 * the list now. We need to be careful that the expire timer 2584 * hasn't already taken ownership (tln_expiretime == SBT_MAX). 2585 * If so, we let the expire timer code free the data. 2586 */ 2587 if (cur_tln->tln_closed) { 2588 no_inp: 2589 /* 2590 * Get the expireq lock so we can get a consistent 2591 * read of tln_expiretime and so we can remove this 2592 * from the expireq. 2593 */ 2594 if (!expireq_locked) { 2595 TCPLOG_EXPIREQ_LOCK(); 2596 expireq_locked = true; 2597 } 2598 2599 /* 2600 * We ignore entries with tln_expiretime == SBT_MAX. 2601 * The expire timer code already owns those. 2602 */ 2603 KASSERT(cur_tln->tln_expiretime > (sbintime_t) 0, 2604 ("%s:%d: node on the expire queue without positive " 2605 "expire time", __func__, __LINE__)); 2606 if (cur_tln->tln_expiretime == SBT_MAX) { 2607 prev_tln = cur_tln; 2608 continue; 2609 } 2610 2611 /* Remove the entry from the expireq. */ 2612 STAILQ_REMOVE(&tcp_log_expireq_head, cur_tln, 2613 tcp_log_id_node, tln_expireq); 2614 2615 /* Remove the entry from the bucket. */ 2616 if (prev_tln != NULL) 2617 SLIST_REMOVE_AFTER(prev_tln, tln_list); 2618 else 2619 SLIST_REMOVE_HEAD(&tlb->tlb_head, tln_list); 2620 2621 /* 2622 * Drop the INP and bucket reference counts. Due to 2623 * lock-ordering rules, we need to drop the expire 2624 * queue lock. 2625 */ 2626 TCPLOG_EXPIREQ_UNLOCK(); 2627 expireq_locked = false; 2628 2629 /* Drop the INP reference. */ 2630 INP_WLOCK(cur_tln->tln_inp); 2631 if (!in_pcbrele_wlocked(cur_tln->tln_inp)) 2632 INP_WUNLOCK(cur_tln->tln_inp); 2633 2634 if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) { 2635 #ifdef INVARIANTS 2636 panic("%s: Bucket refcount unexpectedly 0.", 2637 __func__); 2638 #endif 2639 /* 2640 * Recover as best we can: free the entry we 2641 * own. 2642 */ 2643 tcp_log_free_entries(&cur_tln->tln_entries, 2644 &cur_tln->tln_count); 2645 uma_zfree(tcp_log_id_node_zone, cur_tln); 2646 goto done; 2647 } 2648 2649 if (tcp_log_dump_node_logbuf(cur_tln, reason, 2650 M_NOWAIT)) { 2651 /* 2652 * If we have sapce, save the entries locally. 2653 * Otherwise, free them. 2654 */ 2655 if (num_local_entries < LOCAL_SAVE) { 2656 local_entries[num_local_entries] = 2657 *cur_tln; 2658 num_local_entries++; 2659 } else { 2660 tcp_log_free_entries( 2661 &cur_tln->tln_entries, 2662 &cur_tln->tln_count); 2663 } 2664 } 2665 2666 /* No matter what, we are done with the node now. */ 2667 uma_zfree(tcp_log_id_node_zone, cur_tln); 2668 2669 /* 2670 * Because we removed this entry from the list, prev_tln 2671 * (which tracks the previous entry still on the tlb 2672 * list) remains unchanged. 2673 */ 2674 continue; 2675 } 2676 2677 /* 2678 * If we get to this point, the session data is still held in 2679 * the TCPCB. So, we need to pull the data out of that. 2680 * 2681 * We will need to drop the expireq lock so we can lock the INP. 2682 * We can then try to extract the data the "easy" way. If that 2683 * fails, we'll save the log entries for later. 2684 */ 2685 if (expireq_locked) { 2686 TCPLOG_EXPIREQ_UNLOCK(); 2687 expireq_locked = false; 2688 } 2689 2690 /* Lock the INP and then re-check the state. */ 2691 inp = cur_tln->tln_inp; 2692 INP_WLOCK(inp); 2693 /* 2694 * If we caught this while it was transitioning, the data 2695 * might have moved from the TCPCB to the tln (signified by 2696 * setting tln_closed to true. If so, treat this like an 2697 * inactive connection. 2698 */ 2699 if (cur_tln->tln_closed) { 2700 /* 2701 * It looks like we may have caught this connection 2702 * while it was transitioning from active to inactive. 2703 * Treat this like an inactive connection. 2704 */ 2705 INP_WUNLOCK(inp); 2706 goto no_inp; 2707 } 2708 2709 /* 2710 * Try to dump the data from the tp without dropping the lock. 2711 * If this fails, try to save off the data locally. 2712 */ 2713 tp = cur_tln->tln_tp; 2714 if (tcp_log_dump_tp_logbuf(tp, reason, M_NOWAIT, true) && 2715 num_local_entries < LOCAL_SAVE) { 2716 tcp_log_move_tp_to_node(tp, 2717 &local_entries[num_local_entries]); 2718 local_entries[num_local_entries].tln_closed = 1; 2719 KASSERT(local_entries[num_local_entries].tln_bucket == 2720 tlb, ("%s: %d: bucket mismatch for node %p", 2721 __func__, __LINE__, cur_tln)); 2722 num_local_entries++; 2723 } 2724 2725 INP_WUNLOCK(inp); 2726 2727 /* 2728 * We are goint to leave the current tln on the list. It will 2729 * become the previous tln. 2730 */ 2731 prev_tln = cur_tln; 2732 } 2733 2734 /* Drop our locks, if any. */ 2735 KASSERT(tree_locked == TREE_UNLOCKED, 2736 ("%s: %d: tree unexpectedly locked", __func__, __LINE__)); 2737 switch (tree_locked) { 2738 case TREE_WLOCKED: 2739 TCPID_TREE_WUNLOCK(); 2740 tree_locked = TREE_UNLOCKED; 2741 break; 2742 case TREE_RLOCKED: 2743 TCPID_TREE_RUNLOCK(); 2744 tree_locked = TREE_UNLOCKED; 2745 break; 2746 } 2747 if (expireq_locked) { 2748 TCPLOG_EXPIREQ_UNLOCK(); 2749 expireq_locked = false; 2750 } 2751 2752 /* 2753 * Try again for any saved entries. tcp_log_dump_node_logbuf() is 2754 * guaranteed to free the log entries within the node. And, since 2755 * the node itself is on our stack, we don't need to free it. 2756 */ 2757 for (i = 0; i < num_local_entries; i++) 2758 tcp_log_dump_node_logbuf(&local_entries[i], reason, M_WAITOK); 2759 2760 /* Drop our reference. */ 2761 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 2762 TCPID_BUCKET_UNLOCK(tlb); 2763 2764 done: 2765 /* Drop our locks, if any. */ 2766 switch (tree_locked) { 2767 case TREE_WLOCKED: 2768 TCPID_TREE_WUNLOCK(); 2769 break; 2770 case TREE_RLOCKED: 2771 TCPID_TREE_RUNLOCK(); 2772 break; 2773 } 2774 if (expireq_locked) 2775 TCPLOG_EXPIREQ_UNLOCK(); 2776 } 2777 #undef LOCAL_SAVE 2778 2779 /* 2780 * Queue the log buffers for all sessions in a bucket for transmissions via 2781 * the log buffer facility. 2782 * 2783 * NOTE: This should be called with a locked INP; however, the function 2784 * will drop the lock. 2785 */ 2786 void 2787 tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason) 2788 { 2789 struct inpcb *inp = tptoinpcb(tp); 2790 struct tcp_log_id_bucket *tlb; 2791 int tree_locked; 2792 2793 /* Figure out our bucket and lock it. */ 2794 INP_WLOCK_ASSERT(inp); 2795 tlb = tp->t_lib; 2796 if (tlb == NULL) { 2797 /* 2798 * No bucket; treat this like a request to dump a single 2799 * session's traces. 2800 */ 2801 (void)tcp_log_dump_tp_logbuf(tp, reason, M_WAITOK, true); 2802 INP_WUNLOCK(inp); 2803 return; 2804 } 2805 TCPID_BUCKET_REF(tlb); 2806 INP_WUNLOCK(inp); 2807 TCPID_BUCKET_LOCK(tlb); 2808 2809 /* If we are the last reference, we have nothing more to do here. */ 2810 tree_locked = TREE_UNLOCKED; 2811 if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) { 2812 switch (tree_locked) { 2813 case TREE_WLOCKED: 2814 TCPID_TREE_WUNLOCK(); 2815 break; 2816 case TREE_RLOCKED: 2817 TCPID_TREE_RUNLOCK(); 2818 break; 2819 } 2820 return; 2821 } 2822 2823 /* Turn this over to tcp_log_dumpbucketlogs() to finish the work. */ 2824 tcp_log_dumpbucketlogs(tlb, reason); 2825 } 2826 2827 /* 2828 * Mark the end of a flow with the current stack. A stack can add 2829 * stack-specific info to this trace event by overriding this 2830 * function (see bbr_log_flowend() for example). 2831 */ 2832 void 2833 tcp_log_flowend(struct tcpcb *tp) 2834 { 2835 if (tp->_t_logstate != TCP_LOG_STATE_OFF) { 2836 struct socket *so = tptosocket(tp); 2837 TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd, 2838 TCP_LOG_FLOWEND, 0, 0, NULL, false); 2839 } 2840 } 2841 2842 void 2843 tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags) 2844 { 2845 struct inpcb *inp; 2846 struct tcpcb *tp; 2847 2848 inp = sotoinpcb(so); 2849 KASSERT(inp != NULL, ("tcp_log_sendfile: inp == NULL")); 2850 2851 /* quick check to see if logging is enabled for this connection */ 2852 tp = intotcpcb(inp); 2853 if ((inp->inp_flags & INP_DROPPED) || 2854 (tp->_t_logstate == TCP_LOG_STATE_OFF)) { 2855 return; 2856 } 2857 2858 INP_WLOCK(inp); 2859 /* double check log state now that we have the lock */ 2860 if (inp->inp_flags & INP_DROPPED) 2861 goto done; 2862 if (tp->_t_logstate != TCP_LOG_STATE_OFF) { 2863 struct timeval tv; 2864 tcp_log_eventspecific_t log; 2865 2866 microuptime(&tv); 2867 log.u_sf.offset = offset; 2868 log.u_sf.length = nbytes; 2869 log.u_sf.flags = flags; 2870 2871 TCP_LOG_EVENTP(tp, NULL, 2872 &tptosocket(tp)->so_rcv, 2873 &tptosocket(tp)->so_snd, 2874 TCP_LOG_SENDFILE, 0, 0, &log, false, &tv); 2875 } 2876 done: 2877 INP_WUNLOCK(inp); 2878 } 2879