1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2016-2018 Netflix, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/malloc.h> 36 #include <sys/mutex.h> 37 #include <sys/queue.h> 38 #include <sys/refcount.h> 39 #include <sys/rwlock.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 #include <sys/sysctl.h> 43 #include <sys/tree.h> 44 #include <sys/counter.h> 45 46 #include <dev/tcp_log/tcp_log_dev.h> 47 48 #include <net/if.h> 49 #include <net/if_var.h> 50 #include <net/vnet.h> 51 52 #include <netinet/in.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/tcp_var.h> 56 #include <netinet/tcp_log_buf.h> 57 58 /* Default expiry time */ 59 #define TCP_LOG_EXPIRE_TIME ((sbintime_t)60 * SBT_1S) 60 61 /* Max interval at which to run the expiry timer */ 62 #define TCP_LOG_EXPIRE_INTVL ((sbintime_t)5 * SBT_1S) 63 64 bool tcp_log_verbose; 65 static uma_zone_t tcp_log_bucket_zone, tcp_log_node_zone, tcp_log_zone; 66 static int tcp_log_session_limit = TCP_LOG_BUF_DEFAULT_SESSION_LIMIT; 67 static uint32_t tcp_log_version = TCP_LOG_BUF_VER; 68 RB_HEAD(tcp_log_id_tree, tcp_log_id_bucket); 69 static struct tcp_log_id_tree tcp_log_id_head; 70 static STAILQ_HEAD(, tcp_log_id_node) tcp_log_expireq_head = 71 STAILQ_HEAD_INITIALIZER(tcp_log_expireq_head); 72 static struct mtx tcp_log_expireq_mtx; 73 static struct callout tcp_log_expireq_callout; 74 static u_long tcp_log_auto_ratio = 0; 75 static volatile u_long tcp_log_auto_ratio_cur = 0; 76 static uint32_t tcp_log_auto_mode = TCP_LOG_STATE_TAIL; 77 static bool tcp_log_auto_all = false; 78 79 RB_PROTOTYPE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp) 80 81 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, bb, CTLFLAG_RW, 0, "TCP Black Box controls"); 82 83 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_verbose, CTLFLAG_RW, &tcp_log_verbose, 84 0, "Force verbose logging for TCP traces"); 85 86 SYSCTL_INT(_net_inet_tcp_bb, OID_AUTO, log_session_limit, 87 CTLFLAG_RW, &tcp_log_session_limit, 0, 88 "Maximum number of events maintained for each TCP session"); 89 90 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_global_limit, CTLFLAG_RW, 91 &tcp_log_zone, "Maximum number of events maintained for all TCP sessions"); 92 93 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_global_entries, CTLFLAG_RD, 94 &tcp_log_zone, "Current number of events maintained for all TCP sessions"); 95 96 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_limit, CTLFLAG_RW, 97 &tcp_log_bucket_zone, "Maximum number of log IDs"); 98 99 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_entries, CTLFLAG_RD, 100 &tcp_log_bucket_zone, "Current number of log IDs"); 101 102 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_limit, CTLFLAG_RW, 103 &tcp_log_node_zone, "Maximum number of tcpcbs with log IDs"); 104 105 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_entries, CTLFLAG_RD, 106 &tcp_log_node_zone, "Current number of tcpcbs with log IDs"); 107 108 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, &tcp_log_version, 109 0, "Version of log formats exported"); 110 111 SYSCTL_ULONG(_net_inet_tcp_bb, OID_AUTO, log_auto_ratio, CTLFLAG_RW, 112 &tcp_log_auto_ratio, 0, "Do auto capturing for 1 out of N sessions"); 113 114 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_auto_mode, CTLFLAG_RW, 115 &tcp_log_auto_mode, TCP_LOG_STATE_HEAD_AUTO, 116 "Logging mode for auto-selected sessions (default is TCP_LOG_STATE_HEAD_AUTO)"); 117 118 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_auto_all, CTLFLAG_RW, 119 &tcp_log_auto_all, false, 120 "Auto-select from all sessions (rather than just those with IDs)"); 121 122 #ifdef TCPLOG_DEBUG_COUNTERS 123 counter_u64_t tcp_log_queued; 124 counter_u64_t tcp_log_que_fail1; 125 counter_u64_t tcp_log_que_fail2; 126 counter_u64_t tcp_log_que_fail3; 127 counter_u64_t tcp_log_que_fail4; 128 counter_u64_t tcp_log_que_fail5; 129 counter_u64_t tcp_log_que_copyout; 130 counter_u64_t tcp_log_que_read; 131 counter_u64_t tcp_log_que_freed; 132 133 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, queued, CTLFLAG_RD, 134 &tcp_log_queued, "Number of entries queued"); 135 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail1, CTLFLAG_RD, 136 &tcp_log_que_fail1, "Number of entries queued but fail 1"); 137 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail2, CTLFLAG_RD, 138 &tcp_log_que_fail2, "Number of entries queued but fail 2"); 139 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail3, CTLFLAG_RD, 140 &tcp_log_que_fail3, "Number of entries queued but fail 3"); 141 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail4, CTLFLAG_RD, 142 &tcp_log_que_fail4, "Number of entries queued but fail 4"); 143 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail5, CTLFLAG_RD, 144 &tcp_log_que_fail5, "Number of entries queued but fail 4"); 145 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, copyout, CTLFLAG_RD, 146 &tcp_log_que_copyout, "Number of entries copied out"); 147 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, read, CTLFLAG_RD, 148 &tcp_log_que_read, "Number of entries read from the queue"); 149 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, freed, CTLFLAG_RD, 150 &tcp_log_que_freed, "Number of entries freed after reading"); 151 #endif 152 153 #ifdef INVARIANTS 154 #define TCPLOG_DEBUG_RINGBUF 155 #endif 156 157 struct tcp_log_mem 158 { 159 STAILQ_ENTRY(tcp_log_mem) tlm_queue; 160 struct tcp_log_buffer tlm_buf; 161 struct tcp_log_verbose tlm_v; 162 #ifdef TCPLOG_DEBUG_RINGBUF 163 volatile int tlm_refcnt; 164 #endif 165 }; 166 167 /* 60 bytes for the header, + 16 bytes for padding */ 168 static uint8_t zerobuf[76]; 169 170 /* 171 * Lock order: 172 * 1. TCPID_TREE 173 * 2. TCPID_BUCKET 174 * 3. INP 175 * 176 * Rules: 177 * A. You need a lock on the Tree to add/remove buckets. 178 * B. You need a lock on the bucket to add/remove nodes from the bucket. 179 * C. To change information in a node, you need the INP lock if the tln_closed 180 * field is false. Otherwise, you need the bucket lock. (Note that the 181 * tln_closed field can change at any point, so you need to recheck the 182 * entry after acquiring the INP lock.) 183 * D. To remove a node from the bucket, you must have that entry locked, 184 * according to the criteria of Rule C. Also, the node must not be on 185 * the expiry queue. 186 * E. The exception to C is the expiry queue fields, which are locked by 187 * the TCPLOG_EXPIREQ lock. 188 * 189 * Buckets have a reference count. Each node is a reference. Further, 190 * other callers may add reference counts to keep a bucket from disappearing. 191 * You can add a reference as long as you own a lock sufficient to keep the 192 * bucket from disappearing. For example, a common use is: 193 * a. Have a locked INP, but need to lock the TCPID_BUCKET. 194 * b. Add a refcount on the bucket. (Safe because the INP lock prevents 195 * the TCPID_BUCKET from going away.) 196 * c. Drop the INP lock. 197 * d. Acquire a lock on the TCPID_BUCKET. 198 * e. Acquire a lock on the INP. 199 * f. Drop the refcount on the bucket. 200 * (At this point, the bucket may disappear.) 201 * 202 * Expire queue lock: 203 * You can acquire this with either the bucket or INP lock. Don't reverse it. 204 * When the expire code has committed to freeing a node, it resets the expiry 205 * time to SBT_MAX. That is the signal to everyone else that they should 206 * leave that node alone. 207 */ 208 static struct rwlock tcp_id_tree_lock; 209 #define TCPID_TREE_WLOCK() rw_wlock(&tcp_id_tree_lock) 210 #define TCPID_TREE_RLOCK() rw_rlock(&tcp_id_tree_lock) 211 #define TCPID_TREE_UPGRADE() rw_try_upgrade(&tcp_id_tree_lock) 212 #define TCPID_TREE_WUNLOCK() rw_wunlock(&tcp_id_tree_lock) 213 #define TCPID_TREE_RUNLOCK() rw_runlock(&tcp_id_tree_lock) 214 #define TCPID_TREE_WLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_WLOCKED) 215 #define TCPID_TREE_RLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_RLOCKED) 216 #define TCPID_TREE_UNLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_UNLOCKED) 217 218 #define TCPID_BUCKET_LOCK_INIT(tlb) mtx_init(&((tlb)->tlb_mtx), "tcp log id bucket", NULL, MTX_DEF) 219 #define TCPID_BUCKET_LOCK_DESTROY(tlb) mtx_destroy(&((tlb)->tlb_mtx)) 220 #define TCPID_BUCKET_LOCK(tlb) mtx_lock(&((tlb)->tlb_mtx)) 221 #define TCPID_BUCKET_UNLOCK(tlb) mtx_unlock(&((tlb)->tlb_mtx)) 222 #define TCPID_BUCKET_LOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_OWNED) 223 #define TCPID_BUCKET_UNLOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_NOTOWNED) 224 225 #define TCPID_BUCKET_REF(tlb) refcount_acquire(&((tlb)->tlb_refcnt)) 226 #define TCPID_BUCKET_UNREF(tlb) refcount_release(&((tlb)->tlb_refcnt)) 227 228 #define TCPLOG_EXPIREQ_LOCK() mtx_lock(&tcp_log_expireq_mtx) 229 #define TCPLOG_EXPIREQ_UNLOCK() mtx_unlock(&tcp_log_expireq_mtx) 230 231 SLIST_HEAD(tcp_log_id_head, tcp_log_id_node); 232 233 struct tcp_log_id_bucket 234 { 235 /* 236 * tlb_id must be first. This lets us use strcmp on 237 * (struct tcp_log_id_bucket *) and (char *) interchangeably. 238 */ 239 char tlb_id[TCP_LOG_ID_LEN]; 240 RB_ENTRY(tcp_log_id_bucket) tlb_rb; 241 struct tcp_log_id_head tlb_head; 242 struct mtx tlb_mtx; 243 volatile u_int tlb_refcnt; 244 }; 245 246 struct tcp_log_id_node 247 { 248 SLIST_ENTRY(tcp_log_id_node) tln_list; 249 STAILQ_ENTRY(tcp_log_id_node) tln_expireq; /* Locked by the expireq lock */ 250 sbintime_t tln_expiretime; /* Locked by the expireq lock */ 251 252 /* 253 * If INP is NULL, that means the connection has closed. We've 254 * saved the connection endpoint information and the log entries 255 * in the tln_ie and tln_entries members. We've also saved a pointer 256 * to the enclosing bucket here. If INP is not NULL, the information is 257 * in the PCB and not here. 258 */ 259 struct inpcb *tln_inp; 260 struct tcpcb *tln_tp; 261 struct tcp_log_id_bucket *tln_bucket; 262 struct in_endpoints tln_ie; 263 struct tcp_log_stailq tln_entries; 264 int tln_count; 265 volatile int tln_closed; 266 uint8_t tln_af; 267 }; 268 269 enum tree_lock_state { 270 TREE_UNLOCKED = 0, 271 TREE_RLOCKED, 272 TREE_WLOCKED, 273 }; 274 275 /* Do we want to select this session for auto-logging? */ 276 static __inline bool 277 tcp_log_selectauto(void) 278 { 279 280 /* 281 * If we are doing auto-capturing, figure out whether we will capture 282 * this session. 283 */ 284 if (tcp_log_auto_ratio && 285 (atomic_fetchadd_long(&tcp_log_auto_ratio_cur, 1) % 286 tcp_log_auto_ratio) == 0) 287 return (true); 288 return (false); 289 } 290 291 static __inline int 292 tcp_log_id_cmp(struct tcp_log_id_bucket *a, struct tcp_log_id_bucket *b) 293 { 294 KASSERT(a != NULL, ("tcp_log_id_cmp: argument a is unexpectedly NULL")); 295 KASSERT(b != NULL, ("tcp_log_id_cmp: argument b is unexpectedly NULL")); 296 return strncmp(a->tlb_id, b->tlb_id, TCP_LOG_ID_LEN); 297 } 298 299 RB_GENERATE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp) 300 301 static __inline void 302 tcp_log_id_validate_tree_lock(int tree_locked) 303 { 304 305 #ifdef INVARIANTS 306 switch (tree_locked) { 307 case TREE_WLOCKED: 308 TCPID_TREE_WLOCK_ASSERT(); 309 break; 310 case TREE_RLOCKED: 311 TCPID_TREE_RLOCK_ASSERT(); 312 break; 313 case TREE_UNLOCKED: 314 TCPID_TREE_UNLOCK_ASSERT(); 315 break; 316 default: 317 kassert_panic("%s:%d: unknown tree lock state", __func__, 318 __LINE__); 319 } 320 #endif 321 } 322 323 static __inline void 324 tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb) 325 { 326 327 TCPID_TREE_WLOCK_ASSERT(); 328 KASSERT(SLIST_EMPTY(&tlb->tlb_head), 329 ("%s: Attempt to remove non-empty bucket", __func__)); 330 if (RB_REMOVE(tcp_log_id_tree, &tcp_log_id_head, tlb) == NULL) { 331 #ifdef INVARIANTS 332 kassert_panic("%s:%d: error removing element from tree", 333 __func__, __LINE__); 334 #endif 335 } 336 TCPID_BUCKET_LOCK_DESTROY(tlb); 337 uma_zfree(tcp_log_bucket_zone, tlb); 338 } 339 340 /* 341 * Call with a referenced and locked bucket. 342 * Will return true if the bucket was freed; otherwise, false. 343 * tlb: The bucket to unreference. 344 * tree_locked: A pointer to the state of the tree lock. If the tree lock 345 * state changes, the function will update it. 346 * inp: If not NULL and the function needs to drop the inp lock to relock the 347 * tree, it will do so. (The caller must ensure inp will not become invalid, 348 * probably by holding a reference to it.) 349 */ 350 static bool 351 tcp_log_unref_bucket(struct tcp_log_id_bucket *tlb, int *tree_locked, 352 struct inpcb *inp) 353 { 354 355 KASSERT(tlb != NULL, ("%s: called with NULL tlb", __func__)); 356 KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked", 357 __func__)); 358 359 tcp_log_id_validate_tree_lock(*tree_locked); 360 361 /* 362 * Did we hold the last reference on the tlb? If so, we may need 363 * to free it. (Note that we can realistically only execute the 364 * loop twice: once without a write lock and once with a write 365 * lock.) 366 */ 367 while (TCPID_BUCKET_UNREF(tlb)) { 368 /* 369 * We need a write lock on the tree to free this. 370 * If we can upgrade the tree lock, this is "easy". If we 371 * can't upgrade the tree lock, we need to do this the 372 * "hard" way: unwind all our locks and relock everything. 373 * In the meantime, anything could have changed. We even 374 * need to validate that we still need to free the bucket. 375 */ 376 if (*tree_locked == TREE_RLOCKED && TCPID_TREE_UPGRADE()) 377 *tree_locked = TREE_WLOCKED; 378 else if (*tree_locked != TREE_WLOCKED) { 379 TCPID_BUCKET_REF(tlb); 380 if (inp != NULL) 381 INP_WUNLOCK(inp); 382 TCPID_BUCKET_UNLOCK(tlb); 383 if (*tree_locked == TREE_RLOCKED) 384 TCPID_TREE_RUNLOCK(); 385 TCPID_TREE_WLOCK(); 386 *tree_locked = TREE_WLOCKED; 387 TCPID_BUCKET_LOCK(tlb); 388 if (inp != NULL) 389 INP_WLOCK(inp); 390 continue; 391 } 392 393 /* 394 * We have an empty bucket and a write lock on the tree. 395 * Remove the empty bucket. 396 */ 397 tcp_log_remove_bucket(tlb); 398 return (true); 399 } 400 return (false); 401 } 402 403 /* 404 * Call with a locked bucket. This function will release the lock on the 405 * bucket before returning. 406 * 407 * The caller is responsible for freeing the tp->t_lin/tln node! 408 * 409 * Note: one of tp or both tlb and tln must be supplied. 410 * 411 * inp: A pointer to the inp. If the function needs to drop the inp lock to 412 * acquire the tree write lock, it will do so. (The caller must ensure inp 413 * will not become invalid, probably by holding a reference to it.) 414 * tp: A pointer to the tcpcb. (optional; if specified, tlb and tln are ignored) 415 * tlb: A pointer to the bucket. (optional; ignored if tp is specified) 416 * tln: A pointer to the node. (optional; ignored if tp is specified) 417 * tree_locked: A pointer to the state of the tree lock. If the tree lock 418 * state changes, the function will update it. 419 * 420 * Will return true if the INP lock was reacquired; otherwise, false. 421 */ 422 static bool 423 tcp_log_remove_id_node(struct inpcb *inp, struct tcpcb *tp, 424 struct tcp_log_id_bucket *tlb, struct tcp_log_id_node *tln, 425 int *tree_locked) 426 { 427 int orig_tree_locked; 428 429 KASSERT(tp != NULL || (tlb != NULL && tln != NULL), 430 ("%s: called with tp=%p, tlb=%p, tln=%p", __func__, 431 tp, tlb, tln)); 432 KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked", 433 __func__)); 434 435 if (tp != NULL) { 436 tlb = tp->t_lib; 437 tln = tp->t_lin; 438 KASSERT(tlb != NULL, ("%s: unexpectedly NULL tlb", __func__)); 439 KASSERT(tln != NULL, ("%s: unexpectedly NULL tln", __func__)); 440 } 441 442 tcp_log_id_validate_tree_lock(*tree_locked); 443 TCPID_BUCKET_LOCK_ASSERT(tlb); 444 445 /* 446 * Remove the node, clear the log bucket and node from the TCPCB, and 447 * decrement the bucket refcount. In the process, if this is the 448 * last reference, the bucket will be freed. 449 */ 450 SLIST_REMOVE(&tlb->tlb_head, tln, tcp_log_id_node, tln_list); 451 if (tp != NULL) { 452 tp->t_lib = NULL; 453 tp->t_lin = NULL; 454 } 455 orig_tree_locked = *tree_locked; 456 if (!tcp_log_unref_bucket(tlb, tree_locked, inp)) 457 TCPID_BUCKET_UNLOCK(tlb); 458 return (*tree_locked != orig_tree_locked); 459 } 460 461 #define RECHECK_INP_CLEAN(cleanup) do { \ 462 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ 463 rv = ECONNRESET; \ 464 cleanup; \ 465 goto done; \ 466 } \ 467 tp = intotcpcb(inp); \ 468 } while (0) 469 470 #define RECHECK_INP() RECHECK_INP_CLEAN(/* noop */) 471 472 static void 473 tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp) 474 { 475 476 INP_WLOCK_ASSERT(tp->t_inpcb); 477 478 #ifdef NETFLIX 479 if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL) 480 (void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id)); 481 #endif 482 } 483 484 /* 485 * Set the TCP log ID for a TCPCB. 486 * Called with INPCB locked. Returns with it unlocked. 487 */ 488 int 489 tcp_log_set_id(struct tcpcb *tp, char *id) 490 { 491 struct tcp_log_id_bucket *tlb, *tmp_tlb; 492 struct tcp_log_id_node *tln; 493 struct inpcb *inp; 494 int tree_locked, rv; 495 bool bucket_locked; 496 497 tlb = NULL; 498 tln = NULL; 499 inp = tp->t_inpcb; 500 tree_locked = TREE_UNLOCKED; 501 bucket_locked = false; 502 503 restart: 504 INP_WLOCK_ASSERT(inp); 505 506 /* See if the ID is unchanged. */ 507 if ((tp->t_lib != NULL && !strcmp(tp->t_lib->tlb_id, id)) || 508 (tp->t_lib == NULL && *id == 0)) { 509 rv = 0; 510 goto done; 511 } 512 513 /* 514 * If the TCPCB had a previous ID, we need to extricate it from 515 * the previous list. 516 * 517 * Drop the TCPCB lock and lock the tree and the bucket. 518 * Because this is called in the socket context, we (theoretically) 519 * don't need to worry about the INPCB completely going away 520 * while we are gone. 521 */ 522 if (tp->t_lib != NULL) { 523 tlb = tp->t_lib; 524 TCPID_BUCKET_REF(tlb); 525 INP_WUNLOCK(inp); 526 527 if (tree_locked == TREE_UNLOCKED) { 528 TCPID_TREE_RLOCK(); 529 tree_locked = TREE_RLOCKED; 530 } 531 TCPID_BUCKET_LOCK(tlb); 532 bucket_locked = true; 533 INP_WLOCK(inp); 534 535 /* 536 * Unreference the bucket. If our bucket went away, it is no 537 * longer locked or valid. 538 */ 539 if (tcp_log_unref_bucket(tlb, &tree_locked, inp)) { 540 bucket_locked = false; 541 tlb = NULL; 542 } 543 544 /* Validate the INP. */ 545 RECHECK_INP(); 546 547 /* 548 * Evaluate whether the bucket changed while we were unlocked. 549 * 550 * Possible scenarios here: 551 * 1. Bucket is unchanged and the same one we started with. 552 * 2. The TCPCB no longer has a bucket and our bucket was 553 * freed. 554 * 3. The TCPCB has a new bucket, whether ours was freed. 555 * 4. The TCPCB no longer has a bucket and our bucket was 556 * not freed. 557 * 558 * In cases 2-4, we will start over. In case 1, we will 559 * proceed here to remove the bucket. 560 */ 561 if (tlb == NULL || tp->t_lib != tlb) { 562 KASSERT(bucket_locked || tlb == NULL, 563 ("%s: bucket_locked (%d) and tlb (%p) are " 564 "inconsistent", __func__, bucket_locked, tlb)); 565 566 if (bucket_locked) { 567 TCPID_BUCKET_UNLOCK(tlb); 568 bucket_locked = false; 569 tlb = NULL; 570 } 571 goto restart; 572 } 573 574 /* 575 * Store the (struct tcp_log_id_node) for reuse. Then, remove 576 * it from the bucket. In the process, we may end up relocking. 577 * If so, we need to validate that the INP is still valid, and 578 * the TCPCB entries match we expect. 579 * 580 * We will clear tlb and change the bucket_locked state just 581 * before calling tcp_log_remove_id_node(), since that function 582 * will unlock the bucket. 583 */ 584 if (tln != NULL) 585 uma_zfree(tcp_log_node_zone, tln); 586 tln = tp->t_lin; 587 tlb = NULL; 588 bucket_locked = false; 589 if (tcp_log_remove_id_node(inp, tp, NULL, NULL, &tree_locked)) { 590 RECHECK_INP(); 591 592 /* 593 * If the TCPCB moved to a new bucket while we had 594 * dropped the lock, restart. 595 */ 596 if (tp->t_lib != NULL || tp->t_lin != NULL) 597 goto restart; 598 } 599 600 /* 601 * Yay! We successfully removed the TCPCB from its old 602 * bucket. Phew! 603 * 604 * On to bigger and better things... 605 */ 606 } 607 608 /* At this point, the TCPCB should not be in any bucket. */ 609 KASSERT(tp->t_lib == NULL, ("%s: tp->t_lib is not NULL", __func__)); 610 611 /* 612 * If the new ID is not empty, we need to now assign this TCPCB to a 613 * new bucket. 614 */ 615 if (*id) { 616 /* Get a new tln, if we don't already have one to reuse. */ 617 if (tln == NULL) { 618 tln = uma_zalloc(tcp_log_node_zone, M_NOWAIT | M_ZERO); 619 if (tln == NULL) { 620 rv = ENOBUFS; 621 goto done; 622 } 623 tln->tln_inp = inp; 624 tln->tln_tp = tp; 625 } 626 627 /* 628 * Drop the INP lock for a bit. We don't need it, and dropping 629 * it prevents lock order reversals. 630 */ 631 INP_WUNLOCK(inp); 632 633 /* Make sure we have at least a read lock on the tree. */ 634 tcp_log_id_validate_tree_lock(tree_locked); 635 if (tree_locked == TREE_UNLOCKED) { 636 TCPID_TREE_RLOCK(); 637 tree_locked = TREE_RLOCKED; 638 } 639 640 refind: 641 /* 642 * Remember that we constructed (struct tcp_log_id_node) so 643 * we can safely cast the id to it for the purposes of finding. 644 */ 645 KASSERT(tlb == NULL, ("%s:%d tlb unexpectedly non-NULL", 646 __func__, __LINE__)); 647 tmp_tlb = RB_FIND(tcp_log_id_tree, &tcp_log_id_head, 648 (struct tcp_log_id_bucket *) id); 649 650 /* 651 * If we didn't find a matching bucket, we need to add a new 652 * one. This requires a write lock. But, of course, we will 653 * need to recheck some things when we re-acquire the lock. 654 */ 655 if (tmp_tlb == NULL && tree_locked != TREE_WLOCKED) { 656 tree_locked = TREE_WLOCKED; 657 if (!TCPID_TREE_UPGRADE()) { 658 TCPID_TREE_RUNLOCK(); 659 TCPID_TREE_WLOCK(); 660 661 /* 662 * The tree may have changed while we were 663 * unlocked. 664 */ 665 goto refind; 666 } 667 } 668 669 /* If we need to add a new bucket, do it now. */ 670 if (tmp_tlb == NULL) { 671 /* Allocate new bucket. */ 672 tlb = uma_zalloc(tcp_log_bucket_zone, M_NOWAIT); 673 if (tlb == NULL) { 674 rv = ENOBUFS; 675 goto done_noinp; 676 } 677 678 /* 679 * Copy the ID to the bucket. 680 * NB: Don't use strlcpy() unless you are sure 681 * we've always validated NULL termination. 682 * 683 * TODO: When I'm done writing this, see if we 684 * we have correctly validated NULL termination and 685 * can use strlcpy(). :-) 686 */ 687 strncpy(tlb->tlb_id, id, TCP_LOG_ID_LEN - 1); 688 tlb->tlb_id[TCP_LOG_ID_LEN - 1] = '\0'; 689 690 /* 691 * Take the refcount for the first node and go ahead 692 * and lock this. Note that we zero the tlb_mtx 693 * structure, since 0xdeadc0de flips the right bits 694 * for the code to think that this mutex has already 695 * been initialized. :-( 696 */ 697 SLIST_INIT(&tlb->tlb_head); 698 refcount_init(&tlb->tlb_refcnt, 1); 699 memset(&tlb->tlb_mtx, 0, sizeof(struct mtx)); 700 TCPID_BUCKET_LOCK_INIT(tlb); 701 TCPID_BUCKET_LOCK(tlb); 702 bucket_locked = true; 703 704 #define FREE_NEW_TLB() do { \ 705 TCPID_BUCKET_LOCK_DESTROY(tlb); \ 706 uma_zfree(tcp_log_bucket_zone, tlb); \ 707 bucket_locked = false; \ 708 tlb = NULL; \ 709 } while (0) 710 /* 711 * Relock the INP and make sure we are still 712 * unassigned. 713 */ 714 INP_WLOCK(inp); 715 RECHECK_INP_CLEAN(FREE_NEW_TLB()); 716 if (tp->t_lib != NULL) { 717 FREE_NEW_TLB(); 718 goto restart; 719 } 720 721 /* Add the new bucket to the tree. */ 722 tmp_tlb = RB_INSERT(tcp_log_id_tree, &tcp_log_id_head, 723 tlb); 724 KASSERT(tmp_tlb == NULL, 725 ("%s: Unexpected conflicting bucket (%p) while " 726 "adding new bucket (%p)", __func__, tmp_tlb, tlb)); 727 728 /* 729 * If we found a conflicting bucket, free the new 730 * one we made and fall through to use the existing 731 * bucket. 732 */ 733 if (tmp_tlb != NULL) { 734 FREE_NEW_TLB(); 735 INP_WUNLOCK(inp); 736 } 737 #undef FREE_NEW_TLB 738 } 739 740 /* If we found an existing bucket, use it. */ 741 if (tmp_tlb != NULL) { 742 tlb = tmp_tlb; 743 TCPID_BUCKET_LOCK(tlb); 744 bucket_locked = true; 745 746 /* 747 * Relock the INP and make sure we are still 748 * unassigned. 749 */ 750 INP_UNLOCK_ASSERT(inp); 751 INP_WLOCK(inp); 752 RECHECK_INP(); 753 if (tp->t_lib != NULL) { 754 TCPID_BUCKET_UNLOCK(tlb); 755 tlb = NULL; 756 goto restart; 757 } 758 759 /* Take a reference on the bucket. */ 760 TCPID_BUCKET_REF(tlb); 761 } 762 763 tcp_log_grow_tlb(tlb->tlb_id, tp); 764 765 /* Add the new node to the list. */ 766 SLIST_INSERT_HEAD(&tlb->tlb_head, tln, tln_list); 767 tp->t_lib = tlb; 768 tp->t_lin = tln; 769 tln = NULL; 770 } 771 772 rv = 0; 773 774 done: 775 /* Unlock things, as needed, and return. */ 776 INP_WUNLOCK(inp); 777 done_noinp: 778 INP_UNLOCK_ASSERT(inp); 779 if (bucket_locked) { 780 TCPID_BUCKET_LOCK_ASSERT(tlb); 781 TCPID_BUCKET_UNLOCK(tlb); 782 } else if (tlb != NULL) 783 TCPID_BUCKET_UNLOCK_ASSERT(tlb); 784 if (tree_locked == TREE_WLOCKED) { 785 TCPID_TREE_WLOCK_ASSERT(); 786 TCPID_TREE_WUNLOCK(); 787 } else if (tree_locked == TREE_RLOCKED) { 788 TCPID_TREE_RLOCK_ASSERT(); 789 TCPID_TREE_RUNLOCK(); 790 } else 791 TCPID_TREE_UNLOCK_ASSERT(); 792 if (tln != NULL) 793 uma_zfree(tcp_log_node_zone, tln); 794 return (rv); 795 } 796 797 /* 798 * Get the TCP log ID for a TCPCB. 799 * Called with INPCB locked. 800 * 'buf' must point to a buffer that is at least TCP_LOG_ID_LEN bytes long. 801 * Returns number of bytes copied. 802 */ 803 size_t 804 tcp_log_get_id(struct tcpcb *tp, char *buf) 805 { 806 size_t len; 807 808 INP_LOCK_ASSERT(tp->t_inpcb); 809 if (tp->t_lib != NULL) { 810 len = strlcpy(buf, tp->t_lib->tlb_id, TCP_LOG_ID_LEN); 811 KASSERT(len < TCP_LOG_ID_LEN, 812 ("%s:%d: tp->t_lib->tlb_id too long (%zu)", 813 __func__, __LINE__, len)); 814 } else { 815 *buf = '\0'; 816 len = 0; 817 } 818 return (len); 819 } 820 821 /* 822 * Get number of connections with the same log ID. 823 * Log ID is taken from given TCPCB. 824 * Called with INPCB locked. 825 */ 826 u_int 827 tcp_log_get_id_cnt(struct tcpcb *tp) 828 { 829 830 INP_WLOCK_ASSERT(tp->t_inpcb); 831 return ((tp->t_lib == NULL) ? 0 : tp->t_lib->tlb_refcnt); 832 } 833 834 #ifdef TCPLOG_DEBUG_RINGBUF 835 /* 836 * Functions/macros to increment/decrement reference count for a log 837 * entry. This should catch when we do a double-free/double-remove or 838 * a double-add. 839 */ 840 static inline void 841 _tcp_log_entry_refcnt_add(struct tcp_log_mem *log_entry, const char *func, 842 int line) 843 { 844 int refcnt; 845 846 refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, 1); 847 if (refcnt != 0) 848 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 0)", 849 func, line, log_entry, refcnt); 850 } 851 #define tcp_log_entry_refcnt_add(l) \ 852 _tcp_log_entry_refcnt_add((l), __func__, __LINE__) 853 854 static inline void 855 _tcp_log_entry_refcnt_rem(struct tcp_log_mem *log_entry, const char *func, 856 int line) 857 { 858 int refcnt; 859 860 refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, -1); 861 if (refcnt != 1) 862 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 1)", 863 func, line, log_entry, refcnt); 864 } 865 #define tcp_log_entry_refcnt_rem(l) \ 866 _tcp_log_entry_refcnt_rem((l), __func__, __LINE__) 867 868 #else /* !TCPLOG_DEBUG_RINGBUF */ 869 870 #define tcp_log_entry_refcnt_add(l) 871 #define tcp_log_entry_refcnt_rem(l) 872 873 #endif 874 875 /* 876 * Cleanup after removing a log entry, but only decrement the count if we 877 * are running INVARIANTS. 878 */ 879 static inline void 880 tcp_log_free_log_common(struct tcp_log_mem *log_entry, int *count __unused) 881 { 882 883 uma_zfree(tcp_log_zone, log_entry); 884 #ifdef INVARIANTS 885 (*count)--; 886 KASSERT(*count >= 0, 887 ("%s: count unexpectedly negative", __func__)); 888 #endif 889 } 890 891 static void 892 tcp_log_free_entries(struct tcp_log_stailq *head, int *count) 893 { 894 struct tcp_log_mem *log_entry; 895 896 /* Free the entries. */ 897 while ((log_entry = STAILQ_FIRST(head)) != NULL) { 898 STAILQ_REMOVE_HEAD(head, tlm_queue); 899 tcp_log_entry_refcnt_rem(log_entry); 900 tcp_log_free_log_common(log_entry, count); 901 } 902 } 903 904 /* Cleanup after removing a log entry. */ 905 static inline void 906 tcp_log_remove_log_cleanup(struct tcpcb *tp, struct tcp_log_mem *log_entry) 907 { 908 uma_zfree(tcp_log_zone, log_entry); 909 tp->t_lognum--; 910 KASSERT(tp->t_lognum >= 0, 911 ("%s: tp->t_lognum unexpectedly negative", __func__)); 912 } 913 914 /* Remove a log entry from the head of a list. */ 915 static inline void 916 tcp_log_remove_log_head(struct tcpcb *tp, struct tcp_log_mem *log_entry) 917 { 918 919 KASSERT(log_entry == STAILQ_FIRST(&tp->t_logs), 920 ("%s: attempt to remove non-HEAD log entry", __func__)); 921 STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue); 922 tcp_log_entry_refcnt_rem(log_entry); 923 tcp_log_remove_log_cleanup(tp, log_entry); 924 } 925 926 #ifdef TCPLOG_DEBUG_RINGBUF 927 /* 928 * Initialize the log entry's reference count, which we want to 929 * survive allocations. 930 */ 931 static int 932 tcp_log_zone_init(void *mem, int size, int flags __unused) 933 { 934 struct tcp_log_mem *tlm; 935 936 KASSERT(size >= sizeof(struct tcp_log_mem), 937 ("%s: unexpectedly short (%d) allocation", __func__, size)); 938 tlm = (struct tcp_log_mem *)mem; 939 tlm->tlm_refcnt = 0; 940 return (0); 941 } 942 943 /* 944 * Double check that the refcnt is zero on allocation and return. 945 */ 946 static int 947 tcp_log_zone_ctor(void *mem, int size, void *args __unused, int flags __unused) 948 { 949 struct tcp_log_mem *tlm; 950 951 KASSERT(size >= sizeof(struct tcp_log_mem), 952 ("%s: unexpectedly short (%d) allocation", __func__, size)); 953 tlm = (struct tcp_log_mem *)mem; 954 if (tlm->tlm_refcnt != 0) 955 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)", 956 __func__, __LINE__, tlm, tlm->tlm_refcnt); 957 return (0); 958 } 959 960 static void 961 tcp_log_zone_dtor(void *mem, int size, void *args __unused) 962 { 963 struct tcp_log_mem *tlm; 964 965 KASSERT(size >= sizeof(struct tcp_log_mem), 966 ("%s: unexpectedly short (%d) allocation", __func__, size)); 967 tlm = (struct tcp_log_mem *)mem; 968 if (tlm->tlm_refcnt != 0) 969 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)", 970 __func__, __LINE__, tlm, tlm->tlm_refcnt); 971 } 972 #endif /* TCPLOG_DEBUG_RINGBUF */ 973 974 /* Do global initialization. */ 975 void 976 tcp_log_init(void) 977 { 978 979 tcp_log_zone = uma_zcreate("tcp_log", sizeof(struct tcp_log_mem), 980 #ifdef TCPLOG_DEBUG_RINGBUF 981 tcp_log_zone_ctor, tcp_log_zone_dtor, tcp_log_zone_init, 982 #else 983 NULL, NULL, NULL, 984 #endif 985 NULL, UMA_ALIGN_PTR, 0); 986 (void)uma_zone_set_max(tcp_log_zone, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT); 987 tcp_log_bucket_zone = uma_zcreate("tcp_log_bucket", 988 sizeof(struct tcp_log_id_bucket), NULL, NULL, NULL, NULL, 989 UMA_ALIGN_PTR, 0); 990 tcp_log_node_zone = uma_zcreate("tcp_log_node", 991 sizeof(struct tcp_log_id_node), NULL, NULL, NULL, NULL, 992 UMA_ALIGN_PTR, 0); 993 #ifdef TCPLOG_DEBUG_COUNTERS 994 tcp_log_queued = counter_u64_alloc(M_WAITOK); 995 tcp_log_que_fail1 = counter_u64_alloc(M_WAITOK); 996 tcp_log_que_fail2 = counter_u64_alloc(M_WAITOK); 997 tcp_log_que_fail3 = counter_u64_alloc(M_WAITOK); 998 tcp_log_que_fail4 = counter_u64_alloc(M_WAITOK); 999 tcp_log_que_fail5 = counter_u64_alloc(M_WAITOK); 1000 tcp_log_que_copyout = counter_u64_alloc(M_WAITOK); 1001 tcp_log_que_read = counter_u64_alloc(M_WAITOK); 1002 tcp_log_que_freed = counter_u64_alloc(M_WAITOK); 1003 #endif 1004 1005 rw_init_flags(&tcp_id_tree_lock, "TCP ID tree", RW_NEW); 1006 mtx_init(&tcp_log_expireq_mtx, "TCP log expireq", NULL, MTX_DEF); 1007 callout_init(&tcp_log_expireq_callout, 1); 1008 } 1009 1010 /* Do per-TCPCB initialization. */ 1011 void 1012 tcp_log_tcpcbinit(struct tcpcb *tp) 1013 { 1014 1015 /* A new TCPCB should start out zero-initialized. */ 1016 STAILQ_INIT(&tp->t_logs); 1017 1018 /* 1019 * If we are doing auto-capturing, figure out whether we will capture 1020 * this session. 1021 */ 1022 if (tcp_log_selectauto()) { 1023 tp->t_logstate = tcp_log_auto_mode; 1024 tp->t_flags2 |= TF2_LOG_AUTO; 1025 } 1026 } 1027 1028 1029 /* Remove entries */ 1030 static void 1031 tcp_log_expire(void *unused __unused) 1032 { 1033 struct tcp_log_id_bucket *tlb; 1034 struct tcp_log_id_node *tln; 1035 sbintime_t expiry_limit; 1036 int tree_locked; 1037 1038 TCPLOG_EXPIREQ_LOCK(); 1039 if (callout_pending(&tcp_log_expireq_callout)) { 1040 /* Callout was reset. */ 1041 TCPLOG_EXPIREQ_UNLOCK(); 1042 return; 1043 } 1044 1045 /* 1046 * Process entries until we reach one that expires too far in the 1047 * future. Look one second in the future. 1048 */ 1049 expiry_limit = getsbinuptime() + SBT_1S; 1050 tree_locked = TREE_UNLOCKED; 1051 1052 while ((tln = STAILQ_FIRST(&tcp_log_expireq_head)) != NULL && 1053 tln->tln_expiretime <= expiry_limit) { 1054 if (!callout_active(&tcp_log_expireq_callout)) { 1055 /* 1056 * Callout was stopped. I guess we should 1057 * just quit at this point. 1058 */ 1059 TCPLOG_EXPIREQ_UNLOCK(); 1060 return; 1061 } 1062 1063 /* 1064 * Remove the node from the head of the list and unlock 1065 * the list. Change the expiry time to SBT_MAX as a signal 1066 * to other threads that we now own this. 1067 */ 1068 STAILQ_REMOVE_HEAD(&tcp_log_expireq_head, tln_expireq); 1069 tln->tln_expiretime = SBT_MAX; 1070 TCPLOG_EXPIREQ_UNLOCK(); 1071 1072 /* 1073 * Remove the node from the bucket. 1074 */ 1075 tlb = tln->tln_bucket; 1076 TCPID_BUCKET_LOCK(tlb); 1077 if (tcp_log_remove_id_node(NULL, NULL, tlb, tln, &tree_locked)) { 1078 tcp_log_id_validate_tree_lock(tree_locked); 1079 if (tree_locked == TREE_WLOCKED) 1080 TCPID_TREE_WUNLOCK(); 1081 else 1082 TCPID_TREE_RUNLOCK(); 1083 tree_locked = TREE_UNLOCKED; 1084 } 1085 1086 /* Drop the INP reference. */ 1087 INP_WLOCK(tln->tln_inp); 1088 if (!in_pcbrele_wlocked(tln->tln_inp)) 1089 INP_WUNLOCK(tln->tln_inp); 1090 1091 /* Free the log records. */ 1092 tcp_log_free_entries(&tln->tln_entries, &tln->tln_count); 1093 1094 /* Free the node. */ 1095 uma_zfree(tcp_log_node_zone, tln); 1096 1097 /* Relock the expiry queue. */ 1098 TCPLOG_EXPIREQ_LOCK(); 1099 } 1100 1101 /* 1102 * We've expired all the entries we can. Do we need to reschedule 1103 * ourselves? 1104 */ 1105 callout_deactivate(&tcp_log_expireq_callout); 1106 if (tln != NULL) { 1107 /* 1108 * Get max(now + TCP_LOG_EXPIRE_INTVL, tln->tln_expiretime) and 1109 * set the next callout to that. (This helps ensure we generally 1110 * run the callout no more often than desired.) 1111 */ 1112 expiry_limit = getsbinuptime() + TCP_LOG_EXPIRE_INTVL; 1113 if (expiry_limit < tln->tln_expiretime) 1114 expiry_limit = tln->tln_expiretime; 1115 callout_reset_sbt(&tcp_log_expireq_callout, expiry_limit, 1116 SBT_1S, tcp_log_expire, NULL, C_ABSOLUTE); 1117 } 1118 1119 /* We're done. */ 1120 TCPLOG_EXPIREQ_UNLOCK(); 1121 return; 1122 } 1123 1124 /* 1125 * Move log data from the TCPCB to a new node. This will reset the TCPCB log 1126 * entries and log count; however, it will not touch other things from the 1127 * TCPCB (e.g. t_lin, t_lib). 1128 * 1129 * NOTE: Must hold a lock on the INP. 1130 */ 1131 static void 1132 tcp_log_move_tp_to_node(struct tcpcb *tp, struct tcp_log_id_node *tln) 1133 { 1134 1135 INP_WLOCK_ASSERT(tp->t_inpcb); 1136 1137 tln->tln_ie = tp->t_inpcb->inp_inc.inc_ie; 1138 if (tp->t_inpcb->inp_inc.inc_flags & INC_ISIPV6) 1139 tln->tln_af = AF_INET6; 1140 else 1141 tln->tln_af = AF_INET; 1142 tln->tln_entries = tp->t_logs; 1143 tln->tln_count = tp->t_lognum; 1144 tln->tln_bucket = tp->t_lib; 1145 1146 /* Clear information from the PCB. */ 1147 STAILQ_INIT(&tp->t_logs); 1148 tp->t_lognum = 0; 1149 } 1150 1151 /* Do per-TCPCB cleanup */ 1152 void 1153 tcp_log_tcpcbfini(struct tcpcb *tp) 1154 { 1155 struct tcp_log_id_node *tln, *tln_first; 1156 struct tcp_log_mem *log_entry; 1157 sbintime_t callouttime; 1158 1159 INP_WLOCK_ASSERT(tp->t_inpcb); 1160 1161 /* 1162 * If we were gathering packets to be automatically dumped, try to do 1163 * it now. If this succeeds, the log information in the TCPCB will be 1164 * cleared. Otherwise, we'll handle the log information as we do 1165 * for other states. 1166 */ 1167 switch(tp->t_logstate) { 1168 case TCP_LOG_STATE_HEAD_AUTO: 1169 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", 1170 M_NOWAIT, false); 1171 break; 1172 case TCP_LOG_STATE_TAIL_AUTO: 1173 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail", 1174 M_NOWAIT, false); 1175 break; 1176 case TCP_LOG_STATE_CONTINUAL: 1177 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 1178 M_NOWAIT, false); 1179 break; 1180 } 1181 1182 /* 1183 * There are two ways we could keep logs: per-socket or per-ID. If 1184 * we are tracking logs with an ID, then the logs survive the 1185 * destruction of the TCPCB. 1186 * 1187 * If the TCPCB is associated with an ID node, move the logs from the 1188 * TCPCB to the ID node. In theory, this is safe, for reasons which I 1189 * will now explain for my own benefit when I next need to figure out 1190 * this code. :-) 1191 * 1192 * We own the INP lock. Therefore, no one else can change the contents 1193 * of this node (Rule C). Further, no one can remove this node from 1194 * the bucket while we hold the lock (Rule D). Basically, no one can 1195 * mess with this node. That leaves two states in which we could be: 1196 * 1197 * 1. Another thread is currently waiting to acquire the INP lock, with 1198 * plans to do something with this node. When we drop the INP lock, 1199 * they will have a chance to do that. They will recheck the 1200 * tln_closed field (see note to Rule C) and then acquire the 1201 * bucket lock before proceeding further. 1202 * 1203 * 2. Another thread will try to acquire a lock at some point in the 1204 * future. If they try to acquire a lock before we set the 1205 * tln_closed field, they will follow state #1. If they try to 1206 * acquire a lock after we set the tln_closed field, they will be 1207 * able to make changes to the node, at will, following Rule C. 1208 * 1209 * Therefore, we currently own this node and can make any changes 1210 * we want. But, as soon as we set the tln_closed field to true, we 1211 * have effectively dropped our lock on the node. (For this reason, we 1212 * also need to make sure our writes are ordered correctly. An atomic 1213 * operation with "release" semantics should be sufficient.) 1214 */ 1215 1216 if (tp->t_lin != NULL) { 1217 /* Copy the relevant information to the log entry. */ 1218 tln = tp->t_lin; 1219 KASSERT(tln->tln_inp == tp->t_inpcb, 1220 ("%s: Mismatched inp (tln->tln_inp=%p, tp->t_inpcb=%p)", 1221 __func__, tln->tln_inp, tp->t_inpcb)); 1222 tcp_log_move_tp_to_node(tp, tln); 1223 1224 /* Clear information from the PCB. */ 1225 tp->t_lin = NULL; 1226 tp->t_lib = NULL; 1227 1228 /* 1229 * Take a reference on the INP. This ensures that the INP 1230 * remains valid while the node is on the expiry queue. This 1231 * ensures the INP is valid for other threads that may be 1232 * racing to lock this node when we move it to the expire 1233 * queue. 1234 */ 1235 in_pcbref(tp->t_inpcb); 1236 1237 /* 1238 * Store the entry on the expiry list. The exact behavior 1239 * depends on whether we have entries to keep. If so, we 1240 * put the entry at the tail of the list and expire in 1241 * TCP_LOG_EXPIRE_TIME. Otherwise, we expire "now" and put 1242 * the entry at the head of the list. (Handling the cleanup 1243 * via the expiry timer lets us avoid locking messy-ness here.) 1244 */ 1245 tln->tln_expiretime = getsbinuptime(); 1246 TCPLOG_EXPIREQ_LOCK(); 1247 if (tln->tln_count) { 1248 tln->tln_expiretime += TCP_LOG_EXPIRE_TIME; 1249 if (STAILQ_EMPTY(&tcp_log_expireq_head) && 1250 !callout_active(&tcp_log_expireq_callout)) { 1251 /* 1252 * We are adding the first entry and a callout 1253 * is not currently scheduled; therefore, we 1254 * need to schedule one. 1255 */ 1256 callout_reset_sbt(&tcp_log_expireq_callout, 1257 tln->tln_expiretime, SBT_1S, tcp_log_expire, 1258 NULL, C_ABSOLUTE); 1259 } 1260 STAILQ_INSERT_TAIL(&tcp_log_expireq_head, tln, 1261 tln_expireq); 1262 } else { 1263 callouttime = tln->tln_expiretime + 1264 TCP_LOG_EXPIRE_INTVL; 1265 tln_first = STAILQ_FIRST(&tcp_log_expireq_head); 1266 1267 if ((tln_first == NULL || 1268 callouttime < tln_first->tln_expiretime) && 1269 (callout_pending(&tcp_log_expireq_callout) || 1270 !callout_active(&tcp_log_expireq_callout))) { 1271 /* 1272 * The list is empty, or we want to run the 1273 * expire code before the first entry's timer 1274 * fires. Also, we are in a case where a callout 1275 * is not actively running. We want to reset 1276 * the callout to occur sooner. 1277 */ 1278 callout_reset_sbt(&tcp_log_expireq_callout, 1279 callouttime, SBT_1S, tcp_log_expire, NULL, 1280 C_ABSOLUTE); 1281 } 1282 1283 /* 1284 * Insert to the head, or just after the head, as 1285 * appropriate. (This might result in small 1286 * mis-orderings as a bunch of "expire now" entries 1287 * gather at the start of the list, but that should 1288 * not produce big problems, since the expire timer 1289 * will walk through all of them.) 1290 */ 1291 if (tln_first == NULL || 1292 tln->tln_expiretime < tln_first->tln_expiretime) 1293 STAILQ_INSERT_HEAD(&tcp_log_expireq_head, tln, 1294 tln_expireq); 1295 else 1296 STAILQ_INSERT_AFTER(&tcp_log_expireq_head, 1297 tln_first, tln, tln_expireq); 1298 } 1299 TCPLOG_EXPIREQ_UNLOCK(); 1300 1301 /* 1302 * We are done messing with the tln. After this point, we 1303 * can't touch it. (Note that the "release" semantics should 1304 * be included with the TCPLOG_EXPIREQ_UNLOCK() call above. 1305 * Therefore, they should be unnecessary here. However, it 1306 * seems like a good idea to include them anyway, since we 1307 * really are releasing a lock here.) 1308 */ 1309 atomic_store_rel_int(&tln->tln_closed, 1); 1310 } else { 1311 /* Remove log entries. */ 1312 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1313 tcp_log_remove_log_head(tp, log_entry); 1314 KASSERT(tp->t_lognum == 0, 1315 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 1316 __func__, tp->t_lognum)); 1317 } 1318 1319 /* 1320 * Change the log state to off (just in case anything tries to sneak 1321 * in a last-minute log). 1322 */ 1323 tp->t_logstate = TCP_LOG_STATE_OFF; 1324 } 1325 1326 /* 1327 * This logs an event for a TCP socket. Normally, this is called via 1328 * TCP_LOG_EVENT or TCP_LOG_EVENT_VERBOSE. See the documentation for 1329 * TCP_LOG_EVENT(). 1330 */ 1331 1332 struct tcp_log_buffer * 1333 tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, 1334 struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, 1335 union tcp_log_stackspecific *stackinfo, int th_hostorder, 1336 const char *output_caller, const char *func, int line, const struct timeval *itv) 1337 { 1338 struct tcp_log_mem *log_entry; 1339 struct tcp_log_buffer *log_buf; 1340 int attempt_count = 0; 1341 struct tcp_log_verbose *log_verbose; 1342 uint32_t logsn; 1343 1344 KASSERT((func == NULL && line == 0) || (func != NULL && line > 0), 1345 ("%s called with inconsistent func (%p) and line (%d) arguments", 1346 __func__, func, line)); 1347 1348 INP_WLOCK_ASSERT(tp->t_inpcb); 1349 1350 KASSERT(tp->t_logstate == TCP_LOG_STATE_HEAD || 1351 tp->t_logstate == TCP_LOG_STATE_TAIL || 1352 tp->t_logstate == TCP_LOG_STATE_CONTINUAL || 1353 tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO || 1354 tp->t_logstate == TCP_LOG_STATE_TAIL_AUTO, 1355 ("%s called with unexpected tp->t_logstate (%d)", __func__, 1356 tp->t_logstate)); 1357 1358 /* 1359 * Get the serial number. We do this early so it will 1360 * increment even if we end up skipping the log entry for some 1361 * reason. 1362 */ 1363 logsn = tp->t_logsn++; 1364 1365 /* 1366 * Can we get a new log entry? If so, increment the lognum counter 1367 * here. 1368 */ 1369 retry: 1370 if (tp->t_lognum < tcp_log_session_limit) { 1371 if ((log_entry = uma_zalloc(tcp_log_zone, M_NOWAIT)) != NULL) 1372 tp->t_lognum++; 1373 } else 1374 log_entry = NULL; 1375 1376 /* Do we need to try to reuse? */ 1377 if (log_entry == NULL) { 1378 /* 1379 * Sacrifice auto-logged sessions without a log ID if 1380 * tcp_log_auto_all is false. (If they don't have a log 1381 * ID by now, it is probable that either they won't get one 1382 * or we are resource-constrained.) 1383 */ 1384 if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) && 1385 !tcp_log_auto_all) { 1386 if (tcp_log_state_change(tp, TCP_LOG_STATE_CLEAR)) { 1387 #ifdef INVARIANTS 1388 panic("%s:%d: tcp_log_state_change() failed " 1389 "to set tp %p to TCP_LOG_STATE_CLEAR", 1390 __func__, __LINE__, tp); 1391 #endif 1392 tp->t_logstate = TCP_LOG_STATE_OFF; 1393 } 1394 return (NULL); 1395 } 1396 /* 1397 * If we are in TCP_LOG_STATE_HEAD_AUTO state, try to dump 1398 * the buffers. If successful, deactivate tracing. Otherwise, 1399 * leave it active so we will retry. 1400 */ 1401 if (tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO && 1402 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", 1403 M_NOWAIT, false)) { 1404 tp->t_logstate = TCP_LOG_STATE_OFF; 1405 return(NULL); 1406 } else if ((tp->t_logstate == TCP_LOG_STATE_CONTINUAL) && 1407 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 1408 M_NOWAIT, false)) { 1409 if (attempt_count == 0) { 1410 attempt_count++; 1411 goto retry; 1412 } 1413 #ifdef TCPLOG_DEBUG_COUNTERS 1414 counter_u64_add(tcp_log_que_fail4, 1); 1415 #endif 1416 return(NULL); 1417 } else if (tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO) 1418 return(NULL); 1419 1420 /* If in HEAD state, just deactivate the tracing and return. */ 1421 if (tp->t_logstate == TCP_LOG_STATE_HEAD) { 1422 tp->t_logstate = TCP_LOG_STATE_OFF; 1423 return(NULL); 1424 } 1425 1426 /* 1427 * Get a buffer to reuse. If that fails, just give up. 1428 * (We can't log anything without a buffer in which to 1429 * put it.) 1430 * 1431 * Note that we don't change the t_lognum counter 1432 * here. Because we are re-using the buffer, the total 1433 * number won't change. 1434 */ 1435 if ((log_entry = STAILQ_FIRST(&tp->t_logs)) == NULL) 1436 return(NULL); 1437 STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue); 1438 tcp_log_entry_refcnt_rem(log_entry); 1439 } 1440 1441 KASSERT(log_entry != NULL, 1442 ("%s: log_entry unexpectedly NULL", __func__)); 1443 1444 /* Extract the log buffer and verbose buffer pointers. */ 1445 log_buf = &log_entry->tlm_buf; 1446 log_verbose = &log_entry->tlm_v; 1447 1448 /* Basic entries. */ 1449 if (itv == NULL) 1450 getmicrouptime(&log_buf->tlb_tv); 1451 else 1452 memcpy(&log_buf->tlb_tv, itv, sizeof(struct timeval)); 1453 log_buf->tlb_ticks = ticks; 1454 log_buf->tlb_sn = logsn; 1455 log_buf->tlb_stackid = tp->t_fb->tfb_id; 1456 log_buf->tlb_eventid = eventid; 1457 log_buf->tlb_eventflags = 0; 1458 log_buf->tlb_errno = errornum; 1459 1460 /* Socket buffers */ 1461 if (rxbuf != NULL) { 1462 log_buf->tlb_eventflags |= TLB_FLAG_RXBUF; 1463 log_buf->tlb_rxbuf.tls_sb_acc = rxbuf->sb_acc; 1464 log_buf->tlb_rxbuf.tls_sb_ccc = rxbuf->sb_ccc; 1465 log_buf->tlb_rxbuf.tls_sb_spare = 0; 1466 } 1467 if (txbuf != NULL) { 1468 log_buf->tlb_eventflags |= TLB_FLAG_TXBUF; 1469 log_buf->tlb_txbuf.tls_sb_acc = txbuf->sb_acc; 1470 log_buf->tlb_txbuf.tls_sb_ccc = txbuf->sb_ccc; 1471 log_buf->tlb_txbuf.tls_sb_spare = 0; 1472 } 1473 /* Copy values from tp to the log entry. */ 1474 #define COPY_STAT(f) log_buf->tlb_ ## f = tp->f 1475 #define COPY_STAT_T(f) log_buf->tlb_ ## f = tp->t_ ## f 1476 COPY_STAT_T(state); 1477 COPY_STAT_T(starttime); 1478 COPY_STAT(iss); 1479 COPY_STAT_T(flags); 1480 COPY_STAT(snd_una); 1481 COPY_STAT(snd_max); 1482 COPY_STAT(snd_cwnd); 1483 COPY_STAT(snd_nxt); 1484 COPY_STAT(snd_recover); 1485 COPY_STAT(snd_wnd); 1486 COPY_STAT(snd_ssthresh); 1487 COPY_STAT_T(srtt); 1488 COPY_STAT_T(rttvar); 1489 COPY_STAT(rcv_up); 1490 COPY_STAT(rcv_adv); 1491 COPY_STAT(rcv_nxt); 1492 COPY_STAT(sack_newdata); 1493 COPY_STAT(rcv_wnd); 1494 COPY_STAT_T(dupacks); 1495 COPY_STAT_T(segqlen); 1496 COPY_STAT(snd_numholes); 1497 COPY_STAT(snd_scale); 1498 COPY_STAT(rcv_scale); 1499 #undef COPY_STAT 1500 #undef COPY_STAT_T 1501 log_buf->tlb_flex1 = 0; 1502 log_buf->tlb_flex2 = 0; 1503 /* Copy stack-specific info. */ 1504 if (stackinfo != NULL) { 1505 memcpy(&log_buf->tlb_stackinfo, stackinfo, 1506 sizeof(log_buf->tlb_stackinfo)); 1507 log_buf->tlb_eventflags |= TLB_FLAG_STACKINFO; 1508 } 1509 1510 /* The packet */ 1511 log_buf->tlb_len = len; 1512 if (th) { 1513 int optlen; 1514 1515 log_buf->tlb_eventflags |= TLB_FLAG_HDR; 1516 log_buf->tlb_th = *th; 1517 if (th_hostorder) 1518 tcp_fields_to_net(&log_buf->tlb_th); 1519 optlen = (th->th_off << 2) - sizeof (struct tcphdr); 1520 if (optlen > 0) 1521 memcpy(log_buf->tlb_opts, th + 1, optlen); 1522 } 1523 1524 /* Verbose information */ 1525 if (func != NULL) { 1526 log_buf->tlb_eventflags |= TLB_FLAG_VERBOSE; 1527 if (output_caller != NULL) 1528 strlcpy(log_verbose->tlv_snd_frm, output_caller, 1529 TCP_FUNC_LEN); 1530 else 1531 *log_verbose->tlv_snd_frm = 0; 1532 strlcpy(log_verbose->tlv_trace_func, func, TCP_FUNC_LEN); 1533 log_verbose->tlv_trace_line = line; 1534 } 1535 1536 /* Insert the new log at the tail. */ 1537 STAILQ_INSERT_TAIL(&tp->t_logs, log_entry, tlm_queue); 1538 tcp_log_entry_refcnt_add(log_entry); 1539 return (log_buf); 1540 } 1541 1542 /* 1543 * Change the logging state for a TCPCB. Returns 0 on success or an 1544 * error code on failure. 1545 */ 1546 int 1547 tcp_log_state_change(struct tcpcb *tp, int state) 1548 { 1549 struct tcp_log_mem *log_entry; 1550 1551 INP_WLOCK_ASSERT(tp->t_inpcb); 1552 switch(state) { 1553 case TCP_LOG_STATE_CLEAR: 1554 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1555 tcp_log_remove_log_head(tp, log_entry); 1556 /* Fall through */ 1557 1558 case TCP_LOG_STATE_OFF: 1559 tp->t_logstate = TCP_LOG_STATE_OFF; 1560 break; 1561 1562 case TCP_LOG_STATE_TAIL: 1563 case TCP_LOG_STATE_HEAD: 1564 case TCP_LOG_STATE_CONTINUAL: 1565 case TCP_LOG_STATE_HEAD_AUTO: 1566 case TCP_LOG_STATE_TAIL_AUTO: 1567 tp->t_logstate = state; 1568 break; 1569 1570 default: 1571 return (EINVAL); 1572 } 1573 1574 tp->t_flags2 &= ~(TF2_LOG_AUTO); 1575 1576 return (0); 1577 } 1578 1579 /* If tcp_drain() is called, flush half the log entries. */ 1580 void 1581 tcp_log_drain(struct tcpcb *tp) 1582 { 1583 struct tcp_log_mem *log_entry, *next; 1584 int target, skip; 1585 1586 INP_WLOCK_ASSERT(tp->t_inpcb); 1587 if ((target = tp->t_lognum / 2) == 0) 1588 return; 1589 1590 /* 1591 * If we are logging the "head" packets, we want to discard 1592 * from the tail of the queue. Otherwise, we want to discard 1593 * from the head. 1594 */ 1595 if (tp->t_logstate == TCP_LOG_STATE_HEAD || 1596 tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO) { 1597 skip = tp->t_lognum - target; 1598 STAILQ_FOREACH(log_entry, &tp->t_logs, tlm_queue) 1599 if (!--skip) 1600 break; 1601 KASSERT(log_entry != NULL, 1602 ("%s: skipped through all entries!", __func__)); 1603 if (log_entry == NULL) 1604 return; 1605 while ((next = STAILQ_NEXT(log_entry, tlm_queue)) != NULL) { 1606 STAILQ_REMOVE_AFTER(&tp->t_logs, log_entry, tlm_queue); 1607 tcp_log_entry_refcnt_rem(next); 1608 tcp_log_remove_log_cleanup(tp, next); 1609 #ifdef INVARIANTS 1610 target--; 1611 #endif 1612 } 1613 KASSERT(target == 0, 1614 ("%s: After removing from tail, target was %d", __func__, 1615 target)); 1616 } else if (tp->t_logstate == TCP_LOG_STATE_CONTINUAL) { 1617 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 1618 M_NOWAIT, false); 1619 } else { 1620 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL && 1621 target--) 1622 tcp_log_remove_log_head(tp, log_entry); 1623 KASSERT(target <= 0, 1624 ("%s: After removing from head, target was %d", __func__, 1625 target)); 1626 KASSERT(tp->t_lognum > 0, 1627 ("%s: After removing from head, tp->t_lognum was %d", 1628 __func__, target)); 1629 KASSERT(log_entry != NULL, 1630 ("%s: After removing from head, the tailq was empty", 1631 __func__)); 1632 } 1633 } 1634 1635 static inline int 1636 tcp_log_copyout(struct sockopt *sopt, void *src, void *dst, size_t len) 1637 { 1638 1639 if (sopt->sopt_td != NULL) 1640 return (copyout(src, dst, len)); 1641 bcopy(src, dst, len); 1642 return (0); 1643 } 1644 1645 static int 1646 tcp_log_logs_to_buf(struct sockopt *sopt, struct tcp_log_stailq *log_tailqp, 1647 struct tcp_log_buffer **end, int count) 1648 { 1649 struct tcp_log_buffer *out_entry; 1650 struct tcp_log_mem *log_entry; 1651 size_t entrysize; 1652 int error; 1653 #ifdef INVARIANTS 1654 int orig_count = count; 1655 #endif 1656 1657 /* Copy the data out. */ 1658 error = 0; 1659 out_entry = (struct tcp_log_buffer *) sopt->sopt_val; 1660 STAILQ_FOREACH(log_entry, log_tailqp, tlm_queue) { 1661 count--; 1662 KASSERT(count >= 0, 1663 ("%s:%d: Exceeded expected count (%d) processing list %p", 1664 __func__, __LINE__, orig_count, log_tailqp)); 1665 1666 #ifdef TCPLOG_DEBUG_COUNTERS 1667 counter_u64_add(tcp_log_que_copyout, 1); 1668 #endif 1669 1670 /* 1671 * Skip copying out the header if it isn't present. 1672 * Instead, copy out zeros (to ensure we don't leak info). 1673 * TODO: Make sure we truly do zero everything we don't 1674 * explicitly set. 1675 */ 1676 if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR) 1677 entrysize = sizeof(struct tcp_log_buffer); 1678 else 1679 entrysize = offsetof(struct tcp_log_buffer, tlb_th); 1680 error = tcp_log_copyout(sopt, &log_entry->tlm_buf, out_entry, 1681 entrysize); 1682 if (error) 1683 break; 1684 if (!(log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR)) { 1685 error = tcp_log_copyout(sopt, zerobuf, 1686 ((uint8_t *)out_entry) + entrysize, 1687 sizeof(struct tcp_log_buffer) - entrysize); 1688 } 1689 1690 /* 1691 * Copy out the verbose bit, if needed. Either way, 1692 * increment the output pointer the correct amount. 1693 */ 1694 if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_VERBOSE) { 1695 error = tcp_log_copyout(sopt, &log_entry->tlm_v, 1696 out_entry->tlb_verbose, 1697 sizeof(struct tcp_log_verbose)); 1698 if (error) 1699 break; 1700 out_entry = (struct tcp_log_buffer *) 1701 (((uint8_t *) (out_entry + 1)) + 1702 sizeof(struct tcp_log_verbose)); 1703 } else 1704 out_entry++; 1705 } 1706 *end = out_entry; 1707 KASSERT(error || count == 0, 1708 ("%s:%d: Less than expected count (%d) processing list %p" 1709 " (%d remain)", __func__, __LINE__, orig_count, 1710 log_tailqp, count)); 1711 1712 return (error); 1713 } 1714 1715 /* 1716 * Copy out the buffer. Note that we do incremental copying, so 1717 * sooptcopyout() won't work. However, the goal is to produce the same 1718 * end result as if we copied in the entire user buffer, updated it, 1719 * and then used sooptcopyout() to copy it out. 1720 * 1721 * NOTE: This should be called with a write lock on the PCB; however, 1722 * the function will drop it after it extracts the data from the TCPCB. 1723 */ 1724 int 1725 tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp) 1726 { 1727 struct tcp_log_stailq log_tailq; 1728 struct tcp_log_mem *log_entry, *log_next; 1729 struct tcp_log_buffer *out_entry; 1730 struct inpcb *inp; 1731 size_t outsize, entrysize; 1732 int error, outnum; 1733 1734 INP_WLOCK_ASSERT(tp->t_inpcb); 1735 inp = tp->t_inpcb; 1736 1737 /* 1738 * Determine which log entries will fit in the buffer. As an 1739 * optimization, skip this if all the entries will clearly fit 1740 * in the buffer. (However, get an exact size if we are using 1741 * INVARIANTS.) 1742 */ 1743 #ifndef INVARIANTS 1744 if (sopt->sopt_valsize / (sizeof(struct tcp_log_buffer) + 1745 sizeof(struct tcp_log_verbose)) >= tp->t_lognum) { 1746 log_entry = STAILQ_LAST(&tp->t_logs, tcp_log_mem, tlm_queue); 1747 log_next = NULL; 1748 outsize = 0; 1749 outnum = tp->t_lognum; 1750 } else { 1751 #endif 1752 outsize = outnum = 0; 1753 log_entry = NULL; 1754 STAILQ_FOREACH(log_next, &tp->t_logs, tlm_queue) { 1755 entrysize = sizeof(struct tcp_log_buffer); 1756 if (log_next->tlm_buf.tlb_eventflags & 1757 TLB_FLAG_VERBOSE) 1758 entrysize += sizeof(struct tcp_log_verbose); 1759 if ((sopt->sopt_valsize - outsize) < entrysize) 1760 break; 1761 outsize += entrysize; 1762 outnum++; 1763 log_entry = log_next; 1764 } 1765 KASSERT(outsize <= sopt->sopt_valsize, 1766 ("%s: calculated output size (%zu) greater than available" 1767 "space (%zu)", __func__, outsize, sopt->sopt_valsize)); 1768 #ifndef INVARIANTS 1769 } 1770 #endif 1771 1772 /* 1773 * Copy traditional sooptcopyout() behavior: if sopt->sopt_val 1774 * is NULL, silently skip the copy. However, in this case, we 1775 * will leave the list alone and return. Functionally, this 1776 * gives userspace a way to poll for an approximate buffer 1777 * size they will need to get the log entries. 1778 */ 1779 if (sopt->sopt_val == NULL) { 1780 INP_WUNLOCK(inp); 1781 if (outsize == 0) { 1782 outsize = outnum * (sizeof(struct tcp_log_buffer) + 1783 sizeof(struct tcp_log_verbose)); 1784 } 1785 if (sopt->sopt_valsize > outsize) 1786 sopt->sopt_valsize = outsize; 1787 return (0); 1788 } 1789 1790 /* 1791 * Break apart the list. We'll save the ones we want to copy 1792 * out locally and remove them from the TCPCB list. We can 1793 * then drop the INPCB lock while we do the copyout. 1794 * 1795 * There are roughly three cases: 1796 * 1. There was nothing to copy out. That's easy: drop the 1797 * lock and return. 1798 * 2. We are copying out the entire list. Again, that's easy: 1799 * move the whole list. 1800 * 3. We are copying out a partial list. That's harder. We 1801 * need to update the list book-keeping entries. 1802 */ 1803 if (log_entry != NULL && log_next == NULL) { 1804 /* Move entire list. */ 1805 KASSERT(outnum == tp->t_lognum, 1806 ("%s:%d: outnum (%d) should match tp->t_lognum (%d)", 1807 __func__, __LINE__, outnum, tp->t_lognum)); 1808 log_tailq = tp->t_logs; 1809 tp->t_lognum = 0; 1810 STAILQ_INIT(&tp->t_logs); 1811 } else if (log_entry != NULL) { 1812 /* Move partial list. */ 1813 KASSERT(outnum < tp->t_lognum, 1814 ("%s:%d: outnum (%d) not less than tp->t_lognum (%d)", 1815 __func__, __LINE__, outnum, tp->t_lognum)); 1816 STAILQ_FIRST(&log_tailq) = STAILQ_FIRST(&tp->t_logs); 1817 STAILQ_FIRST(&tp->t_logs) = STAILQ_NEXT(log_entry, tlm_queue); 1818 KASSERT(STAILQ_NEXT(log_entry, tlm_queue) != NULL, 1819 ("%s:%d: tp->t_logs is unexpectedly shorter than expected" 1820 "(tp: %p, log_tailq: %p, outnum: %d, tp->t_lognum: %d)", 1821 __func__, __LINE__, tp, &log_tailq, outnum, tp->t_lognum)); 1822 STAILQ_NEXT(log_entry, tlm_queue) = NULL; 1823 log_tailq.stqh_last = &STAILQ_NEXT(log_entry, tlm_queue); 1824 tp->t_lognum -= outnum; 1825 } else 1826 STAILQ_INIT(&log_tailq); 1827 1828 /* Drop the PCB lock. */ 1829 INP_WUNLOCK(inp); 1830 1831 /* Copy the data out. */ 1832 error = tcp_log_logs_to_buf(sopt, &log_tailq, &out_entry, outnum); 1833 1834 if (error) { 1835 /* Restore list */ 1836 INP_WLOCK(inp); 1837 if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0) { 1838 tp = intotcpcb(inp); 1839 1840 /* Merge the two lists. */ 1841 STAILQ_CONCAT(&log_tailq, &tp->t_logs); 1842 tp->t_logs = log_tailq; 1843 tp->t_lognum += outnum; 1844 } 1845 INP_WUNLOCK(inp); 1846 } else { 1847 /* Sanity check entries */ 1848 KASSERT(((caddr_t)out_entry - (caddr_t)sopt->sopt_val) == 1849 outsize, ("%s: Actual output size (%zu) != " 1850 "calculated output size (%zu)", __func__, 1851 (size_t)((caddr_t)out_entry - (caddr_t)sopt->sopt_val), 1852 outsize)); 1853 1854 /* Free the entries we just copied out. */ 1855 STAILQ_FOREACH_SAFE(log_entry, &log_tailq, tlm_queue, log_next) { 1856 tcp_log_entry_refcnt_rem(log_entry); 1857 uma_zfree(tcp_log_zone, log_entry); 1858 } 1859 } 1860 1861 sopt->sopt_valsize = (size_t)((caddr_t)out_entry - 1862 (caddr_t)sopt->sopt_val); 1863 return (error); 1864 } 1865 1866 static void 1867 tcp_log_free_queue(struct tcp_log_dev_queue *param) 1868 { 1869 struct tcp_log_dev_log_queue *entry; 1870 1871 KASSERT(param != NULL, ("%s: called with NULL param", __func__)); 1872 if (param == NULL) 1873 return; 1874 1875 entry = (struct tcp_log_dev_log_queue *)param; 1876 1877 /* Free the entries. */ 1878 tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count); 1879 1880 /* Free the buffer, if it is allocated. */ 1881 if (entry->tldl_common.tldq_buf != NULL) 1882 free(entry->tldl_common.tldq_buf, M_TCPLOGDEV); 1883 1884 /* Free the queue entry. */ 1885 free(entry, M_TCPLOGDEV); 1886 } 1887 1888 static struct tcp_log_common_header * 1889 tcp_log_expandlogbuf(struct tcp_log_dev_queue *param) 1890 { 1891 struct tcp_log_dev_log_queue *entry; 1892 struct tcp_log_header *hdr; 1893 uint8_t *end; 1894 struct sockopt sopt; 1895 int error; 1896 1897 entry = (struct tcp_log_dev_log_queue *)param; 1898 1899 /* Take a worst-case guess at space needs. */ 1900 sopt.sopt_valsize = sizeof(struct tcp_log_header) + 1901 entry->tldl_count * (sizeof(struct tcp_log_buffer) + 1902 sizeof(struct tcp_log_verbose)); 1903 hdr = malloc(sopt.sopt_valsize, M_TCPLOGDEV, M_NOWAIT); 1904 if (hdr == NULL) { 1905 #ifdef TCPLOG_DEBUG_COUNTERS 1906 counter_u64_add(tcp_log_que_fail5, entry->tldl_count); 1907 #endif 1908 return (NULL); 1909 } 1910 sopt.sopt_val = hdr + 1; 1911 sopt.sopt_valsize -= sizeof(struct tcp_log_header); 1912 sopt.sopt_td = NULL; 1913 1914 error = tcp_log_logs_to_buf(&sopt, &entry->tldl_entries, 1915 (struct tcp_log_buffer **)&end, entry->tldl_count); 1916 if (error) { 1917 free(hdr, M_TCPLOGDEV); 1918 return (NULL); 1919 } 1920 1921 /* Free the entries. */ 1922 tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count); 1923 entry->tldl_count = 0; 1924 1925 memset(hdr, 0, sizeof(struct tcp_log_header)); 1926 hdr->tlh_version = TCP_LOG_BUF_VER; 1927 hdr->tlh_type = TCP_LOG_DEV_TYPE_BBR; 1928 hdr->tlh_length = end - (uint8_t *)hdr; 1929 hdr->tlh_ie = entry->tldl_ie; 1930 hdr->tlh_af = entry->tldl_af; 1931 getboottime(&hdr->tlh_offset); 1932 strlcpy(hdr->tlh_id, entry->tldl_id, TCP_LOG_ID_LEN); 1933 strlcpy(hdr->tlh_reason, entry->tldl_reason, TCP_LOG_REASON_LEN); 1934 return ((struct tcp_log_common_header *)hdr); 1935 } 1936 1937 /* 1938 * Queue the tcpcb's log buffer for transmission via the log buffer facility. 1939 * 1940 * NOTE: This should be called with a write lock on the PCB. 1941 * 1942 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop 1943 * and reacquire the INP lock if it needs to do so. 1944 * 1945 * If force is false, this will only dump auto-logged sessions if 1946 * tcp_log_auto_all is true or if there is a log ID defined for the session. 1947 */ 1948 int 1949 tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force) 1950 { 1951 struct tcp_log_dev_log_queue *entry; 1952 struct inpcb *inp; 1953 #ifdef TCPLOG_DEBUG_COUNTERS 1954 int num_entries; 1955 #endif 1956 1957 inp = tp->t_inpcb; 1958 INP_WLOCK_ASSERT(inp); 1959 1960 /* If there are no log entries, there is nothing to do. */ 1961 if (tp->t_lognum == 0) 1962 return (0); 1963 1964 /* Check for a log ID. */ 1965 if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) && 1966 !tcp_log_auto_all && !force) { 1967 struct tcp_log_mem *log_entry; 1968 1969 /* 1970 * We needed a log ID and none was found. Free the log entries 1971 * and return success. Also, cancel further logging. If the 1972 * session doesn't have a log ID by now, we'll assume it isn't 1973 * going to get one. 1974 */ 1975 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1976 tcp_log_remove_log_head(tp, log_entry); 1977 KASSERT(tp->t_lognum == 0, 1978 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 1979 __func__, tp->t_lognum)); 1980 tp->t_logstate = TCP_LOG_STATE_OFF; 1981 return (0); 1982 } 1983 1984 /* 1985 * Allocate memory. If we must wait, we'll need to drop the locks 1986 * and reacquire them (and do all the related business that goes 1987 * along with that). 1988 */ 1989 entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV, 1990 M_NOWAIT); 1991 if (entry == NULL && (how & M_NOWAIT)) { 1992 #ifdef TCPLOG_DEBUG_COUNTERS 1993 counter_u64_add(tcp_log_que_fail3, 1); 1994 #endif 1995 return (ENOBUFS); 1996 } 1997 if (entry == NULL) { 1998 INP_WUNLOCK(inp); 1999 entry = malloc(sizeof(struct tcp_log_dev_log_queue), 2000 M_TCPLOGDEV, M_WAITOK); 2001 INP_WLOCK(inp); 2002 /* 2003 * Note that this check is slightly overly-restrictive in 2004 * that the TCB can survive either of these events. 2005 * However, there is currently not a good way to ensure 2006 * that is the case. So, if we hit this M_WAIT path, we 2007 * may end up dropping some entries. That seems like a 2008 * small price to pay for safety. 2009 */ 2010 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 2011 free(entry, M_TCPLOGDEV); 2012 #ifdef TCPLOG_DEBUG_COUNTERS 2013 counter_u64_add(tcp_log_que_fail2, 1); 2014 #endif 2015 return (ECONNRESET); 2016 } 2017 tp = intotcpcb(inp); 2018 if (tp->t_lognum == 0) { 2019 free(entry, M_TCPLOGDEV); 2020 return (0); 2021 } 2022 } 2023 2024 /* Fill in the unique parts of the queue entry. */ 2025 if (tp->t_lib != NULL) 2026 strlcpy(entry->tldl_id, tp->t_lib->tlb_id, TCP_LOG_ID_LEN); 2027 else 2028 strlcpy(entry->tldl_id, "UNKNOWN", TCP_LOG_ID_LEN); 2029 if (reason != NULL) 2030 strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN); 2031 else 2032 strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN); 2033 entry->tldl_ie = inp->inp_inc.inc_ie; 2034 if (inp->inp_inc.inc_flags & INC_ISIPV6) 2035 entry->tldl_af = AF_INET6; 2036 else 2037 entry->tldl_af = AF_INET; 2038 entry->tldl_entries = tp->t_logs; 2039 entry->tldl_count = tp->t_lognum; 2040 2041 /* Fill in the common parts of the queue entry. */ 2042 entry->tldl_common.tldq_buf = NULL; 2043 entry->tldl_common.tldq_xform = tcp_log_expandlogbuf; 2044 entry->tldl_common.tldq_dtor = tcp_log_free_queue; 2045 2046 /* Clear the log data from the TCPCB. */ 2047 #ifdef TCPLOG_DEBUG_COUNTERS 2048 num_entries = tp->t_lognum; 2049 #endif 2050 tp->t_lognum = 0; 2051 STAILQ_INIT(&tp->t_logs); 2052 2053 /* Add the entry. If no one is listening, free the entry. */ 2054 if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) { 2055 tcp_log_free_queue((struct tcp_log_dev_queue *)entry); 2056 #ifdef TCPLOG_DEBUG_COUNTERS 2057 counter_u64_add(tcp_log_que_fail1, num_entries); 2058 } else { 2059 counter_u64_add(tcp_log_queued, num_entries); 2060 #endif 2061 } 2062 return (0); 2063 } 2064 2065 /* 2066 * Queue the log_id_node's log buffers for transmission via the log buffer 2067 * facility. 2068 * 2069 * NOTE: This should be called with the bucket locked and referenced. 2070 * 2071 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop 2072 * and reacquire the bucket lock if it needs to do so. (The caller must 2073 * ensure that the tln is no longer on any lists so no one else will mess 2074 * with this while the lock is dropped!) 2075 */ 2076 static int 2077 tcp_log_dump_node_logbuf(struct tcp_log_id_node *tln, char *reason, int how) 2078 { 2079 struct tcp_log_dev_log_queue *entry; 2080 struct tcp_log_id_bucket *tlb; 2081 2082 tlb = tln->tln_bucket; 2083 TCPID_BUCKET_LOCK_ASSERT(tlb); 2084 KASSERT(tlb->tlb_refcnt > 0, 2085 ("%s:%d: Called with unreferenced bucket (tln=%p, tlb=%p)", 2086 __func__, __LINE__, tln, tlb)); 2087 KASSERT(tln->tln_closed, 2088 ("%s:%d: Called for node with tln_closed==false (tln=%p)", 2089 __func__, __LINE__, tln)); 2090 2091 /* If there are no log entries, there is nothing to do. */ 2092 if (tln->tln_count == 0) 2093 return (0); 2094 2095 /* 2096 * Allocate memory. If we must wait, we'll need to drop the locks 2097 * and reacquire them (and do all the related business that goes 2098 * along with that). 2099 */ 2100 entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV, 2101 M_NOWAIT); 2102 if (entry == NULL && (how & M_NOWAIT)) 2103 return (ENOBUFS); 2104 if (entry == NULL) { 2105 TCPID_BUCKET_UNLOCK(tlb); 2106 entry = malloc(sizeof(struct tcp_log_dev_log_queue), 2107 M_TCPLOGDEV, M_WAITOK); 2108 TCPID_BUCKET_LOCK(tlb); 2109 } 2110 2111 /* Fill in the common parts of the queue entry.. */ 2112 entry->tldl_common.tldq_buf = NULL; 2113 entry->tldl_common.tldq_xform = tcp_log_expandlogbuf; 2114 entry->tldl_common.tldq_dtor = tcp_log_free_queue; 2115 2116 /* Fill in the unique parts of the queue entry. */ 2117 strlcpy(entry->tldl_id, tlb->tlb_id, TCP_LOG_ID_LEN); 2118 if (reason != NULL) 2119 strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN); 2120 else 2121 strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN); 2122 entry->tldl_ie = tln->tln_ie; 2123 entry->tldl_entries = tln->tln_entries; 2124 entry->tldl_count = tln->tln_count; 2125 entry->tldl_af = tln->tln_af; 2126 2127 /* Add the entry. If no one is listening, free the entry. */ 2128 if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) 2129 tcp_log_free_queue((struct tcp_log_dev_queue *)entry); 2130 2131 return (0); 2132 } 2133 2134 2135 /* 2136 * Queue the log buffers for all sessions in a bucket for transmissions via 2137 * the log buffer facility. 2138 * 2139 * NOTE: This should be called with a locked bucket; however, the function 2140 * will drop the lock. 2141 */ 2142 #define LOCAL_SAVE 10 2143 static void 2144 tcp_log_dumpbucketlogs(struct tcp_log_id_bucket *tlb, char *reason) 2145 { 2146 struct tcp_log_id_node local_entries[LOCAL_SAVE]; 2147 struct inpcb *inp; 2148 struct tcpcb *tp; 2149 struct tcp_log_id_node *cur_tln, *prev_tln, *tmp_tln; 2150 int i, num_local_entries, tree_locked; 2151 bool expireq_locked; 2152 2153 TCPID_BUCKET_LOCK_ASSERT(tlb); 2154 2155 /* 2156 * Take a reference on the bucket to keep it from disappearing until 2157 * we are done. 2158 */ 2159 TCPID_BUCKET_REF(tlb); 2160 2161 /* 2162 * We'll try to create these without dropping locks. However, we 2163 * might very well need to drop locks to get memory. If that's the 2164 * case, we'll save up to 10 on the stack, and sacrifice the rest. 2165 * (Otherwise, we need to worry about finding our place again in a 2166 * potentially changed list. It just doesn't seem worth the trouble 2167 * to do that. 2168 */ 2169 expireq_locked = false; 2170 num_local_entries = 0; 2171 prev_tln = NULL; 2172 tree_locked = TREE_UNLOCKED; 2173 SLIST_FOREACH_SAFE(cur_tln, &tlb->tlb_head, tln_list, tmp_tln) { 2174 /* 2175 * If this isn't associated with a TCPCB, we can pull it off 2176 * the list now. We need to be careful that the expire timer 2177 * hasn't already taken ownership (tln_expiretime == SBT_MAX). 2178 * If so, we let the expire timer code free the data. 2179 */ 2180 if (cur_tln->tln_closed) { 2181 no_inp: 2182 /* 2183 * Get the expireq lock so we can get a consistent 2184 * read of tln_expiretime and so we can remove this 2185 * from the expireq. 2186 */ 2187 if (!expireq_locked) { 2188 TCPLOG_EXPIREQ_LOCK(); 2189 expireq_locked = true; 2190 } 2191 2192 /* 2193 * We ignore entries with tln_expiretime == SBT_MAX. 2194 * The expire timer code already owns those. 2195 */ 2196 KASSERT(cur_tln->tln_expiretime > (sbintime_t) 0, 2197 ("%s:%d: node on the expire queue without positive " 2198 "expire time", __func__, __LINE__)); 2199 if (cur_tln->tln_expiretime == SBT_MAX) { 2200 prev_tln = cur_tln; 2201 continue; 2202 } 2203 2204 /* Remove the entry from the expireq. */ 2205 STAILQ_REMOVE(&tcp_log_expireq_head, cur_tln, 2206 tcp_log_id_node, tln_expireq); 2207 2208 /* Remove the entry from the bucket. */ 2209 if (prev_tln != NULL) 2210 SLIST_REMOVE_AFTER(prev_tln, tln_list); 2211 else 2212 SLIST_REMOVE_HEAD(&tlb->tlb_head, tln_list); 2213 2214 /* 2215 * Drop the INP and bucket reference counts. Due to 2216 * lock-ordering rules, we need to drop the expire 2217 * queue lock. 2218 */ 2219 TCPLOG_EXPIREQ_UNLOCK(); 2220 expireq_locked = false; 2221 2222 /* Drop the INP reference. */ 2223 INP_WLOCK(cur_tln->tln_inp); 2224 if (!in_pcbrele_wlocked(cur_tln->tln_inp)) 2225 INP_WUNLOCK(cur_tln->tln_inp); 2226 2227 if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) { 2228 #ifdef INVARIANTS 2229 panic("%s: Bucket refcount unexpectedly 0.", 2230 __func__); 2231 #endif 2232 /* 2233 * Recover as best we can: free the entry we 2234 * own. 2235 */ 2236 tcp_log_free_entries(&cur_tln->tln_entries, 2237 &cur_tln->tln_count); 2238 uma_zfree(tcp_log_node_zone, cur_tln); 2239 goto done; 2240 } 2241 2242 if (tcp_log_dump_node_logbuf(cur_tln, reason, 2243 M_NOWAIT)) { 2244 /* 2245 * If we have sapce, save the entries locally. 2246 * Otherwise, free them. 2247 */ 2248 if (num_local_entries < LOCAL_SAVE) { 2249 local_entries[num_local_entries] = 2250 *cur_tln; 2251 num_local_entries++; 2252 } else { 2253 tcp_log_free_entries( 2254 &cur_tln->tln_entries, 2255 &cur_tln->tln_count); 2256 } 2257 } 2258 2259 /* No matter what, we are done with the node now. */ 2260 uma_zfree(tcp_log_node_zone, cur_tln); 2261 2262 /* 2263 * Because we removed this entry from the list, prev_tln 2264 * (which tracks the previous entry still on the tlb 2265 * list) remains unchanged. 2266 */ 2267 continue; 2268 } 2269 2270 /* 2271 * If we get to this point, the session data is still held in 2272 * the TCPCB. So, we need to pull the data out of that. 2273 * 2274 * We will need to drop the expireq lock so we can lock the INP. 2275 * We can then try to extract the data the "easy" way. If that 2276 * fails, we'll save the log entries for later. 2277 */ 2278 if (expireq_locked) { 2279 TCPLOG_EXPIREQ_UNLOCK(); 2280 expireq_locked = false; 2281 } 2282 2283 /* Lock the INP and then re-check the state. */ 2284 inp = cur_tln->tln_inp; 2285 INP_WLOCK(inp); 2286 /* 2287 * If we caught this while it was transitioning, the data 2288 * might have moved from the TCPCB to the tln (signified by 2289 * setting tln_closed to true. If so, treat this like an 2290 * inactive connection. 2291 */ 2292 if (cur_tln->tln_closed) { 2293 /* 2294 * It looks like we may have caught this connection 2295 * while it was transitioning from active to inactive. 2296 * Treat this like an inactive connection. 2297 */ 2298 INP_WUNLOCK(inp); 2299 goto no_inp; 2300 } 2301 2302 /* 2303 * Try to dump the data from the tp without dropping the lock. 2304 * If this fails, try to save off the data locally. 2305 */ 2306 tp = cur_tln->tln_tp; 2307 if (tcp_log_dump_tp_logbuf(tp, reason, M_NOWAIT, true) && 2308 num_local_entries < LOCAL_SAVE) { 2309 tcp_log_move_tp_to_node(tp, 2310 &local_entries[num_local_entries]); 2311 local_entries[num_local_entries].tln_closed = 1; 2312 KASSERT(local_entries[num_local_entries].tln_bucket == 2313 tlb, ("%s: %d: bucket mismatch for node %p", 2314 __func__, __LINE__, cur_tln)); 2315 num_local_entries++; 2316 } 2317 2318 INP_WUNLOCK(inp); 2319 2320 /* 2321 * We are goint to leave the current tln on the list. It will 2322 * become the previous tln. 2323 */ 2324 prev_tln = cur_tln; 2325 } 2326 2327 /* Drop our locks, if any. */ 2328 KASSERT(tree_locked == TREE_UNLOCKED, 2329 ("%s: %d: tree unexpectedly locked", __func__, __LINE__)); 2330 switch (tree_locked) { 2331 case TREE_WLOCKED: 2332 TCPID_TREE_WUNLOCK(); 2333 tree_locked = TREE_UNLOCKED; 2334 break; 2335 case TREE_RLOCKED: 2336 TCPID_TREE_RUNLOCK(); 2337 tree_locked = TREE_UNLOCKED; 2338 break; 2339 } 2340 if (expireq_locked) { 2341 TCPLOG_EXPIREQ_UNLOCK(); 2342 expireq_locked = false; 2343 } 2344 2345 /* 2346 * Try again for any saved entries. tcp_log_dump_node_logbuf() is 2347 * guaranteed to free the log entries within the node. And, since 2348 * the node itself is on our stack, we don't need to free it. 2349 */ 2350 for (i = 0; i < num_local_entries; i++) 2351 tcp_log_dump_node_logbuf(&local_entries[i], reason, M_WAITOK); 2352 2353 /* Drop our reference. */ 2354 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 2355 TCPID_BUCKET_UNLOCK(tlb); 2356 2357 done: 2358 /* Drop our locks, if any. */ 2359 switch (tree_locked) { 2360 case TREE_WLOCKED: 2361 TCPID_TREE_WUNLOCK(); 2362 break; 2363 case TREE_RLOCKED: 2364 TCPID_TREE_RUNLOCK(); 2365 break; 2366 } 2367 if (expireq_locked) 2368 TCPLOG_EXPIREQ_UNLOCK(); 2369 } 2370 #undef LOCAL_SAVE 2371 2372 2373 /* 2374 * Queue the log buffers for all sessions in a bucket for transmissions via 2375 * the log buffer facility. 2376 * 2377 * NOTE: This should be called with a locked INP; however, the function 2378 * will drop the lock. 2379 */ 2380 void 2381 tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason) 2382 { 2383 struct tcp_log_id_bucket *tlb; 2384 int tree_locked; 2385 2386 /* Figure out our bucket and lock it. */ 2387 INP_WLOCK_ASSERT(tp->t_inpcb); 2388 tlb = tp->t_lib; 2389 if (tlb == NULL) { 2390 /* 2391 * No bucket; treat this like a request to dump a single 2392 * session's traces. 2393 */ 2394 (void)tcp_log_dump_tp_logbuf(tp, reason, M_WAITOK, true); 2395 INP_WUNLOCK(tp->t_inpcb); 2396 return; 2397 } 2398 TCPID_BUCKET_REF(tlb); 2399 INP_WUNLOCK(tp->t_inpcb); 2400 TCPID_BUCKET_LOCK(tlb); 2401 2402 /* If we are the last reference, we have nothing more to do here. */ 2403 tree_locked = TREE_UNLOCKED; 2404 if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) { 2405 switch (tree_locked) { 2406 case TREE_WLOCKED: 2407 TCPID_TREE_WUNLOCK(); 2408 break; 2409 case TREE_RLOCKED: 2410 TCPID_TREE_RUNLOCK(); 2411 break; 2412 } 2413 return; 2414 } 2415 2416 /* Turn this over to tcp_log_dumpbucketlogs() to finish the work. */ 2417 tcp_log_dumpbucketlogs(tlb, reason); 2418 } 2419 2420 /* 2421 * Mark the end of a flow with the current stack. A stack can add 2422 * stack-specific info to this trace event by overriding this 2423 * function (see bbr_log_flowend() for example). 2424 */ 2425 void 2426 tcp_log_flowend(struct tcpcb *tp) 2427 { 2428 if (tp->t_logstate != TCP_LOG_STATE_OFF) { 2429 struct socket *so = tp->t_inpcb->inp_socket; 2430 TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd, 2431 TCP_LOG_FLOWEND, 0, 0, NULL, false); 2432 } 2433 } 2434 2435