1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2016-2018 Netflix, Inc. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_inet.h" 33 #include <sys/param.h> 34 #include <sys/arb.h> 35 #include <sys/hash.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/qmath.h> 41 #include <sys/queue.h> 42 #include <sys/refcount.h> 43 #include <sys/rwlock.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/tree.h> 48 #include <sys/stats.h> /* Must come after qmath.h and tree.h */ 49 #include <sys/counter.h> 50 #include <dev/tcp_log/tcp_log_dev.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/vnet.h> 55 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet/tcp_var.h> 60 #include <netinet/tcp_log_buf.h> 61 #include <netinet/tcp_seq.h> 62 #include <netinet/tcp_hpts.h> 63 64 /* Default expiry time */ 65 #define TCP_LOG_EXPIRE_TIME ((sbintime_t)60 * SBT_1S) 66 67 /* Max interval at which to run the expiry timer */ 68 #define TCP_LOG_EXPIRE_INTVL ((sbintime_t)5 * SBT_1S) 69 70 bool tcp_log_verbose; 71 static uma_zone_t tcp_log_id_bucket_zone, tcp_log_id_node_zone, tcp_log_zone; 72 static int tcp_log_session_limit = TCP_LOG_BUF_DEFAULT_SESSION_LIMIT; 73 static uint32_t tcp_log_version = TCP_LOG_BUF_VER; 74 RB_HEAD(tcp_log_id_tree, tcp_log_id_bucket); 75 static struct tcp_log_id_tree tcp_log_id_head; 76 static STAILQ_HEAD(, tcp_log_id_node) tcp_log_expireq_head = 77 STAILQ_HEAD_INITIALIZER(tcp_log_expireq_head); 78 static struct mtx tcp_log_expireq_mtx; 79 static struct callout tcp_log_expireq_callout; 80 static u_long tcp_log_auto_ratio = 0; 81 static volatile u_long tcp_log_auto_ratio_cur = 0; 82 static uint32_t tcp_log_auto_mode = TCP_LOG_STATE_TAIL; 83 static bool tcp_log_auto_all = false; 84 static uint32_t tcp_disable_all_bb_logs = 0; 85 86 RB_PROTOTYPE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp) 87 88 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, bb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 89 "TCP Black Box controls"); 90 91 SYSCTL_NODE(_net_inet_tcp_bb, OID_AUTO, tp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 92 "TCP Black Box Trace Point controls"); 93 94 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_verbose, CTLFLAG_RW, &tcp_log_verbose, 95 0, "Force verbose logging for TCP traces"); 96 97 SYSCTL_INT(_net_inet_tcp_bb, OID_AUTO, log_session_limit, 98 CTLFLAG_RW, &tcp_log_session_limit, 0, 99 "Maximum number of events maintained for each TCP session"); 100 101 uint32_t tcp_trace_point_config = 0; 102 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, number, CTLFLAG_RW, 103 &tcp_trace_point_config, TCP_LOG_STATE_HEAD_AUTO, 104 "What is the trace point number to activate (0=none, 0xffffffff = all)?"); 105 106 uint32_t tcp_trace_point_bb_mode = TCP_LOG_STATE_CONTINUAL; 107 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, bbmode, CTLFLAG_RW, 108 &tcp_trace_point_bb_mode, TCP_LOG_STATE_HEAD_AUTO, 109 "What is BB logging mode that is activated?"); 110 111 int32_t tcp_trace_point_count = 0; 112 SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, count, CTLFLAG_RW, 113 &tcp_trace_point_count, TCP_LOG_STATE_HEAD_AUTO, 114 "How many connections will have BB logging turned on that hit the tracepoint?"); 115 116 117 118 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_global_limit, CTLFLAG_RW, 119 &tcp_log_zone, "Maximum number of events maintained for all TCP sessions"); 120 121 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_global_entries, CTLFLAG_RD, 122 &tcp_log_zone, "Current number of events maintained for all TCP sessions"); 123 124 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_limit, CTLFLAG_RW, 125 &tcp_log_id_bucket_zone, "Maximum number of log IDs"); 126 127 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_entries, CTLFLAG_RD, 128 &tcp_log_id_bucket_zone, "Current number of log IDs"); 129 130 SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_limit, CTLFLAG_RW, 131 &tcp_log_id_node_zone, "Maximum number of tcpcbs with log IDs"); 132 133 SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_entries, CTLFLAG_RD, 134 &tcp_log_id_node_zone, "Current number of tcpcbs with log IDs"); 135 136 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, &tcp_log_version, 137 0, "Version of log formats exported"); 138 139 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, disable_all, CTLFLAG_RW, 140 &tcp_disable_all_bb_logs, 0, 141 "Disable all BB logging for all connections"); 142 143 SYSCTL_ULONG(_net_inet_tcp_bb, OID_AUTO, log_auto_ratio, CTLFLAG_RW, 144 &tcp_log_auto_ratio, 0, "Do auto capturing for 1 out of N sessions"); 145 146 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_auto_mode, CTLFLAG_RW, 147 &tcp_log_auto_mode, 0, 148 "Logging mode for auto-selected sessions (default is TCP_LOG_STATE_TAIL)"); 149 150 SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_auto_all, CTLFLAG_RW, 151 &tcp_log_auto_all, 0, 152 "Auto-select from all sessions (rather than just those with IDs)"); 153 154 #ifdef TCPLOG_DEBUG_COUNTERS 155 counter_u64_t tcp_log_queued; 156 counter_u64_t tcp_log_que_fail1; 157 counter_u64_t tcp_log_que_fail2; 158 counter_u64_t tcp_log_que_fail3; 159 counter_u64_t tcp_log_que_fail4; 160 counter_u64_t tcp_log_que_fail5; 161 counter_u64_t tcp_log_que_copyout; 162 counter_u64_t tcp_log_que_read; 163 counter_u64_t tcp_log_que_freed; 164 165 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, queued, CTLFLAG_RD, 166 &tcp_log_queued, "Number of entries queued"); 167 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail1, CTLFLAG_RD, 168 &tcp_log_que_fail1, "Number of entries queued but fail 1"); 169 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail2, CTLFLAG_RD, 170 &tcp_log_que_fail2, "Number of entries queued but fail 2"); 171 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail3, CTLFLAG_RD, 172 &tcp_log_que_fail3, "Number of entries queued but fail 3"); 173 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail4, CTLFLAG_RD, 174 &tcp_log_que_fail4, "Number of entries queued but fail 4"); 175 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, fail5, CTLFLAG_RD, 176 &tcp_log_que_fail5, "Number of entries queued but fail 4"); 177 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, copyout, CTLFLAG_RD, 178 &tcp_log_que_copyout, "Number of entries copied out"); 179 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, read, CTLFLAG_RD, 180 &tcp_log_que_read, "Number of entries read from the queue"); 181 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, freed, CTLFLAG_RD, 182 &tcp_log_que_freed, "Number of entries freed after reading"); 183 #endif 184 185 #ifdef INVARIANTS 186 #define TCPLOG_DEBUG_RINGBUF 187 #endif 188 /* Number of requests to consider a PBCID "active". */ 189 #define ACTIVE_REQUEST_COUNT 10 190 191 /* Statistic tracking for "active" PBCIDs. */ 192 static counter_u64_t tcp_log_pcb_ids_cur; 193 static counter_u64_t tcp_log_pcb_ids_tot; 194 195 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_cur, CTLFLAG_RD, 196 &tcp_log_pcb_ids_cur, "Number of pcb IDs allocated in the system"); 197 SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_tot, CTLFLAG_RD, 198 &tcp_log_pcb_ids_tot, "Total number of pcb IDs that have been allocated"); 199 200 struct tcp_log_mem 201 { 202 STAILQ_ENTRY(tcp_log_mem) tlm_queue; 203 struct tcp_log_buffer tlm_buf; 204 struct tcp_log_verbose tlm_v; 205 #ifdef TCPLOG_DEBUG_RINGBUF 206 volatile int tlm_refcnt; 207 #endif 208 }; 209 210 /* 60 bytes for the header, + 16 bytes for padding */ 211 static uint8_t zerobuf[76]; 212 213 /* 214 * Lock order: 215 * 1. TCPID_TREE 216 * 2. TCPID_BUCKET 217 * 3. INP 218 * 219 * Rules: 220 * A. You need a lock on the Tree to add/remove buckets. 221 * B. You need a lock on the bucket to add/remove nodes from the bucket. 222 * C. To change information in a node, you need the INP lock if the tln_closed 223 * field is false. Otherwise, you need the bucket lock. (Note that the 224 * tln_closed field can change at any point, so you need to recheck the 225 * entry after acquiring the INP lock.) 226 * D. To remove a node from the bucket, you must have that entry locked, 227 * according to the criteria of Rule C. Also, the node must not be on 228 * the expiry queue. 229 * E. The exception to C is the expiry queue fields, which are locked by 230 * the TCPLOG_EXPIREQ lock. 231 * 232 * Buckets have a reference count. Each node is a reference. Further, 233 * other callers may add reference counts to keep a bucket from disappearing. 234 * You can add a reference as long as you own a lock sufficient to keep the 235 * bucket from disappearing. For example, a common use is: 236 * a. Have a locked INP, but need to lock the TCPID_BUCKET. 237 * b. Add a refcount on the bucket. (Safe because the INP lock prevents 238 * the TCPID_BUCKET from going away.) 239 * c. Drop the INP lock. 240 * d. Acquire a lock on the TCPID_BUCKET. 241 * e. Acquire a lock on the INP. 242 * f. Drop the refcount on the bucket. 243 * (At this point, the bucket may disappear.) 244 * 245 * Expire queue lock: 246 * You can acquire this with either the bucket or INP lock. Don't reverse it. 247 * When the expire code has committed to freeing a node, it resets the expiry 248 * time to SBT_MAX. That is the signal to everyone else that they should 249 * leave that node alone. 250 */ 251 static struct rwlock tcp_id_tree_lock; 252 #define TCPID_TREE_WLOCK() rw_wlock(&tcp_id_tree_lock) 253 #define TCPID_TREE_RLOCK() rw_rlock(&tcp_id_tree_lock) 254 #define TCPID_TREE_UPGRADE() rw_try_upgrade(&tcp_id_tree_lock) 255 #define TCPID_TREE_WUNLOCK() rw_wunlock(&tcp_id_tree_lock) 256 #define TCPID_TREE_RUNLOCK() rw_runlock(&tcp_id_tree_lock) 257 #define TCPID_TREE_WLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_WLOCKED) 258 #define TCPID_TREE_RLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_RLOCKED) 259 #define TCPID_TREE_UNLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_UNLOCKED) 260 261 #define TCPID_BUCKET_LOCK_INIT(tlb) mtx_init(&((tlb)->tlb_mtx), "tcp log id bucket", NULL, MTX_DEF) 262 #define TCPID_BUCKET_LOCK_DESTROY(tlb) mtx_destroy(&((tlb)->tlb_mtx)) 263 #define TCPID_BUCKET_LOCK(tlb) mtx_lock(&((tlb)->tlb_mtx)) 264 #define TCPID_BUCKET_UNLOCK(tlb) mtx_unlock(&((tlb)->tlb_mtx)) 265 #define TCPID_BUCKET_LOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_OWNED) 266 #define TCPID_BUCKET_UNLOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_NOTOWNED) 267 268 #define TCPID_BUCKET_REF(tlb) refcount_acquire(&((tlb)->tlb_refcnt)) 269 #define TCPID_BUCKET_UNREF(tlb) refcount_release(&((tlb)->tlb_refcnt)) 270 271 #define TCPLOG_EXPIREQ_LOCK() mtx_lock(&tcp_log_expireq_mtx) 272 #define TCPLOG_EXPIREQ_UNLOCK() mtx_unlock(&tcp_log_expireq_mtx) 273 274 SLIST_HEAD(tcp_log_id_head, tcp_log_id_node); 275 276 struct tcp_log_id_bucket 277 { 278 /* 279 * tlb_id must be first. This lets us use strcmp on 280 * (struct tcp_log_id_bucket *) and (char *) interchangeably. 281 */ 282 char tlb_id[TCP_LOG_ID_LEN]; 283 char tlb_tag[TCP_LOG_TAG_LEN]; 284 RB_ENTRY(tcp_log_id_bucket) tlb_rb; 285 struct tcp_log_id_head tlb_head; 286 struct mtx tlb_mtx; 287 volatile u_int tlb_refcnt; 288 volatile u_int tlb_reqcnt; 289 uint32_t tlb_loglimit; 290 int8_t tlb_logstate; 291 }; 292 293 struct tcp_log_id_node 294 { 295 SLIST_ENTRY(tcp_log_id_node) tln_list; 296 STAILQ_ENTRY(tcp_log_id_node) tln_expireq; /* Locked by the expireq lock */ 297 sbintime_t tln_expiretime; /* Locked by the expireq lock */ 298 299 /* 300 * If INP is NULL, that means the connection has closed. We've 301 * saved the connection endpoint information and the log entries 302 * in the tln_ie and tln_entries members. We've also saved a pointer 303 * to the enclosing bucket here. If INP is not NULL, the information is 304 * in the PCB and not here. 305 */ 306 struct inpcb *tln_inp; 307 struct tcpcb *tln_tp; 308 struct tcp_log_id_bucket *tln_bucket; 309 struct in_endpoints tln_ie; 310 struct tcp_log_stailq tln_entries; 311 int tln_count; 312 volatile int tln_closed; 313 uint8_t tln_af; 314 }; 315 316 enum tree_lock_state { 317 TREE_UNLOCKED = 0, 318 TREE_RLOCKED, 319 TREE_WLOCKED, 320 }; 321 322 /* Do we want to select this session for auto-logging? */ 323 static __inline bool 324 tcp_log_selectauto(void) 325 { 326 327 /* 328 * If we are doing auto-capturing, figure out whether we will capture 329 * this session. 330 */ 331 if (tcp_log_auto_ratio && 332 (tcp_disable_all_bb_logs == 0) && 333 (atomic_fetchadd_long(&tcp_log_auto_ratio_cur, 1) % 334 tcp_log_auto_ratio) == 0) 335 return (true); 336 return (false); 337 } 338 339 static __inline int 340 tcp_log_id_cmp(struct tcp_log_id_bucket *a, struct tcp_log_id_bucket *b) 341 { 342 KASSERT(a != NULL, ("tcp_log_id_cmp: argument a is unexpectedly NULL")); 343 KASSERT(b != NULL, ("tcp_log_id_cmp: argument b is unexpectedly NULL")); 344 return strncmp(a->tlb_id, b->tlb_id, TCP_LOG_ID_LEN); 345 } 346 347 RB_GENERATE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp) 348 349 static __inline void 350 tcp_log_id_validate_tree_lock(int tree_locked) 351 { 352 353 #ifdef INVARIANTS 354 switch (tree_locked) { 355 case TREE_WLOCKED: 356 TCPID_TREE_WLOCK_ASSERT(); 357 break; 358 case TREE_RLOCKED: 359 TCPID_TREE_RLOCK_ASSERT(); 360 break; 361 case TREE_UNLOCKED: 362 TCPID_TREE_UNLOCK_ASSERT(); 363 break; 364 default: 365 kassert_panic("%s:%d: unknown tree lock state", __func__, 366 __LINE__); 367 } 368 #endif 369 } 370 371 static __inline void 372 tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb) 373 { 374 375 TCPID_TREE_WLOCK_ASSERT(); 376 KASSERT(SLIST_EMPTY(&tlb->tlb_head), 377 ("%s: Attempt to remove non-empty bucket", __func__)); 378 if (RB_REMOVE(tcp_log_id_tree, &tcp_log_id_head, tlb) == NULL) { 379 #ifdef INVARIANTS 380 kassert_panic("%s:%d: error removing element from tree", 381 __func__, __LINE__); 382 #endif 383 } 384 TCPID_BUCKET_LOCK_DESTROY(tlb); 385 counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); 386 uma_zfree(tcp_log_id_bucket_zone, tlb); 387 } 388 389 /* 390 * Call with a referenced and locked bucket. 391 * Will return true if the bucket was freed; otherwise, false. 392 * tlb: The bucket to unreference. 393 * tree_locked: A pointer to the state of the tree lock. If the tree lock 394 * state changes, the function will update it. 395 * inp: If not NULL and the function needs to drop the inp lock to relock the 396 * tree, it will do so. (The caller must ensure inp will not become invalid, 397 * probably by holding a reference to it.) 398 */ 399 static bool 400 tcp_log_unref_bucket(struct tcp_log_id_bucket *tlb, int *tree_locked, 401 struct inpcb *inp) 402 { 403 404 KASSERT(tlb != NULL, ("%s: called with NULL tlb", __func__)); 405 KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked", 406 __func__)); 407 408 tcp_log_id_validate_tree_lock(*tree_locked); 409 410 /* 411 * Did we hold the last reference on the tlb? If so, we may need 412 * to free it. (Note that we can realistically only execute the 413 * loop twice: once without a write lock and once with a write 414 * lock.) 415 */ 416 while (TCPID_BUCKET_UNREF(tlb)) { 417 /* 418 * We need a write lock on the tree to free this. 419 * If we can upgrade the tree lock, this is "easy". If we 420 * can't upgrade the tree lock, we need to do this the 421 * "hard" way: unwind all our locks and relock everything. 422 * In the meantime, anything could have changed. We even 423 * need to validate that we still need to free the bucket. 424 */ 425 if (*tree_locked == TREE_RLOCKED && TCPID_TREE_UPGRADE()) 426 *tree_locked = TREE_WLOCKED; 427 else if (*tree_locked != TREE_WLOCKED) { 428 TCPID_BUCKET_REF(tlb); 429 if (inp != NULL) 430 INP_WUNLOCK(inp); 431 TCPID_BUCKET_UNLOCK(tlb); 432 if (*tree_locked == TREE_RLOCKED) 433 TCPID_TREE_RUNLOCK(); 434 TCPID_TREE_WLOCK(); 435 *tree_locked = TREE_WLOCKED; 436 TCPID_BUCKET_LOCK(tlb); 437 if (inp != NULL) 438 INP_WLOCK(inp); 439 continue; 440 } 441 442 /* 443 * We have an empty bucket and a write lock on the tree. 444 * Remove the empty bucket. 445 */ 446 tcp_log_remove_bucket(tlb); 447 return (true); 448 } 449 return (false); 450 } 451 452 /* 453 * Call with a locked bucket. This function will release the lock on the 454 * bucket before returning. 455 * 456 * The caller is responsible for freeing the tp->t_lin/tln node! 457 * 458 * Note: one of tp or both tlb and tln must be supplied. 459 * 460 * inp: A pointer to the inp. If the function needs to drop the inp lock to 461 * acquire the tree write lock, it will do so. (The caller must ensure inp 462 * will not become invalid, probably by holding a reference to it.) 463 * tp: A pointer to the tcpcb. (optional; if specified, tlb and tln are ignored) 464 * tlb: A pointer to the bucket. (optional; ignored if tp is specified) 465 * tln: A pointer to the node. (optional; ignored if tp is specified) 466 * tree_locked: A pointer to the state of the tree lock. If the tree lock 467 * state changes, the function will update it. 468 * 469 * Will return true if the INP lock was reacquired; otherwise, false. 470 */ 471 static bool 472 tcp_log_remove_id_node(struct inpcb *inp, struct tcpcb *tp, 473 struct tcp_log_id_bucket *tlb, struct tcp_log_id_node *tln, 474 int *tree_locked) 475 { 476 int orig_tree_locked; 477 478 KASSERT(tp != NULL || (tlb != NULL && tln != NULL), 479 ("%s: called with tp=%p, tlb=%p, tln=%p", __func__, 480 tp, tlb, tln)); 481 KASSERT(tree_locked != NULL, ("%s: called with NULL tree_locked", 482 __func__)); 483 484 if (tp != NULL) { 485 tlb = tp->t_lib; 486 tln = tp->t_lin; 487 KASSERT(tlb != NULL, ("%s: unexpectedly NULL tlb", __func__)); 488 KASSERT(tln != NULL, ("%s: unexpectedly NULL tln", __func__)); 489 } 490 491 tcp_log_id_validate_tree_lock(*tree_locked); 492 TCPID_BUCKET_LOCK_ASSERT(tlb); 493 494 /* 495 * Remove the node, clear the log bucket and node from the TCPCB, and 496 * decrement the bucket refcount. In the process, if this is the 497 * last reference, the bucket will be freed. 498 */ 499 SLIST_REMOVE(&tlb->tlb_head, tln, tcp_log_id_node, tln_list); 500 if (tp != NULL) { 501 tp->t_lib = NULL; 502 tp->t_lin = NULL; 503 } 504 orig_tree_locked = *tree_locked; 505 if (!tcp_log_unref_bucket(tlb, tree_locked, inp)) 506 TCPID_BUCKET_UNLOCK(tlb); 507 return (*tree_locked != orig_tree_locked); 508 } 509 510 #define RECHECK_INP_CLEAN(cleanup) do { \ 511 if (inp->inp_flags & INP_DROPPED) { \ 512 rv = ECONNRESET; \ 513 cleanup; \ 514 goto done; \ 515 } \ 516 tp = intotcpcb(inp); \ 517 } while (0) 518 519 #define RECHECK_INP() RECHECK_INP_CLEAN(/* noop */) 520 521 static void 522 tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp) 523 { 524 525 INP_WLOCK_ASSERT(tptoinpcb(tp)); 526 527 #ifdef STATS 528 if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL) 529 (void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id)); 530 #endif 531 } 532 533 static void 534 tcp_log_increment_reqcnt(struct tcp_log_id_bucket *tlb) 535 { 536 537 atomic_fetchadd_int(&tlb->tlb_reqcnt, 1); 538 } 539 540 int 541 tcp_log_apply_ratio(struct tcpcb *tp, int ratio) 542 { 543 struct tcp_log_id_bucket *tlb; 544 struct inpcb *inp = tptoinpcb(tp); 545 uint32_t hash, ratio_hash_thresh; 546 int rv, tree_locked; 547 548 rv = 0; 549 tree_locked = TREE_UNLOCKED; 550 tlb = tp->t_lib; 551 552 INP_WLOCK_ASSERT(inp); 553 if (tlb == NULL) { 554 INP_WUNLOCK(inp); 555 return (EOPNOTSUPP); 556 } 557 if (ratio) 558 ratio_hash_thresh = max(1, UINT32_MAX / ratio); 559 else 560 ratio_hash_thresh = 0; 561 TCPID_BUCKET_REF(tlb); 562 INP_WUNLOCK(inp); 563 TCPID_BUCKET_LOCK(tlb); 564 565 hash = hash32_buf(tlb->tlb_id, strlen(tlb->tlb_id), 0); 566 if (hash > ratio_hash_thresh && tp->_t_logstate == TCP_LOG_STATE_OFF && 567 tlb->tlb_logstate == TCP_LOG_STATE_OFF) { 568 /* 569 * Ratio decision not to log this log ID (and this connection by 570 * way of association). We only apply a log ratio log disable 571 * decision if it would not interfere with a log enable decision 572 * made elsewhere e.g. tcp_log_selectauto() or setsockopt(). 573 */ 574 tlb->tlb_logstate = TCP_LOG_STATE_RATIO_OFF; 575 INP_WLOCK(inp); 576 RECHECK_INP(); 577 (void)tcp_log_state_change(tp, TCP_LOG_STATE_OFF); 578 done: 579 INP_WUNLOCK(inp); 580 } 581 582 INP_UNLOCK_ASSERT(inp); 583 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 584 TCPID_BUCKET_UNLOCK(tlb); 585 586 if (tree_locked == TREE_WLOCKED) { 587 TCPID_TREE_WLOCK_ASSERT(); 588 TCPID_TREE_WUNLOCK(); 589 } else if (tree_locked == TREE_RLOCKED) { 590 TCPID_TREE_RLOCK_ASSERT(); 591 TCPID_TREE_RUNLOCK(); 592 } else 593 TCPID_TREE_UNLOCK_ASSERT(); 594 595 return (rv); 596 } 597 598 /* 599 * Associate the specified tag with a particular TCP log ID. 600 * Called with INPCB locked. Returns with it unlocked. 601 * Returns 0 on success or EOPNOTSUPP if the connection has no TCP log ID. 602 */ 603 int 604 tcp_log_set_tag(struct tcpcb *tp, char *tag) 605 { 606 struct inpcb *inp = tptoinpcb(tp); 607 struct tcp_log_id_bucket *tlb; 608 int tree_locked; 609 610 INP_WLOCK_ASSERT(inp); 611 612 tree_locked = TREE_UNLOCKED; 613 tlb = tp->t_lib; 614 if (tlb == NULL) { 615 INP_WUNLOCK(inp); 616 return (EOPNOTSUPP); 617 } 618 619 TCPID_BUCKET_REF(tlb); 620 INP_WUNLOCK(inp); 621 TCPID_BUCKET_LOCK(tlb); 622 strlcpy(tlb->tlb_tag, tag, TCP_LOG_TAG_LEN); 623 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 624 TCPID_BUCKET_UNLOCK(tlb); 625 626 if (tree_locked == TREE_WLOCKED) { 627 TCPID_TREE_WLOCK_ASSERT(); 628 TCPID_TREE_WUNLOCK(); 629 } else if (tree_locked == TREE_RLOCKED) { 630 TCPID_TREE_RLOCK_ASSERT(); 631 TCPID_TREE_RUNLOCK(); 632 } else 633 TCPID_TREE_UNLOCK_ASSERT(); 634 635 return (0); 636 } 637 638 /* 639 * Set the TCP log ID for a TCPCB. 640 * Called with INPCB locked. Returns with it unlocked. 641 */ 642 int 643 tcp_log_set_id(struct tcpcb *tp, char *id) 644 { 645 struct tcp_log_id_bucket *tlb, *tmp_tlb; 646 struct tcp_log_id_node *tln; 647 struct inpcb *inp = tptoinpcb(tp); 648 int tree_locked, rv; 649 bool bucket_locked, same; 650 651 tlb = NULL; 652 tln = NULL; 653 tree_locked = TREE_UNLOCKED; 654 bucket_locked = false; 655 656 restart: 657 INP_WLOCK_ASSERT(inp); 658 /* See if the ID is unchanged. */ 659 same = ((tp->t_lib != NULL && !strcmp(tp->t_lib->tlb_id, id)) || 660 (tp->t_lib == NULL && *id == 0)); 661 if (tp->_t_logstate && STAILQ_FIRST(&tp->t_logs) && !same) { 662 /* 663 * There are residual logs left we may 664 * be changing id's so dump what we can. 665 */ 666 switch(tp->_t_logstate) { 667 case TCP_LOG_STATE_HEAD_AUTO: 668 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at id switch", 669 M_NOWAIT, false); 670 break; 671 case TCP_LOG_STATE_TAIL_AUTO: 672 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at id switch", 673 M_NOWAIT, false); 674 break; 675 case TCP_LOG_STATE_CONTINUAL: 676 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual at id switch", 677 M_NOWAIT, false); 678 break; 679 case TCP_LOG_VIA_BBPOINTS: 680 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints at id switch", 681 M_NOWAIT, false); 682 break; 683 } 684 } 685 if (same) { 686 if (tp->t_lib != NULL) { 687 tcp_log_increment_reqcnt(tp->t_lib); 688 if ((tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) && 689 (tp->t_log_state_set == 0)) { 690 /* Clone in any logging */ 691 692 tp->_t_logstate = tp->t_lib->tlb_logstate; 693 } 694 if ((tp->t_lib->tlb_loglimit) && 695 (tp->t_log_state_set == 0)) { 696 /* We also have a limit set */ 697 698 tp->t_loglimit = tp->t_lib->tlb_loglimit; 699 } 700 } 701 rv = 0; 702 goto done; 703 } 704 705 /* 706 * If the TCPCB had a previous ID, we need to extricate it from 707 * the previous list. 708 * 709 * Drop the TCPCB lock and lock the tree and the bucket. 710 * Because this is called in the socket context, we (theoretically) 711 * don't need to worry about the INPCB completely going away 712 * while we are gone. 713 */ 714 if (tp->t_lib != NULL) { 715 tlb = tp->t_lib; 716 TCPID_BUCKET_REF(tlb); 717 INP_WUNLOCK(inp); 718 719 if (tree_locked == TREE_UNLOCKED) { 720 TCPID_TREE_RLOCK(); 721 tree_locked = TREE_RLOCKED; 722 } 723 TCPID_BUCKET_LOCK(tlb); 724 bucket_locked = true; 725 INP_WLOCK(inp); 726 727 /* 728 * Unreference the bucket. If our bucket went away, it is no 729 * longer locked or valid. 730 */ 731 if (tcp_log_unref_bucket(tlb, &tree_locked, inp)) { 732 bucket_locked = false; 733 tlb = NULL; 734 } 735 736 /* Validate the INP. */ 737 RECHECK_INP(); 738 739 /* 740 * Evaluate whether the bucket changed while we were unlocked. 741 * 742 * Possible scenarios here: 743 * 1. Bucket is unchanged and the same one we started with. 744 * 2. The TCPCB no longer has a bucket and our bucket was 745 * freed. 746 * 3. The TCPCB has a new bucket, whether ours was freed. 747 * 4. The TCPCB no longer has a bucket and our bucket was 748 * not freed. 749 * 750 * In cases 2-4, we will start over. In case 1, we will 751 * proceed here to remove the bucket. 752 */ 753 if (tlb == NULL || tp->t_lib != tlb) { 754 KASSERT(bucket_locked || tlb == NULL, 755 ("%s: bucket_locked (%d) and tlb (%p) are " 756 "inconsistent", __func__, bucket_locked, tlb)); 757 758 if (bucket_locked) { 759 TCPID_BUCKET_UNLOCK(tlb); 760 bucket_locked = false; 761 tlb = NULL; 762 } 763 goto restart; 764 } 765 766 /* 767 * Store the (struct tcp_log_id_node) for reuse. Then, remove 768 * it from the bucket. In the process, we may end up relocking. 769 * If so, we need to validate that the INP is still valid, and 770 * the TCPCB entries match we expect. 771 * 772 * We will clear tlb and change the bucket_locked state just 773 * before calling tcp_log_remove_id_node(), since that function 774 * will unlock the bucket. 775 */ 776 if (tln != NULL) 777 uma_zfree(tcp_log_id_node_zone, tln); 778 tln = tp->t_lin; 779 tlb = NULL; 780 bucket_locked = false; 781 if (tcp_log_remove_id_node(inp, tp, NULL, NULL, &tree_locked)) { 782 RECHECK_INP(); 783 784 /* 785 * If the TCPCB moved to a new bucket while we had 786 * dropped the lock, restart. 787 */ 788 if (tp->t_lib != NULL || tp->t_lin != NULL) 789 goto restart; 790 } 791 792 /* 793 * Yay! We successfully removed the TCPCB from its old 794 * bucket. Phew! 795 * 796 * On to bigger and better things... 797 */ 798 } 799 800 /* At this point, the TCPCB should not be in any bucket. */ 801 KASSERT(tp->t_lib == NULL, ("%s: tp->t_lib is not NULL", __func__)); 802 803 /* 804 * If the new ID is not empty, we need to now assign this TCPCB to a 805 * new bucket. 806 */ 807 if (*id) { 808 /* Get a new tln, if we don't already have one to reuse. */ 809 if (tln == NULL) { 810 tln = uma_zalloc(tcp_log_id_node_zone, 811 M_NOWAIT | M_ZERO); 812 if (tln == NULL) { 813 rv = ENOBUFS; 814 goto done; 815 } 816 tln->tln_inp = inp; 817 tln->tln_tp = tp; 818 } 819 820 /* 821 * Drop the INP lock for a bit. We don't need it, and dropping 822 * it prevents lock order reversals. 823 */ 824 INP_WUNLOCK(inp); 825 826 /* Make sure we have at least a read lock on the tree. */ 827 tcp_log_id_validate_tree_lock(tree_locked); 828 if (tree_locked == TREE_UNLOCKED) { 829 TCPID_TREE_RLOCK(); 830 tree_locked = TREE_RLOCKED; 831 } 832 833 refind: 834 /* 835 * Remember that we constructed (struct tcp_log_id_node) so 836 * we can safely cast the id to it for the purposes of finding. 837 */ 838 KASSERT(tlb == NULL, ("%s:%d tlb unexpectedly non-NULL", 839 __func__, __LINE__)); 840 tmp_tlb = RB_FIND(tcp_log_id_tree, &tcp_log_id_head, 841 (struct tcp_log_id_bucket *) id); 842 843 /* 844 * If we didn't find a matching bucket, we need to add a new 845 * one. This requires a write lock. But, of course, we will 846 * need to recheck some things when we re-acquire the lock. 847 */ 848 if (tmp_tlb == NULL && tree_locked != TREE_WLOCKED) { 849 tree_locked = TREE_WLOCKED; 850 if (!TCPID_TREE_UPGRADE()) { 851 TCPID_TREE_RUNLOCK(); 852 TCPID_TREE_WLOCK(); 853 854 /* 855 * The tree may have changed while we were 856 * unlocked. 857 */ 858 goto refind; 859 } 860 } 861 862 /* If we need to add a new bucket, do it now. */ 863 if (tmp_tlb == NULL) { 864 /* Allocate new bucket. */ 865 tlb = uma_zalloc(tcp_log_id_bucket_zone, M_NOWAIT); 866 if (tlb == NULL) { 867 rv = ENOBUFS; 868 goto done_noinp; 869 } 870 counter_u64_add(tcp_log_pcb_ids_cur, 1); 871 counter_u64_add(tcp_log_pcb_ids_tot, 1); 872 873 if ((tcp_log_auto_all == false) && 874 tcp_log_auto_mode && 875 tcp_log_selectauto()) { 876 /* Save off the log state */ 877 tlb->tlb_logstate = tcp_log_auto_mode; 878 } else 879 tlb->tlb_logstate = TCP_LOG_STATE_OFF; 880 tlb->tlb_loglimit = 0; 881 tlb->tlb_tag[0] = '\0'; /* Default to an empty tag. */ 882 883 /* 884 * Copy the ID to the bucket. 885 * NB: Don't use strlcpy() unless you are sure 886 * we've always validated NULL termination. 887 * 888 * TODO: When I'm done writing this, see if we 889 * we have correctly validated NULL termination and 890 * can use strlcpy(). :-) 891 */ 892 strncpy(tlb->tlb_id, id, TCP_LOG_ID_LEN - 1); 893 tlb->tlb_id[TCP_LOG_ID_LEN - 1] = '\0'; 894 895 /* 896 * Take the refcount for the first node and go ahead 897 * and lock this. Note that we zero the tlb_mtx 898 * structure, since 0xdeadc0de flips the right bits 899 * for the code to think that this mutex has already 900 * been initialized. :-( 901 */ 902 SLIST_INIT(&tlb->tlb_head); 903 refcount_init(&tlb->tlb_refcnt, 1); 904 tlb->tlb_reqcnt = 1; 905 memset(&tlb->tlb_mtx, 0, sizeof(struct mtx)); 906 TCPID_BUCKET_LOCK_INIT(tlb); 907 TCPID_BUCKET_LOCK(tlb); 908 bucket_locked = true; 909 910 #define FREE_NEW_TLB() do { \ 911 TCPID_BUCKET_LOCK_DESTROY(tlb); \ 912 uma_zfree(tcp_log_id_bucket_zone, tlb); \ 913 counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); \ 914 counter_u64_add(tcp_log_pcb_ids_tot, (int64_t)-1); \ 915 bucket_locked = false; \ 916 tlb = NULL; \ 917 } while (0) 918 /* 919 * Relock the INP and make sure we are still 920 * unassigned. 921 */ 922 INP_WLOCK(inp); 923 RECHECK_INP_CLEAN(FREE_NEW_TLB()); 924 if (tp->t_lib != NULL) { 925 FREE_NEW_TLB(); 926 goto restart; 927 } 928 929 /* Add the new bucket to the tree. */ 930 tmp_tlb = RB_INSERT(tcp_log_id_tree, &tcp_log_id_head, 931 tlb); 932 KASSERT(tmp_tlb == NULL, 933 ("%s: Unexpected conflicting bucket (%p) while " 934 "adding new bucket (%p)", __func__, tmp_tlb, tlb)); 935 936 /* 937 * If we found a conflicting bucket, free the new 938 * one we made and fall through to use the existing 939 * bucket. 940 */ 941 if (tmp_tlb != NULL) { 942 FREE_NEW_TLB(); 943 INP_WUNLOCK(inp); 944 } 945 #undef FREE_NEW_TLB 946 } 947 948 /* If we found an existing bucket, use it. */ 949 if (tmp_tlb != NULL) { 950 tlb = tmp_tlb; 951 TCPID_BUCKET_LOCK(tlb); 952 bucket_locked = true; 953 954 /* 955 * Relock the INP and make sure we are still 956 * unassigned. 957 */ 958 INP_UNLOCK_ASSERT(inp); 959 INP_WLOCK(inp); 960 RECHECK_INP(); 961 if (tp->t_lib != NULL) { 962 TCPID_BUCKET_UNLOCK(tlb); 963 bucket_locked = false; 964 tlb = NULL; 965 goto restart; 966 } 967 968 /* Take a reference on the bucket. */ 969 TCPID_BUCKET_REF(tlb); 970 971 /* Record the request. */ 972 tcp_log_increment_reqcnt(tlb); 973 } 974 975 tcp_log_grow_tlb(tlb->tlb_id, tp); 976 977 /* Add the new node to the list. */ 978 SLIST_INSERT_HEAD(&tlb->tlb_head, tln, tln_list); 979 tp->t_lib = tlb; 980 tp->t_lin = tln; 981 if (tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) { 982 /* Clone in any logging */ 983 984 tp->_t_logstate = tp->t_lib->tlb_logstate; 985 } 986 if (tp->t_lib->tlb_loglimit) { 987 /* The loglimit too */ 988 989 tp->t_loglimit = tp->t_lib->tlb_loglimit; 990 } 991 tln = NULL; 992 } 993 994 rv = 0; 995 996 done: 997 /* Unlock things, as needed, and return. */ 998 INP_WUNLOCK(inp); 999 done_noinp: 1000 INP_UNLOCK_ASSERT(inp); 1001 if (bucket_locked) { 1002 TCPID_BUCKET_LOCK_ASSERT(tlb); 1003 TCPID_BUCKET_UNLOCK(tlb); 1004 } else if (tlb != NULL) 1005 TCPID_BUCKET_UNLOCK_ASSERT(tlb); 1006 if (tree_locked == TREE_WLOCKED) { 1007 TCPID_TREE_WLOCK_ASSERT(); 1008 TCPID_TREE_WUNLOCK(); 1009 } else if (tree_locked == TREE_RLOCKED) { 1010 TCPID_TREE_RLOCK_ASSERT(); 1011 TCPID_TREE_RUNLOCK(); 1012 } else 1013 TCPID_TREE_UNLOCK_ASSERT(); 1014 if (tln != NULL) 1015 uma_zfree(tcp_log_id_node_zone, tln); 1016 return (rv); 1017 } 1018 1019 /* 1020 * Get the TCP log ID for a TCPCB. 1021 * Called with INPCB locked. 1022 * 'buf' must point to a buffer that is at least TCP_LOG_ID_LEN bytes long. 1023 * Returns number of bytes copied. 1024 */ 1025 size_t 1026 tcp_log_get_id(struct tcpcb *tp, char *buf) 1027 { 1028 size_t len; 1029 1030 INP_LOCK_ASSERT(tptoinpcb(tp)); 1031 if (tp->t_lib != NULL) { 1032 len = strlcpy(buf, tp->t_lib->tlb_id, TCP_LOG_ID_LEN); 1033 KASSERT(len < TCP_LOG_ID_LEN, 1034 ("%s:%d: tp->t_lib->tlb_id too long (%zu)", 1035 __func__, __LINE__, len)); 1036 } else { 1037 *buf = '\0'; 1038 len = 0; 1039 } 1040 return (len); 1041 } 1042 1043 /* 1044 * Get the tag associated with the TCPCB's log ID. 1045 * Called with INPCB locked. Returns with it unlocked. 1046 * 'buf' must point to a buffer that is at least TCP_LOG_TAG_LEN bytes long. 1047 * Returns number of bytes copied. 1048 */ 1049 size_t 1050 tcp_log_get_tag(struct tcpcb *tp, char *buf) 1051 { 1052 struct inpcb *inp = tptoinpcb(tp); 1053 struct tcp_log_id_bucket *tlb; 1054 size_t len; 1055 int tree_locked; 1056 1057 INP_WLOCK_ASSERT(inp); 1058 1059 tree_locked = TREE_UNLOCKED; 1060 tlb = tp->t_lib; 1061 1062 if (tlb != NULL) { 1063 TCPID_BUCKET_REF(tlb); 1064 INP_WUNLOCK(inp); 1065 TCPID_BUCKET_LOCK(tlb); 1066 len = strlcpy(buf, tlb->tlb_tag, TCP_LOG_TAG_LEN); 1067 KASSERT(len < TCP_LOG_TAG_LEN, 1068 ("%s:%d: tp->t_lib->tlb_tag too long (%zu)", 1069 __func__, __LINE__, len)); 1070 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 1071 TCPID_BUCKET_UNLOCK(tlb); 1072 1073 if (tree_locked == TREE_WLOCKED) { 1074 TCPID_TREE_WLOCK_ASSERT(); 1075 TCPID_TREE_WUNLOCK(); 1076 } else if (tree_locked == TREE_RLOCKED) { 1077 TCPID_TREE_RLOCK_ASSERT(); 1078 TCPID_TREE_RUNLOCK(); 1079 } else 1080 TCPID_TREE_UNLOCK_ASSERT(); 1081 } else { 1082 INP_WUNLOCK(inp); 1083 *buf = '\0'; 1084 len = 0; 1085 } 1086 1087 return (len); 1088 } 1089 1090 /* 1091 * Get number of connections with the same log ID. 1092 * Log ID is taken from given TCPCB. 1093 * Called with INPCB locked. 1094 */ 1095 u_int 1096 tcp_log_get_id_cnt(struct tcpcb *tp) 1097 { 1098 1099 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1100 return ((tp->t_lib == NULL) ? 0 : tp->t_lib->tlb_refcnt); 1101 } 1102 1103 #ifdef TCPLOG_DEBUG_RINGBUF 1104 /* 1105 * Functions/macros to increment/decrement reference count for a log 1106 * entry. This should catch when we do a double-free/double-remove or 1107 * a double-add. 1108 */ 1109 static inline void 1110 _tcp_log_entry_refcnt_add(struct tcp_log_mem *log_entry, const char *func, 1111 int line) 1112 { 1113 int refcnt; 1114 1115 refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, 1); 1116 if (refcnt != 0) 1117 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 0)", 1118 func, line, log_entry, refcnt); 1119 } 1120 #define tcp_log_entry_refcnt_add(l) \ 1121 _tcp_log_entry_refcnt_add((l), __func__, __LINE__) 1122 1123 static inline void 1124 _tcp_log_entry_refcnt_rem(struct tcp_log_mem *log_entry, const char *func, 1125 int line) 1126 { 1127 int refcnt; 1128 1129 refcnt = atomic_fetchadd_int(&log_entry->tlm_refcnt, -1); 1130 if (refcnt != 1) 1131 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 1)", 1132 func, line, log_entry, refcnt); 1133 } 1134 #define tcp_log_entry_refcnt_rem(l) \ 1135 _tcp_log_entry_refcnt_rem((l), __func__, __LINE__) 1136 1137 #else /* !TCPLOG_DEBUG_RINGBUF */ 1138 1139 #define tcp_log_entry_refcnt_add(l) 1140 #define tcp_log_entry_refcnt_rem(l) 1141 1142 #endif 1143 1144 /* 1145 * Cleanup after removing a log entry, but only decrement the count if we 1146 * are running INVARIANTS. 1147 */ 1148 static inline void 1149 tcp_log_free_log_common(struct tcp_log_mem *log_entry, int *count __unused) 1150 { 1151 1152 uma_zfree(tcp_log_zone, log_entry); 1153 #ifdef INVARIANTS 1154 (*count)--; 1155 KASSERT(*count >= 0, 1156 ("%s: count unexpectedly negative", __func__)); 1157 #endif 1158 } 1159 1160 static void 1161 tcp_log_free_entries(struct tcp_log_stailq *head, int *count) 1162 { 1163 struct tcp_log_mem *log_entry; 1164 1165 /* Free the entries. */ 1166 while ((log_entry = STAILQ_FIRST(head)) != NULL) { 1167 STAILQ_REMOVE_HEAD(head, tlm_queue); 1168 tcp_log_entry_refcnt_rem(log_entry); 1169 tcp_log_free_log_common(log_entry, count); 1170 } 1171 } 1172 1173 /* Cleanup after removing a log entry. */ 1174 static inline void 1175 tcp_log_remove_log_cleanup(struct tcpcb *tp, struct tcp_log_mem *log_entry) 1176 { 1177 uma_zfree(tcp_log_zone, log_entry); 1178 tp->t_lognum--; 1179 KASSERT(tp->t_lognum >= 0, 1180 ("%s: tp->t_lognum unexpectedly negative", __func__)); 1181 } 1182 1183 /* Remove a log entry from the head of a list. */ 1184 static inline void 1185 tcp_log_remove_log_head(struct tcpcb *tp, struct tcp_log_mem *log_entry) 1186 { 1187 1188 KASSERT(log_entry == STAILQ_FIRST(&tp->t_logs), 1189 ("%s: attempt to remove non-HEAD log entry", __func__)); 1190 STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue); 1191 tcp_log_entry_refcnt_rem(log_entry); 1192 tcp_log_remove_log_cleanup(tp, log_entry); 1193 } 1194 1195 #ifdef TCPLOG_DEBUG_RINGBUF 1196 /* 1197 * Initialize the log entry's reference count, which we want to 1198 * survive allocations. 1199 */ 1200 static int 1201 tcp_log_zone_init(void *mem, int size, int flags __unused) 1202 { 1203 struct tcp_log_mem *tlm; 1204 1205 KASSERT(size >= sizeof(struct tcp_log_mem), 1206 ("%s: unexpectedly short (%d) allocation", __func__, size)); 1207 tlm = (struct tcp_log_mem *)mem; 1208 tlm->tlm_refcnt = 0; 1209 return (0); 1210 } 1211 1212 /* 1213 * Double check that the refcnt is zero on allocation and return. 1214 */ 1215 static int 1216 tcp_log_zone_ctor(void *mem, int size, void *args __unused, int flags __unused) 1217 { 1218 struct tcp_log_mem *tlm; 1219 1220 KASSERT(size >= sizeof(struct tcp_log_mem), 1221 ("%s: unexpectedly short (%d) allocation", __func__, size)); 1222 tlm = (struct tcp_log_mem *)mem; 1223 if (tlm->tlm_refcnt != 0) 1224 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)", 1225 __func__, __LINE__, tlm, tlm->tlm_refcnt); 1226 return (0); 1227 } 1228 1229 static void 1230 tcp_log_zone_dtor(void *mem, int size, void *args __unused) 1231 { 1232 struct tcp_log_mem *tlm; 1233 1234 KASSERT(size >= sizeof(struct tcp_log_mem), 1235 ("%s: unexpectedly short (%d) allocation", __func__, size)); 1236 tlm = (struct tcp_log_mem *)mem; 1237 if (tlm->tlm_refcnt != 0) 1238 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)", 1239 __func__, __LINE__, tlm, tlm->tlm_refcnt); 1240 } 1241 #endif /* TCPLOG_DEBUG_RINGBUF */ 1242 1243 /* Do global initialization. */ 1244 void 1245 tcp_log_init(void) 1246 { 1247 1248 tcp_log_zone = uma_zcreate("tcp_log", sizeof(struct tcp_log_mem), 1249 #ifdef TCPLOG_DEBUG_RINGBUF 1250 tcp_log_zone_ctor, tcp_log_zone_dtor, tcp_log_zone_init, 1251 #else 1252 NULL, NULL, NULL, 1253 #endif 1254 NULL, UMA_ALIGN_PTR, 0); 1255 (void)uma_zone_set_max(tcp_log_zone, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT); 1256 tcp_log_id_bucket_zone = uma_zcreate("tcp_log_id_bucket", 1257 sizeof(struct tcp_log_id_bucket), NULL, NULL, NULL, NULL, 1258 UMA_ALIGN_PTR, 0); 1259 tcp_log_id_node_zone = uma_zcreate("tcp_log_id_node", 1260 sizeof(struct tcp_log_id_node), NULL, NULL, NULL, NULL, 1261 UMA_ALIGN_PTR, 0); 1262 #ifdef TCPLOG_DEBUG_COUNTERS 1263 tcp_log_queued = counter_u64_alloc(M_WAITOK); 1264 tcp_log_que_fail1 = counter_u64_alloc(M_WAITOK); 1265 tcp_log_que_fail2 = counter_u64_alloc(M_WAITOK); 1266 tcp_log_que_fail3 = counter_u64_alloc(M_WAITOK); 1267 tcp_log_que_fail4 = counter_u64_alloc(M_WAITOK); 1268 tcp_log_que_fail5 = counter_u64_alloc(M_WAITOK); 1269 tcp_log_que_copyout = counter_u64_alloc(M_WAITOK); 1270 tcp_log_que_read = counter_u64_alloc(M_WAITOK); 1271 tcp_log_que_freed = counter_u64_alloc(M_WAITOK); 1272 #endif 1273 tcp_log_pcb_ids_cur = counter_u64_alloc(M_WAITOK); 1274 tcp_log_pcb_ids_tot = counter_u64_alloc(M_WAITOK); 1275 1276 rw_init_flags(&tcp_id_tree_lock, "TCP ID tree", RW_NEW); 1277 mtx_init(&tcp_log_expireq_mtx, "TCP log expireq", NULL, MTX_DEF); 1278 callout_init(&tcp_log_expireq_callout, 1); 1279 } 1280 1281 /* Do per-TCPCB initialization. */ 1282 void 1283 tcp_log_tcpcbinit(struct tcpcb *tp) 1284 { 1285 1286 /* A new TCPCB should start out zero-initialized. */ 1287 STAILQ_INIT(&tp->t_logs); 1288 1289 /* 1290 * If we are doing auto-capturing, figure out whether we will capture 1291 * this session. 1292 */ 1293 tp->t_loglimit = tcp_log_session_limit; 1294 if ((tcp_log_auto_all == true) && 1295 tcp_log_auto_mode && 1296 tcp_log_selectauto()) { 1297 tp->_t_logstate = tcp_log_auto_mode; 1298 tp->t_flags2 |= TF2_LOG_AUTO; 1299 } 1300 } 1301 1302 /* Remove entries */ 1303 static void 1304 tcp_log_expire(void *unused __unused) 1305 { 1306 struct tcp_log_id_bucket *tlb; 1307 struct tcp_log_id_node *tln; 1308 sbintime_t expiry_limit; 1309 int tree_locked; 1310 1311 TCPLOG_EXPIREQ_LOCK(); 1312 if (callout_pending(&tcp_log_expireq_callout)) { 1313 /* Callout was reset. */ 1314 TCPLOG_EXPIREQ_UNLOCK(); 1315 return; 1316 } 1317 1318 /* 1319 * Process entries until we reach one that expires too far in the 1320 * future. Look one second in the future. 1321 */ 1322 expiry_limit = getsbinuptime() + SBT_1S; 1323 tree_locked = TREE_UNLOCKED; 1324 1325 while ((tln = STAILQ_FIRST(&tcp_log_expireq_head)) != NULL && 1326 tln->tln_expiretime <= expiry_limit) { 1327 if (!callout_active(&tcp_log_expireq_callout)) { 1328 /* 1329 * Callout was stopped. I guess we should 1330 * just quit at this point. 1331 */ 1332 TCPLOG_EXPIREQ_UNLOCK(); 1333 return; 1334 } 1335 1336 /* 1337 * Remove the node from the head of the list and unlock 1338 * the list. Change the expiry time to SBT_MAX as a signal 1339 * to other threads that we now own this. 1340 */ 1341 STAILQ_REMOVE_HEAD(&tcp_log_expireq_head, tln_expireq); 1342 tln->tln_expiretime = SBT_MAX; 1343 TCPLOG_EXPIREQ_UNLOCK(); 1344 1345 /* 1346 * Remove the node from the bucket. 1347 */ 1348 tlb = tln->tln_bucket; 1349 TCPID_BUCKET_LOCK(tlb); 1350 if (tcp_log_remove_id_node(NULL, NULL, tlb, tln, &tree_locked)) { 1351 tcp_log_id_validate_tree_lock(tree_locked); 1352 if (tree_locked == TREE_WLOCKED) 1353 TCPID_TREE_WUNLOCK(); 1354 else 1355 TCPID_TREE_RUNLOCK(); 1356 tree_locked = TREE_UNLOCKED; 1357 } 1358 1359 /* Drop the INP reference. */ 1360 INP_WLOCK(tln->tln_inp); 1361 if (!in_pcbrele_wlocked(tln->tln_inp)) 1362 INP_WUNLOCK(tln->tln_inp); 1363 1364 /* Free the log records. */ 1365 tcp_log_free_entries(&tln->tln_entries, &tln->tln_count); 1366 1367 /* Free the node. */ 1368 uma_zfree(tcp_log_id_node_zone, tln); 1369 1370 /* Relock the expiry queue. */ 1371 TCPLOG_EXPIREQ_LOCK(); 1372 } 1373 1374 /* 1375 * We've expired all the entries we can. Do we need to reschedule 1376 * ourselves? 1377 */ 1378 callout_deactivate(&tcp_log_expireq_callout); 1379 if (tln != NULL) { 1380 /* 1381 * Get max(now + TCP_LOG_EXPIRE_INTVL, tln->tln_expiretime) and 1382 * set the next callout to that. (This helps ensure we generally 1383 * run the callout no more often than desired.) 1384 */ 1385 expiry_limit = getsbinuptime() + TCP_LOG_EXPIRE_INTVL; 1386 if (expiry_limit < tln->tln_expiretime) 1387 expiry_limit = tln->tln_expiretime; 1388 callout_reset_sbt(&tcp_log_expireq_callout, expiry_limit, 1389 SBT_1S, tcp_log_expire, NULL, C_ABSOLUTE); 1390 } 1391 1392 /* We're done. */ 1393 TCPLOG_EXPIREQ_UNLOCK(); 1394 return; 1395 } 1396 1397 /* 1398 * Move log data from the TCPCB to a new node. This will reset the TCPCB log 1399 * entries and log count; however, it will not touch other things from the 1400 * TCPCB (e.g. t_lin, t_lib). 1401 * 1402 * NOTE: Must hold a lock on the INP. 1403 */ 1404 static void 1405 tcp_log_move_tp_to_node(struct tcpcb *tp, struct tcp_log_id_node *tln) 1406 { 1407 struct inpcb *inp = tptoinpcb(tp); 1408 1409 INP_WLOCK_ASSERT(inp); 1410 1411 tln->tln_ie = inp->inp_inc.inc_ie; 1412 if (inp->inp_inc.inc_flags & INC_ISIPV6) 1413 tln->tln_af = AF_INET6; 1414 else 1415 tln->tln_af = AF_INET; 1416 tln->tln_entries = tp->t_logs; 1417 tln->tln_count = tp->t_lognum; 1418 tln->tln_bucket = tp->t_lib; 1419 1420 /* Clear information from the PCB. */ 1421 STAILQ_INIT(&tp->t_logs); 1422 tp->t_lognum = 0; 1423 } 1424 1425 /* Do per-TCPCB cleanup */ 1426 void 1427 tcp_log_tcpcbfini(struct tcpcb *tp) 1428 { 1429 struct tcp_log_id_node *tln, *tln_first; 1430 struct tcp_log_mem *log_entry; 1431 sbintime_t callouttime; 1432 1433 1434 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1435 if (tp->_t_logstate) { 1436 union tcp_log_stackspecific log; 1437 struct timeval tv; 1438 #ifdef TCP_ACCOUNTING 1439 struct tcp_log_buffer *lgb; 1440 int i; 1441 1442 memset(&log, 0, sizeof(log)); 1443 if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { 1444 for (i = 0; i < TCP_NUM_CNT_COUNTERS; i++) { 1445 log.u_raw.u64_flex[i] = tp->tcp_cnt_counters[i]; 1446 } 1447 lgb = tcp_log_event(tp, NULL, 1448 NULL, 1449 NULL, 1450 TCP_LOG_ACCOUNTING, 0, 1451 0, &log, false, NULL, NULL, 0, &tv); 1452 if (lgb != NULL) { 1453 lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS; 1454 lgb->tlb_flex2 = 1; 1455 } else 1456 goto skip_out; 1457 for (i = 0; i<TCP_NUM_CNT_COUNTERS; i++) { 1458 log.u_raw.u64_flex[i] = tp->tcp_proc_time[i]; 1459 } 1460 lgb = tcp_log_event(tp, NULL, 1461 NULL, 1462 NULL, 1463 TCP_LOG_ACCOUNTING, 0, 1464 0, &log, false, NULL, NULL, 0, &tv); 1465 if (lgb != NULL) { 1466 lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS; 1467 lgb->tlb_flex2 = 2; 1468 } 1469 } 1470 skip_out: 1471 #endif 1472 log.u_bbr.timeStamp = tcp_get_usecs(&tv); 1473 log.u_bbr.cur_del_rate = tp->t_end_info; 1474 (void)tcp_log_event(tp, NULL, 1475 NULL, 1476 NULL, 1477 TCP_LOG_CONNEND, 0, 1478 0, &log, false, NULL, NULL, 0, &tv); 1479 } 1480 /* 1481 * If we were gathering packets to be automatically dumped, try to do 1482 * it now. If this succeeds, the log information in the TCPCB will be 1483 * cleared. Otherwise, we'll handle the log information as we do 1484 * for other states. 1485 */ 1486 switch(tp->_t_logstate) { 1487 case TCP_LOG_STATE_HEAD_AUTO: 1488 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", 1489 M_NOWAIT, false); 1490 break; 1491 case TCP_LOG_STATE_TAIL_AUTO: 1492 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail", 1493 M_NOWAIT, false); 1494 break; 1495 case TCP_LOG_VIA_BBPOINTS: 1496 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", 1497 M_NOWAIT, false); 1498 break; 1499 case TCP_LOG_STATE_CONTINUAL: 1500 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 1501 M_NOWAIT, false); 1502 break; 1503 } 1504 1505 /* 1506 * There are two ways we could keep logs: per-socket or per-ID. If 1507 * we are tracking logs with an ID, then the logs survive the 1508 * destruction of the TCPCB. 1509 * 1510 * If the TCPCB is associated with an ID node, move the logs from the 1511 * TCPCB to the ID node. In theory, this is safe, for reasons which I 1512 * will now explain for my own benefit when I next need to figure out 1513 * this code. :-) 1514 * 1515 * We own the INP lock. Therefore, no one else can change the contents 1516 * of this node (Rule C). Further, no one can remove this node from 1517 * the bucket while we hold the lock (Rule D). Basically, no one can 1518 * mess with this node. That leaves two states in which we could be: 1519 * 1520 * 1. Another thread is currently waiting to acquire the INP lock, with 1521 * plans to do something with this node. When we drop the INP lock, 1522 * they will have a chance to do that. They will recheck the 1523 * tln_closed field (see note to Rule C) and then acquire the 1524 * bucket lock before proceeding further. 1525 * 1526 * 2. Another thread will try to acquire a lock at some point in the 1527 * future. If they try to acquire a lock before we set the 1528 * tln_closed field, they will follow state #1. If they try to 1529 * acquire a lock after we set the tln_closed field, they will be 1530 * able to make changes to the node, at will, following Rule C. 1531 * 1532 * Therefore, we currently own this node and can make any changes 1533 * we want. But, as soon as we set the tln_closed field to true, we 1534 * have effectively dropped our lock on the node. (For this reason, we 1535 * also need to make sure our writes are ordered correctly. An atomic 1536 * operation with "release" semantics should be sufficient.) 1537 */ 1538 1539 if (tp->t_lin != NULL) { 1540 struct inpcb *inp = tptoinpcb(tp); 1541 1542 /* Copy the relevant information to the log entry. */ 1543 tln = tp->t_lin; 1544 KASSERT(tln->tln_inp == inp, 1545 ("%s: Mismatched inp (tln->tln_inp=%p, tp inpcb=%p)", 1546 __func__, tln->tln_inp, inp)); 1547 tcp_log_move_tp_to_node(tp, tln); 1548 1549 /* Clear information from the PCB. */ 1550 tp->t_lin = NULL; 1551 tp->t_lib = NULL; 1552 1553 /* 1554 * Take a reference on the INP. This ensures that the INP 1555 * remains valid while the node is on the expiry queue. This 1556 * ensures the INP is valid for other threads that may be 1557 * racing to lock this node when we move it to the expire 1558 * queue. 1559 */ 1560 in_pcbref(inp); 1561 1562 /* 1563 * Store the entry on the expiry list. The exact behavior 1564 * depends on whether we have entries to keep. If so, we 1565 * put the entry at the tail of the list and expire in 1566 * TCP_LOG_EXPIRE_TIME. Otherwise, we expire "now" and put 1567 * the entry at the head of the list. (Handling the cleanup 1568 * via the expiry timer lets us avoid locking messy-ness here.) 1569 */ 1570 tln->tln_expiretime = getsbinuptime(); 1571 TCPLOG_EXPIREQ_LOCK(); 1572 if (tln->tln_count) { 1573 tln->tln_expiretime += TCP_LOG_EXPIRE_TIME; 1574 if (STAILQ_EMPTY(&tcp_log_expireq_head) && 1575 !callout_active(&tcp_log_expireq_callout)) { 1576 /* 1577 * We are adding the first entry and a callout 1578 * is not currently scheduled; therefore, we 1579 * need to schedule one. 1580 */ 1581 callout_reset_sbt(&tcp_log_expireq_callout, 1582 tln->tln_expiretime, SBT_1S, tcp_log_expire, 1583 NULL, C_ABSOLUTE); 1584 } 1585 STAILQ_INSERT_TAIL(&tcp_log_expireq_head, tln, 1586 tln_expireq); 1587 } else { 1588 callouttime = tln->tln_expiretime + 1589 TCP_LOG_EXPIRE_INTVL; 1590 tln_first = STAILQ_FIRST(&tcp_log_expireq_head); 1591 1592 if ((tln_first == NULL || 1593 callouttime < tln_first->tln_expiretime) && 1594 (callout_pending(&tcp_log_expireq_callout) || 1595 !callout_active(&tcp_log_expireq_callout))) { 1596 /* 1597 * The list is empty, or we want to run the 1598 * expire code before the first entry's timer 1599 * fires. Also, we are in a case where a callout 1600 * is not actively running. We want to reset 1601 * the callout to occur sooner. 1602 */ 1603 callout_reset_sbt(&tcp_log_expireq_callout, 1604 callouttime, SBT_1S, tcp_log_expire, NULL, 1605 C_ABSOLUTE); 1606 } 1607 1608 /* 1609 * Insert to the head, or just after the head, as 1610 * appropriate. (This might result in small 1611 * mis-orderings as a bunch of "expire now" entries 1612 * gather at the start of the list, but that should 1613 * not produce big problems, since the expire timer 1614 * will walk through all of them.) 1615 */ 1616 if (tln_first == NULL || 1617 tln->tln_expiretime < tln_first->tln_expiretime) 1618 STAILQ_INSERT_HEAD(&tcp_log_expireq_head, tln, 1619 tln_expireq); 1620 else 1621 STAILQ_INSERT_AFTER(&tcp_log_expireq_head, 1622 tln_first, tln, tln_expireq); 1623 } 1624 TCPLOG_EXPIREQ_UNLOCK(); 1625 1626 /* 1627 * We are done messing with the tln. After this point, we 1628 * can't touch it. (Note that the "release" semantics should 1629 * be included with the TCPLOG_EXPIREQ_UNLOCK() call above. 1630 * Therefore, they should be unnecessary here. However, it 1631 * seems like a good idea to include them anyway, since we 1632 * really are releasing a lock here.) 1633 */ 1634 atomic_store_rel_int(&tln->tln_closed, 1); 1635 } else { 1636 /* Remove log entries. */ 1637 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1638 tcp_log_remove_log_head(tp, log_entry); 1639 KASSERT(tp->t_lognum == 0, 1640 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 1641 __func__, tp->t_lognum)); 1642 } 1643 1644 /* 1645 * Change the log state to off (just in case anything tries to sneak 1646 * in a last-minute log). 1647 */ 1648 tp->_t_logstate = TCP_LOG_STATE_OFF; 1649 } 1650 1651 static void 1652 tcp_log_purge_tp_logbuf(struct tcpcb *tp) 1653 { 1654 struct tcp_log_mem *log_entry; 1655 1656 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1657 if (tp->t_lognum == 0) 1658 return; 1659 1660 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1661 tcp_log_remove_log_head(tp, log_entry); 1662 KASSERT(tp->t_lognum == 0, 1663 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 1664 __func__, tp->t_lognum)); 1665 tp->_t_logstate = TCP_LOG_STATE_OFF; 1666 } 1667 1668 /* 1669 * This logs an event for a TCP socket. Normally, this is called via 1670 * TCP_LOG_EVENT or TCP_LOG_EVENT_VERBOSE. See the documentation for 1671 * TCP_LOG_EVENT(). 1672 */ 1673 1674 struct tcp_log_buffer * 1675 tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, 1676 struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, 1677 union tcp_log_stackspecific *stackinfo, int th_hostorder, 1678 const char *output_caller, const char *func, int line, const struct timeval *itv) 1679 { 1680 struct tcp_log_mem *log_entry; 1681 struct tcp_log_buffer *log_buf; 1682 int attempt_count = 0; 1683 struct tcp_log_verbose *log_verbose; 1684 uint32_t logsn; 1685 1686 KASSERT((func == NULL && line == 0) || (func != NULL && line > 0), 1687 ("%s called with inconsistent func (%p) and line (%d) arguments", 1688 __func__, func, line)); 1689 1690 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1691 if (tcp_disable_all_bb_logs) { 1692 /* 1693 * The global shutdown logging 1694 * switch has been thrown. Call 1695 * the purge function that frees 1696 * purges out the logs and 1697 * turns off logging. 1698 */ 1699 tcp_log_purge_tp_logbuf(tp); 1700 return (NULL); 1701 } 1702 KASSERT(tp->_t_logstate == TCP_LOG_STATE_HEAD || 1703 tp->_t_logstate == TCP_LOG_STATE_TAIL || 1704 tp->_t_logstate == TCP_LOG_STATE_CONTINUAL || 1705 tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO || 1706 tp->_t_logstate == TCP_LOG_VIA_BBPOINTS || 1707 tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO, 1708 ("%s called with unexpected tp->_t_logstate (%d)", __func__, 1709 tp->_t_logstate)); 1710 1711 /* 1712 * Get the serial number. We do this early so it will 1713 * increment even if we end up skipping the log entry for some 1714 * reason. 1715 */ 1716 logsn = tp->t_logsn++; 1717 1718 /* 1719 * Can we get a new log entry? If so, increment the lognum counter 1720 * here. 1721 */ 1722 retry: 1723 if (tp->t_lognum < tp->t_loglimit) { 1724 if ((log_entry = uma_zalloc(tcp_log_zone, M_NOWAIT)) != NULL) 1725 tp->t_lognum++; 1726 } else 1727 log_entry = NULL; 1728 1729 /* Do we need to try to reuse? */ 1730 if (log_entry == NULL) { 1731 /* 1732 * Sacrifice auto-logged sessions without a log ID if 1733 * tcp_log_auto_all is false. (If they don't have a log 1734 * ID by now, it is probable that either they won't get one 1735 * or we are resource-constrained.) 1736 */ 1737 if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) && 1738 !tcp_log_auto_all) { 1739 if (tcp_log_state_change(tp, TCP_LOG_STATE_CLEAR)) { 1740 #ifdef INVARIANTS 1741 panic("%s:%d: tcp_log_state_change() failed " 1742 "to set tp %p to TCP_LOG_STATE_CLEAR", 1743 __func__, __LINE__, tp); 1744 #endif 1745 tp->_t_logstate = TCP_LOG_STATE_OFF; 1746 } 1747 return (NULL); 1748 } 1749 /* 1750 * If we are in TCP_LOG_STATE_HEAD_AUTO state, try to dump 1751 * the buffers. If successful, deactivate tracing. Otherwise, 1752 * leave it active so we will retry. 1753 */ 1754 if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO && 1755 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", 1756 M_NOWAIT, false)) { 1757 tp->_t_logstate = TCP_LOG_STATE_OFF; 1758 return(NULL); 1759 } else if ((tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) && 1760 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 1761 M_NOWAIT, false)) { 1762 if (attempt_count == 0) { 1763 attempt_count++; 1764 goto retry; 1765 } 1766 #ifdef TCPLOG_DEBUG_COUNTERS 1767 counter_u64_add(tcp_log_que_fail4, 1); 1768 #endif 1769 return(NULL); 1770 1771 } else if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) && 1772 !tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", 1773 M_NOWAIT, false)) { 1774 if (attempt_count == 0) { 1775 attempt_count++; 1776 goto retry; 1777 } 1778 #ifdef TCPLOG_DEBUG_COUNTERS 1779 counter_u64_add(tcp_log_que_fail4, 1); 1780 #endif 1781 return(NULL); 1782 } else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO) 1783 return(NULL); 1784 1785 /* If in HEAD state, just deactivate the tracing and return. */ 1786 if (tp->_t_logstate == TCP_LOG_STATE_HEAD) { 1787 tp->_t_logstate = TCP_LOG_STATE_OFF; 1788 return(NULL); 1789 } 1790 /* 1791 * Get a buffer to reuse. If that fails, just give up. 1792 * (We can't log anything without a buffer in which to 1793 * put it.) 1794 * 1795 * Note that we don't change the t_lognum counter 1796 * here. Because we are re-using the buffer, the total 1797 * number won't change. 1798 */ 1799 if ((log_entry = STAILQ_FIRST(&tp->t_logs)) == NULL) 1800 return(NULL); 1801 STAILQ_REMOVE_HEAD(&tp->t_logs, tlm_queue); 1802 tcp_log_entry_refcnt_rem(log_entry); 1803 } 1804 1805 KASSERT(log_entry != NULL, 1806 ("%s: log_entry unexpectedly NULL", __func__)); 1807 1808 /* Extract the log buffer and verbose buffer pointers. */ 1809 log_buf = &log_entry->tlm_buf; 1810 log_verbose = &log_entry->tlm_v; 1811 1812 /* Basic entries. */ 1813 if (itv == NULL) 1814 microuptime(&log_buf->tlb_tv); 1815 else 1816 memcpy(&log_buf->tlb_tv, itv, sizeof(struct timeval)); 1817 log_buf->tlb_ticks = ticks; 1818 log_buf->tlb_sn = logsn; 1819 log_buf->tlb_stackid = tp->t_fb->tfb_id; 1820 log_buf->tlb_eventid = eventid; 1821 log_buf->tlb_eventflags = 0; 1822 log_buf->tlb_errno = errornum; 1823 1824 /* Socket buffers */ 1825 if (rxbuf != NULL) { 1826 log_buf->tlb_eventflags |= TLB_FLAG_RXBUF; 1827 log_buf->tlb_rxbuf.tls_sb_acc = rxbuf->sb_acc; 1828 log_buf->tlb_rxbuf.tls_sb_ccc = rxbuf->sb_ccc; 1829 log_buf->tlb_rxbuf.tls_sb_spare = 0; 1830 } else { 1831 log_buf->tlb_rxbuf.tls_sb_acc = 0; 1832 log_buf->tlb_rxbuf.tls_sb_ccc = 0; 1833 } 1834 if (txbuf != NULL) { 1835 log_buf->tlb_eventflags |= TLB_FLAG_TXBUF; 1836 log_buf->tlb_txbuf.tls_sb_acc = txbuf->sb_acc; 1837 log_buf->tlb_txbuf.tls_sb_ccc = txbuf->sb_ccc; 1838 log_buf->tlb_txbuf.tls_sb_spare = 0; 1839 } else { 1840 log_buf->tlb_txbuf.tls_sb_acc = 0; 1841 log_buf->tlb_txbuf.tls_sb_ccc = 0; 1842 } 1843 /* Copy values from tp to the log entry. */ 1844 #define COPY_STAT(f) log_buf->tlb_ ## f = tp->f 1845 #define COPY_STAT_T(f) log_buf->tlb_ ## f = tp->t_ ## f 1846 COPY_STAT_T(state); 1847 COPY_STAT_T(starttime); 1848 COPY_STAT(iss); 1849 COPY_STAT_T(flags); 1850 COPY_STAT(snd_una); 1851 COPY_STAT(snd_max); 1852 COPY_STAT(snd_cwnd); 1853 COPY_STAT(snd_nxt); 1854 COPY_STAT(snd_recover); 1855 COPY_STAT(snd_wnd); 1856 COPY_STAT(snd_ssthresh); 1857 COPY_STAT_T(srtt); 1858 COPY_STAT_T(rttvar); 1859 COPY_STAT(rcv_up); 1860 COPY_STAT(rcv_adv); 1861 COPY_STAT(rcv_nxt); 1862 COPY_STAT(rcv_wnd); 1863 COPY_STAT_T(dupacks); 1864 COPY_STAT_T(segqlen); 1865 COPY_STAT(snd_numholes); 1866 COPY_STAT(snd_scale); 1867 COPY_STAT(rcv_scale); 1868 COPY_STAT_T(flags2); 1869 COPY_STAT_T(fbyte_in); 1870 COPY_STAT_T(fbyte_out); 1871 #undef COPY_STAT 1872 #undef COPY_STAT_T 1873 /* Copy stack-specific info. */ 1874 if (stackinfo != NULL) { 1875 memcpy(&log_buf->tlb_stackinfo, stackinfo, 1876 sizeof(log_buf->tlb_stackinfo)); 1877 log_buf->tlb_eventflags |= TLB_FLAG_STACKINFO; 1878 } 1879 1880 /* The packet */ 1881 log_buf->tlb_len = len; 1882 if (th) { 1883 int optlen; 1884 1885 log_buf->tlb_eventflags |= TLB_FLAG_HDR; 1886 log_buf->tlb_th = *th; 1887 if (th_hostorder) 1888 tcp_fields_to_net(&log_buf->tlb_th); 1889 optlen = (th->th_off << 2) - sizeof (struct tcphdr); 1890 if (optlen > 0) 1891 memcpy(log_buf->tlb_opts, th + 1, optlen); 1892 } else { 1893 memset(&log_buf->tlb_th, 0, sizeof(*th)); 1894 } 1895 1896 /* Verbose information */ 1897 if (func != NULL) { 1898 log_buf->tlb_eventflags |= TLB_FLAG_VERBOSE; 1899 if (output_caller != NULL) 1900 strlcpy(log_verbose->tlv_snd_frm, output_caller, 1901 TCP_FUNC_LEN); 1902 else 1903 *log_verbose->tlv_snd_frm = 0; 1904 strlcpy(log_verbose->tlv_trace_func, func, TCP_FUNC_LEN); 1905 log_verbose->tlv_trace_line = line; 1906 } 1907 1908 /* Insert the new log at the tail. */ 1909 STAILQ_INSERT_TAIL(&tp->t_logs, log_entry, tlm_queue); 1910 tcp_log_entry_refcnt_add(log_entry); 1911 return (log_buf); 1912 } 1913 1914 /* 1915 * Change the logging state for a TCPCB. Returns 0 on success or an 1916 * error code on failure. 1917 */ 1918 int 1919 tcp_log_state_change(struct tcpcb *tp, int state) 1920 { 1921 struct tcp_log_mem *log_entry; 1922 int rv; 1923 1924 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1925 rv = 0; 1926 switch(state) { 1927 case TCP_LOG_STATE_CLEAR: 1928 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 1929 tcp_log_remove_log_head(tp, log_entry); 1930 /* Fall through */ 1931 1932 case TCP_LOG_STATE_OFF: 1933 tp->_t_logstate = TCP_LOG_STATE_OFF; 1934 break; 1935 1936 case TCP_LOG_STATE_TAIL: 1937 case TCP_LOG_STATE_HEAD: 1938 case TCP_LOG_STATE_CONTINUAL: 1939 case TCP_LOG_VIA_BBPOINTS: 1940 case TCP_LOG_STATE_HEAD_AUTO: 1941 case TCP_LOG_STATE_TAIL_AUTO: 1942 /* 1943 * When the RATIO_OFF state is set for the bucket, the log ID 1944 * this tp is associated with has been probabilistically opted 1945 * out of logging per tcp_log_apply_ratio(). 1946 */ 1947 if (tp->t_lib == NULL || 1948 tp->t_lib->tlb_logstate != TCP_LOG_STATE_RATIO_OFF) { 1949 tp->_t_logstate = state; 1950 } else { 1951 rv = ECANCELED; 1952 tp->_t_logstate = TCP_LOG_STATE_OFF; 1953 } 1954 break; 1955 1956 default: 1957 return (EINVAL); 1958 } 1959 if (tcp_disable_all_bb_logs) { 1960 /* We are prohibited from doing any logs */ 1961 tp->_t_logstate = TCP_LOG_STATE_OFF; 1962 rv = EBUSY; 1963 } 1964 tp->t_flags2 &= ~(TF2_LOG_AUTO); 1965 1966 return (rv); 1967 } 1968 1969 /* If tcp_drain() is called, flush half the log entries. */ 1970 void 1971 tcp_log_drain(struct tcpcb *tp) 1972 { 1973 struct tcp_log_mem *log_entry, *next; 1974 int target, skip; 1975 1976 INP_WLOCK_ASSERT(tptoinpcb(tp)); 1977 if ((target = tp->t_lognum / 2) == 0) 1978 return; 1979 1980 /* 1981 * XXXRRS: At this I don't think this is wise that 1982 * we do this. All that a drain call means is that 1983 * we are hitting one of the system mbuf limits. BB 1984 * logging, or freeing of them, will not create any 1985 * more mbufs and really has nothing to do with 1986 * the system running out of mbufs. For now I 1987 * am changing this to free any "AUTO" by dumping 1988 * them out. But this should either be changed 1989 * so that it gets called when we hit the BB limit 1990 * or it should just not get called (one of the two) 1991 * since I don't think the mbuf <-> BB log cleanup 1992 * is the right thing to do here. 1993 */ 1994 /* 1995 * If we are logging the "head" packets, we want to discard 1996 * from the tail of the queue. Otherwise, we want to discard 1997 * from the head. 1998 */ 1999 if (tp->_t_logstate == TCP_LOG_STATE_HEAD) { 2000 skip = tp->t_lognum - target; 2001 STAILQ_FOREACH(log_entry, &tp->t_logs, tlm_queue) 2002 if (!--skip) 2003 break; 2004 KASSERT(log_entry != NULL, 2005 ("%s: skipped through all entries!", __func__)); 2006 if (log_entry == NULL) 2007 return; 2008 while ((next = STAILQ_NEXT(log_entry, tlm_queue)) != NULL) { 2009 STAILQ_REMOVE_AFTER(&tp->t_logs, log_entry, tlm_queue); 2010 tcp_log_entry_refcnt_rem(next); 2011 tcp_log_remove_log_cleanup(tp, next); 2012 #ifdef INVARIANTS 2013 target--; 2014 #endif 2015 } 2016 KASSERT(target == 0, 2017 ("%s: After removing from tail, target was %d", __func__, 2018 target)); 2019 } else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO) { 2020 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at drain", 2021 M_NOWAIT, false); 2022 } else if (tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO) { 2023 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at drain", 2024 M_NOWAIT, false); 2025 } else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) { 2026 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", 2027 M_NOWAIT, false); 2028 } else if (tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) { 2029 (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", 2030 M_NOWAIT, false); 2031 } else { 2032 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL && 2033 target--) 2034 tcp_log_remove_log_head(tp, log_entry); 2035 KASSERT(target <= 0, 2036 ("%s: After removing from head, target was %d", __func__, 2037 target)); 2038 KASSERT(tp->t_lognum > 0, 2039 ("%s: After removing from head, tp->t_lognum was %d", 2040 __func__, target)); 2041 KASSERT(log_entry != NULL, 2042 ("%s: After removing from head, the tailq was empty", 2043 __func__)); 2044 } 2045 } 2046 2047 static inline int 2048 tcp_log_copyout(struct sockopt *sopt, void *src, void *dst, size_t len) 2049 { 2050 2051 if (sopt->sopt_td != NULL) 2052 return (copyout(src, dst, len)); 2053 bcopy(src, dst, len); 2054 return (0); 2055 } 2056 2057 static int 2058 tcp_log_logs_to_buf(struct sockopt *sopt, struct tcp_log_stailq *log_tailqp, 2059 struct tcp_log_buffer **end, int count) 2060 { 2061 struct tcp_log_buffer *out_entry; 2062 struct tcp_log_mem *log_entry; 2063 size_t entrysize; 2064 int error; 2065 #ifdef INVARIANTS 2066 int orig_count = count; 2067 #endif 2068 2069 /* Copy the data out. */ 2070 error = 0; 2071 out_entry = (struct tcp_log_buffer *) sopt->sopt_val; 2072 STAILQ_FOREACH(log_entry, log_tailqp, tlm_queue) { 2073 count--; 2074 KASSERT(count >= 0, 2075 ("%s:%d: Exceeded expected count (%d) processing list %p", 2076 __func__, __LINE__, orig_count, log_tailqp)); 2077 2078 #ifdef TCPLOG_DEBUG_COUNTERS 2079 counter_u64_add(tcp_log_que_copyout, 1); 2080 #endif 2081 2082 /* 2083 * Skip copying out the header if it isn't present. 2084 * Instead, copy out zeros (to ensure we don't leak info). 2085 * TODO: Make sure we truly do zero everything we don't 2086 * explicitly set. 2087 */ 2088 if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR) 2089 entrysize = sizeof(struct tcp_log_buffer); 2090 else 2091 entrysize = offsetof(struct tcp_log_buffer, tlb_th); 2092 error = tcp_log_copyout(sopt, &log_entry->tlm_buf, out_entry, 2093 entrysize); 2094 if (error) 2095 break; 2096 if (!(log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_HDR)) { 2097 error = tcp_log_copyout(sopt, zerobuf, 2098 ((uint8_t *)out_entry) + entrysize, 2099 sizeof(struct tcp_log_buffer) - entrysize); 2100 } 2101 2102 /* 2103 * Copy out the verbose bit, if needed. Either way, 2104 * increment the output pointer the correct amount. 2105 */ 2106 if (log_entry->tlm_buf.tlb_eventflags & TLB_FLAG_VERBOSE) { 2107 error = tcp_log_copyout(sopt, &log_entry->tlm_v, 2108 out_entry->tlb_verbose, 2109 sizeof(struct tcp_log_verbose)); 2110 if (error) 2111 break; 2112 out_entry = (struct tcp_log_buffer *) 2113 (((uint8_t *) (out_entry + 1)) + 2114 sizeof(struct tcp_log_verbose)); 2115 } else 2116 out_entry++; 2117 } 2118 *end = out_entry; 2119 KASSERT(error || count == 0, 2120 ("%s:%d: Less than expected count (%d) processing list %p" 2121 " (%d remain)", __func__, __LINE__, orig_count, 2122 log_tailqp, count)); 2123 2124 return (error); 2125 } 2126 2127 /* 2128 * Copy out the buffer. Note that we do incremental copying, so 2129 * sooptcopyout() won't work. However, the goal is to produce the same 2130 * end result as if we copied in the entire user buffer, updated it, 2131 * and then used sooptcopyout() to copy it out. 2132 * 2133 * NOTE: This should be called with a write lock on the PCB; however, 2134 * the function will drop it after it extracts the data from the TCPCB. 2135 */ 2136 int 2137 tcp_log_getlogbuf(struct sockopt *sopt, struct tcpcb *tp) 2138 { 2139 struct tcp_log_stailq log_tailq; 2140 struct tcp_log_mem *log_entry, *log_next; 2141 struct tcp_log_buffer *out_entry; 2142 struct inpcb *inp = tptoinpcb(tp); 2143 size_t outsize, entrysize; 2144 int error, outnum; 2145 2146 INP_WLOCK_ASSERT(inp); 2147 2148 /* 2149 * Determine which log entries will fit in the buffer. As an 2150 * optimization, skip this if all the entries will clearly fit 2151 * in the buffer. (However, get an exact size if we are using 2152 * INVARIANTS.) 2153 */ 2154 #ifndef INVARIANTS 2155 if (sopt->sopt_valsize / (sizeof(struct tcp_log_buffer) + 2156 sizeof(struct tcp_log_verbose)) >= tp->t_lognum) { 2157 log_entry = STAILQ_LAST(&tp->t_logs, tcp_log_mem, tlm_queue); 2158 log_next = NULL; 2159 outsize = 0; 2160 outnum = tp->t_lognum; 2161 } else { 2162 #endif 2163 outsize = outnum = 0; 2164 log_entry = NULL; 2165 STAILQ_FOREACH(log_next, &tp->t_logs, tlm_queue) { 2166 entrysize = sizeof(struct tcp_log_buffer); 2167 if (log_next->tlm_buf.tlb_eventflags & 2168 TLB_FLAG_VERBOSE) 2169 entrysize += sizeof(struct tcp_log_verbose); 2170 if ((sopt->sopt_valsize - outsize) < entrysize) 2171 break; 2172 outsize += entrysize; 2173 outnum++; 2174 log_entry = log_next; 2175 } 2176 KASSERT(outsize <= sopt->sopt_valsize, 2177 ("%s: calculated output size (%zu) greater than available" 2178 "space (%zu)", __func__, outsize, sopt->sopt_valsize)); 2179 #ifndef INVARIANTS 2180 } 2181 #endif 2182 2183 /* 2184 * Copy traditional sooptcopyout() behavior: if sopt->sopt_val 2185 * is NULL, silently skip the copy. However, in this case, we 2186 * will leave the list alone and return. Functionally, this 2187 * gives userspace a way to poll for an approximate buffer 2188 * size they will need to get the log entries. 2189 */ 2190 if (sopt->sopt_val == NULL) { 2191 INP_WUNLOCK(inp); 2192 if (outsize == 0) { 2193 outsize = outnum * (sizeof(struct tcp_log_buffer) + 2194 sizeof(struct tcp_log_verbose)); 2195 } 2196 if (sopt->sopt_valsize > outsize) 2197 sopt->sopt_valsize = outsize; 2198 return (0); 2199 } 2200 2201 /* 2202 * Break apart the list. We'll save the ones we want to copy 2203 * out locally and remove them from the TCPCB list. We can 2204 * then drop the INPCB lock while we do the copyout. 2205 * 2206 * There are roughly three cases: 2207 * 1. There was nothing to copy out. That's easy: drop the 2208 * lock and return. 2209 * 2. We are copying out the entire list. Again, that's easy: 2210 * move the whole list. 2211 * 3. We are copying out a partial list. That's harder. We 2212 * need to update the list book-keeping entries. 2213 */ 2214 if (log_entry != NULL && log_next == NULL) { 2215 /* Move entire list. */ 2216 KASSERT(outnum == tp->t_lognum, 2217 ("%s:%d: outnum (%d) should match tp->t_lognum (%d)", 2218 __func__, __LINE__, outnum, tp->t_lognum)); 2219 log_tailq = tp->t_logs; 2220 tp->t_lognum = 0; 2221 STAILQ_INIT(&tp->t_logs); 2222 } else if (log_entry != NULL) { 2223 /* Move partial list. */ 2224 KASSERT(outnum < tp->t_lognum, 2225 ("%s:%d: outnum (%d) not less than tp->t_lognum (%d)", 2226 __func__, __LINE__, outnum, tp->t_lognum)); 2227 STAILQ_FIRST(&log_tailq) = STAILQ_FIRST(&tp->t_logs); 2228 STAILQ_FIRST(&tp->t_logs) = STAILQ_NEXT(log_entry, tlm_queue); 2229 KASSERT(STAILQ_NEXT(log_entry, tlm_queue) != NULL, 2230 ("%s:%d: tp->t_logs is unexpectedly shorter than expected" 2231 "(tp: %p, log_tailq: %p, outnum: %d, tp->t_lognum: %d)", 2232 __func__, __LINE__, tp, &log_tailq, outnum, tp->t_lognum)); 2233 STAILQ_NEXT(log_entry, tlm_queue) = NULL; 2234 log_tailq.stqh_last = &STAILQ_NEXT(log_entry, tlm_queue); 2235 tp->t_lognum -= outnum; 2236 } else 2237 STAILQ_INIT(&log_tailq); 2238 2239 /* Drop the PCB lock. */ 2240 INP_WUNLOCK(inp); 2241 2242 /* Copy the data out. */ 2243 error = tcp_log_logs_to_buf(sopt, &log_tailq, &out_entry, outnum); 2244 2245 if (error) { 2246 /* Restore list */ 2247 INP_WLOCK(inp); 2248 if ((inp->inp_flags & INP_DROPPED) == 0) { 2249 tp = intotcpcb(inp); 2250 2251 /* Merge the two lists. */ 2252 STAILQ_CONCAT(&log_tailq, &tp->t_logs); 2253 tp->t_logs = log_tailq; 2254 tp->t_lognum += outnum; 2255 } 2256 INP_WUNLOCK(inp); 2257 } else { 2258 /* Sanity check entries */ 2259 KASSERT(((caddr_t)out_entry - (caddr_t)sopt->sopt_val) == 2260 outsize, ("%s: Actual output size (%zu) != " 2261 "calculated output size (%zu)", __func__, 2262 (size_t)((caddr_t)out_entry - (caddr_t)sopt->sopt_val), 2263 outsize)); 2264 2265 /* Free the entries we just copied out. */ 2266 STAILQ_FOREACH_SAFE(log_entry, &log_tailq, tlm_queue, log_next) { 2267 tcp_log_entry_refcnt_rem(log_entry); 2268 uma_zfree(tcp_log_zone, log_entry); 2269 } 2270 } 2271 2272 sopt->sopt_valsize = (size_t)((caddr_t)out_entry - 2273 (caddr_t)sopt->sopt_val); 2274 return (error); 2275 } 2276 2277 static void 2278 tcp_log_free_queue(struct tcp_log_dev_queue *param) 2279 { 2280 struct tcp_log_dev_log_queue *entry; 2281 2282 KASSERT(param != NULL, ("%s: called with NULL param", __func__)); 2283 if (param == NULL) 2284 return; 2285 2286 entry = (struct tcp_log_dev_log_queue *)param; 2287 2288 /* Free the entries. */ 2289 tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count); 2290 2291 /* Free the buffer, if it is allocated. */ 2292 if (entry->tldl_common.tldq_buf != NULL) 2293 free(entry->tldl_common.tldq_buf, M_TCPLOGDEV); 2294 2295 /* Free the queue entry. */ 2296 free(entry, M_TCPLOGDEV); 2297 } 2298 2299 static struct tcp_log_common_header * 2300 tcp_log_expandlogbuf(struct tcp_log_dev_queue *param) 2301 { 2302 struct tcp_log_dev_log_queue *entry; 2303 struct tcp_log_header *hdr; 2304 uint8_t *end; 2305 struct sockopt sopt; 2306 int error; 2307 2308 entry = (struct tcp_log_dev_log_queue *)param; 2309 2310 /* Take a worst-case guess at space needs. */ 2311 sopt.sopt_valsize = sizeof(struct tcp_log_header) + 2312 entry->tldl_count * (sizeof(struct tcp_log_buffer) + 2313 sizeof(struct tcp_log_verbose)); 2314 hdr = malloc(sopt.sopt_valsize, M_TCPLOGDEV, M_NOWAIT); 2315 if (hdr == NULL) { 2316 #ifdef TCPLOG_DEBUG_COUNTERS 2317 counter_u64_add(tcp_log_que_fail5, entry->tldl_count); 2318 #endif 2319 return (NULL); 2320 } 2321 sopt.sopt_val = hdr + 1; 2322 sopt.sopt_valsize -= sizeof(struct tcp_log_header); 2323 sopt.sopt_td = NULL; 2324 2325 error = tcp_log_logs_to_buf(&sopt, &entry->tldl_entries, 2326 (struct tcp_log_buffer **)&end, entry->tldl_count); 2327 if (error) { 2328 free(hdr, M_TCPLOGDEV); 2329 return (NULL); 2330 } 2331 2332 /* Free the entries. */ 2333 tcp_log_free_entries(&entry->tldl_entries, &entry->tldl_count); 2334 entry->tldl_count = 0; 2335 2336 memset(hdr, 0, sizeof(struct tcp_log_header)); 2337 hdr->tlh_version = TCP_LOG_BUF_VER; 2338 hdr->tlh_type = TCP_LOG_DEV_TYPE_BBR; 2339 hdr->tlh_length = end - (uint8_t *)hdr; 2340 hdr->tlh_ie = entry->tldl_ie; 2341 hdr->tlh_af = entry->tldl_af; 2342 getboottime(&hdr->tlh_offset); 2343 strlcpy(hdr->tlh_id, entry->tldl_id, TCP_LOG_ID_LEN); 2344 strlcpy(hdr->tlh_tag, entry->tldl_tag, TCP_LOG_TAG_LEN); 2345 strlcpy(hdr->tlh_reason, entry->tldl_reason, TCP_LOG_REASON_LEN); 2346 return ((struct tcp_log_common_header *)hdr); 2347 } 2348 2349 /* 2350 * Queue the tcpcb's log buffer for transmission via the log buffer facility. 2351 * 2352 * NOTE: This should be called with a write lock on the PCB. 2353 * 2354 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop 2355 * and reacquire the INP lock if it needs to do so. 2356 * 2357 * If force is false, this will only dump auto-logged sessions if 2358 * tcp_log_auto_all is true or if there is a log ID defined for the session. 2359 */ 2360 int 2361 tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force) 2362 { 2363 struct tcp_log_dev_log_queue *entry; 2364 struct inpcb *inp = tptoinpcb(tp); 2365 #ifdef TCPLOG_DEBUG_COUNTERS 2366 int num_entries; 2367 #endif 2368 2369 INP_WLOCK_ASSERT(inp); 2370 2371 /* If there are no log entries, there is nothing to do. */ 2372 if (tp->t_lognum == 0) 2373 return (0); 2374 2375 /* Check for a log ID. */ 2376 if (tp->t_lib == NULL && (tp->t_flags2 & TF2_LOG_AUTO) && 2377 !tcp_log_auto_all && !force) { 2378 struct tcp_log_mem *log_entry; 2379 2380 /* 2381 * We needed a log ID and none was found. Free the log entries 2382 * and return success. Also, cancel further logging. If the 2383 * session doesn't have a log ID by now, we'll assume it isn't 2384 * going to get one. 2385 */ 2386 while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) 2387 tcp_log_remove_log_head(tp, log_entry); 2388 KASSERT(tp->t_lognum == 0, 2389 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", 2390 __func__, tp->t_lognum)); 2391 tp->_t_logstate = TCP_LOG_STATE_OFF; 2392 return (0); 2393 } 2394 2395 /* 2396 * Allocate memory. If we must wait, we'll need to drop the locks 2397 * and reacquire them (and do all the related business that goes 2398 * along with that). 2399 */ 2400 entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV, 2401 M_NOWAIT); 2402 if (entry == NULL && (how & M_NOWAIT)) { 2403 #ifdef TCPLOG_DEBUG_COUNTERS 2404 counter_u64_add(tcp_log_que_fail3, 1); 2405 #endif 2406 return (ENOBUFS); 2407 } 2408 if (entry == NULL) { 2409 INP_WUNLOCK(inp); 2410 entry = malloc(sizeof(struct tcp_log_dev_log_queue), 2411 M_TCPLOGDEV, M_WAITOK); 2412 INP_WLOCK(inp); 2413 /* 2414 * Note that this check is slightly overly-restrictive in 2415 * that the TCB can survive either of these events. 2416 * However, there is currently not a good way to ensure 2417 * that is the case. So, if we hit this M_WAIT path, we 2418 * may end up dropping some entries. That seems like a 2419 * small price to pay for safety. 2420 */ 2421 if (inp->inp_flags & INP_DROPPED) { 2422 free(entry, M_TCPLOGDEV); 2423 #ifdef TCPLOG_DEBUG_COUNTERS 2424 counter_u64_add(tcp_log_que_fail2, 1); 2425 #endif 2426 return (ECONNRESET); 2427 } 2428 tp = intotcpcb(inp); 2429 if (tp->t_lognum == 0) { 2430 free(entry, M_TCPLOGDEV); 2431 return (0); 2432 } 2433 } 2434 2435 /* Fill in the unique parts of the queue entry. */ 2436 if (tp->t_lib != NULL) { 2437 strlcpy(entry->tldl_id, tp->t_lib->tlb_id, TCP_LOG_ID_LEN); 2438 strlcpy(entry->tldl_tag, tp->t_lib->tlb_tag, TCP_LOG_TAG_LEN); 2439 } else { 2440 strlcpy(entry->tldl_id, "UNKNOWN", TCP_LOG_ID_LEN); 2441 strlcpy(entry->tldl_tag, "UNKNOWN", TCP_LOG_TAG_LEN); 2442 } 2443 if (reason != NULL) 2444 strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN); 2445 else 2446 strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN); 2447 entry->tldl_ie = inp->inp_inc.inc_ie; 2448 if (inp->inp_inc.inc_flags & INC_ISIPV6) 2449 entry->tldl_af = AF_INET6; 2450 else 2451 entry->tldl_af = AF_INET; 2452 entry->tldl_entries = tp->t_logs; 2453 entry->tldl_count = tp->t_lognum; 2454 2455 /* Fill in the common parts of the queue entry. */ 2456 entry->tldl_common.tldq_buf = NULL; 2457 entry->tldl_common.tldq_xform = tcp_log_expandlogbuf; 2458 entry->tldl_common.tldq_dtor = tcp_log_free_queue; 2459 2460 /* Clear the log data from the TCPCB. */ 2461 #ifdef TCPLOG_DEBUG_COUNTERS 2462 num_entries = tp->t_lognum; 2463 #endif 2464 tp->t_lognum = 0; 2465 STAILQ_INIT(&tp->t_logs); 2466 2467 /* Add the entry. If no one is listening, free the entry. */ 2468 if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) { 2469 tcp_log_free_queue((struct tcp_log_dev_queue *)entry); 2470 #ifdef TCPLOG_DEBUG_COUNTERS 2471 counter_u64_add(tcp_log_que_fail1, num_entries); 2472 } else { 2473 counter_u64_add(tcp_log_queued, num_entries); 2474 #endif 2475 } 2476 return (0); 2477 } 2478 2479 /* 2480 * Queue the log_id_node's log buffers for transmission via the log buffer 2481 * facility. 2482 * 2483 * NOTE: This should be called with the bucket locked and referenced. 2484 * 2485 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop 2486 * and reacquire the bucket lock if it needs to do so. (The caller must 2487 * ensure that the tln is no longer on any lists so no one else will mess 2488 * with this while the lock is dropped!) 2489 */ 2490 static int 2491 tcp_log_dump_node_logbuf(struct tcp_log_id_node *tln, char *reason, int how) 2492 { 2493 struct tcp_log_dev_log_queue *entry; 2494 struct tcp_log_id_bucket *tlb; 2495 2496 tlb = tln->tln_bucket; 2497 TCPID_BUCKET_LOCK_ASSERT(tlb); 2498 KASSERT(tlb->tlb_refcnt > 0, 2499 ("%s:%d: Called with unreferenced bucket (tln=%p, tlb=%p)", 2500 __func__, __LINE__, tln, tlb)); 2501 KASSERT(tln->tln_closed, 2502 ("%s:%d: Called for node with tln_closed==false (tln=%p)", 2503 __func__, __LINE__, tln)); 2504 2505 /* If there are no log entries, there is nothing to do. */ 2506 if (tln->tln_count == 0) 2507 return (0); 2508 2509 /* 2510 * Allocate memory. If we must wait, we'll need to drop the locks 2511 * and reacquire them (and do all the related business that goes 2512 * along with that). 2513 */ 2514 entry = malloc(sizeof(struct tcp_log_dev_log_queue), M_TCPLOGDEV, 2515 M_NOWAIT); 2516 if (entry == NULL && (how & M_NOWAIT)) 2517 return (ENOBUFS); 2518 if (entry == NULL) { 2519 TCPID_BUCKET_UNLOCK(tlb); 2520 entry = malloc(sizeof(struct tcp_log_dev_log_queue), 2521 M_TCPLOGDEV, M_WAITOK); 2522 TCPID_BUCKET_LOCK(tlb); 2523 } 2524 2525 /* Fill in the common parts of the queue entry.. */ 2526 entry->tldl_common.tldq_buf = NULL; 2527 entry->tldl_common.tldq_xform = tcp_log_expandlogbuf; 2528 entry->tldl_common.tldq_dtor = tcp_log_free_queue; 2529 2530 /* Fill in the unique parts of the queue entry. */ 2531 strlcpy(entry->tldl_id, tlb->tlb_id, TCP_LOG_ID_LEN); 2532 strlcpy(entry->tldl_tag, tlb->tlb_tag, TCP_LOG_TAG_LEN); 2533 if (reason != NULL) 2534 strlcpy(entry->tldl_reason, reason, TCP_LOG_REASON_LEN); 2535 else 2536 strlcpy(entry->tldl_reason, "UNKNOWN", TCP_LOG_ID_LEN); 2537 entry->tldl_ie = tln->tln_ie; 2538 entry->tldl_entries = tln->tln_entries; 2539 entry->tldl_count = tln->tln_count; 2540 entry->tldl_af = tln->tln_af; 2541 2542 /* Add the entry. If no one is listening, free the entry. */ 2543 if (tcp_log_dev_add_log((struct tcp_log_dev_queue *)entry)) 2544 tcp_log_free_queue((struct tcp_log_dev_queue *)entry); 2545 2546 return (0); 2547 } 2548 2549 /* 2550 * Queue the log buffers for all sessions in a bucket for transmissions via 2551 * the log buffer facility. 2552 * 2553 * NOTE: This should be called with a locked bucket; however, the function 2554 * will drop the lock. 2555 */ 2556 #define LOCAL_SAVE 10 2557 static void 2558 tcp_log_dumpbucketlogs(struct tcp_log_id_bucket *tlb, char *reason) 2559 { 2560 struct tcp_log_id_node local_entries[LOCAL_SAVE]; 2561 struct inpcb *inp; 2562 struct tcpcb *tp; 2563 struct tcp_log_id_node *cur_tln, *prev_tln, *tmp_tln; 2564 int i, num_local_entries, tree_locked; 2565 bool expireq_locked; 2566 2567 TCPID_BUCKET_LOCK_ASSERT(tlb); 2568 2569 /* 2570 * Take a reference on the bucket to keep it from disappearing until 2571 * we are done. 2572 */ 2573 TCPID_BUCKET_REF(tlb); 2574 2575 /* 2576 * We'll try to create these without dropping locks. However, we 2577 * might very well need to drop locks to get memory. If that's the 2578 * case, we'll save up to 10 on the stack, and sacrifice the rest. 2579 * (Otherwise, we need to worry about finding our place again in a 2580 * potentially changed list. It just doesn't seem worth the trouble 2581 * to do that. 2582 */ 2583 expireq_locked = false; 2584 num_local_entries = 0; 2585 prev_tln = NULL; 2586 tree_locked = TREE_UNLOCKED; 2587 SLIST_FOREACH_SAFE(cur_tln, &tlb->tlb_head, tln_list, tmp_tln) { 2588 /* 2589 * If this isn't associated with a TCPCB, we can pull it off 2590 * the list now. We need to be careful that the expire timer 2591 * hasn't already taken ownership (tln_expiretime == SBT_MAX). 2592 * If so, we let the expire timer code free the data. 2593 */ 2594 if (cur_tln->tln_closed) { 2595 no_inp: 2596 /* 2597 * Get the expireq lock so we can get a consistent 2598 * read of tln_expiretime and so we can remove this 2599 * from the expireq. 2600 */ 2601 if (!expireq_locked) { 2602 TCPLOG_EXPIREQ_LOCK(); 2603 expireq_locked = true; 2604 } 2605 2606 /* 2607 * We ignore entries with tln_expiretime == SBT_MAX. 2608 * The expire timer code already owns those. 2609 */ 2610 KASSERT(cur_tln->tln_expiretime > (sbintime_t) 0, 2611 ("%s:%d: node on the expire queue without positive " 2612 "expire time", __func__, __LINE__)); 2613 if (cur_tln->tln_expiretime == SBT_MAX) { 2614 prev_tln = cur_tln; 2615 continue; 2616 } 2617 2618 /* Remove the entry from the expireq. */ 2619 STAILQ_REMOVE(&tcp_log_expireq_head, cur_tln, 2620 tcp_log_id_node, tln_expireq); 2621 2622 /* Remove the entry from the bucket. */ 2623 if (prev_tln != NULL) 2624 SLIST_REMOVE_AFTER(prev_tln, tln_list); 2625 else 2626 SLIST_REMOVE_HEAD(&tlb->tlb_head, tln_list); 2627 2628 /* 2629 * Drop the INP and bucket reference counts. Due to 2630 * lock-ordering rules, we need to drop the expire 2631 * queue lock. 2632 */ 2633 TCPLOG_EXPIREQ_UNLOCK(); 2634 expireq_locked = false; 2635 2636 /* Drop the INP reference. */ 2637 INP_WLOCK(cur_tln->tln_inp); 2638 if (!in_pcbrele_wlocked(cur_tln->tln_inp)) 2639 INP_WUNLOCK(cur_tln->tln_inp); 2640 2641 if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) { 2642 #ifdef INVARIANTS 2643 panic("%s: Bucket refcount unexpectedly 0.", 2644 __func__); 2645 #endif 2646 /* 2647 * Recover as best we can: free the entry we 2648 * own. 2649 */ 2650 tcp_log_free_entries(&cur_tln->tln_entries, 2651 &cur_tln->tln_count); 2652 uma_zfree(tcp_log_id_node_zone, cur_tln); 2653 goto done; 2654 } 2655 2656 if (tcp_log_dump_node_logbuf(cur_tln, reason, 2657 M_NOWAIT)) { 2658 /* 2659 * If we have sapce, save the entries locally. 2660 * Otherwise, free them. 2661 */ 2662 if (num_local_entries < LOCAL_SAVE) { 2663 local_entries[num_local_entries] = 2664 *cur_tln; 2665 num_local_entries++; 2666 } else { 2667 tcp_log_free_entries( 2668 &cur_tln->tln_entries, 2669 &cur_tln->tln_count); 2670 } 2671 } 2672 2673 /* No matter what, we are done with the node now. */ 2674 uma_zfree(tcp_log_id_node_zone, cur_tln); 2675 2676 /* 2677 * Because we removed this entry from the list, prev_tln 2678 * (which tracks the previous entry still on the tlb 2679 * list) remains unchanged. 2680 */ 2681 continue; 2682 } 2683 2684 /* 2685 * If we get to this point, the session data is still held in 2686 * the TCPCB. So, we need to pull the data out of that. 2687 * 2688 * We will need to drop the expireq lock so we can lock the INP. 2689 * We can then try to extract the data the "easy" way. If that 2690 * fails, we'll save the log entries for later. 2691 */ 2692 if (expireq_locked) { 2693 TCPLOG_EXPIREQ_UNLOCK(); 2694 expireq_locked = false; 2695 } 2696 2697 /* Lock the INP and then re-check the state. */ 2698 inp = cur_tln->tln_inp; 2699 INP_WLOCK(inp); 2700 /* 2701 * If we caught this while it was transitioning, the data 2702 * might have moved from the TCPCB to the tln (signified by 2703 * setting tln_closed to true. If so, treat this like an 2704 * inactive connection. 2705 */ 2706 if (cur_tln->tln_closed) { 2707 /* 2708 * It looks like we may have caught this connection 2709 * while it was transitioning from active to inactive. 2710 * Treat this like an inactive connection. 2711 */ 2712 INP_WUNLOCK(inp); 2713 goto no_inp; 2714 } 2715 2716 /* 2717 * Try to dump the data from the tp without dropping the lock. 2718 * If this fails, try to save off the data locally. 2719 */ 2720 tp = cur_tln->tln_tp; 2721 if (tcp_log_dump_tp_logbuf(tp, reason, M_NOWAIT, true) && 2722 num_local_entries < LOCAL_SAVE) { 2723 tcp_log_move_tp_to_node(tp, 2724 &local_entries[num_local_entries]); 2725 local_entries[num_local_entries].tln_closed = 1; 2726 KASSERT(local_entries[num_local_entries].tln_bucket == 2727 tlb, ("%s: %d: bucket mismatch for node %p", 2728 __func__, __LINE__, cur_tln)); 2729 num_local_entries++; 2730 } 2731 2732 INP_WUNLOCK(inp); 2733 2734 /* 2735 * We are goint to leave the current tln on the list. It will 2736 * become the previous tln. 2737 */ 2738 prev_tln = cur_tln; 2739 } 2740 2741 /* Drop our locks, if any. */ 2742 KASSERT(tree_locked == TREE_UNLOCKED, 2743 ("%s: %d: tree unexpectedly locked", __func__, __LINE__)); 2744 switch (tree_locked) { 2745 case TREE_WLOCKED: 2746 TCPID_TREE_WUNLOCK(); 2747 tree_locked = TREE_UNLOCKED; 2748 break; 2749 case TREE_RLOCKED: 2750 TCPID_TREE_RUNLOCK(); 2751 tree_locked = TREE_UNLOCKED; 2752 break; 2753 } 2754 if (expireq_locked) { 2755 TCPLOG_EXPIREQ_UNLOCK(); 2756 expireq_locked = false; 2757 } 2758 2759 /* 2760 * Try again for any saved entries. tcp_log_dump_node_logbuf() is 2761 * guaranteed to free the log entries within the node. And, since 2762 * the node itself is on our stack, we don't need to free it. 2763 */ 2764 for (i = 0; i < num_local_entries; i++) 2765 tcp_log_dump_node_logbuf(&local_entries[i], reason, M_WAITOK); 2766 2767 /* Drop our reference. */ 2768 if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) 2769 TCPID_BUCKET_UNLOCK(tlb); 2770 2771 done: 2772 /* Drop our locks, if any. */ 2773 switch (tree_locked) { 2774 case TREE_WLOCKED: 2775 TCPID_TREE_WUNLOCK(); 2776 break; 2777 case TREE_RLOCKED: 2778 TCPID_TREE_RUNLOCK(); 2779 break; 2780 } 2781 if (expireq_locked) 2782 TCPLOG_EXPIREQ_UNLOCK(); 2783 } 2784 #undef LOCAL_SAVE 2785 2786 /* 2787 * Queue the log buffers for all sessions in a bucket for transmissions via 2788 * the log buffer facility. 2789 * 2790 * NOTE: This should be called with a locked INP; however, the function 2791 * will drop the lock. 2792 */ 2793 void 2794 tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason) 2795 { 2796 struct inpcb *inp = tptoinpcb(tp); 2797 struct tcp_log_id_bucket *tlb; 2798 int tree_locked; 2799 2800 /* Figure out our bucket and lock it. */ 2801 INP_WLOCK_ASSERT(inp); 2802 tlb = tp->t_lib; 2803 if (tlb == NULL) { 2804 /* 2805 * No bucket; treat this like a request to dump a single 2806 * session's traces. 2807 */ 2808 (void)tcp_log_dump_tp_logbuf(tp, reason, M_WAITOK, true); 2809 INP_WUNLOCK(inp); 2810 return; 2811 } 2812 TCPID_BUCKET_REF(tlb); 2813 INP_WUNLOCK(inp); 2814 TCPID_BUCKET_LOCK(tlb); 2815 2816 /* If we are the last reference, we have nothing more to do here. */ 2817 tree_locked = TREE_UNLOCKED; 2818 if (tcp_log_unref_bucket(tlb, &tree_locked, NULL)) { 2819 switch (tree_locked) { 2820 case TREE_WLOCKED: 2821 TCPID_TREE_WUNLOCK(); 2822 break; 2823 case TREE_RLOCKED: 2824 TCPID_TREE_RUNLOCK(); 2825 break; 2826 } 2827 return; 2828 } 2829 2830 /* Turn this over to tcp_log_dumpbucketlogs() to finish the work. */ 2831 tcp_log_dumpbucketlogs(tlb, reason); 2832 } 2833 2834 /* 2835 * Mark the end of a flow with the current stack. A stack can add 2836 * stack-specific info to this trace event by overriding this 2837 * function (see bbr_log_flowend() for example). 2838 */ 2839 void 2840 tcp_log_flowend(struct tcpcb *tp) 2841 { 2842 if (tp->_t_logstate != TCP_LOG_STATE_OFF) { 2843 struct socket *so = tptosocket(tp); 2844 TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd, 2845 TCP_LOG_FLOWEND, 0, 0, NULL, false); 2846 } 2847 } 2848 2849 void 2850 tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags) 2851 { 2852 struct inpcb *inp; 2853 struct tcpcb *tp; 2854 #ifdef TCP_REQUEST_TRK 2855 struct http_sendfile_track *ent; 2856 int i, fnd; 2857 #endif 2858 2859 inp = sotoinpcb(so); 2860 KASSERT(inp != NULL, ("tcp_log_sendfile: inp == NULL")); 2861 2862 /* quick check to see if logging is enabled for this connection */ 2863 tp = intotcpcb(inp); 2864 if ((inp->inp_flags & INP_DROPPED) || 2865 (tp->_t_logstate == TCP_LOG_STATE_OFF)) { 2866 return; 2867 } 2868 2869 INP_WLOCK(inp); 2870 /* double check log state now that we have the lock */ 2871 if (inp->inp_flags & INP_DROPPED) 2872 goto done; 2873 if (tp->_t_logstate != TCP_LOG_STATE_OFF) { 2874 struct timeval tv; 2875 tcp_log_eventspecific_t log; 2876 2877 microuptime(&tv); 2878 log.u_sf.offset = offset; 2879 log.u_sf.length = nbytes; 2880 log.u_sf.flags = flags; 2881 2882 TCP_LOG_EVENTP(tp, NULL, 2883 &tptosocket(tp)->so_rcv, 2884 &tptosocket(tp)->so_snd, 2885 TCP_LOG_SENDFILE, 0, 0, &log, false, &tv); 2886 } 2887 #ifdef TCP_REQUEST_TRK 2888 if (tp->t_http_req == 0) { 2889 /* No http requests to track */ 2890 goto done; 2891 } 2892 fnd = 0; 2893 if (tp->t_http_closed == 0) { 2894 /* No closed end req to track */ 2895 goto skip_closed_req; 2896 } 2897 for(i = 0; i < MAX_TCP_HTTP_REQ; i++) { 2898 /* Lets see if this one can be found */ 2899 ent = &tp->t_http_info[i]; 2900 if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY) { 2901 /* Not used */ 2902 continue; 2903 } 2904 if (ent->flags & TCP_HTTP_TRACK_FLG_OPEN) { 2905 /* This pass does not consider open requests */ 2906 continue; 2907 } 2908 if (ent->flags & TCP_HTTP_TRACK_FLG_COMP) { 2909 /* Don't look at what we have completed */ 2910 continue; 2911 } 2912 /* If we reach here its a allocated closed end request */ 2913 if ((ent->start == offset) || 2914 ((offset > ent->start) && (offset < ent->end))){ 2915 /* Its within this request?? */ 2916 fnd = 1; 2917 } 2918 if (fnd) { 2919 /* 2920 * It is at or past the end, its complete. 2921 */ 2922 ent->flags |= TCP_HTTP_TRACK_FLG_SEQV; 2923 /* 2924 * When an entry completes we can take (snd_una + sb_cc) and know where 2925 * the end of the range really is. Note that this works since two 2926 * requests must be sequential and sendfile now is complete for *this* request. 2927 * we must use sb_ccc since the data may still be in-flight in TLS. 2928 * 2929 * We always cautiously move the end_seq only if our calculations 2930 * show it happened (just in case sf has the call to here at the wrong 2931 * place). When we go COMP we will stop coming here and hopefully be 2932 * left with the correct end_seq. 2933 */ 2934 if (SEQ_GT((tp->snd_una + so->so_snd.sb_ccc), ent->end_seq)) 2935 ent->end_seq = tp->snd_una + so->so_snd.sb_ccc; 2936 if ((offset + nbytes) >= ent->end) { 2937 ent->flags |= TCP_HTTP_TRACK_FLG_COMP; 2938 tcp_http_log_req_info(tp, ent, i, TCP_HTTP_REQ_LOG_COMPLETE, offset, nbytes); 2939 } else { 2940 tcp_http_log_req_info(tp, ent, i, TCP_HTTP_REQ_LOG_MOREYET, offset, nbytes); 2941 } 2942 /* We assume that sendfile never sends overlapping requests */ 2943 goto done; 2944 } 2945 } 2946 skip_closed_req: 2947 if (!fnd) { 2948 /* Ok now lets look for open requests */ 2949 for(i = 0; i < MAX_TCP_HTTP_REQ; i++) { 2950 ent = &tp->t_http_info[i]; 2951 if (ent->flags == TCP_HTTP_TRACK_FLG_EMPTY) { 2952 /* Not used */ 2953 continue; 2954 } 2955 if ((ent->flags & TCP_HTTP_TRACK_FLG_OPEN) == 0) 2956 continue; 2957 /* If we reach here its an allocated open request */ 2958 if (ent->start == offset) { 2959 /* It begins this request */ 2960 ent->start_seq = tp->snd_una + 2961 tptosocket(tp)->so_snd.sb_ccc; 2962 ent->flags |= TCP_HTTP_TRACK_FLG_SEQV; 2963 break; 2964 } else if (offset > ent->start) { 2965 ent->flags |= TCP_HTTP_TRACK_FLG_SEQV; 2966 break; 2967 } 2968 } 2969 } 2970 #endif 2971 done: 2972 INP_WUNLOCK(inp); 2973 } 2974