1 /*- 2 * Copyright (c) 2016-2018 Netflix, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * Author: Lawrence Stewart <lstewart@netflix.com> 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/arb.h> 36 #include <sys/errno.h> 37 #include <sys/malloc.h> 38 #include <sys/qmath.h> 39 #include <sys/queue.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 #include <sys/sysctl.h> 43 #ifdef _KERNEL 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/rmlock.h> 47 #include <sys/systm.h> 48 #endif 49 #include <sys/stats.h> 50 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_pcb.h> 55 #include <netinet/tcp.h> 56 #include <netinet/tcp_var.h> 57 58 #include <netinet/cc/cc.h> 59 60 VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1; 61 62 #ifndef _KERNEL 63 #define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable) 64 #define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl) 65 #else /* _KERNEL */ 66 67 VNET_DEFINE(int, tcp_perconn_stats_enable) = 2; 68 VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates); 69 VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0; 70 #define V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates) 71 #define V_tcp_stats_nrates VNET(tcp_stats_nrates) 72 73 static struct rmlock tcp_stats_tpl_sampling_lock; 74 static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action, 75 struct stats_tpl_sample_rate **rates, int *nrates, void *ctx); 76 77 SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable, 78 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0, 79 "Enable per-connection TCP stats gathering; 1 enables for all connections, " 80 "2 enables random sampling across log id connection groups"); 81 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates, 82 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT, tcp_stats_tpl_sr_cb, 83 sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A", 84 "TCP stats per template random sampling rates, in CSV tpl_spec=percent " 85 "key-value pairs (see stats(9) for template spec details)"); 86 #endif /* _KERNEL */ 87 88 #ifdef _KERNEL 89 int 90 #else 91 static int 92 /* Ensure all templates are also added to the userland template list. */ 93 __attribute__ ((constructor)) 94 #endif 95 tcp_stats_init() 96 { 97 int err, lasterr; 98 99 err = lasterr = 0; 100 101 V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0); 102 if (V_tcp_perconn_stats_dflt_tpl < 0) 103 return (-V_tcp_perconn_stats_dflt_tpl); 104 105 struct voistatspec vss_sum[] = { 106 STATS_VSS_SUM(), 107 }; 108 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 109 VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64, 110 NVSS(vss_sum), vss_sum, 0); 111 lasterr = err ? err : lasterr; 112 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 113 VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32, 114 NVSS(vss_sum), vss_sum, 0); 115 lasterr = err ? err : lasterr; 116 117 struct voistatspec vss_max[] = { 118 STATS_VSS_MAX(), 119 }; 120 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 121 VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG, 122 NVSS(vss_max), vss_max, 0); 123 lasterr = err ? err : lasterr; 124 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 125 VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG, 126 NVSS(vss_max), vss_max, 0); 127 lasterr = err ? err : lasterr; 128 129 struct voistatspec vss_rtt[] = { 130 STATS_VSS_MAX(), 131 STATS_VSS_MIN(), 132 STATS_VSS_TDGSTCLUST32(20, 4), 133 }; 134 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 135 VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32, 136 NVSS(vss_rtt), vss_rtt, 0); 137 lasterr = err ? err : lasterr; 138 139 struct voistatspec vss_congsig[] = { 140 STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO), 141 DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0) 142 }; 143 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 144 VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32, 145 NVSS(vss_congsig), vss_congsig, 0); 146 lasterr = err ? err : lasterr; 147 148 struct voistatspec vss_gput[] = { 149 STATS_VSS_MAX(), 150 STATS_VSS_TDGSTCLUST32(20, 4), 151 }; 152 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 153 VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32, 154 NVSS(vss_gput), vss_gput, 0); 155 lasterr = err ? err : lasterr; 156 157 struct voistatspec vss_gput_nd[] = { 158 STATS_VSS_TDGSTCLUST32(10, 4), 159 }; 160 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 161 VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32, 162 NVSS(vss_gput_nd), vss_gput_nd, 0); 163 lasterr = err ? err : lasterr; 164 165 struct voistatspec vss_windiff[] = { 166 STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF) 167 }; 168 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 169 VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32, 170 NVSS(vss_windiff), vss_windiff, 0); 171 lasterr = err ? err : lasterr; 172 173 struct voistatspec vss_acklen[] = { 174 STATS_VSS_MAX(), 175 STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF) 176 }; 177 err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl, 178 VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32, 179 NVSS(vss_acklen), vss_acklen, 0); 180 lasterr = err ? err : lasterr; 181 182 return (lasterr); 183 } 184 185 #ifdef _KERNEL 186 int 187 tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes, 188 size_t seed_len) 189 { 190 struct rm_priotracker tracker; 191 int tpl; 192 193 tpl = -1; 194 195 if (V_tcp_stats_nrates > 0) { 196 rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker); 197 tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates, 198 V_tcp_stats_nrates, seed_bytes, seed_len); 199 rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker); 200 201 if (tpl >= 0) { 202 INP_WLOCK_ASSERT(tptoinpcb(tp)); 203 if (tp->t_stats != NULL) 204 stats_blob_destroy(tp->t_stats); 205 tp->t_stats = stats_blob_alloc(tpl, 0); 206 if (tp->t_stats == NULL) 207 tpl = -ENOMEM; 208 } 209 } 210 211 return (tpl); 212 } 213 214 /* 215 * Callback function for stats_tpl_sample_rates() to interact with the TCP 216 * subsystem's stats template sample rates list. 217 */ 218 int 219 tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action, 220 struct stats_tpl_sample_rate **rates, int *nrates, void *ctx) 221 { 222 struct stats_tpl_sample_rate *old_rates; 223 int old_nrates; 224 225 if (ctx == NULL) 226 return (ENOMEM); 227 228 switch (action) { 229 case TPL_SR_RLOCKED_GET: 230 /* 231 * Return with rlock held i.e. this call must be paired with a 232 * "action == TPL_SR_RUNLOCK" call. 233 */ 234 rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED); 235 rm_rlock(&tcp_stats_tpl_sampling_lock, 236 (struct rm_priotracker *)ctx); 237 /* FALLTHROUGH */ 238 case TPL_SR_UNLOCKED_GET: 239 if (rates != NULL) 240 *rates = V_tcp_perconn_stats_sample_rates; 241 if (nrates != NULL) 242 *nrates = V_tcp_stats_nrates; 243 break; 244 case TPL_SR_RUNLOCK: 245 rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED); 246 rm_runlock(&tcp_stats_tpl_sampling_lock, 247 (struct rm_priotracker *)ctx); 248 break; 249 case TPL_SR_PUT: 250 KASSERT(rates != NULL && nrates != NULL, 251 ("%s: PUT without new rates", __func__)); 252 rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED); 253 if (rates == NULL || nrates == NULL) 254 return (EINVAL); 255 rm_wlock(&tcp_stats_tpl_sampling_lock); 256 old_rates = V_tcp_perconn_stats_sample_rates; 257 old_nrates = V_tcp_stats_nrates; 258 V_tcp_perconn_stats_sample_rates = *rates; 259 V_tcp_stats_nrates = *nrates; 260 rm_wunlock(&tcp_stats_tpl_sampling_lock); 261 *rates = old_rates; 262 *nrates = old_nrates; 263 break; 264 default: 265 return (EINVAL); 266 break; 267 } 268 269 return (0); 270 } 271 272 RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock, 273 "tcp_stats_tpl_sampling_lock"); 274 #endif /* _KERNEL */ 275