xref: /freebsd/sys/netinet/tcp_stats.c (revision edf8578117e8844e02c0121147f45e4609b30680)
1 /*-
2  * Copyright (c) 2016-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * Author: Lawrence Stewart <lstewart@netflix.com>
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/arb.h>
34 #include <sys/errno.h>
35 #include <sys/malloc.h>
36 #include <sys/qmath.h>
37 #include <sys/queue.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
40 #include <sys/sysctl.h>
41 #ifdef _KERNEL
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/rmlock.h>
45 #include <sys/systm.h>
46 #endif
47 #include <sys/stats.h>
48 
49 #include <net/vnet.h>
50 
51 #include <netinet/in.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/tcp.h>
54 #include <netinet/tcp_var.h>
55 
56 #include <netinet/cc/cc.h>
57 
58 VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1;
59 
60 #ifndef _KERNEL
61 #define	V_tcp_perconn_stats_enable	VNET(tcp_perconn_stats_enable)
62 #define	V_tcp_perconn_stats_dflt_tpl	VNET(tcp_perconn_stats_dflt_tpl)
63 #else /* _KERNEL */
64 
65 VNET_DEFINE(int, tcp_perconn_stats_enable) = 2;
66 VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates);
67 VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0;
68 #define	V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates)
69 #define	V_tcp_stats_nrates		VNET(tcp_stats_nrates)
70 
71 static struct rmlock tcp_stats_tpl_sampling_lock;
72 static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
73     struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
74 
75 SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable,
76     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0,
77     "Enable per-connection TCP stats gathering; 1 enables for all connections, "
78     "2 enables random sampling across log id connection groups");
79 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates,
80     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT, tcp_stats_tpl_sr_cb,
81     sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A",
82     "TCP stats per template random sampling rates, in CSV tpl_spec=percent "
83     "key-value pairs (see stats(9) for template spec details)");
84 #endif /* _KERNEL */
85 
86 #ifdef _KERNEL
87 int
88 #else
89 static int
90 /* Ensure all templates are also added to the userland template list. */
91 __attribute__ ((constructor))
92 #endif
93 tcp_stats_init(void)
94 {
95 	int err, lasterr;
96 
97 	err = lasterr = 0;
98 
99 	V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0);
100 	if (V_tcp_perconn_stats_dflt_tpl < 0)
101 		return (-V_tcp_perconn_stats_dflt_tpl);
102 
103 	struct voistatspec vss_sum[] = {
104 		STATS_VSS_SUM(),
105 	};
106 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
107 	    VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64,
108 	    NVSS(vss_sum), vss_sum, 0);
109 	lasterr = err ? err : lasterr;
110 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
111 	    VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32,
112 	    NVSS(vss_sum), vss_sum, 0);
113 	lasterr = err ? err : lasterr;
114 
115 	struct voistatspec vss_max[] = {
116 		STATS_VSS_MAX(),
117 	};
118 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
119 	    VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG,
120 	    NVSS(vss_max), vss_max, 0);
121 	lasterr = err ? err : lasterr;
122 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
123 	    VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG,
124 	    NVSS(vss_max), vss_max, 0);
125 	lasterr = err ? err : lasterr;
126 
127 	struct voistatspec vss_rtt[] = {
128 		STATS_VSS_MAX(),
129 		STATS_VSS_MIN(),
130 		STATS_VSS_TDGSTCLUST32(20, 4),
131 	};
132 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
133 	    VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32,
134 	    NVSS(vss_rtt), vss_rtt, 0);
135 	lasterr = err ? err : lasterr;
136 
137 	struct voistatspec vss_congsig[] = {
138 		STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO),
139 		    DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0)
140 	};
141 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
142 	    VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32,
143 	    NVSS(vss_congsig), vss_congsig, 0);
144 	lasterr = err ? err : lasterr;
145 
146 	struct voistatspec vss_gput[] = {
147 		STATS_VSS_MAX(),
148 		STATS_VSS_TDGSTCLUST32(20, 4),
149 	};
150 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
151 	    VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32,
152 	    NVSS(vss_gput), vss_gput, 0);
153 	lasterr = err ? err : lasterr;
154 
155 	struct voistatspec vss_gput_nd[] = {
156 		STATS_VSS_TDGSTCLUST32(10, 4),
157 	};
158 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
159 	    VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32,
160 	    NVSS(vss_gput_nd), vss_gput_nd, 0);
161 	lasterr = err ? err : lasterr;
162 
163 	struct voistatspec vss_windiff[] = {
164 		STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF)
165 	};
166 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
167 	    VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32,
168 	    NVSS(vss_windiff), vss_windiff, 0);
169 	lasterr = err ? err : lasterr;
170 
171 	struct voistatspec vss_acklen[] = {
172 		STATS_VSS_MAX(),
173 		STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF)
174 	};
175 	err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
176 	    VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32,
177 	    NVSS(vss_acklen), vss_acklen, 0);
178 	lasterr = err ? err : lasterr;
179 
180 	return (lasterr);
181 }
182 
183 #ifdef _KERNEL
184 int
185 tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
186     size_t seed_len)
187 {
188 	struct rm_priotracker tracker;
189 	int tpl;
190 
191 	tpl = -1;
192 
193 	if (V_tcp_stats_nrates > 0) {
194 		rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker);
195 		tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates,
196 		    V_tcp_stats_nrates, seed_bytes, seed_len);
197 		rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker);
198 
199 		if (tpl >= 0) {
200 			INP_WLOCK_ASSERT(tptoinpcb(tp));
201 			if (tp->t_stats != NULL)
202 				stats_blob_destroy(tp->t_stats);
203 			tp->t_stats = stats_blob_alloc(tpl, 0);
204 			if (tp->t_stats == NULL)
205 				tpl = -ENOMEM;
206 		}
207 	}
208 
209 	return (tpl);
210 }
211 
212 /*
213  * Callback function for stats_tpl_sample_rates() to interact with the TCP
214  * subsystem's stats template sample rates list.
215  */
216 int
217 tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
218     struct stats_tpl_sample_rate **rates, int *nrates, void *ctx)
219 {
220 	struct stats_tpl_sample_rate *old_rates;
221 	int old_nrates;
222 
223 	if (ctx == NULL)
224 		return (ENOMEM);
225 
226 	switch (action) {
227 	case TPL_SR_RLOCKED_GET:
228 		/*
229 		 * Return with rlock held i.e. this call must be paired with a
230 		 * "action == TPL_SR_RUNLOCK" call.
231 		 */
232 		rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
233 		rm_rlock(&tcp_stats_tpl_sampling_lock,
234 		    (struct rm_priotracker *)ctx);
235 		/* FALLTHROUGH */
236 	case TPL_SR_UNLOCKED_GET:
237 		if (rates != NULL)
238 			*rates = V_tcp_perconn_stats_sample_rates;
239 		if (nrates != NULL)
240 			*nrates = V_tcp_stats_nrates;
241 		break;
242 	case TPL_SR_RUNLOCK:
243 		rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED);
244 		rm_runlock(&tcp_stats_tpl_sampling_lock,
245 		    (struct rm_priotracker *)ctx);
246 		break;
247 	case TPL_SR_PUT:
248 		KASSERT(rates != NULL && nrates != NULL,
249 		    ("%s: PUT without new rates", __func__));
250 		rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
251 		if (rates == NULL || nrates == NULL)
252 			return (EINVAL);
253 		rm_wlock(&tcp_stats_tpl_sampling_lock);
254 		old_rates = V_tcp_perconn_stats_sample_rates;
255 		old_nrates = V_tcp_stats_nrates;
256 		V_tcp_perconn_stats_sample_rates = *rates;
257 		V_tcp_stats_nrates = *nrates;
258 		rm_wunlock(&tcp_stats_tpl_sampling_lock);
259 		*rates = old_rates;
260 		*nrates = old_nrates;
261 		break;
262 	default:
263 		return (EINVAL);
264 		break;
265 	}
266 
267 	return (0);
268 }
269 
270 RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock,
271     "tcp_stats_tpl_sampling_lock");
272 #endif /* _KERNEL */
273