xref: /freebsd/sys/netinet/tcp_ratelimit.h (revision 62ff619dcc3540659a319be71c9a489f1659e14a)
1 /*-
2  *
3  * SPDX-License-Identifier: BSD-3-Clause
4  *
5  * Copyright (c) 2018-2020
6  *	Netflix Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  * __FBSDID("$FreeBSD$");
29  *
30  */
31 /**
32  * Author: Randall Stewart <rrs@netflix.com>
33  */
34 #ifndef __tcp_ratelimit_h__
35 #define __tcp_ratelimit_h__
36 
37 struct m_snd_tag;
38 
39 /* Flags on an individual rate */
40 #define HDWRPACE_INITED 	0x0001
41 #define HDWRPACE_TAGPRESENT	0x0002
42 #define HDWRPACE_IFPDEPARTED	0x0004
43 struct tcp_hwrate_limit_table {
44 	const struct tcp_rate_set *ptbl;	/* Pointer to parent table */
45 	struct m_snd_tag *tag;	/* Send tag if needed (chelsio) */
46 	long	 rate;		/* Rate we get in Bytes per second (Bps) */
47 	long	 using;		/* How many flows are using this hdwr rate. */
48 	long	 rs_num_enobufs;
49 	uint32_t time_between;	/* Time-Gap between packets at this rate */
50 	uint32_t flags;
51 };
52 
53 /* Rateset flags */
54 #define RS_IS_DEFF      0x0001	/* Its a lagg, do a double lookup */
55 #define RS_IS_INTF      0x0002	/* Its a plain interface */
56 #define RS_NO_PRE       0x0004	/* The interfacd has set rates */
57 #define RS_INT_TBL      0x0010	/*
58 				 * The table is the internal version
59 				 * which has special setup requirements.
60 				 */
61 #define RS_IS_DEAD      0x0020	/* The RS is dead list */
62 #define RS_FUNERAL_SCHD 0x0040  /* Is a epoch call scheduled to bury this guy?*/
63 #define RS_INTF_NO_SUP  0x0100 	/* The interface does not support the ratelimiting */
64 
65 struct tcp_rate_set {
66 	struct sysctl_ctx_list sysctl_ctx;
67 	CK_LIST_ENTRY(tcp_rate_set) next;
68 	struct ifnet *rs_ifp;
69 	struct tcp_hwrate_limit_table *rs_rlt;
70 	uint64_t rs_flows_using;
71 	uint64_t rs_flow_limit;
72 	uint32_t rs_if_dunit;
73 	int rs_rate_cnt;
74 	int rs_min_seg;
75 	int rs_highest_valid;
76 	int rs_lowest_valid;
77 	int rs_disable;
78 	int rs_flags;
79 	struct epoch_context rs_epoch_ctx;
80 };
81 
82 CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set);
83 
84 /* Request flags */
85 #define RS_PACING_EXACT_MATCH	0x0001	/* Need an exact match for rate */
86 #define RS_PACING_GT		0x0002	/* Greater than requested */
87 #define RS_PACING_GEQ		0x0004	/* Greater than or equal too */
88 #define RS_PACING_LT		0x0008	/* Less than requested rate */
89 #define RS_PACING_SUB_OK	0x0010	/* If a rate can't be found get the
90 					 * next best rate (highest or lowest). */
91 #ifdef _KERNEL
92 #ifndef ETHERNET_SEGMENT_SIZE
93 #define ETHERNET_SEGMENT_SIZE 1514
94 #endif
95 #ifdef RATELIMIT
96 #define DETAILED_RATELIMIT_SYSCTL 1	/*
97 					 * Undefine this if you don't want
98 					 * detailed rates to appear in
99 					 * net.inet.tcp.rl.
100 					 * With the defintion each rate
101 					 * shows up in your sysctl tree
102 					 * this can be big.
103 					 */
104 uint64_t inline
105 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table *rle)
106 {
107 	return (rle->ptbl->rs_rlt[rle->ptbl->rs_highest_valid].rate);
108 }
109 
110 uint64_t
111 tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp);
112 
113 const struct tcp_hwrate_limit_table *
114 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
115     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate);
116 
117 const struct tcp_hwrate_limit_table *
118 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
119     struct tcpcb *tp, struct ifnet *ifp,
120     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate);
121 void
122 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
123     struct tcpcb *tp);
124 #else
125 static inline const struct tcp_hwrate_limit_table *
126 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp,
127     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate)
128 {
129 	if (error)
130 		*error = EOPNOTSUPP;
131 	return (NULL);
132 }
133 
134 static inline const struct tcp_hwrate_limit_table *
135 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte,
136     struct tcpcb *tp, struct ifnet *ifp,
137     uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate)
138 {
139 	if (error)
140 		*error = EOPNOTSUPP;
141 	return (NULL);
142 }
143 
144 static inline void
145 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte,
146     struct tcpcb *tp)
147 {
148 	return;
149 }
150 
151 static uint64_t inline
152 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table *rle)
153 {
154 	return (0);
155 }
156 
157 static uint64_t inline
158 tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp)
159 {
160 	return (0);
161 }
162 
163 
164 #endif
165 /*
166  * Given a b/w and a segsiz, and optional hardware
167  * rate limit, return the ideal size to burst
168  * out at once. Note the parameter can_use_1mss
169  * dictates if the transport will tolerate a 1mss
170  * limit, if not it will bottom out at 2mss (think
171  * delayed ack).
172  */
173 uint32_t
174 tcp_get_pacing_burst_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss,
175    const struct tcp_hwrate_limit_table *te, int *err);
176 
177 
178 void
179 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte);
180 
181 #endif
182 #endif
183