1 /*- 2 * 3 * SPDX-License-Identifier: BSD-3-Clause 4 * 5 * Copyright (c) 2018-2020 6 * Netflix Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 /** 31 * Author: Randall Stewart <rrs@netflix.com> 32 */ 33 #ifndef __tcp_ratelimit_h__ 34 #define __tcp_ratelimit_h__ 35 36 struct m_snd_tag; 37 38 #define RL_MIN_DIVISOR 50 39 #define RL_DEFAULT_DIVISOR 1000 40 41 /* Flags on an individual rate */ 42 #define HDWRPACE_INITED 0x0001 43 #define HDWRPACE_TAGPRESENT 0x0002 44 #define HDWRPACE_IFPDEPARTED 0x0004 45 struct tcp_hwrate_limit_table { 46 const struct tcp_rate_set *ptbl; /* Pointer to parent table */ 47 struct m_snd_tag *tag; /* Send tag if needed (chelsio) */ 48 long rate; /* Rate we get in Bytes per second (Bps) */ 49 long using; /* How many flows are using this hdwr rate. */ 50 long rs_num_enobufs; 51 uint32_t time_between; /* Time-Gap between packets at this rate */ 52 uint32_t flags; 53 }; 54 55 /* Rateset flags */ 56 #define RS_IS_DEFF 0x0001 /* Its a lagg, do a double lookup */ 57 #define RS_IS_INTF 0x0002 /* Its a plain interface */ 58 #define RS_NO_PRE 0x0004 /* The interfacd has set rates */ 59 #define RS_INT_TBL 0x0010 /* 60 * The table is the internal version 61 * which has special setup requirements. 62 */ 63 #define RS_IS_DEAD 0x0020 /* The RS is dead list */ 64 #define RS_FUNERAL_SCHD 0x0040 /* Is a epoch call scheduled to bury this guy?*/ 65 #define RS_INTF_NO_SUP 0x0100 /* The interface does not support the ratelimiting */ 66 67 struct tcp_rate_set { 68 struct sysctl_ctx_list sysctl_ctx; 69 CK_LIST_ENTRY(tcp_rate_set) next; 70 struct ifnet *rs_ifp; 71 struct tcp_hwrate_limit_table *rs_rlt; 72 uint64_t rs_flows_using; 73 uint64_t rs_flow_limit; 74 uint32_t rs_if_dunit; 75 int rs_rate_cnt; 76 int rs_min_seg; 77 int rs_highest_valid; 78 int rs_lowest_valid; 79 int rs_disable; 80 int rs_flags; 81 struct epoch_context rs_epoch_ctx; 82 }; 83 84 CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set); 85 86 /* Request flags */ 87 #define RS_PACING_EXACT_MATCH 0x0001 /* Need an exact match for rate */ 88 #define RS_PACING_GT 0x0002 /* Greater than requested */ 89 #define RS_PACING_GEQ 0x0004 /* Greater than or equal too */ 90 #define RS_PACING_LT 0x0008 /* Less than requested rate */ 91 #define RS_PACING_SUB_OK 0x0010 /* If a rate can't be found get the 92 * next best rate (highest or lowest). */ 93 #ifdef _KERNEL 94 #ifndef ETHERNET_SEGMENT_SIZE 95 #define ETHERNET_SEGMENT_SIZE 1514 96 #endif 97 struct tcpcb; 98 99 #ifdef RATELIMIT 100 #define DETAILED_RATELIMIT_SYSCTL 1 /* 101 * Undefine this if you don't want 102 * detailed rates to appear in 103 * net.inet.tcp.rl. 104 * With the defintion each rate 105 * shows up in your sysctl tree 106 * this can be big. 107 */ 108 uint64_t inline 109 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table *rle) 110 { 111 return (rle->ptbl->rs_rlt[rle->ptbl->rs_highest_valid].rate); 112 } 113 114 uint64_t 115 tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp); 116 117 const struct tcp_hwrate_limit_table * 118 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, 119 uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate); 120 121 const struct tcp_hwrate_limit_table * 122 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, 123 struct tcpcb *tp, struct ifnet *ifp, 124 uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate); 125 void 126 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, 127 struct tcpcb *tp); 128 129 uint32_t 130 tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, 131 const struct tcp_hwrate_limit_table *te, int *err, int divisor); 132 133 void 134 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte); 135 136 void 137 tcp_rl_release_ifnet(struct ifnet *ifp); 138 139 #else 140 static inline const struct tcp_hwrate_limit_table * 141 tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *ifp, 142 uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate) 143 { 144 if (error) 145 *error = EOPNOTSUPP; 146 return (NULL); 147 } 148 149 static inline const struct tcp_hwrate_limit_table * 150 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table *crte, 151 struct tcpcb *tp, struct ifnet *ifp, 152 uint64_t bytes_per_sec, int flags, int *error, uint64_t *lower_rate) 153 { 154 if (error) 155 *error = EOPNOTSUPP; 156 return (NULL); 157 } 158 159 static inline void 160 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table *crte, 161 struct tcpcb *tp) 162 { 163 return; 164 } 165 166 static uint64_t inline 167 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table *rle) 168 { 169 return (0); 170 } 171 172 static uint64_t inline 173 tcp_hw_highest_rate_ifp(struct ifnet *ifp, struct inpcb *inp) 174 { 175 return (0); 176 } 177 178 static inline uint32_t 179 tcp_get_pacing_burst_size_w_divisor(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, 180 const struct tcp_hwrate_limit_table *te, int *err, int divisor) 181 { 182 /* 183 * We use the google formula to calculate the 184 * TSO size. I.E. 185 * bw < 24Meg 186 * tso = 2mss 187 * else 188 * tso = min(bw/(div=1000), 64k) 189 * 190 * Note for these calculations we ignore the 191 * packet overhead (enet hdr, ip hdr and tcp hdr). 192 * We only get the google formula when we have 193 * divisor = 1000, which is the default for now. 194 */ 195 uint64_t bytes; 196 uint32_t new_tso, min_tso_segs; 197 198 /* It can't be zero */ 199 if ((divisor == 0) || 200 (divisor < RL_MIN_DIVISOR)) { 201 bytes = bw / RL_DEFAULT_DIVISOR; 202 } else 203 bytes = bw / divisor; 204 /* We can't ever send more than 65k in a TSO */ 205 if (bytes > 0xffff) { 206 bytes = 0xffff; 207 } 208 /* Round up */ 209 new_tso = (bytes + segsiz - 1) / segsiz; 210 if (can_use_1mss) 211 min_tso_segs = 1; 212 else 213 min_tso_segs = 2; 214 if (new_tso < min_tso_segs) 215 new_tso = min_tso_segs; 216 new_tso *= segsiz; 217 return (new_tso); 218 } 219 220 /* Do nothing if RATELIMIT is not defined */ 221 static inline void 222 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte) 223 { 224 } 225 226 static inline void 227 tcp_rl_release_ifnet(struct ifnet *ifp) 228 { 229 } 230 #endif 231 232 /* 233 * Given a b/w and a segsiz, and optional hardware 234 * rate limit, return the ideal size to burst 235 * out at once. Note the parameter can_use_1mss 236 * dictates if the transport will tolerate a 1mss 237 * limit, if not it will bottom out at 2mss (think 238 * delayed ack). 239 */ 240 static inline uint32_t 241 tcp_get_pacing_burst_size(struct tcpcb *tp, uint64_t bw, uint32_t segsiz, int can_use_1mss, 242 const struct tcp_hwrate_limit_table *te, int *err) 243 { 244 245 return (tcp_get_pacing_burst_size_w_divisor(tp, bw, segsiz, 246 can_use_1mss, 247 te, err, 0)); 248 } 249 250 #endif 251 #endif 252