1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2317a76f9SStephen Hemminger /*
3b92022f3SFabian Frederick * Pluggable TCP congestion control support and newReno
4317a76f9SStephen Hemminger * congestion control.
502582e9bSMasanari Iida * Based on ideas from I/O scheduler support and Web100.
6317a76f9SStephen Hemminger *
7317a76f9SStephen Hemminger * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org>
8317a76f9SStephen Hemminger */
9317a76f9SStephen Hemminger
10afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt
11afd46503SJoe Perches
12317a76f9SStephen Hemminger #include <linux/module.h>
13317a76f9SStephen Hemminger #include <linux/mm.h>
14317a76f9SStephen Hemminger #include <linux/types.h>
15317a76f9SStephen Hemminger #include <linux/list.h>
165a0e3ad6STejun Heo #include <linux/gfp.h>
17c5c6a8abSDaniel Borkmann #include <linux/jhash.h>
18317a76f9SStephen Hemminger #include <net/tcp.h>
1915fcdf6aSPing Gan #include <trace/events/tcp.h>
20317a76f9SStephen Hemminger
21317a76f9SStephen Hemminger static DEFINE_SPINLOCK(tcp_cong_list_lock);
22317a76f9SStephen Hemminger static LIST_HEAD(tcp_cong_list);
23317a76f9SStephen Hemminger
24317a76f9SStephen Hemminger /* Simple linear search, don't expect many entries! */
tcp_ca_find(const char * name)250baf26b0SMartin KaFai Lau struct tcp_congestion_ops *tcp_ca_find(const char *name)
26317a76f9SStephen Hemminger {
27317a76f9SStephen Hemminger struct tcp_congestion_ops *e;
28317a76f9SStephen Hemminger
295f8ef48dSStephen Hemminger list_for_each_entry_rcu(e, &tcp_cong_list, list) {
30317a76f9SStephen Hemminger if (strcmp(e->name, name) == 0)
31317a76f9SStephen Hemminger return e;
32317a76f9SStephen Hemminger }
33317a76f9SStephen Hemminger
34317a76f9SStephen Hemminger return NULL;
35317a76f9SStephen Hemminger }
36317a76f9SStephen Hemminger
tcp_set_ca_state(struct sock * sk,const u8 ca_state)3715fcdf6aSPing Gan void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
3815fcdf6aSPing Gan {
3915fcdf6aSPing Gan struct inet_connection_sock *icsk = inet_csk(sk);
4015fcdf6aSPing Gan
4115fcdf6aSPing Gan trace_tcp_cong_state_set(sk, ca_state);
4215fcdf6aSPing Gan
4315fcdf6aSPing Gan if (icsk->icsk_ca_ops->set_state)
4415fcdf6aSPing Gan icsk->icsk_ca_ops->set_state(sk, ca_state);
4515fcdf6aSPing Gan icsk->icsk_ca_state = ca_state;
4615fcdf6aSPing Gan }
4715fcdf6aSPing Gan
48c5c6a8abSDaniel Borkmann /* Must be called with rcu lock held */
tcp_ca_find_autoload(const char * name)4961e2bbafSJason Xing static struct tcp_congestion_ops *tcp_ca_find_autoload(const char *name)
50c5c6a8abSDaniel Borkmann {
516670e152SStephen Hemminger struct tcp_congestion_ops *ca = tcp_ca_find(name);
526670e152SStephen Hemminger
53c5c6a8abSDaniel Borkmann #ifdef CONFIG_MODULES
54c5c6a8abSDaniel Borkmann if (!ca && capable(CAP_NET_ADMIN)) {
55c5c6a8abSDaniel Borkmann rcu_read_unlock();
56c5c6a8abSDaniel Borkmann request_module("tcp_%s", name);
57c5c6a8abSDaniel Borkmann rcu_read_lock();
58c5c6a8abSDaniel Borkmann ca = tcp_ca_find(name);
59c5c6a8abSDaniel Borkmann }
60c5c6a8abSDaniel Borkmann #endif
61c5c6a8abSDaniel Borkmann return ca;
62c5c6a8abSDaniel Borkmann }
63c5c6a8abSDaniel Borkmann
64c5c6a8abSDaniel Borkmann /* Simple linear search, not much in here. */
tcp_ca_find_key(u32 key)65c5c6a8abSDaniel Borkmann struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
66c5c6a8abSDaniel Borkmann {
67c5c6a8abSDaniel Borkmann struct tcp_congestion_ops *e;
68c5c6a8abSDaniel Borkmann
69c5c6a8abSDaniel Borkmann list_for_each_entry_rcu(e, &tcp_cong_list, list) {
70c5c6a8abSDaniel Borkmann if (e->key == key)
71c5c6a8abSDaniel Borkmann return e;
72c5c6a8abSDaniel Borkmann }
73c5c6a8abSDaniel Borkmann
74c5c6a8abSDaniel Borkmann return NULL;
75c5c6a8abSDaniel Borkmann }
76c5c6a8abSDaniel Borkmann
tcp_validate_congestion_control(struct tcp_congestion_ops * ca)778fb1a76aSKui-Feng Lee int tcp_validate_congestion_control(struct tcp_congestion_ops *ca)
78317a76f9SStephen Hemminger {
79e9799183SFlorian Westphal /* all algorithms must implement these */
80e9799183SFlorian Westphal if (!ca->ssthresh || !ca->undo_cwnd ||
81e9799183SFlorian Westphal !(ca->cong_avoid || ca->cong_control)) {
82afd46503SJoe Perches pr_err("%s does not implement required ops\n", ca->name);
83317a76f9SStephen Hemminger return -EINVAL;
84317a76f9SStephen Hemminger }
85317a76f9SStephen Hemminger
868fb1a76aSKui-Feng Lee return 0;
878fb1a76aSKui-Feng Lee }
888fb1a76aSKui-Feng Lee
898fb1a76aSKui-Feng Lee /* Attach new congestion control algorithm to the list
908fb1a76aSKui-Feng Lee * of available options.
918fb1a76aSKui-Feng Lee */
tcp_register_congestion_control(struct tcp_congestion_ops * ca)928fb1a76aSKui-Feng Lee int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
938fb1a76aSKui-Feng Lee {
948fb1a76aSKui-Feng Lee int ret;
958fb1a76aSKui-Feng Lee
968fb1a76aSKui-Feng Lee ret = tcp_validate_congestion_control(ca);
978fb1a76aSKui-Feng Lee if (ret)
988fb1a76aSKui-Feng Lee return ret;
998fb1a76aSKui-Feng Lee
100c5c6a8abSDaniel Borkmann ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
101c5c6a8abSDaniel Borkmann
102317a76f9SStephen Hemminger spin_lock(&tcp_cong_list_lock);
103c5c6a8abSDaniel Borkmann if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) {
104c5c6a8abSDaniel Borkmann pr_notice("%s already registered or non-unique key\n",
105c5c6a8abSDaniel Borkmann ca->name);
106317a76f9SStephen Hemminger ret = -EEXIST;
107317a76f9SStephen Hemminger } else {
1083d2573f7SStephen Hemminger list_add_tail_rcu(&ca->list, &tcp_cong_list);
109db2855aeSstephen hemminger pr_debug("%s registered\n", ca->name);
110317a76f9SStephen Hemminger }
111317a76f9SStephen Hemminger spin_unlock(&tcp_cong_list_lock);
112317a76f9SStephen Hemminger
113317a76f9SStephen Hemminger return ret;
114317a76f9SStephen Hemminger }
115317a76f9SStephen Hemminger EXPORT_SYMBOL_GPL(tcp_register_congestion_control);
116317a76f9SStephen Hemminger
117317a76f9SStephen Hemminger /*
118317a76f9SStephen Hemminger * Remove congestion control algorithm, called from
119317a76f9SStephen Hemminger * the module's remove function. Module ref counts are used
120317a76f9SStephen Hemminger * to ensure that this can't be done till all sockets using
121317a76f9SStephen Hemminger * that method are closed.
122317a76f9SStephen Hemminger */
tcp_unregister_congestion_control(struct tcp_congestion_ops * ca)123317a76f9SStephen Hemminger void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
124317a76f9SStephen Hemminger {
125317a76f9SStephen Hemminger spin_lock(&tcp_cong_list_lock);
126317a76f9SStephen Hemminger list_del_rcu(&ca->list);
127317a76f9SStephen Hemminger spin_unlock(&tcp_cong_list_lock);
128c5c6a8abSDaniel Borkmann
129c5c6a8abSDaniel Borkmann /* Wait for outstanding readers to complete before the
130c5c6a8abSDaniel Borkmann * module gets removed entirely.
131c5c6a8abSDaniel Borkmann *
132c5c6a8abSDaniel Borkmann * A try_module_get() should fail by now as our module is
133c5c6a8abSDaniel Borkmann * in "going" state since no refs are held anymore and
134c5c6a8abSDaniel Borkmann * module_exit() handler being called.
135c5c6a8abSDaniel Borkmann */
136c5c6a8abSDaniel Borkmann synchronize_rcu();
137317a76f9SStephen Hemminger }
138317a76f9SStephen Hemminger EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
139317a76f9SStephen Hemminger
1408fb1a76aSKui-Feng Lee /* Replace a registered old ca with a new one.
1418fb1a76aSKui-Feng Lee *
1428fb1a76aSKui-Feng Lee * The new ca must have the same name as the old one, that has been
1438fb1a76aSKui-Feng Lee * registered.
1448fb1a76aSKui-Feng Lee */
tcp_update_congestion_control(struct tcp_congestion_ops * ca,struct tcp_congestion_ops * old_ca)1458fb1a76aSKui-Feng Lee int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
1468fb1a76aSKui-Feng Lee {
1478fb1a76aSKui-Feng Lee struct tcp_congestion_ops *existing;
14873e4f9e6SKui-Feng Lee int ret = 0;
1498fb1a76aSKui-Feng Lee
1508fb1a76aSKui-Feng Lee ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
1518fb1a76aSKui-Feng Lee
1528fb1a76aSKui-Feng Lee spin_lock(&tcp_cong_list_lock);
1538fb1a76aSKui-Feng Lee existing = tcp_ca_find_key(old_ca->key);
1548fb1a76aSKui-Feng Lee if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) {
1558fb1a76aSKui-Feng Lee pr_notice("%s not registered or non-unique key\n",
1568fb1a76aSKui-Feng Lee ca->name);
1578fb1a76aSKui-Feng Lee ret = -EINVAL;
1588fb1a76aSKui-Feng Lee } else if (existing != old_ca) {
1598fb1a76aSKui-Feng Lee pr_notice("invalid old congestion control algorithm to replace\n");
1608fb1a76aSKui-Feng Lee ret = -EINVAL;
1618fb1a76aSKui-Feng Lee } else {
1628fb1a76aSKui-Feng Lee /* Add the new one before removing the old one to keep
1638fb1a76aSKui-Feng Lee * one implementation available all the time.
1648fb1a76aSKui-Feng Lee */
1658fb1a76aSKui-Feng Lee list_add_tail_rcu(&ca->list, &tcp_cong_list);
1668fb1a76aSKui-Feng Lee list_del_rcu(&existing->list);
1678fb1a76aSKui-Feng Lee pr_debug("%s updated\n", ca->name);
1688fb1a76aSKui-Feng Lee }
1698fb1a76aSKui-Feng Lee spin_unlock(&tcp_cong_list_lock);
1708fb1a76aSKui-Feng Lee
1718fb1a76aSKui-Feng Lee /* Wait for outstanding readers to complete before the
1728fb1a76aSKui-Feng Lee * module or struct_ops gets removed entirely.
1738fb1a76aSKui-Feng Lee */
1748fb1a76aSKui-Feng Lee if (!ret)
1758fb1a76aSKui-Feng Lee synchronize_rcu();
1768fb1a76aSKui-Feng Lee
1778fb1a76aSKui-Feng Lee return ret;
1788fb1a76aSKui-Feng Lee }
1798fb1a76aSKui-Feng Lee
tcp_ca_get_key_by_name(const char * name,bool * ecn_ca)18061e2bbafSJason Xing u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
181c5c6a8abSDaniel Borkmann {
182c5c6a8abSDaniel Borkmann const struct tcp_congestion_ops *ca;
183c3a8d947SDaniel Borkmann u32 key = TCP_CA_UNSPEC;
184c5c6a8abSDaniel Borkmann
185c5c6a8abSDaniel Borkmann might_sleep();
186c5c6a8abSDaniel Borkmann
187c5c6a8abSDaniel Borkmann rcu_read_lock();
18861e2bbafSJason Xing ca = tcp_ca_find_autoload(name);
189c3a8d947SDaniel Borkmann if (ca) {
190c3a8d947SDaniel Borkmann key = ca->key;
191c3a8d947SDaniel Borkmann *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
192c3a8d947SDaniel Borkmann }
193c5c6a8abSDaniel Borkmann rcu_read_unlock();
194c5c6a8abSDaniel Borkmann
195c5c6a8abSDaniel Borkmann return key;
196c5c6a8abSDaniel Borkmann }
197c5c6a8abSDaniel Borkmann
tcp_ca_get_name_by_key(u32 key,char * buffer)198c5c6a8abSDaniel Borkmann char *tcp_ca_get_name_by_key(u32 key, char *buffer)
199c5c6a8abSDaniel Borkmann {
200c5c6a8abSDaniel Borkmann const struct tcp_congestion_ops *ca;
201c5c6a8abSDaniel Borkmann char *ret = NULL;
202c5c6a8abSDaniel Borkmann
203c5c6a8abSDaniel Borkmann rcu_read_lock();
204c5c6a8abSDaniel Borkmann ca = tcp_ca_find_key(key);
205*a3bfc095SKees Cook if (ca) {
206*a3bfc095SKees Cook strscpy(buffer, ca->name, TCP_CA_NAME_MAX);
207*a3bfc095SKees Cook ret = buffer;
208*a3bfc095SKees Cook }
209c5c6a8abSDaniel Borkmann rcu_read_unlock();
210c5c6a8abSDaniel Borkmann
211c5c6a8abSDaniel Borkmann return ret;
212c5c6a8abSDaniel Borkmann }
213c5c6a8abSDaniel Borkmann
214317a76f9SStephen Hemminger /* Assign choice of congestion control. */
tcp_assign_congestion_control(struct sock * sk)21555d8694fSFlorian Westphal void tcp_assign_congestion_control(struct sock *sk)
216317a76f9SStephen Hemminger {
2176670e152SStephen Hemminger struct net *net = sock_net(sk);
2186687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
2196670e152SStephen Hemminger const struct tcp_congestion_ops *ca;
220317a76f9SStephen Hemminger
221317a76f9SStephen Hemminger rcu_read_lock();
2226670e152SStephen Hemminger ca = rcu_dereference(net->ipv4.tcp_congestion_control);
2230baf26b0SMartin KaFai Lau if (unlikely(!bpf_try_module_get(ca, ca->owner)))
2246670e152SStephen Hemminger ca = &tcp_reno;
2256687e988SArnaldo Carvalho de Melo icsk->icsk_ca_ops = ca;
22655d8694fSFlorian Westphal rcu_read_unlock();
227c1201444SWei Wang
2286670e152SStephen Hemminger memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
2296ac705b1SEric Dumazet if (ca->flags & TCP_CONG_NEEDS_ECN)
2306ac705b1SEric Dumazet INET_ECN_xmit(sk);
2316ac705b1SEric Dumazet else
2326ac705b1SEric Dumazet INET_ECN_dontxmit(sk);
233317a76f9SStephen Hemminger }
234317a76f9SStephen Hemminger
tcp_init_congestion_control(struct sock * sk)23555d8694fSFlorian Westphal void tcp_init_congestion_control(struct sock *sk)
23655d8694fSFlorian Westphal {
2378919a9b3SNeal Cardwell struct inet_connection_sock *icsk = inet_csk(sk);
238317a76f9SStephen Hemminger
23944abafc4SYuchung Cheng tcp_sk(sk)->prior_ssthresh = 0;
2406687e988SArnaldo Carvalho de Melo if (icsk->icsk_ca_ops->init)
2416687e988SArnaldo Carvalho de Melo icsk->icsk_ca_ops->init(sk);
2426ac705b1SEric Dumazet if (tcp_ca_needs_ecn(sk))
2436ac705b1SEric Dumazet INET_ECN_xmit(sk);
2446ac705b1SEric Dumazet else
2456ac705b1SEric Dumazet INET_ECN_dontxmit(sk);
2468919a9b3SNeal Cardwell icsk->icsk_ca_initialized = 1;
247317a76f9SStephen Hemminger }
248317a76f9SStephen Hemminger
tcp_reinit_congestion_control(struct sock * sk,const struct tcp_congestion_ops * ca)249ebfa00c5SSabrina Dubroca static void tcp_reinit_congestion_control(struct sock *sk,
25029ba4fffSDaniel Borkmann const struct tcp_congestion_ops *ca)
25129ba4fffSDaniel Borkmann {
25229ba4fffSDaniel Borkmann struct inet_connection_sock *icsk = inet_csk(sk);
25329ba4fffSDaniel Borkmann
25429ba4fffSDaniel Borkmann tcp_cleanup_congestion_control(sk);
25529ba4fffSDaniel Borkmann icsk->icsk_ca_ops = ca;
2569f950415SNeal Cardwell icsk->icsk_ca_setsockopt = 1;
2577082c5c3SFlorian Westphal memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
258c1201444SWei Wang
25955472017SAlexander Duyck if (ca->flags & TCP_CONG_NEEDS_ECN)
26055472017SAlexander Duyck INET_ECN_xmit(sk);
26155472017SAlexander Duyck else
26255472017SAlexander Duyck INET_ECN_dontxmit(sk);
26355472017SAlexander Duyck
264ce69e563SChristoph Paasch if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2656ac705b1SEric Dumazet tcp_init_congestion_control(sk);
26629ba4fffSDaniel Borkmann }
26729ba4fffSDaniel Borkmann
268317a76f9SStephen Hemminger /* Manage refcounts on socket close. */
tcp_cleanup_congestion_control(struct sock * sk)2696687e988SArnaldo Carvalho de Melo void tcp_cleanup_congestion_control(struct sock *sk)
270317a76f9SStephen Hemminger {
2716687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
2726687e988SArnaldo Carvalho de Melo
2736687e988SArnaldo Carvalho de Melo if (icsk->icsk_ca_ops->release)
2746687e988SArnaldo Carvalho de Melo icsk->icsk_ca_ops->release(sk);
2750baf26b0SMartin KaFai Lau bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner);
276317a76f9SStephen Hemminger }
277317a76f9SStephen Hemminger
278317a76f9SStephen Hemminger /* Used by sysctl to change default congestion control */
tcp_set_default_congestion_control(struct net * net,const char * name)2796670e152SStephen Hemminger int tcp_set_default_congestion_control(struct net *net, const char *name)
280317a76f9SStephen Hemminger {
281317a76f9SStephen Hemminger struct tcp_congestion_ops *ca;
2826670e152SStephen Hemminger const struct tcp_congestion_ops *prev;
2836670e152SStephen Hemminger int ret;
284317a76f9SStephen Hemminger
2856670e152SStephen Hemminger rcu_read_lock();
28661e2bbafSJason Xing ca = tcp_ca_find_autoload(name);
2876670e152SStephen Hemminger if (!ca) {
2886670e152SStephen Hemminger ret = -ENOENT;
2890baf26b0SMartin KaFai Lau } else if (!bpf_try_module_get(ca, ca->owner)) {
2906670e152SStephen Hemminger ret = -EBUSY;
2918d432592SJonathon Reinhart } else if (!net_eq(net, &init_net) &&
2928d432592SJonathon Reinhart !(ca->flags & TCP_CONG_NON_RESTRICTED)) {
2938d432592SJonathon Reinhart /* Only init netns can set default to a restricted algorithm */
2948d432592SJonathon Reinhart ret = -EPERM;
2956670e152SStephen Hemminger } else {
2966670e152SStephen Hemminger prev = xchg(&net->ipv4.tcp_congestion_control, ca);
2976670e152SStephen Hemminger if (prev)
2980baf26b0SMartin KaFai Lau bpf_module_put(prev, prev->owner);
299317a76f9SStephen Hemminger
3006670e152SStephen Hemminger ca->flags |= TCP_CONG_NON_RESTRICTED;
301317a76f9SStephen Hemminger ret = 0;
302317a76f9SStephen Hemminger }
3036670e152SStephen Hemminger rcu_read_unlock();
304317a76f9SStephen Hemminger
305317a76f9SStephen Hemminger return ret;
306317a76f9SStephen Hemminger }
307317a76f9SStephen Hemminger
308b1736a71SStephen Hemminger /* Set default value from kernel configuration at bootup */
tcp_congestion_default(void)309b1736a71SStephen Hemminger static int __init tcp_congestion_default(void)
310b1736a71SStephen Hemminger {
3116670e152SStephen Hemminger return tcp_set_default_congestion_control(&init_net,
3126670e152SStephen Hemminger CONFIG_DEFAULT_TCP_CONG);
313b1736a71SStephen Hemminger }
314b1736a71SStephen Hemminger late_initcall(tcp_congestion_default);
315b1736a71SStephen Hemminger
3163ff825b2SStephen Hemminger /* Build string with list of available congestion control values */
tcp_get_available_congestion_control(char * buf,size_t maxlen)3173ff825b2SStephen Hemminger void tcp_get_available_congestion_control(char *buf, size_t maxlen)
3183ff825b2SStephen Hemminger {
3193ff825b2SStephen Hemminger struct tcp_congestion_ops *ca;
3203ff825b2SStephen Hemminger size_t offs = 0;
3213ff825b2SStephen Hemminger
3223ff825b2SStephen Hemminger rcu_read_lock();
3233ff825b2SStephen Hemminger list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
3243ff825b2SStephen Hemminger offs += snprintf(buf + offs, maxlen - offs,
3253ff825b2SStephen Hemminger "%s%s",
3263ff825b2SStephen Hemminger offs == 0 ? "" : " ", ca->name);
3279bb59a21SHangbin Liu
3289bb59a21SHangbin Liu if (WARN_ON_ONCE(offs >= maxlen))
3299bb59a21SHangbin Liu break;
3303ff825b2SStephen Hemminger }
3313ff825b2SStephen Hemminger rcu_read_unlock();
3323ff825b2SStephen Hemminger }
3333ff825b2SStephen Hemminger
334317a76f9SStephen Hemminger /* Get current default congestion control */
tcp_get_default_congestion_control(struct net * net,char * name)3356670e152SStephen Hemminger void tcp_get_default_congestion_control(struct net *net, char *name)
336317a76f9SStephen Hemminger {
3376670e152SStephen Hemminger const struct tcp_congestion_ops *ca;
338317a76f9SStephen Hemminger
339317a76f9SStephen Hemminger rcu_read_lock();
3406670e152SStephen Hemminger ca = rcu_dereference(net->ipv4.tcp_congestion_control);
341*a3bfc095SKees Cook strscpy(name, ca->name, TCP_CA_NAME_MAX);
342317a76f9SStephen Hemminger rcu_read_unlock();
343317a76f9SStephen Hemminger }
344317a76f9SStephen Hemminger
345ce7bc3bfSStephen Hemminger /* Built list of non-restricted congestion control values */
tcp_get_allowed_congestion_control(char * buf,size_t maxlen)346ce7bc3bfSStephen Hemminger void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
347ce7bc3bfSStephen Hemminger {
348ce7bc3bfSStephen Hemminger struct tcp_congestion_ops *ca;
349ce7bc3bfSStephen Hemminger size_t offs = 0;
350ce7bc3bfSStephen Hemminger
351ce7bc3bfSStephen Hemminger *buf = '\0';
352ce7bc3bfSStephen Hemminger rcu_read_lock();
353ce7bc3bfSStephen Hemminger list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
354164891aaSStephen Hemminger if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
355ce7bc3bfSStephen Hemminger continue;
356ce7bc3bfSStephen Hemminger offs += snprintf(buf + offs, maxlen - offs,
357ce7bc3bfSStephen Hemminger "%s%s",
358ce7bc3bfSStephen Hemminger offs == 0 ? "" : " ", ca->name);
3599bb59a21SHangbin Liu
3609bb59a21SHangbin Liu if (WARN_ON_ONCE(offs >= maxlen))
3619bb59a21SHangbin Liu break;
362ce7bc3bfSStephen Hemminger }
363ce7bc3bfSStephen Hemminger rcu_read_unlock();
364ce7bc3bfSStephen Hemminger }
365ce7bc3bfSStephen Hemminger
366ce7bc3bfSStephen Hemminger /* Change list of non-restricted congestion control */
tcp_set_allowed_congestion_control(char * val)367ce7bc3bfSStephen Hemminger int tcp_set_allowed_congestion_control(char *val)
368ce7bc3bfSStephen Hemminger {
369ce7bc3bfSStephen Hemminger struct tcp_congestion_ops *ca;
370c34186edSJulia Lawall char *saved_clone, *clone, *name;
371ce7bc3bfSStephen Hemminger int ret = 0;
372ce7bc3bfSStephen Hemminger
373c34186edSJulia Lawall saved_clone = clone = kstrdup(val, GFP_USER);
374ce7bc3bfSStephen Hemminger if (!clone)
375ce7bc3bfSStephen Hemminger return -ENOMEM;
376ce7bc3bfSStephen Hemminger
377ce7bc3bfSStephen Hemminger spin_lock(&tcp_cong_list_lock);
378ce7bc3bfSStephen Hemminger /* pass 1 check for bad entries */
379ce7bc3bfSStephen Hemminger while ((name = strsep(&clone, " ")) && *name) {
380ce7bc3bfSStephen Hemminger ca = tcp_ca_find(name);
381ce7bc3bfSStephen Hemminger if (!ca) {
382ce7bc3bfSStephen Hemminger ret = -ENOENT;
383ce7bc3bfSStephen Hemminger goto out;
384ce7bc3bfSStephen Hemminger }
385ce7bc3bfSStephen Hemminger }
386ce7bc3bfSStephen Hemminger
387164891aaSStephen Hemminger /* pass 2 clear old values */
388ce7bc3bfSStephen Hemminger list_for_each_entry_rcu(ca, &tcp_cong_list, list)
389164891aaSStephen Hemminger ca->flags &= ~TCP_CONG_NON_RESTRICTED;
390ce7bc3bfSStephen Hemminger
391ce7bc3bfSStephen Hemminger /* pass 3 mark as allowed */
392ce7bc3bfSStephen Hemminger while ((name = strsep(&val, " ")) && *name) {
393ce7bc3bfSStephen Hemminger ca = tcp_ca_find(name);
394ce7bc3bfSStephen Hemminger WARN_ON(!ca);
395ce7bc3bfSStephen Hemminger if (ca)
396164891aaSStephen Hemminger ca->flags |= TCP_CONG_NON_RESTRICTED;
397ce7bc3bfSStephen Hemminger }
398ce7bc3bfSStephen Hemminger out:
399ce7bc3bfSStephen Hemminger spin_unlock(&tcp_cong_list_lock);
400c34186edSJulia Lawall kfree(saved_clone);
401ce7bc3bfSStephen Hemminger
402ce7bc3bfSStephen Hemminger return ret;
403ce7bc3bfSStephen Hemminger }
404ce7bc3bfSStephen Hemminger
40591b5b21cSLawrence Brakmo /* Change congestion control for socket. If load is false, then it is the
40691b5b21cSLawrence Brakmo * responsibility of the caller to call tcp_init_congestion_control or
40791b5b21cSLawrence Brakmo * tcp_reinit_congestion_control (if the current congestion control was
40891b5b21cSLawrence Brakmo * already initialized.
40991b5b21cSLawrence Brakmo */
tcp_set_congestion_control(struct sock * sk,const char * name,bool load,bool cap_net_admin)4108d650cdeSEric Dumazet int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
41129a94932SNeal Cardwell bool cap_net_admin)
4125f8ef48dSStephen Hemminger {
4136687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
414c5c6a8abSDaniel Borkmann const struct tcp_congestion_ops *ca;
4155f8ef48dSStephen Hemminger int err = 0;
4165f8ef48dSStephen Hemminger
417c5c6a8abSDaniel Borkmann if (icsk->icsk_ca_dst_locked)
418c5c6a8abSDaniel Borkmann return -EPERM;
4194d4d3d1eSStephen Hemminger
420c5c6a8abSDaniel Borkmann rcu_read_lock();
42191b5b21cSLawrence Brakmo if (!load)
42291b5b21cSLawrence Brakmo ca = tcp_ca_find(name);
42391b5b21cSLawrence Brakmo else
42461e2bbafSJason Xing ca = tcp_ca_find_autoload(name);
4256670e152SStephen Hemminger
426c5c6a8abSDaniel Borkmann /* No change asking for existing value */
4279f950415SNeal Cardwell if (ca == icsk->icsk_ca_ops) {
4289f950415SNeal Cardwell icsk->icsk_ca_setsockopt = 1;
4295f8ef48dSStephen Hemminger goto out;
4309f950415SNeal Cardwell }
4316670e152SStephen Hemminger
4325050bef8SNeal Cardwell if (!ca)
4335f8ef48dSStephen Hemminger err = -ENOENT;
4345050bef8SNeal Cardwell else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin))
43591b5b21cSLawrence Brakmo err = -EPERM;
4365050bef8SNeal Cardwell else if (!bpf_try_module_get(ca, ca->owner))
43791b5b21cSLawrence Brakmo err = -EBUSY;
4385050bef8SNeal Cardwell else
43929ba4fffSDaniel Borkmann tcp_reinit_congestion_control(sk, ca);
4405f8ef48dSStephen Hemminger out:
4415f8ef48dSStephen Hemminger rcu_read_unlock();
4425f8ef48dSStephen Hemminger return err;
4435f8ef48dSStephen Hemminger }
4445f8ef48dSStephen Hemminger
4459f9843a7SYuchung Cheng /* Slow start is used when congestion window is no greater than the slow start
4469f9843a7SYuchung Cheng * threshold. We base on RFC2581 and also handle stretch ACKs properly.
4479f9843a7SYuchung Cheng * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
4489f9843a7SYuchung Cheng * something better;) a packet is only considered (s)acked in its entirety to
4499f9843a7SYuchung Cheng * defend the ACK attacks described in the RFC. Slow start processes a stretch
4509f9843a7SYuchung Cheng * ACK of degree N as if N acks of degree 1 are received back to back except
4519f9843a7SYuchung Cheng * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
4529f9843a7SYuchung Cheng * returns the leftover acks to adjust cwnd in congestion avoidance mode.
45340efc6faSStephen Hemminger */
tcp_slow_start(struct tcp_sock * tp,u32 acked)454400031e0SDavid Vernet __bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
45540efc6faSStephen Hemminger {
45640570375SEric Dumazet u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
457973ec449SEric Dumazet
45840570375SEric Dumazet acked -= cwnd - tcp_snd_cwnd(tp);
45940570375SEric Dumazet tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));
460e73ebb08SNeal Cardwell
461e73ebb08SNeal Cardwell return acked;
462886236c1SJohn Heffner }
46340efc6faSStephen Hemminger EXPORT_SYMBOL_GPL(tcp_slow_start);
46440efc6faSStephen Hemminger
465814d488cSNeal Cardwell /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w),
466814d488cSNeal Cardwell * for every packet that was ACKed.
467814d488cSNeal Cardwell */
tcp_cong_avoid_ai(struct tcp_sock * tp,u32 w,u32 acked)468400031e0SDavid Vernet __bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
469758ce5c8SIlpo Järvinen {
4709949afa4SNeal Cardwell /* If credits accumulated at a higher w, apply them gently now. */
4719949afa4SNeal Cardwell if (tp->snd_cwnd_cnt >= w) {
4729949afa4SNeal Cardwell tp->snd_cwnd_cnt = 0;
47340570375SEric Dumazet tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
4749949afa4SNeal Cardwell }
4759949afa4SNeal Cardwell
476e73ebb08SNeal Cardwell tp->snd_cwnd_cnt += acked;
477758ce5c8SIlpo Järvinen if (tp->snd_cwnd_cnt >= w) {
478814d488cSNeal Cardwell u32 delta = tp->snd_cwnd_cnt / w;
479814d488cSNeal Cardwell
480814d488cSNeal Cardwell tp->snd_cwnd_cnt -= delta * w;
48140570375SEric Dumazet tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta);
482758ce5c8SIlpo Järvinen }
48340570375SEric Dumazet tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
484758ce5c8SIlpo Järvinen }
485758ce5c8SIlpo Järvinen EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
486758ce5c8SIlpo Järvinen
487317a76f9SStephen Hemminger /*
488317a76f9SStephen Hemminger * TCP Reno congestion control
489317a76f9SStephen Hemminger * This is special case used for fallback as well.
490317a76f9SStephen Hemminger */
491317a76f9SStephen Hemminger /* This is Jacobson's slow start and congestion avoidance.
492317a76f9SStephen Hemminger * SIGCOMM '88, p. 328.
493317a76f9SStephen Hemminger */
tcp_reno_cong_avoid(struct sock * sk,u32 ack,u32 acked)494400031e0SDavid Vernet __bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
495317a76f9SStephen Hemminger {
4966687e988SArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk);
4976687e988SArnaldo Carvalho de Melo
49824901551SEric Dumazet if (!tcp_is_cwnd_limited(sk))
499317a76f9SStephen Hemminger return;
500317a76f9SStephen Hemminger
501317a76f9SStephen Hemminger /* In "safe" area, increase. */
502071d5080SYuchung Cheng if (tcp_in_slow_start(tp)) {
503c22bdca9SNeal Cardwell acked = tcp_slow_start(tp, acked);
504c22bdca9SNeal Cardwell if (!acked)
505c22bdca9SNeal Cardwell return;
506c22bdca9SNeal Cardwell }
5079772efb9SStephen Hemminger /* In dangerous area, increase slowly. */
50840570375SEric Dumazet tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
509317a76f9SStephen Hemminger }
510317a76f9SStephen Hemminger EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
511317a76f9SStephen Hemminger
512317a76f9SStephen Hemminger /* Slow start threshold is half the congestion window (min 2) */
tcp_reno_ssthresh(struct sock * sk)513400031e0SDavid Vernet __bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk)
514317a76f9SStephen Hemminger {
5156687e988SArnaldo Carvalho de Melo const struct tcp_sock *tp = tcp_sk(sk);
516688d1945Sstephen hemminger
51740570375SEric Dumazet return max(tcp_snd_cwnd(tp) >> 1U, 2U);
518317a76f9SStephen Hemminger }
519317a76f9SStephen Hemminger EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
520317a76f9SStephen Hemminger
tcp_reno_undo_cwnd(struct sock * sk)521400031e0SDavid Vernet __bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk)
522e9799183SFlorian Westphal {
523e9799183SFlorian Westphal const struct tcp_sock *tp = tcp_sk(sk);
524e9799183SFlorian Westphal
52540570375SEric Dumazet return max(tcp_snd_cwnd(tp), tp->prior_cwnd);
526e9799183SFlorian Westphal }
527e9799183SFlorian Westphal EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
528e9799183SFlorian Westphal
529317a76f9SStephen Hemminger struct tcp_congestion_ops tcp_reno = {
530164891aaSStephen Hemminger .flags = TCP_CONG_NON_RESTRICTED,
531317a76f9SStephen Hemminger .name = "reno",
532317a76f9SStephen Hemminger .owner = THIS_MODULE,
533317a76f9SStephen Hemminger .ssthresh = tcp_reno_ssthresh,
534317a76f9SStephen Hemminger .cong_avoid = tcp_reno_cong_avoid,
535e9799183SFlorian Westphal .undo_cwnd = tcp_reno_undo_cwnd,
536317a76f9SStephen Hemminger };
537