1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2017 by Delphix. All rights reserved.
24 */
25
26 /*
27 * The TCP congestion control algorithm extracted from the pre-framework
28 * implementation of TCP congestion control.
29 */
30
31 #include <sys/errno.h>
32 #include <inet/tcp.h>
33 #include <inet/tcp_impl.h>
34 #include <inet/cc.h>
35 #include <inet/cc/cc_module.h>
36
37 static void sunreno_ack_received(struct cc_var *ccv, uint16_t type);
38 static void sunreno_after_idle(struct cc_var *ccv);
39 static void sunreno_cong_signal(struct cc_var *ccv, uint32_t type);
40 static void sunreno_post_recovery(struct cc_var *ccv);
41
42 #define CC_SUNRENO_ALGO_NAME "sunreno"
43
44 static struct modlmisc cc_sunreno_modlmisc = {
45 &mod_miscops,
46 "SUNReno Congestion Control"
47 };
48
49 static struct modlinkage cc_sunreno_modlinkage = {
50 MODREV_1,
51 &cc_sunreno_modlmisc,
52 NULL
53 };
54
55 struct cc_algo sunreno_cc_algo = {
56 .name = CC_SUNRENO_ALGO_NAME,
57 .ack_received = sunreno_ack_received,
58 .after_idle = sunreno_after_idle,
59 .cong_signal = sunreno_cong_signal,
60 .post_recovery = sunreno_post_recovery,
61 };
62
63 int
_init(void)64 _init(void)
65 {
66 int err;
67
68 if ((err = cc_register_algo(&sunreno_cc_algo)) == 0) {
69 if ((err = mod_install(&cc_sunreno_modlinkage)) != 0)
70 (void) cc_deregister_algo(&sunreno_cc_algo);
71 }
72 return (err);
73 }
74
75 int
_fini(void)76 _fini(void)
77 {
78 return (EBUSY);
79 }
80
81 int
_info(struct modinfo * modinfop)82 _info(struct modinfo *modinfop)
83 {
84 return (mod_info(&cc_sunreno_modlinkage, modinfop));
85 }
86
87 static void
sunreno_ack_received(struct cc_var * ccv,uint16_t type)88 sunreno_ack_received(struct cc_var *ccv, uint16_t type)
89 {
90 uint32_t add;
91 uint32_t cwnd;
92 int mss;
93
94 if (type == CC_ACK && !IN_RECOVERY(ccv->flags)) {
95 mss = CCV(ccv, tcp_mss);
96 cwnd = CCV(ccv, tcp_cwnd);
97 add = mss;
98
99 if (cwnd >= CCV(ccv, tcp_cwnd_ssthresh)) {
100 /*
101 * This is to prevent an increase of less than 1 MSS of
102 * tcp_cwnd. With partial increase, tcp_wput_data()
103 * may send out tinygrams in order to preserve mblk
104 * boundaries.
105 *
106 * By initializing tcp_cwnd_cnt to new tcp_cwnd and
107 * decrementing it by 1 MSS for every ACKs, tcp_cwnd is
108 * increased by 1 MSS for every RTTs.
109 */
110 if (CCV(ccv, tcp_cwnd_cnt) <= 0) {
111 CCV(ccv, tcp_cwnd_cnt) = cwnd + add;
112 } else {
113 CCV(ccv, tcp_cwnd_cnt) -= add;
114 add = 0;
115 }
116 }
117 CCV(ccv, tcp_cwnd) = MIN(cwnd + add, CCV(ccv, tcp_cwnd_max));
118 }
119 }
120
121 static void
sunreno_after_idle(struct cc_var * ccv)122 sunreno_after_idle(struct cc_var *ccv)
123 {
124 int32_t num_sack_blk = 0;
125 int mss;
126
127 if (CCV(ccv, tcp_snd_sack_ok) && CCV(ccv, tcp_num_sack_blk) > 0) {
128 int32_t opt_len;
129
130 num_sack_blk = MIN(CCV(ccv, tcp_max_sack_blk),
131 CCV(ccv, tcp_num_sack_blk));
132 opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN *
133 2 + TCPOPT_HEADER_LEN;
134 mss = CCV(ccv, tcp_mss) - opt_len;
135 } else {
136 mss = CCV(ccv, tcp_mss);
137 }
138
139 TCP_SET_INIT_CWND(CCV_PROTO(ccv), mss,
140 CCSV(ccv, tcps_slow_start_after_idle));
141 }
142
143 /*
144 * Perform any necessary tasks before we enter congestion recovery.
145 */
146 static void
sunreno_cong_signal(struct cc_var * ccv,uint32_t type)147 sunreno_cong_signal(struct cc_var *ccv, uint32_t type)
148 {
149 int npkt;
150 int mss;
151
152 /* Catch algos which mistakenly leak private signal types. */
153 ASSERT((type & CC_SIGPRIVMASK) == 0);
154
155 mss = CCV(ccv, tcp_mss);
156 npkt = ((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) >> 1) / mss;
157
158 switch (type) {
159 case CC_NDUPACK:
160 if (!IN_FASTRECOVERY(ccv->flags)) {
161 if (!IN_CONGRECOVERY(ccv->flags)) {
162 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) *
163 mss;
164 CCV(ccv, tcp_cwnd) = (npkt +
165 CCV(ccv, tcp_dupack_cnt)) * mss;
166 }
167 ENTER_RECOVERY(ccv->flags);
168 }
169 break;
170 case CC_ECN:
171 if (!IN_CONGRECOVERY(ccv->flags) && !CCV(ccv, tcp_cwr)) {
172 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
173 CCV(ccv, tcp_cwnd) = npkt * mss;
174 if (CCV(ccv, tcp_cwnd) == 0) {
175 /*
176 * This makes sure that when the ACK comes
177 * back, we will increase tcp_cwnd by 1 MSS.
178 */
179 CCV(ccv, tcp_cwnd_cnt) = 0;
180 }
181 ENTER_CONGRECOVERY(ccv->flags);
182 }
183 break;
184 case CC_RTO:
185 /*
186 * After retransmission, we need to do slow start. Set the
187 * ssthresh to one half of current effective window and cwnd to
188 * one MSS. Also reset tcp_cwnd_cnt.
189 *
190 * Note that if tcp_ssthresh is reduced because of ECN, do not
191 * reduce it again unless it is already one window of data away
192 * (tcp_cwr should then be cleared) or this is a timeout for a
193 * retransmitted segment.
194 */
195 if (!CCV(ccv, tcp_cwr) || CCV(ccv, tcp_rexmit)) {
196 if (CCV(ccv, tcp_timer_backoff) != 0)
197 npkt = CCV(ccv, tcp_cwnd_ssthresh) / 2 / mss;
198 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
199 }
200 CCV(ccv, tcp_cwnd) = mss;
201 CCV(ccv, tcp_cwnd_cnt) = 0;
202 break;
203 }
204 }
205
206 /*
207 * Perform any necessary tasks before we exit congestion recovery.
208 */
209 static void
sunreno_post_recovery(struct cc_var * ccv)210 sunreno_post_recovery(struct cc_var *ccv)
211 {
212 /*
213 * Restore the congestion window back to ssthresh as per RFC 5681
214 * section 3.2.
215 */
216 if (IN_FASTRECOVERY(ccv->flags)) {
217 if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
218 CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
219 }
220 }
221 CCV(ccv, tcp_cwnd_cnt) = 0;
222 }
223