xref: /freebsd/sys/netinet/cc/cc_dctcp.c (revision 4644fda3f7a455e47f45a51a2e986d6b1fd6d0f9)
164807b30SHiren Panchasara /*-
264807b30SHiren Panchasara  * Copyright (c) 2007-2008
364807b30SHiren Panchasara  *	Swinburne University of Technology, Melbourne, Australia
464807b30SHiren Panchasara  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
564807b30SHiren Panchasara  * Copyright (c) 2014 Midori Kato <katoon@sfc.wide.ad.jp>
664807b30SHiren Panchasara  * Copyright (c) 2014 The FreeBSD Foundation
764807b30SHiren Panchasara  * All rights reserved.
864807b30SHiren Panchasara  *
964807b30SHiren Panchasara  * Redistribution and use in source and binary forms, with or without
1064807b30SHiren Panchasara  * modification, are permitted provided that the following conditions
1164807b30SHiren Panchasara  * are met:
1264807b30SHiren Panchasara  * 1. Redistributions of source code must retain the above copyright
1364807b30SHiren Panchasara  *    notice, this list of conditions and the following disclaimer.
1464807b30SHiren Panchasara  * 2. Redistributions in binary form must reproduce the above copyright
1564807b30SHiren Panchasara  *    notice, this list of conditions and the following disclaimer in the
1664807b30SHiren Panchasara  *    documentation and/or other materials provided with the distribution.
1764807b30SHiren Panchasara  *
1864807b30SHiren Panchasara  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1964807b30SHiren Panchasara  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2064807b30SHiren Panchasara  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2164807b30SHiren Panchasara  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2264807b30SHiren Panchasara  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2364807b30SHiren Panchasara  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2464807b30SHiren Panchasara  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2564807b30SHiren Panchasara  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2664807b30SHiren Panchasara  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2764807b30SHiren Panchasara  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2864807b30SHiren Panchasara  * SUCH DAMAGE.
2964807b30SHiren Panchasara  */
3064807b30SHiren Panchasara 
3164807b30SHiren Panchasara /*
3264807b30SHiren Panchasara  * An implementation of the DCTCP algorithm for FreeBSD, based on
3364807b30SHiren Panchasara  * "Data Center TCP (DCTCP)" by M. Alizadeh, A. Greenberg, D. A. Maltz,
3464807b30SHiren Panchasara  * J. Padhye, P. Patel, B. Prabhakar, S. Sengupta, and M. Sridharan.,
3564807b30SHiren Panchasara  * in ACM Conference on SIGCOMM 2010, New York, USA,
3664807b30SHiren Panchasara  * Originally released as the contribution of Microsoft Research project.
3764807b30SHiren Panchasara  */
3864807b30SHiren Panchasara 
3964807b30SHiren Panchasara #include <sys/cdefs.h>
4064807b30SHiren Panchasara __FBSDID("$FreeBSD$");
4164807b30SHiren Panchasara 
4264807b30SHiren Panchasara #include <sys/param.h>
4364807b30SHiren Panchasara #include <sys/kernel.h>
4464807b30SHiren Panchasara #include <sys/malloc.h>
4564807b30SHiren Panchasara #include <sys/module.h>
4664807b30SHiren Panchasara #include <sys/socket.h>
4764807b30SHiren Panchasara #include <sys/socketvar.h>
4864807b30SHiren Panchasara #include <sys/sysctl.h>
4964807b30SHiren Panchasara #include <sys/systm.h>
5064807b30SHiren Panchasara 
5164807b30SHiren Panchasara #include <net/vnet.h>
5264807b30SHiren Panchasara 
532de3e790SGleb Smirnoff #include <netinet/tcp.h>
5464807b30SHiren Panchasara #include <netinet/tcp_seq.h>
5564807b30SHiren Panchasara #include <netinet/tcp_var.h>
56*4644fda3SGleb Smirnoff #include <netinet/cc/cc.h>
5764807b30SHiren Panchasara #include <netinet/cc/cc_module.h>
5864807b30SHiren Panchasara 
5964807b30SHiren Panchasara #define	CAST_PTR_INT(X)	(*((int*)(X)))
6064807b30SHiren Panchasara 
6164807b30SHiren Panchasara #define MAX_ALPHA_VALUE 1024
6264807b30SHiren Panchasara static VNET_DEFINE(uint32_t, dctcp_alpha) = 0;
6364807b30SHiren Panchasara #define V_dctcp_alpha	    VNET(dctcp_alpha)
6464807b30SHiren Panchasara static VNET_DEFINE(uint32_t, dctcp_shift_g) = 4;
6564807b30SHiren Panchasara #define	V_dctcp_shift_g	    VNET(dctcp_shift_g)
6664807b30SHiren Panchasara static VNET_DEFINE(uint32_t, dctcp_slowstart) = 0;
6764807b30SHiren Panchasara #define	V_dctcp_slowstart   VNET(dctcp_slowstart)
6864807b30SHiren Panchasara 
6964807b30SHiren Panchasara struct dctcp {
7064807b30SHiren Panchasara 	int     bytes_ecn;	/* # of marked bytes during a RTT */
7164807b30SHiren Panchasara 	int     bytes_total;	/* # of acked bytes during a RTT */
7264807b30SHiren Panchasara 	int     alpha;		/* the fraction of marked bytes */
7364807b30SHiren Panchasara 	int     ce_prev;	/* CE state of the last segment */
7464807b30SHiren Panchasara 	int     save_sndnxt;	/* end sequence number of the current window */
7564807b30SHiren Panchasara 	int	ece_curr;	/* ECE flag in this segment */
7664807b30SHiren Panchasara 	int	ece_prev;	/* ECE flag in the last segment */
7764807b30SHiren Panchasara 	uint32_t    num_cong_events; /* # of congestion events */
7864807b30SHiren Panchasara };
7964807b30SHiren Panchasara 
8064807b30SHiren Panchasara static MALLOC_DEFINE(M_dctcp, "dctcp data",
8164807b30SHiren Panchasara     "Per connection data required for the dctcp algorithm");
8264807b30SHiren Panchasara 
8364807b30SHiren Panchasara static void	dctcp_ack_received(struct cc_var *ccv, uint16_t type);
8464807b30SHiren Panchasara static void	dctcp_after_idle(struct cc_var *ccv);
8564807b30SHiren Panchasara static void	dctcp_cb_destroy(struct cc_var *ccv);
8664807b30SHiren Panchasara static int	dctcp_cb_init(struct cc_var *ccv);
8764807b30SHiren Panchasara static void	dctcp_cong_signal(struct cc_var *ccv, uint32_t type);
8864807b30SHiren Panchasara static void	dctcp_conn_init(struct cc_var *ccv);
8964807b30SHiren Panchasara static void	dctcp_post_recovery(struct cc_var *ccv);
9064807b30SHiren Panchasara static void	dctcp_ecnpkt_handler(struct cc_var *ccv);
9164807b30SHiren Panchasara static void	dctcp_update_alpha(struct cc_var *ccv);
9264807b30SHiren Panchasara 
9364807b30SHiren Panchasara struct cc_algo dctcp_cc_algo = {
9464807b30SHiren Panchasara 	.name = "dctcp",
9564807b30SHiren Panchasara 	.ack_received = dctcp_ack_received,
9664807b30SHiren Panchasara 	.cb_destroy = dctcp_cb_destroy,
9764807b30SHiren Panchasara 	.cb_init = dctcp_cb_init,
9864807b30SHiren Panchasara 	.cong_signal = dctcp_cong_signal,
9964807b30SHiren Panchasara 	.conn_init = dctcp_conn_init,
10064807b30SHiren Panchasara 	.post_recovery = dctcp_post_recovery,
10164807b30SHiren Panchasara 	.ecnpkt_handler = dctcp_ecnpkt_handler,
10264807b30SHiren Panchasara 	.after_idle = dctcp_after_idle,
10364807b30SHiren Panchasara };
10464807b30SHiren Panchasara 
10564807b30SHiren Panchasara static void
10664807b30SHiren Panchasara dctcp_ack_received(struct cc_var *ccv, uint16_t type)
10764807b30SHiren Panchasara {
10864807b30SHiren Panchasara 	struct dctcp *dctcp_data;
10964807b30SHiren Panchasara 	int bytes_acked = 0;
11064807b30SHiren Panchasara 
11164807b30SHiren Panchasara 	dctcp_data = ccv->cc_data;
11264807b30SHiren Panchasara 
11364807b30SHiren Panchasara 	if (CCV(ccv, t_flags) & TF_ECN_PERMIT) {
11464807b30SHiren Panchasara 		/*
11564807b30SHiren Panchasara 		 * DCTCP doesn't treat receipt of ECN marked packet as a
11664807b30SHiren Panchasara 		 * congestion event. Thus, DCTCP always executes the ACK
11764807b30SHiren Panchasara 		 * processing out of congestion recovery.
11864807b30SHiren Panchasara 		 */
11964807b30SHiren Panchasara 		if (IN_CONGRECOVERY(CCV(ccv, t_flags))) {
12064807b30SHiren Panchasara 			EXIT_CONGRECOVERY(CCV(ccv, t_flags));
12164807b30SHiren Panchasara 			newreno_cc_algo.ack_received(ccv, type);
12264807b30SHiren Panchasara 			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
12364807b30SHiren Panchasara 		} else
12464807b30SHiren Panchasara 			newreno_cc_algo.ack_received(ccv, type);
12564807b30SHiren Panchasara 
12664807b30SHiren Panchasara 		if (type == CC_DUPACK)
12764807b30SHiren Panchasara 			bytes_acked = CCV(ccv, t_maxseg);
12864807b30SHiren Panchasara 
12964807b30SHiren Panchasara 		if (type == CC_ACK)
13064807b30SHiren Panchasara 			bytes_acked = ccv->bytes_this_ack;
13164807b30SHiren Panchasara 
13264807b30SHiren Panchasara 		/* Update total bytes. */
13364807b30SHiren Panchasara 		dctcp_data->bytes_total += bytes_acked;
13464807b30SHiren Panchasara 
13564807b30SHiren Panchasara 		/* Update total marked bytes. */
13664807b30SHiren Panchasara 		if (dctcp_data->ece_curr) {
13764807b30SHiren Panchasara 			if (!dctcp_data->ece_prev
13864807b30SHiren Panchasara 			    && bytes_acked > CCV(ccv, t_maxseg)) {
13964807b30SHiren Panchasara 				dctcp_data->bytes_ecn +=
14064807b30SHiren Panchasara 				    (bytes_acked - CCV(ccv, t_maxseg));
14164807b30SHiren Panchasara 			} else
14264807b30SHiren Panchasara 				dctcp_data->bytes_ecn += bytes_acked;
14364807b30SHiren Panchasara 			dctcp_data->ece_prev = 1;
14464807b30SHiren Panchasara 		} else {
14564807b30SHiren Panchasara 			if (dctcp_data->ece_prev
14664807b30SHiren Panchasara 			    && bytes_acked > CCV(ccv, t_maxseg))
14764807b30SHiren Panchasara 				dctcp_data->bytes_ecn += CCV(ccv, t_maxseg);
14864807b30SHiren Panchasara 			dctcp_data->ece_prev = 0;
14964807b30SHiren Panchasara 		}
15064807b30SHiren Panchasara 		dctcp_data->ece_curr = 0;
15164807b30SHiren Panchasara 
15264807b30SHiren Panchasara 		/*
15364807b30SHiren Panchasara 		 * Update the fraction of marked bytes at the end of
15464807b30SHiren Panchasara 		 * current window size.
15564807b30SHiren Panchasara 		 */
15664807b30SHiren Panchasara 		if ((IN_FASTRECOVERY(CCV(ccv, t_flags)) &&
15764807b30SHiren Panchasara 		    SEQ_GEQ(ccv->curack, CCV(ccv, snd_recover))) ||
15864807b30SHiren Panchasara 		    (!IN_FASTRECOVERY(CCV(ccv, t_flags)) &&
15964807b30SHiren Panchasara 		    SEQ_GT(ccv->curack, dctcp_data->save_sndnxt)))
16064807b30SHiren Panchasara 			dctcp_update_alpha(ccv);
16164807b30SHiren Panchasara 	} else
16264807b30SHiren Panchasara 		newreno_cc_algo.ack_received(ccv, type);
16364807b30SHiren Panchasara }
16464807b30SHiren Panchasara 
16564807b30SHiren Panchasara static void
16664807b30SHiren Panchasara dctcp_after_idle(struct cc_var *ccv)
16764807b30SHiren Panchasara {
16864807b30SHiren Panchasara 	struct dctcp *dctcp_data;
16964807b30SHiren Panchasara 
17064807b30SHiren Panchasara 	dctcp_data = ccv->cc_data;
17164807b30SHiren Panchasara 
17264807b30SHiren Panchasara 	/* Initialize internal parameters after idle time */
17364807b30SHiren Panchasara 	dctcp_data->bytes_ecn = 0;
17464807b30SHiren Panchasara 	dctcp_data->bytes_total = 0;
17564807b30SHiren Panchasara 	dctcp_data->save_sndnxt = CCV(ccv, snd_nxt);
17664807b30SHiren Panchasara 	dctcp_data->alpha = V_dctcp_alpha;
17764807b30SHiren Panchasara 	dctcp_data->ece_curr = 0;
17864807b30SHiren Panchasara 	dctcp_data->ece_prev = 0;
17964807b30SHiren Panchasara 	dctcp_data->num_cong_events = 0;
18064807b30SHiren Panchasara 
18164807b30SHiren Panchasara 	dctcp_cc_algo.after_idle = newreno_cc_algo.after_idle;
18264807b30SHiren Panchasara }
18364807b30SHiren Panchasara 
18464807b30SHiren Panchasara static void
18564807b30SHiren Panchasara dctcp_cb_destroy(struct cc_var *ccv)
18664807b30SHiren Panchasara {
18764807b30SHiren Panchasara 	if (ccv->cc_data != NULL)
18864807b30SHiren Panchasara 		free(ccv->cc_data, M_dctcp);
18964807b30SHiren Panchasara }
19064807b30SHiren Panchasara 
19164807b30SHiren Panchasara static int
19264807b30SHiren Panchasara dctcp_cb_init(struct cc_var *ccv)
19364807b30SHiren Panchasara {
19464807b30SHiren Panchasara 	struct dctcp *dctcp_data;
19564807b30SHiren Panchasara 
19664807b30SHiren Panchasara 	dctcp_data = malloc(sizeof(struct dctcp), M_dctcp, M_NOWAIT|M_ZERO);
19764807b30SHiren Panchasara 
19864807b30SHiren Panchasara 	if (dctcp_data == NULL)
19964807b30SHiren Panchasara 		return (ENOMEM);
20064807b30SHiren Panchasara 
20164807b30SHiren Panchasara 	/* Initialize some key variables with sensible defaults. */
20264807b30SHiren Panchasara 	dctcp_data->bytes_ecn = 0;
20364807b30SHiren Panchasara 	dctcp_data->bytes_total = 0;
20464807b30SHiren Panchasara 	/*
20564807b30SHiren Panchasara 	 * When alpha is set to 0 in the beggining, DCTCP sender transfers as
20664807b30SHiren Panchasara 	 * much data as possible until the value converges which may expand the
20764807b30SHiren Panchasara 	 * queueing delay at the switch. When alpha is set to 1, queueing delay
20864807b30SHiren Panchasara 	 * is kept small.
20964807b30SHiren Panchasara 	 * Throughput-sensitive applications should have alpha = 0
21064807b30SHiren Panchasara 	 * Latency-sensitive applications should have alpha = 1
21164807b30SHiren Panchasara 	 *
21264807b30SHiren Panchasara 	 * Note: DCTCP draft suggests initial alpha to be 1 but we've decided to
21364807b30SHiren Panchasara 	 * keep it 0 as default.
21464807b30SHiren Panchasara 	 */
21564807b30SHiren Panchasara 	dctcp_data->alpha = V_dctcp_alpha;
21664807b30SHiren Panchasara 	dctcp_data->save_sndnxt = 0;
21764807b30SHiren Panchasara 	dctcp_data->ce_prev = 0;
21864807b30SHiren Panchasara 	dctcp_data->ece_curr = 0;
21964807b30SHiren Panchasara 	dctcp_data->ece_prev = 0;
22064807b30SHiren Panchasara 	dctcp_data->num_cong_events = 0;
22164807b30SHiren Panchasara 
22264807b30SHiren Panchasara 	ccv->cc_data = dctcp_data;
22364807b30SHiren Panchasara 	return (0);
22464807b30SHiren Panchasara }
22564807b30SHiren Panchasara 
22664807b30SHiren Panchasara /*
22764807b30SHiren Panchasara  * Perform any necessary tasks before we enter congestion recovery.
22864807b30SHiren Panchasara  */
22964807b30SHiren Panchasara static void
23064807b30SHiren Panchasara dctcp_cong_signal(struct cc_var *ccv, uint32_t type)
23164807b30SHiren Panchasara {
23264807b30SHiren Panchasara 	struct dctcp *dctcp_data;
23364807b30SHiren Panchasara 	u_int win, mss;
23464807b30SHiren Panchasara 
23564807b30SHiren Panchasara 	dctcp_data = ccv->cc_data;
23664807b30SHiren Panchasara 	win = CCV(ccv, snd_cwnd);
23764807b30SHiren Panchasara 	mss = CCV(ccv, t_maxseg);
23864807b30SHiren Panchasara 
23964807b30SHiren Panchasara 	switch (type) {
24064807b30SHiren Panchasara 	case CC_NDUPACK:
24164807b30SHiren Panchasara 		if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
24264807b30SHiren Panchasara 			if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
24364807b30SHiren Panchasara 				CCV(ccv, snd_ssthresh) = mss *
24464807b30SHiren Panchasara 				    max(win / 2 / mss, 2);
24564807b30SHiren Panchasara 				dctcp_data->num_cong_events++;
24664807b30SHiren Panchasara 			} else {
24764807b30SHiren Panchasara 				/* cwnd has already updated as congestion
24864807b30SHiren Panchasara 				 * recovery. Reverse cwnd value using
24964807b30SHiren Panchasara 				 * snd_cwnd_prev and recalculate snd_ssthresh
25064807b30SHiren Panchasara 				 */
25164807b30SHiren Panchasara 				win = CCV(ccv, snd_cwnd_prev);
25264807b30SHiren Panchasara 				CCV(ccv, snd_ssthresh) =
25364807b30SHiren Panchasara 				    max(win / 2 / mss, 2) * mss;
25464807b30SHiren Panchasara 			}
25564807b30SHiren Panchasara 			ENTER_RECOVERY(CCV(ccv, t_flags));
25664807b30SHiren Panchasara 		}
25764807b30SHiren Panchasara 		break;
25864807b30SHiren Panchasara 	case CC_ECN:
25964807b30SHiren Panchasara 		/*
26064807b30SHiren Panchasara 		 * Save current snd_cwnd when the host encounters both
26164807b30SHiren Panchasara 		 * congestion recovery and fast recovery.
26264807b30SHiren Panchasara 		 */
26364807b30SHiren Panchasara 		CCV(ccv, snd_cwnd_prev) = win;
26464807b30SHiren Panchasara 		if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
26564807b30SHiren Panchasara 			if (V_dctcp_slowstart &&
26664807b30SHiren Panchasara 			    dctcp_data->num_cong_events++ == 0) {
26764807b30SHiren Panchasara 				CCV(ccv, snd_ssthresh) =
26864807b30SHiren Panchasara 				    mss * max(win / 2 / mss, 2);
26964807b30SHiren Panchasara 				dctcp_data->alpha = MAX_ALPHA_VALUE;
27064807b30SHiren Panchasara 				dctcp_data->bytes_ecn = 0;
27164807b30SHiren Panchasara 				dctcp_data->bytes_total = 0;
27264807b30SHiren Panchasara 				dctcp_data->save_sndnxt = CCV(ccv, snd_nxt);
27364807b30SHiren Panchasara 			} else
27464807b30SHiren Panchasara 				CCV(ccv, snd_ssthresh) = max((win - ((win *
27564807b30SHiren Panchasara 				    dctcp_data->alpha) >> 11)) / mss, 2) * mss;
27664807b30SHiren Panchasara 			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
27764807b30SHiren Panchasara 			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
27864807b30SHiren Panchasara 		}
27964807b30SHiren Panchasara 		dctcp_data->ece_curr = 1;
28064807b30SHiren Panchasara 		break;
28164807b30SHiren Panchasara 	case CC_RTO:
28264807b30SHiren Panchasara 		if (CCV(ccv, t_flags) & TF_ECN_PERMIT) {
28364807b30SHiren Panchasara 			CCV(ccv, t_flags) |= TF_ECN_SND_CWR;
28464807b30SHiren Panchasara 			dctcp_update_alpha(ccv);
28564807b30SHiren Panchasara 			dctcp_data->save_sndnxt += CCV(ccv, t_maxseg);
28664807b30SHiren Panchasara 			dctcp_data->num_cong_events++;
28764807b30SHiren Panchasara 		}
28864807b30SHiren Panchasara 		break;
28964807b30SHiren Panchasara 	}
29064807b30SHiren Panchasara }
29164807b30SHiren Panchasara 
29264807b30SHiren Panchasara static void
29364807b30SHiren Panchasara dctcp_conn_init(struct cc_var *ccv)
29464807b30SHiren Panchasara {
29564807b30SHiren Panchasara 	struct dctcp *dctcp_data;
29664807b30SHiren Panchasara 
29764807b30SHiren Panchasara 	dctcp_data = ccv->cc_data;
29864807b30SHiren Panchasara 
29964807b30SHiren Panchasara 	if (CCV(ccv, t_flags) & TF_ECN_PERMIT)
30064807b30SHiren Panchasara 		dctcp_data->save_sndnxt = CCV(ccv, snd_nxt);
30164807b30SHiren Panchasara }
30264807b30SHiren Panchasara 
30364807b30SHiren Panchasara /*
30464807b30SHiren Panchasara  * Perform any necessary tasks before we exit congestion recovery.
30564807b30SHiren Panchasara  */
30664807b30SHiren Panchasara static void
30764807b30SHiren Panchasara dctcp_post_recovery(struct cc_var *ccv)
30864807b30SHiren Panchasara {
30964807b30SHiren Panchasara 	dctcp_cc_algo.post_recovery = newreno_cc_algo.post_recovery;
31064807b30SHiren Panchasara 
31164807b30SHiren Panchasara 	if (CCV(ccv, t_flags) & TF_ECN_PERMIT)
31264807b30SHiren Panchasara 		dctcp_update_alpha(ccv);
31364807b30SHiren Panchasara }
31464807b30SHiren Panchasara 
31564807b30SHiren Panchasara /*
31664807b30SHiren Panchasara  * Execute an additional ECN processing using ECN field in IP header and the CWR
31764807b30SHiren Panchasara  * bit in TCP header.
31864807b30SHiren Panchasara  *
31964807b30SHiren Panchasara  * delay_ack == 0 - Delayed ACK disabled
32064807b30SHiren Panchasara  * delay_ack == 1 - Delayed ACK enabled
32164807b30SHiren Panchasara  */
32264807b30SHiren Panchasara 
32364807b30SHiren Panchasara static void
32464807b30SHiren Panchasara dctcp_ecnpkt_handler(struct cc_var *ccv)
32564807b30SHiren Panchasara {
32664807b30SHiren Panchasara 	struct dctcp *dctcp_data;
32764807b30SHiren Panchasara 	uint32_t ccflag;
32864807b30SHiren Panchasara 	int delay_ack;
32964807b30SHiren Panchasara 
33064807b30SHiren Panchasara 	dctcp_data = ccv->cc_data;
33164807b30SHiren Panchasara 	ccflag = ccv->flags;
33264807b30SHiren Panchasara 	delay_ack = 1;
33364807b30SHiren Panchasara 
33464807b30SHiren Panchasara 	/*
33564807b30SHiren Panchasara 	 * DCTCP responses an ACK immediately when the CE state
33664807b30SHiren Panchasara 	 * in between this segment and the last segment is not same.
33764807b30SHiren Panchasara 	 */
33864807b30SHiren Panchasara 	if (ccflag & CCF_IPHDR_CE) {
33964807b30SHiren Panchasara 		if (!dctcp_data->ce_prev && (ccflag & CCF_DELACK))
34064807b30SHiren Panchasara 			delay_ack = 0;
34164807b30SHiren Panchasara 		dctcp_data->ce_prev = 1;
34264807b30SHiren Panchasara 		CCV(ccv, t_flags) |= TF_ECN_SND_ECE;
34364807b30SHiren Panchasara 	} else {
34464807b30SHiren Panchasara 		if (dctcp_data->ce_prev && (ccflag & CCF_DELACK))
34564807b30SHiren Panchasara 			delay_ack = 0;
34664807b30SHiren Panchasara 		dctcp_data->ce_prev = 0;
34764807b30SHiren Panchasara 		CCV(ccv, t_flags) &= ~TF_ECN_SND_ECE;
34864807b30SHiren Panchasara 	}
34964807b30SHiren Panchasara 
35064807b30SHiren Panchasara 	/* DCTCP sets delayed ack when this segment sets the CWR flag. */
35164807b30SHiren Panchasara 	if ((ccflag & CCF_DELACK) && (ccflag & CCF_TCPHDR_CWR))
35264807b30SHiren Panchasara 		delay_ack = 1;
35364807b30SHiren Panchasara 
35464807b30SHiren Panchasara 	if (delay_ack == 0)
35564807b30SHiren Panchasara 		ccv->flags |= CCF_ACKNOW;
35664807b30SHiren Panchasara 	else
35764807b30SHiren Panchasara 		ccv->flags &= ~CCF_ACKNOW;
35864807b30SHiren Panchasara }
35964807b30SHiren Panchasara 
36064807b30SHiren Panchasara /*
36164807b30SHiren Panchasara  * Update the fraction of marked bytes represented as 'alpha'.
36264807b30SHiren Panchasara  * Also initialize several internal parameters at the end of this function.
36364807b30SHiren Panchasara  */
36464807b30SHiren Panchasara static void
36564807b30SHiren Panchasara dctcp_update_alpha(struct cc_var *ccv)
36664807b30SHiren Panchasara {
36764807b30SHiren Panchasara 	struct dctcp *dctcp_data;
36864807b30SHiren Panchasara 	int alpha_prev;
36964807b30SHiren Panchasara 
37064807b30SHiren Panchasara 	dctcp_data = ccv->cc_data;
37164807b30SHiren Panchasara 	alpha_prev = dctcp_data->alpha;
37264807b30SHiren Panchasara 	dctcp_data->bytes_total = max(dctcp_data->bytes_total, 1);
37364807b30SHiren Panchasara 
37464807b30SHiren Panchasara 	/*
37564807b30SHiren Panchasara 	 * Update alpha: alpha = (1 - g) * alpha + g * F.
37664807b30SHiren Panchasara 	 * Here:
37764807b30SHiren Panchasara 	 * g is weight factor
37864807b30SHiren Panchasara 	 *	recommaded to be set to 1/16
37964807b30SHiren Panchasara 	 *	small g = slow convergence between competitive DCTCP flows
38064807b30SHiren Panchasara 	 *	large g = impacts low utilization of bandwidth at switches
38164807b30SHiren Panchasara 	 * F is fraction of marked segments in last RTT
38264807b30SHiren Panchasara 	 *	updated every RTT
38364807b30SHiren Panchasara 	 * Alpha must be round to 0 - MAX_ALPHA_VALUE.
38464807b30SHiren Panchasara 	 */
38564807b30SHiren Panchasara 	dctcp_data->alpha = min(alpha_prev - (alpha_prev >> V_dctcp_shift_g) +
38664807b30SHiren Panchasara 	    (dctcp_data->bytes_ecn << (10 - V_dctcp_shift_g)) /
38764807b30SHiren Panchasara 	    dctcp_data->bytes_total, MAX_ALPHA_VALUE);
38864807b30SHiren Panchasara 
38964807b30SHiren Panchasara 	/* Initialize internal parameters for next alpha calculation */
39064807b30SHiren Panchasara 	dctcp_data->bytes_ecn = 0;
39164807b30SHiren Panchasara 	dctcp_data->bytes_total = 0;
39264807b30SHiren Panchasara 	dctcp_data->save_sndnxt = CCV(ccv, snd_nxt);
39364807b30SHiren Panchasara }
39464807b30SHiren Panchasara 
39564807b30SHiren Panchasara static int
39664807b30SHiren Panchasara dctcp_alpha_handler(SYSCTL_HANDLER_ARGS)
39764807b30SHiren Panchasara {
39864807b30SHiren Panchasara 	uint32_t new;
39964807b30SHiren Panchasara 	int error;
40064807b30SHiren Panchasara 
40164807b30SHiren Panchasara 	new = V_dctcp_alpha;
40264807b30SHiren Panchasara 	error = sysctl_handle_int(oidp, &new, 0, req);
40364807b30SHiren Panchasara 	if (error == 0 && req->newptr != NULL) {
40464807b30SHiren Panchasara 		if (CAST_PTR_INT(req->newptr) > 1)
40564807b30SHiren Panchasara 			error = EINVAL;
40664807b30SHiren Panchasara 		else {
40764807b30SHiren Panchasara 			if (new > MAX_ALPHA_VALUE)
40864807b30SHiren Panchasara 				V_dctcp_alpha = MAX_ALPHA_VALUE;
40964807b30SHiren Panchasara 			else
41064807b30SHiren Panchasara 				V_dctcp_alpha = new;
41164807b30SHiren Panchasara 		}
41264807b30SHiren Panchasara 	}
41364807b30SHiren Panchasara 
41464807b30SHiren Panchasara 	return (error);
41564807b30SHiren Panchasara }
41664807b30SHiren Panchasara 
41764807b30SHiren Panchasara static int
41864807b30SHiren Panchasara dctcp_shift_g_handler(SYSCTL_HANDLER_ARGS)
41964807b30SHiren Panchasara {
42064807b30SHiren Panchasara 	uint32_t new;
42164807b30SHiren Panchasara 	int error;
42264807b30SHiren Panchasara 
42364807b30SHiren Panchasara 	new = V_dctcp_shift_g;
42464807b30SHiren Panchasara 	error = sysctl_handle_int(oidp, &new, 0, req);
42564807b30SHiren Panchasara 	if (error == 0 && req->newptr != NULL) {
42664807b30SHiren Panchasara 		if (CAST_PTR_INT(req->newptr) > 1)
42764807b30SHiren Panchasara 			error = EINVAL;
42864807b30SHiren Panchasara 		else
42964807b30SHiren Panchasara 			V_dctcp_shift_g = new;
43064807b30SHiren Panchasara 	}
43164807b30SHiren Panchasara 
43264807b30SHiren Panchasara 	return (error);
43364807b30SHiren Panchasara }
43464807b30SHiren Panchasara 
43564807b30SHiren Panchasara static int
43664807b30SHiren Panchasara dctcp_slowstart_handler(SYSCTL_HANDLER_ARGS)
43764807b30SHiren Panchasara {
43864807b30SHiren Panchasara 	uint32_t new;
43964807b30SHiren Panchasara 	int error;
44064807b30SHiren Panchasara 
44164807b30SHiren Panchasara 	new = V_dctcp_slowstart;
44264807b30SHiren Panchasara 	error = sysctl_handle_int(oidp, &new, 0, req);
44364807b30SHiren Panchasara 	if (error == 0 && req->newptr != NULL) {
44464807b30SHiren Panchasara 		if (CAST_PTR_INT(req->newptr) > 1)
44564807b30SHiren Panchasara 			error = EINVAL;
44664807b30SHiren Panchasara 		else
44764807b30SHiren Panchasara 			V_dctcp_slowstart = new;
44864807b30SHiren Panchasara 	}
44964807b30SHiren Panchasara 
45064807b30SHiren Panchasara 	return (error);
45164807b30SHiren Panchasara }
45264807b30SHiren Panchasara 
45364807b30SHiren Panchasara SYSCTL_DECL(_net_inet_tcp_cc_dctcp);
45464807b30SHiren Panchasara SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, dctcp, CTLFLAG_RW, NULL,
45564807b30SHiren Panchasara     "dctcp congestion control related settings");
45664807b30SHiren Panchasara 
45764807b30SHiren Panchasara SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, alpha,
45864807b30SHiren Panchasara     CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(dctcp_alpha), 0,
45964807b30SHiren Panchasara     &dctcp_alpha_handler,
46064807b30SHiren Panchasara     "IU", "dctcp alpha parameter");
46164807b30SHiren Panchasara 
46264807b30SHiren Panchasara SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, shift_g,
46364807b30SHiren Panchasara     CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(dctcp_shift_g), 4,
46464807b30SHiren Panchasara     &dctcp_shift_g_handler,
46564807b30SHiren Panchasara     "IU", "dctcp shift parameter");
46664807b30SHiren Panchasara 
46764807b30SHiren Panchasara SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, slowstart,
46864807b30SHiren Panchasara     CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(dctcp_slowstart), 0,
46964807b30SHiren Panchasara     &dctcp_slowstart_handler,
47064807b30SHiren Panchasara     "IU", "half CWND reduction after the first slow start");
47164807b30SHiren Panchasara 
47264807b30SHiren Panchasara DECLARE_CC_MODULE(dctcp, &dctcp_cc_algo);
473