xref: /linux/include/net/codel_impl.h (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1d068ca2aSMichal Kazior #ifndef __NET_SCHED_CODEL_IMPL_H
2d068ca2aSMichal Kazior #define __NET_SCHED_CODEL_IMPL_H
3d068ca2aSMichal Kazior 
4d068ca2aSMichal Kazior /*
5d068ca2aSMichal Kazior  * Codel - The Controlled-Delay Active Queue Management algorithm
6d068ca2aSMichal Kazior  *
7d068ca2aSMichal Kazior  *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
8d068ca2aSMichal Kazior  *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
9d068ca2aSMichal Kazior  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
10d068ca2aSMichal Kazior  *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
11d068ca2aSMichal Kazior  *
12d068ca2aSMichal Kazior  * Redistribution and use in source and binary forms, with or without
13d068ca2aSMichal Kazior  * modification, are permitted provided that the following conditions
14d068ca2aSMichal Kazior  * are met:
15d068ca2aSMichal Kazior  * 1. Redistributions of source code must retain the above copyright
16d068ca2aSMichal Kazior  *    notice, this list of conditions, and the following disclaimer,
17d068ca2aSMichal Kazior  *    without modification.
18d068ca2aSMichal Kazior  * 2. Redistributions in binary form must reproduce the above copyright
19d068ca2aSMichal Kazior  *    notice, this list of conditions and the following disclaimer in the
20d068ca2aSMichal Kazior  *    documentation and/or other materials provided with the distribution.
21d068ca2aSMichal Kazior  * 3. The names of the authors may not be used to endorse or promote products
22d068ca2aSMichal Kazior  *    derived from this software without specific prior written permission.
23d068ca2aSMichal Kazior  *
24d068ca2aSMichal Kazior  * Alternatively, provided that this notice is retained in full, this
25d068ca2aSMichal Kazior  * software may be distributed under the terms of the GNU General
26d068ca2aSMichal Kazior  * Public License ("GPL") version 2, in which case the provisions of the
27d068ca2aSMichal Kazior  * GPL apply INSTEAD OF those given above.
28d068ca2aSMichal Kazior  *
29d068ca2aSMichal Kazior  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30d068ca2aSMichal Kazior  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31d068ca2aSMichal Kazior  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32d068ca2aSMichal Kazior  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33d068ca2aSMichal Kazior  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34d068ca2aSMichal Kazior  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35d068ca2aSMichal Kazior  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36d068ca2aSMichal Kazior  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37d068ca2aSMichal Kazior  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38d068ca2aSMichal Kazior  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39d068ca2aSMichal Kazior  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
40d068ca2aSMichal Kazior  * DAMAGE.
41d068ca2aSMichal Kazior  *
42d068ca2aSMichal Kazior  */
43d068ca2aSMichal Kazior 
44d068ca2aSMichal Kazior /* Controlling Queue Delay (CoDel) algorithm
45d068ca2aSMichal Kazior  * =========================================
46d068ca2aSMichal Kazior  * Source : Kathleen Nichols and Van Jacobson
47d068ca2aSMichal Kazior  * http://queue.acm.org/detail.cfm?id=2209336
48d068ca2aSMichal Kazior  *
49d068ca2aSMichal Kazior  * Implemented on linux by Dave Taht and Eric Dumazet
50d068ca2aSMichal Kazior  */
51d068ca2aSMichal Kazior 
52*15fcb103SJakub Kicinski #include <net/inet_ecn.h>
53*15fcb103SJakub Kicinski 
codel_params_init(struct codel_params * params)54d068ca2aSMichal Kazior static void codel_params_init(struct codel_params *params)
55d068ca2aSMichal Kazior {
56d068ca2aSMichal Kazior 	params->interval = MS2TIME(100);
57d068ca2aSMichal Kazior 	params->target = MS2TIME(5);
58d068ca2aSMichal Kazior 	params->ce_threshold = CODEL_DISABLED_THRESHOLD;
59dfcb63ceSToke Høiland-Jørgensen 	params->ce_threshold_mask = 0;
60dfcb63ceSToke Høiland-Jørgensen 	params->ce_threshold_selector = 0;
61d068ca2aSMichal Kazior 	params->ecn = false;
62d068ca2aSMichal Kazior }
63d068ca2aSMichal Kazior 
codel_vars_init(struct codel_vars * vars)64d068ca2aSMichal Kazior static void codel_vars_init(struct codel_vars *vars)
65d068ca2aSMichal Kazior {
66d068ca2aSMichal Kazior 	memset(vars, 0, sizeof(*vars));
67d068ca2aSMichal Kazior }
68d068ca2aSMichal Kazior 
codel_stats_init(struct codel_stats * stats)69d068ca2aSMichal Kazior static void codel_stats_init(struct codel_stats *stats)
70d068ca2aSMichal Kazior {
71d068ca2aSMichal Kazior 	stats->maxpacket = 0;
72d068ca2aSMichal Kazior }
73d068ca2aSMichal Kazior 
74d068ca2aSMichal Kazior /*
75d068ca2aSMichal Kazior  * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
76d068ca2aSMichal Kazior  * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
77d068ca2aSMichal Kazior  *
78d068ca2aSMichal Kazior  * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
79d068ca2aSMichal Kazior  */
codel_Newton_step(struct codel_vars * vars)80d068ca2aSMichal Kazior static void codel_Newton_step(struct codel_vars *vars)
81d068ca2aSMichal Kazior {
82d068ca2aSMichal Kazior 	u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
83d068ca2aSMichal Kazior 	u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
84d068ca2aSMichal Kazior 	u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2);
85d068ca2aSMichal Kazior 
86d068ca2aSMichal Kazior 	val >>= 2; /* avoid overflow in following multiply */
87d068ca2aSMichal Kazior 	val = (val * invsqrt) >> (32 - 2 + 1);
88d068ca2aSMichal Kazior 
89d068ca2aSMichal Kazior 	vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
90d068ca2aSMichal Kazior }
91d068ca2aSMichal Kazior 
92d068ca2aSMichal Kazior /*
93d068ca2aSMichal Kazior  * CoDel control_law is t + interval/sqrt(count)
94d068ca2aSMichal Kazior  * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
95d068ca2aSMichal Kazior  * both sqrt() and divide operation.
96d068ca2aSMichal Kazior  */
codel_control_law(codel_time_t t,codel_time_t interval,u32 rec_inv_sqrt)97d068ca2aSMichal Kazior static codel_time_t codel_control_law(codel_time_t t,
98d068ca2aSMichal Kazior 				      codel_time_t interval,
99d068ca2aSMichal Kazior 				      u32 rec_inv_sqrt)
100d068ca2aSMichal Kazior {
101d068ca2aSMichal Kazior 	return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT);
102d068ca2aSMichal Kazior }
103d068ca2aSMichal Kazior 
codel_should_drop(const struct sk_buff * skb,void * ctx,struct codel_vars * vars,struct codel_params * params,struct codel_stats * stats,codel_skb_len_t skb_len_func,codel_skb_time_t skb_time_func,u32 * backlog,codel_time_t now)104d068ca2aSMichal Kazior static bool codel_should_drop(const struct sk_buff *skb,
105d068ca2aSMichal Kazior 			      void *ctx,
106d068ca2aSMichal Kazior 			      struct codel_vars *vars,
107d068ca2aSMichal Kazior 			      struct codel_params *params,
108d068ca2aSMichal Kazior 			      struct codel_stats *stats,
109d068ca2aSMichal Kazior 			      codel_skb_len_t skb_len_func,
110d068ca2aSMichal Kazior 			      codel_skb_time_t skb_time_func,
111d068ca2aSMichal Kazior 			      u32 *backlog,
112d068ca2aSMichal Kazior 			      codel_time_t now)
113d068ca2aSMichal Kazior {
114d068ca2aSMichal Kazior 	bool ok_to_drop;
115d068ca2aSMichal Kazior 	u32 skb_len;
116d068ca2aSMichal Kazior 
117d068ca2aSMichal Kazior 	if (!skb) {
118d068ca2aSMichal Kazior 		vars->first_above_time = 0;
119d068ca2aSMichal Kazior 		return false;
120d068ca2aSMichal Kazior 	}
121d068ca2aSMichal Kazior 
122d068ca2aSMichal Kazior 	skb_len = skb_len_func(skb);
123d068ca2aSMichal Kazior 	vars->ldelay = now - skb_time_func(skb);
124d068ca2aSMichal Kazior 
125d068ca2aSMichal Kazior 	if (unlikely(skb_len > stats->maxpacket))
126d068ca2aSMichal Kazior 		stats->maxpacket = skb_len;
127d068ca2aSMichal Kazior 
128d068ca2aSMichal Kazior 	if (codel_time_before(vars->ldelay, params->target) ||
129d068ca2aSMichal Kazior 	    *backlog <= params->mtu) {
130d068ca2aSMichal Kazior 		/* went below - stay below for at least interval */
131d068ca2aSMichal Kazior 		vars->first_above_time = 0;
132d068ca2aSMichal Kazior 		return false;
133d068ca2aSMichal Kazior 	}
134d068ca2aSMichal Kazior 	ok_to_drop = false;
135d068ca2aSMichal Kazior 	if (vars->first_above_time == 0) {
136d068ca2aSMichal Kazior 		/* just went above from below. If we stay above
137d068ca2aSMichal Kazior 		 * for at least interval we'll say it's ok to drop
138d068ca2aSMichal Kazior 		 */
139d068ca2aSMichal Kazior 		vars->first_above_time = now + params->interval;
140d068ca2aSMichal Kazior 	} else if (codel_time_after(now, vars->first_above_time)) {
141d068ca2aSMichal Kazior 		ok_to_drop = true;
142d068ca2aSMichal Kazior 	}
143d068ca2aSMichal Kazior 	return ok_to_drop;
144d068ca2aSMichal Kazior }
145d068ca2aSMichal Kazior 
codel_dequeue(void * ctx,u32 * backlog,struct codel_params * params,struct codel_vars * vars,struct codel_stats * stats,codel_skb_len_t skb_len_func,codel_skb_time_t skb_time_func,codel_skb_drop_t drop_func,codel_skb_dequeue_t dequeue_func)146d068ca2aSMichal Kazior static struct sk_buff *codel_dequeue(void *ctx,
147d068ca2aSMichal Kazior 				     u32 *backlog,
148d068ca2aSMichal Kazior 				     struct codel_params *params,
149d068ca2aSMichal Kazior 				     struct codel_vars *vars,
150d068ca2aSMichal Kazior 				     struct codel_stats *stats,
151d068ca2aSMichal Kazior 				     codel_skb_len_t skb_len_func,
152d068ca2aSMichal Kazior 				     codel_skb_time_t skb_time_func,
153d068ca2aSMichal Kazior 				     codel_skb_drop_t drop_func,
154d068ca2aSMichal Kazior 				     codel_skb_dequeue_t dequeue_func)
155d068ca2aSMichal Kazior {
156d068ca2aSMichal Kazior 	struct sk_buff *skb = dequeue_func(vars, ctx);
157d068ca2aSMichal Kazior 	codel_time_t now;
158d068ca2aSMichal Kazior 	bool drop;
159d068ca2aSMichal Kazior 
160d068ca2aSMichal Kazior 	if (!skb) {
161d068ca2aSMichal Kazior 		vars->dropping = false;
162d068ca2aSMichal Kazior 		return skb;
163d068ca2aSMichal Kazior 	}
164d068ca2aSMichal Kazior 	now = codel_get_time();
165d068ca2aSMichal Kazior 	drop = codel_should_drop(skb, ctx, vars, params, stats,
166d068ca2aSMichal Kazior 				 skb_len_func, skb_time_func, backlog, now);
167d068ca2aSMichal Kazior 	if (vars->dropping) {
168d068ca2aSMichal Kazior 		if (!drop) {
169d068ca2aSMichal Kazior 			/* sojourn time below target - leave dropping state */
170d068ca2aSMichal Kazior 			vars->dropping = false;
171d068ca2aSMichal Kazior 		} else if (codel_time_after_eq(now, vars->drop_next)) {
172d068ca2aSMichal Kazior 			/* It's time for the next drop. Drop the current
173d068ca2aSMichal Kazior 			 * packet and dequeue the next. The dequeue might
174d068ca2aSMichal Kazior 			 * take us out of dropping state.
175d068ca2aSMichal Kazior 			 * If not, schedule the next drop.
176d068ca2aSMichal Kazior 			 * A large backlog might result in drop rates so high
177d068ca2aSMichal Kazior 			 * that the next drop should happen now,
178d068ca2aSMichal Kazior 			 * hence the while loop.
179d068ca2aSMichal Kazior 			 */
180d068ca2aSMichal Kazior 			while (vars->dropping &&
181d068ca2aSMichal Kazior 			       codel_time_after_eq(now, vars->drop_next)) {
182d068ca2aSMichal Kazior 				vars->count++; /* dont care of possible wrap
183d068ca2aSMichal Kazior 						* since there is no more divide
184d068ca2aSMichal Kazior 						*/
185d068ca2aSMichal Kazior 				codel_Newton_step(vars);
186d068ca2aSMichal Kazior 				if (params->ecn && INET_ECN_set_ce(skb)) {
187d068ca2aSMichal Kazior 					stats->ecn_mark++;
188d068ca2aSMichal Kazior 					vars->drop_next =
189d068ca2aSMichal Kazior 						codel_control_law(vars->drop_next,
190d068ca2aSMichal Kazior 								  params->interval,
191d068ca2aSMichal Kazior 								  vars->rec_inv_sqrt);
192d068ca2aSMichal Kazior 					goto end;
193d068ca2aSMichal Kazior 				}
194d068ca2aSMichal Kazior 				stats->drop_len += skb_len_func(skb);
195d068ca2aSMichal Kazior 				drop_func(skb, ctx);
196d068ca2aSMichal Kazior 				stats->drop_count++;
197d068ca2aSMichal Kazior 				skb = dequeue_func(vars, ctx);
198d068ca2aSMichal Kazior 				if (!codel_should_drop(skb, ctx,
199d068ca2aSMichal Kazior 						       vars, params, stats,
200d068ca2aSMichal Kazior 						       skb_len_func,
201d068ca2aSMichal Kazior 						       skb_time_func,
202d068ca2aSMichal Kazior 						       backlog, now)) {
203d068ca2aSMichal Kazior 					/* leave dropping state */
204d068ca2aSMichal Kazior 					vars->dropping = false;
205d068ca2aSMichal Kazior 				} else {
206d068ca2aSMichal Kazior 					/* and schedule the next drop */
207d068ca2aSMichal Kazior 					vars->drop_next =
208d068ca2aSMichal Kazior 						codel_control_law(vars->drop_next,
209d068ca2aSMichal Kazior 								  params->interval,
210d068ca2aSMichal Kazior 								  vars->rec_inv_sqrt);
211d068ca2aSMichal Kazior 				}
212d068ca2aSMichal Kazior 			}
213d068ca2aSMichal Kazior 		}
214d068ca2aSMichal Kazior 	} else if (drop) {
215d068ca2aSMichal Kazior 		u32 delta;
216d068ca2aSMichal Kazior 
217d068ca2aSMichal Kazior 		if (params->ecn && INET_ECN_set_ce(skb)) {
218d068ca2aSMichal Kazior 			stats->ecn_mark++;
219d068ca2aSMichal Kazior 		} else {
220d068ca2aSMichal Kazior 			stats->drop_len += skb_len_func(skb);
221d068ca2aSMichal Kazior 			drop_func(skb, ctx);
222d068ca2aSMichal Kazior 			stats->drop_count++;
223d068ca2aSMichal Kazior 
224d068ca2aSMichal Kazior 			skb = dequeue_func(vars, ctx);
225d068ca2aSMichal Kazior 			drop = codel_should_drop(skb, ctx, vars, params,
226d068ca2aSMichal Kazior 						 stats, skb_len_func,
227d068ca2aSMichal Kazior 						 skb_time_func, backlog, now);
228d068ca2aSMichal Kazior 		}
229d068ca2aSMichal Kazior 		vars->dropping = true;
230d068ca2aSMichal Kazior 		/* if min went above target close to when we last went below it
231d068ca2aSMichal Kazior 		 * assume that the drop rate that controlled the queue on the
232d068ca2aSMichal Kazior 		 * last cycle is a good starting point to control it now.
233d068ca2aSMichal Kazior 		 */
234d068ca2aSMichal Kazior 		delta = vars->count - vars->lastcount;
235d068ca2aSMichal Kazior 		if (delta > 1 &&
236d068ca2aSMichal Kazior 		    codel_time_before(now - vars->drop_next,
237d068ca2aSMichal Kazior 				      16 * params->interval)) {
238d068ca2aSMichal Kazior 			vars->count = delta;
239d068ca2aSMichal Kazior 			/* we dont care if rec_inv_sqrt approximation
240d068ca2aSMichal Kazior 			 * is not very precise :
241d068ca2aSMichal Kazior 			 * Next Newton steps will correct it quadratically.
242d068ca2aSMichal Kazior 			 */
243d068ca2aSMichal Kazior 			codel_Newton_step(vars);
244d068ca2aSMichal Kazior 		} else {
245d068ca2aSMichal Kazior 			vars->count = 1;
246d068ca2aSMichal Kazior 			vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
247d068ca2aSMichal Kazior 		}
248d068ca2aSMichal Kazior 		vars->lastcount = vars->count;
249d068ca2aSMichal Kazior 		vars->drop_next = codel_control_law(now, params->interval,
250d068ca2aSMichal Kazior 						    vars->rec_inv_sqrt);
251d068ca2aSMichal Kazior 	}
252d068ca2aSMichal Kazior end:
253e72aeb9eSEric Dumazet 	if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
254e72aeb9eSEric Dumazet 		bool set_ce = true;
255e72aeb9eSEric Dumazet 
256dfcb63ceSToke Høiland-Jørgensen 		if (params->ce_threshold_mask) {
257dfcb63ceSToke Høiland-Jørgensen 			int dsfield = skb_get_dsfield(skb);
258e72aeb9eSEric Dumazet 
259dfcb63ceSToke Høiland-Jørgensen 			set_ce = (dsfield >= 0 &&
260dfcb63ceSToke Høiland-Jørgensen 				  (((u8)dsfield & params->ce_threshold_mask) ==
261dfcb63ceSToke Høiland-Jørgensen 				   params->ce_threshold_selector));
262e72aeb9eSEric Dumazet 		}
263e72aeb9eSEric Dumazet 		if (set_ce && INET_ECN_set_ce(skb))
264d068ca2aSMichal Kazior 			stats->ce_mark++;
265e72aeb9eSEric Dumazet 	}
266d068ca2aSMichal Kazior 	return skb;
267d068ca2aSMichal Kazior }
268d068ca2aSMichal Kazior 
269d068ca2aSMichal Kazior #endif
270