xref: /freebsd/sys/netpfil/ipfw/dn_sched_fq_pie.c (revision 55620f43deef5c0eb5b4b0f675de18b30c8d1c2d)
1 /*
2  * FQ_PIE - The FlowQueue-PIE scheduler/AQM
3  *
4  * $FreeBSD$
5  *
6  * Copyright (C) 2016 Centre for Advanced Internet Architectures,
7  *  Swinburne University of Technology, Melbourne, Australia.
8  * Portions of this code were made possible in part by a gift from
9  *  The Comcast Innovation Fund.
10  * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /* Important note:
35  * As there is no an office document for FQ-PIE specification, we used
36  * FQ-CoDel algorithm with some modifications to implement FQ-PIE.
37  * This FQ-PIE implementation is a beta version and have not been tested
38  * extensively. Our FQ-PIE uses stand-alone PIE AQM per sub-queue. By
39  * default, timestamp is used to calculate queue delay instead of departure
40  * rate estimation method. Although departure rate estimation is available
41  * as testing option, the results could be incorrect. Moreover, turning PIE on
42  * and off option is available but it does not work properly in this version.
43  */
44 
45 
46 #ifdef _KERNEL
47 #include <sys/malloc.h>
48 #include <sys/socket.h>
49 #include <sys/kernel.h>
50 #include <sys/mbuf.h>
51 #include <sys/lock.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <net/if.h>	/* IFNAMSIZ */
55 #include <netinet/in.h>
56 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
57 #include <netinet/ip_fw.h>	/* flow_id */
58 #include <netinet/ip_dummynet.h>
59 
60 #include <sys/proc.h>
61 #include <sys/rwlock.h>
62 
63 #include <netpfil/ipfw/ip_fw_private.h>
64 #include <sys/sysctl.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/ip_icmp.h>
68 #include <netinet/tcp.h>
69 #include <netinet/udp.h>
70 #include <sys/queue.h>
71 #include <sys/hash.h>
72 
73 #include <netpfil/ipfw/dn_heap.h>
74 #include <netpfil/ipfw/ip_dn_private.h>
75 
76 #include <netpfil/ipfw/dn_aqm.h>
77 #include <netpfil/ipfw/dn_aqm_pie.h>
78 #include <netpfil/ipfw/dn_sched.h>
79 
80 #else
81 #include <dn_test.h>
82 #endif
83 
84 #define DN_SCHED_FQ_PIE 7
85 
86 /* list of queues */
87 STAILQ_HEAD(fq_pie_list, fq_pie_flow) ;
88 
89 /* FQ_PIE parameters including PIE */
90 struct dn_sch_fq_pie_parms {
91 	struct dn_aqm_pie_parms	pcfg;	/* PIE configuration Parameters */
92 	/* FQ_PIE Parameters */
93 	uint32_t flows_cnt;	/* number of flows */
94 	uint32_t limit;	/* hard limit of FQ_PIE queue size*/
95 	uint32_t quantum;
96 };
97 
98 /* flow (sub-queue) stats */
99 struct flow_stats {
100 	uint64_t tot_pkts;	/* statistics counters  */
101 	uint64_t tot_bytes;
102 	uint32_t length;		/* Queue length, in packets */
103 	uint32_t len_bytes;	/* Queue length, in bytes */
104 	uint32_t drops;
105 };
106 
107 /* A flow of packets (sub-queue)*/
108 struct fq_pie_flow {
109 	struct mq	mq;	/* list of packets */
110 	struct flow_stats stats;	/* statistics */
111 	int deficit;
112 	int active;		/* 1: flow is active (in a list) */
113 	struct pie_status pst;	/* pie status variables */
114 	struct fq_pie_si *psi;	/* parent scheduler instance */
115 	STAILQ_ENTRY(fq_pie_flow) flowchain;
116 };
117 
118 /* extra fq_pie scheduler configurations */
119 struct fq_pie_schk {
120 	struct dn_sch_fq_pie_parms cfg;
121 };
122 
123 /* fq_pie scheduler instance */
124 struct fq_pie_si {
125 	struct dn_sch_inst _si;	/* standard scheduler instance */
126 	struct dn_queue main_q; /* main queue is after si directly */
127 	uint32_t nr_active_q;
128 	struct fq_pie_flow *flows;	/* array of flows (queues) */
129 	uint32_t perturbation; 	/* random value */
130 	struct fq_pie_list newflows;	/* list of new queues */
131 	struct fq_pie_list oldflows;	/* list of old queues */
132 };
133 
134 
135 struct mem_to_free {
136 	void *mem_flows;
137 	void *mem_callout;
138 };
139 static struct mtx freemem_mtx;
140 static struct dn_alg fq_pie_desc;
141 
142 /*  Default FQ-PIE parameters including PIE */
143 /*  PIE defaults
144  * target=15ms, max_burst=150ms, max_ecnth=0.1,
145  * alpha=0.125, beta=1.25, tupdate=15ms
146  * FQ-
147  * flows=1024, limit=10240, quantum =1514
148  */
149 struct dn_sch_fq_pie_parms
150  fq_pie_sysctl = {{15000 * AQM_TIME_1US, 15000 * AQM_TIME_1US,
151 	150000 * AQM_TIME_1US, PIE_SCALE * 0.1, PIE_SCALE * 0.125,
152 	PIE_SCALE * 1.25,	PIE_CAPDROP_ENABLED | PIE_DERAND_ENABLED},
153 	1024, 10240, 1514};
154 
155 static int
156 fqpie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
157 {
158 	int error;
159 	long  value;
160 
161 	if (!strcmp(oidp->oid_name,"alpha"))
162 		value = fq_pie_sysctl.pcfg.alpha;
163 	else
164 		value = fq_pie_sysctl.pcfg.beta;
165 
166 	value = value * 1000 / PIE_SCALE;
167 	error = sysctl_handle_long(oidp, &value, 0, req);
168 	if (error != 0 || req->newptr == NULL)
169 		return (error);
170 	if (value < 1 || value > 7 * PIE_SCALE)
171 		return (EINVAL);
172 	value = (value * PIE_SCALE) / 1000;
173 	if (!strcmp(oidp->oid_name,"alpha"))
174 			fq_pie_sysctl.pcfg.alpha = value;
175 	else
176 		fq_pie_sysctl.pcfg.beta = value;
177 	return (0);
178 }
179 
180 static int
181 fqpie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
182 {
183 	int error;
184 	long  value;
185 
186 	if (!strcmp(oidp->oid_name,"target"))
187 		value = fq_pie_sysctl.pcfg.qdelay_ref;
188 	else if (!strcmp(oidp->oid_name,"tupdate"))
189 		value = fq_pie_sysctl.pcfg.tupdate;
190 	else
191 		value = fq_pie_sysctl.pcfg.max_burst;
192 
193 	value = value / AQM_TIME_1US;
194 	error = sysctl_handle_long(oidp, &value, 0, req);
195 	if (error != 0 || req->newptr == NULL)
196 		return (error);
197 	if (value < 1 || value > 10 * AQM_TIME_1S)
198 		return (EINVAL);
199 	value = value * AQM_TIME_1US;
200 
201 	if (!strcmp(oidp->oid_name,"target"))
202 		fq_pie_sysctl.pcfg.qdelay_ref  = value;
203 	else if (!strcmp(oidp->oid_name,"tupdate"))
204 		fq_pie_sysctl.pcfg.tupdate  = value;
205 	else
206 		fq_pie_sysctl.pcfg.max_burst = value;
207 	return (0);
208 }
209 
210 static int
211 fqpie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
212 {
213 	int error;
214 	long  value;
215 
216 	value = fq_pie_sysctl.pcfg.max_ecnth;
217 	value = value * 1000 / PIE_SCALE;
218 	error = sysctl_handle_long(oidp, &value, 0, req);
219 	if (error != 0 || req->newptr == NULL)
220 		return (error);
221 	if (value < 1 || value > PIE_SCALE)
222 		return (EINVAL);
223 	value = (value * PIE_SCALE) / 1000;
224 	fq_pie_sysctl.pcfg.max_ecnth = value;
225 	return (0);
226 }
227 
228 /* define FQ- PIE sysctl variables */
229 SYSBEGIN(f4)
230 SYSCTL_DECL(_net_inet);
231 SYSCTL_DECL(_net_inet_ip);
232 SYSCTL_DECL(_net_inet_ip_dummynet);
233 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqpie,
234 	CTLFLAG_RW, 0, "FQ_PIE");
235 
236 #ifdef SYSCTL_NODE
237 
238 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, target,
239 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
240 	fqpie_sysctl_target_tupdate_maxb_handler, "L",
241 	"queue target in microsecond");
242 
243 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, tupdate,
244 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
245 	fqpie_sysctl_target_tupdate_maxb_handler, "L",
246 	"the frequency of drop probability calculation in microsecond");
247 
248 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_burst,
249 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
250 	fqpie_sysctl_target_tupdate_maxb_handler, "L",
251 	"Burst allowance interval in microsecond");
252 
253 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_ecnth,
254 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
255 	fqpie_sysctl_max_ecnth_handler, "L",
256 	"ECN safeguard threshold scaled by 1000");
257 
258 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, alpha,
259 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
260 	fqpie_sysctl_alpha_beta_handler, "L", "PIE alpha scaled by 1000");
261 
262 SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, beta,
263 	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
264 	fqpie_sysctl_alpha_beta_handler, "L", "beta scaled by 1000");
265 
266 SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, quantum,
267 	CTLFLAG_RW, &fq_pie_sysctl.quantum, 1514, "quantum for FQ_PIE");
268 SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, flows,
269 	CTLFLAG_RW, &fq_pie_sysctl.flows_cnt, 1024, "Number of queues for FQ_PIE");
270 SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, limit,
271 	CTLFLAG_RW, &fq_pie_sysctl.limit, 10240, "limit for FQ_PIE");
272 #endif
273 
274 /* Helper function to update queue&main-queue and scheduler statistics.
275  * negative len & drop -> drop
276  * negative len -> dequeue
277  * positive len -> enqueue
278  * positive len + drop -> drop during enqueue
279  */
280 __inline static void
281 fq_update_stats(struct fq_pie_flow *q, struct fq_pie_si *si, int len,
282 	int drop)
283 {
284 	int inc = 0;
285 
286 	if (len < 0)
287 		inc = -1;
288 	else if (len > 0)
289 		inc = 1;
290 
291 	if (drop) {
292 		si->main_q.ni.drops ++;
293 		q->stats.drops ++;
294 		si->_si.ni.drops ++;
295 		io_pkt_drop ++;
296 	}
297 
298 	if (!drop || (drop && len < 0)) {
299 		/* Update stats for the main queue */
300 		si->main_q.ni.length += inc;
301 		si->main_q.ni.len_bytes += len;
302 
303 		/*update sub-queue stats */
304 		q->stats.length += inc;
305 		q->stats.len_bytes += len;
306 
307 		/*update scheduler instance stats */
308 		si->_si.ni.length += inc;
309 		si->_si.ni.len_bytes += len;
310 	}
311 
312 	if (inc > 0) {
313 		si->main_q.ni.tot_bytes += len;
314 		si->main_q.ni.tot_pkts ++;
315 
316 		q->stats.tot_bytes +=len;
317 		q->stats.tot_pkts++;
318 
319 		si->_si.ni.tot_bytes +=len;
320 		si->_si.ni.tot_pkts ++;
321 	}
322 
323 }
324 
325 /*
326  * Extract a packet from the head of sub-queue 'q'
327  * Return a packet or NULL if the queue is empty.
328  * If getts is set, also extract packet's timestamp from mtag.
329  */
330 __inline static struct mbuf *
331 fq_pie_extract_head(struct fq_pie_flow *q, aqm_time_t *pkt_ts,
332 	struct fq_pie_si *si, int getts)
333 {
334 	struct mbuf *m = q->mq.head;
335 
336 	if (m == NULL)
337 		return m;
338 	q->mq.head = m->m_nextpkt;
339 
340 	fq_update_stats(q, si, -m->m_pkthdr.len, 0);
341 
342 	if (si->main_q.ni.length == 0) /* queue is now idle */
343 			si->main_q.q_time = dn_cfg.curr_time;
344 
345 	if (getts) {
346 		/* extract packet timestamp*/
347 		struct m_tag *mtag;
348 		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
349 		if (mtag == NULL){
350 			D("PIE timestamp mtag not found!");
351 			*pkt_ts = 0;
352 		} else {
353 			*pkt_ts = *(aqm_time_t *)(mtag + 1);
354 			m_tag_delete(m,mtag);
355 		}
356 	}
357 	return m;
358 }
359 
360 /*
361  * Callout function for drop probability calculation
362  * This function is called over tupdate ms and takes pointer of FQ-PIE
363  * flow as an argument
364   */
365 static void
366 fq_calculate_drop_prob(void *x)
367 {
368 	struct fq_pie_flow *q = (struct fq_pie_flow *) x;
369 	struct pie_status *pst = &q->pst;
370 	struct dn_aqm_pie_parms *pprms;
371 	int64_t p, prob, oldprob;
372 	aqm_time_t now;
373 
374 	/* dealing with race condition */
375 	if (callout_pending(&pst->aqm_pie_callout)) {
376 		/* callout was reset */
377 		mtx_unlock(&pst->lock_mtx);
378 		return;
379 	}
380 
381 	if (!callout_active(&pst->aqm_pie_callout)) {
382 		/* callout was stopped */
383 		mtx_unlock(&pst->lock_mtx);
384 		mtx_destroy(&pst->lock_mtx);
385 		q->psi->nr_active_q--;
386 		return;
387 	}
388 	callout_deactivate(&pst->aqm_pie_callout);
389 
390 	now = AQM_UNOW;
391 	pprms = pst->parms;
392 	prob = pst->drop_prob;
393 
394 	/* calculate current qdelay */
395 	if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
396 		pst->current_qdelay = ((uint64_t)q->stats.len_bytes  * pst->avg_dq_time)
397 			>> PIE_DQ_THRESHOLD_BITS;
398 	}
399 
400 	/* calculate drop probability */
401 	p = (int64_t)pprms->alpha *
402 		((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref);
403 	p +=(int64_t) pprms->beta *
404 		((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old);
405 
406 	/* We PIE_MAX_PROB shift by 12-bits to increase the division precision  */
407 	p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
408 
409 	/* auto-tune drop probability */
410 	if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
411 		p >>= 11 + PIE_FIX_POINT_BITS + 12;
412 	else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
413 		p >>= 9 + PIE_FIX_POINT_BITS + 12;
414 	else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
415 		p >>= 7 + PIE_FIX_POINT_BITS + 12;
416 	else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
417 		p >>= 5 + PIE_FIX_POINT_BITS + 12;
418 	else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
419 		p >>= 3 + PIE_FIX_POINT_BITS + 12;
420 	else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
421 		p >>= 1 + PIE_FIX_POINT_BITS + 12;
422 	else
423 		p >>= PIE_FIX_POINT_BITS + 12;
424 
425 	oldprob = prob;
426 
427 	/* Cap Drop adjustment */
428 	if ((pprms->flags & PIE_CAPDROP_ENABLED) && prob >= PIE_MAX_PROB / 10
429 		&& p > PIE_MAX_PROB / 50 )
430 			p = PIE_MAX_PROB / 50;
431 
432 	prob = prob + p;
433 
434 	/* decay the drop probability exponentially */
435 	if (pst->current_qdelay == 0 && pst->qdelay_old == 0)
436 		/* 0.98 ~= 1- 1/64 */
437 		prob = prob - (prob >> 6);
438 
439 
440 	/* check for multiplication over/under flow */
441 	if (p>0) {
442 		if (prob<oldprob) {
443 			D("overflow");
444 			prob= PIE_MAX_PROB;
445 		}
446 	}
447 	else
448 		if (prob>oldprob) {
449 			prob= 0;
450 			D("underflow");
451 		}
452 
453 	/* make drop probability between 0 and PIE_MAX_PROB*/
454 	if (prob < 0)
455 		prob = 0;
456 	else if (prob > PIE_MAX_PROB)
457 		prob = PIE_MAX_PROB;
458 
459 	pst->drop_prob = prob;
460 
461 	/* store current delay value */
462 	pst->qdelay_old = pst->current_qdelay;
463 
464 	/* update burst allowance */
465 	if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance) {
466 		if (pst->burst_allowance > pprms->tupdate)
467 			pst->burst_allowance -= pprms->tupdate;
468 		else
469 			pst->burst_allowance = 0;
470 	}
471 
472 	if (pst->sflags & PIE_ACTIVE)
473 	callout_reset_sbt(&pst->aqm_pie_callout,
474 		(uint64_t)pprms->tupdate * SBT_1US,
475 		0, fq_calculate_drop_prob, q, 0);
476 
477 	mtx_unlock(&pst->lock_mtx);
478 }
479 
480 /*
481  * Reset PIE variables & activate the queue
482  */
483 __inline static void
484 fq_activate_pie(struct fq_pie_flow *q)
485 {
486 	struct pie_status *pst = &q->pst;
487 	struct dn_aqm_pie_parms *pprms;
488 
489 	mtx_lock(&pst->lock_mtx);
490 	pprms = pst->parms;
491 
492 	pprms = pst->parms;
493 	pst->drop_prob = 0;
494 	pst->qdelay_old = 0;
495 	pst->burst_allowance = pprms->max_burst;
496 	pst->accu_prob = 0;
497 	pst->dq_count = 0;
498 	pst->avg_dq_time = 0;
499 	pst->sflags = PIE_INMEASUREMENT | PIE_ACTIVE;
500 	pst->measurement_start = AQM_UNOW;
501 
502 	callout_reset_sbt(&pst->aqm_pie_callout,
503 		(uint64_t)pprms->tupdate * SBT_1US,
504 		0, fq_calculate_drop_prob, q, 0);
505 
506 	mtx_unlock(&pst->lock_mtx);
507 }
508 
509 
510  /*
511   * Deactivate PIE and stop probe update callout
512   */
513 __inline static void
514 fq_deactivate_pie(struct pie_status *pst)
515 {
516 	mtx_lock(&pst->lock_mtx);
517 	pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
518 	callout_stop(&pst->aqm_pie_callout);
519 	//D("PIE Deactivated");
520 	mtx_unlock(&pst->lock_mtx);
521 }
522 
523  /*
524   * Initialize PIE for sub-queue 'q'
525   */
526 static int
527 pie_init(struct fq_pie_flow *q)
528 {
529 	struct pie_status *pst=&q->pst;
530 	struct dn_aqm_pie_parms *pprms = pst->parms;
531 	struct fq_pie_schk *fqpie_schk;
532 
533 	fqpie_schk = (struct fq_pie_schk *)(q->psi->_si.sched+1);
534 	int err = 0;
535 
536 	if (!pprms){
537 		D("AQM_PIE is not configured");
538 		err = EINVAL;
539 	} else {
540 		q->psi->nr_active_q++;
541 
542 		/* For speed optimization, we caculate 1/3 queue size once here */
543 		// XXX limit divided by number of queues divided by 3 ???
544 		pst->one_third_q_size = (fqpie_schk->cfg.limit /
545 			fqpie_schk->cfg.flows_cnt) / 3;
546 
547 		mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
548 		callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
549 			CALLOUT_RETURNUNLOCKED);
550 	}
551 
552 	return err;
553 }
554 
555 /*
556  * Clean up PIE status for sub-queue 'q'
557  * Stop callout timer and destroy mtx
558  */
559 static int
560 pie_cleanup(struct fq_pie_flow *q)
561 {
562 	struct pie_status *pst  = &q->pst;
563 
564 	mtx_lock(&pst->lock_mtx);
565 	if (callout_stop(&pst->aqm_pie_callout) || !(pst->sflags & PIE_ACTIVE)) {
566 		mtx_unlock(&pst->lock_mtx);
567 		mtx_destroy(&pst->lock_mtx);
568 		q->psi->nr_active_q--;
569 	} else {
570 		mtx_unlock(&pst->lock_mtx);
571 		return EBUSY;
572 	}
573 	return 0;
574 }
575 
576 /*
577  * Dequeue and return a pcaket from sub-queue 'q' or NULL if 'q' is empty.
578  * Also, caculate depature time or queue delay using timestamp
579  */
580  static struct mbuf *
581 pie_dequeue(struct fq_pie_flow *q, struct fq_pie_si *si)
582 {
583 	struct mbuf *m;
584 	struct dn_aqm_pie_parms *pprms;
585 	struct pie_status *pst;
586 	aqm_time_t now;
587 	aqm_time_t pkt_ts, dq_time;
588 	int32_t w;
589 
590 	pst  = &q->pst;
591 	pprms = q->pst.parms;
592 
593 	/*we extarct packet ts only when Departure Rate Estimation dis not used*/
594 	m = fq_pie_extract_head(q, &pkt_ts, si,
595 		!(pprms->flags & PIE_DEPRATEEST_ENABLED));
596 
597 	if (!m || !(pst->sflags & PIE_ACTIVE))
598 		return m;
599 
600 	now = AQM_UNOW;
601 	if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
602 		/* calculate average depature time */
603 		if(pst->sflags & PIE_INMEASUREMENT) {
604 			pst->dq_count += m->m_pkthdr.len;
605 
606 			if (pst->dq_count >= PIE_DQ_THRESHOLD) {
607 				dq_time = now - pst->measurement_start;
608 
609 				/*
610 				 * if we don't have old avg dq_time i.e PIE is (re)initialized,
611 				 * don't use weight to calculate new avg_dq_time
612 				 */
613 				if(pst->avg_dq_time == 0)
614 					pst->avg_dq_time = dq_time;
615 				else {
616 					/*
617 					 * weight = PIE_DQ_THRESHOLD/2^6, but we scaled
618 					 * weight by 2^8. Thus, scaled
619 					 * weight = PIE_DQ_THRESHOLD /2^8
620 					 * */
621 					w = PIE_DQ_THRESHOLD >> 8;
622 					pst->avg_dq_time = (dq_time* w
623 						+ (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
624 					pst->sflags &= ~PIE_INMEASUREMENT;
625 				}
626 			}
627 		}
628 
629 		/*
630 		 * Start new measurment cycle when the queue has
631 		 *  PIE_DQ_THRESHOLD worth of bytes.
632 		 */
633 		if(!(pst->sflags & PIE_INMEASUREMENT) &&
634 			q->stats.len_bytes >= PIE_DQ_THRESHOLD) {
635 			pst->sflags |= PIE_INMEASUREMENT;
636 			pst->measurement_start = now;
637 			pst->dq_count = 0;
638 		}
639 	}
640 	/* Optionally, use packet timestamp to estimate queue delay */
641 	else
642 		pst->current_qdelay = now - pkt_ts;
643 
644 	return m;
645 }
646 
647 
648  /*
649  * Enqueue a packet in q, subject to space and FQ-PIE queue management policy
650  * (whose parameters are in q->fs).
651  * Update stats for the queue and the scheduler.
652  * Return 0 on success, 1 on drop. The packet is consumed anyways.
653  */
654 static int
655 pie_enqueue(struct fq_pie_flow *q, struct mbuf* m, struct fq_pie_si *si)
656 {
657 	uint64_t len;
658 	struct pie_status *pst;
659 	struct dn_aqm_pie_parms *pprms;
660 	int t;
661 
662 	len = m->m_pkthdr.len;
663 	pst  = &q->pst;
664 	pprms = pst->parms;
665 	t = ENQUE;
666 
667 	/* drop/mark the packet when PIE is active and burst time elapsed */
668 	if (pst->sflags & PIE_ACTIVE && pst->burst_allowance == 0
669 		&& drop_early(pst, q->stats.len_bytes) == DROP) {
670 			/*
671 			 * if drop_prob over ECN threshold, drop the packet
672 			 * otherwise mark and enqueue it.
673 			 */
674 			if (pprms->flags & PIE_ECN_ENABLED && pst->drop_prob <
675 				(pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
676 				&& ecn_mark(m))
677 				t = ENQUE;
678 			else
679 				t = DROP;
680 		}
681 
682 	/* Turn PIE on when 1/3 of the queue is full */
683 	if (!(pst->sflags & PIE_ACTIVE) && q->stats.len_bytes >=
684 		pst->one_third_q_size) {
685 		fq_activate_pie(q);
686 	}
687 
688 	/*  reset burst tolerance and optinally turn PIE off*/
689 	if (pst->drop_prob == 0 && pst->current_qdelay < (pprms->qdelay_ref >> 1)
690 		&& pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
691 
692 			pst->burst_allowance = pprms->max_burst;
693 		if (pprms->flags & PIE_ON_OFF_MODE_ENABLED && q->stats.len_bytes<=0)
694 			fq_deactivate_pie(pst);
695 	}
696 
697 	/* Use timestamp if Departure Rate Estimation mode is disabled */
698 	if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
699 		/* Add TS to mbuf as a TAG */
700 		struct m_tag *mtag;
701 		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
702 		if (mtag == NULL)
703 			mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
704 				sizeof(aqm_time_t), M_NOWAIT);
705 		if (mtag == NULL) {
706 			m_freem(m);
707 			t = DROP;
708 		}
709 		*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
710 		m_tag_prepend(m, mtag);
711 	}
712 
713 	if (t != DROP) {
714 		mq_append(&q->mq, m);
715 		fq_update_stats(q, si, len, 0);
716 		return 0;
717 	} else {
718 		fq_update_stats(q, si, len, 1);
719 		pst->accu_prob = 0;
720 		FREE_PKT(m);
721 		return 1;
722 	}
723 
724 	return 0;
725 }
726 
727 /* Drop a packet form the head of FQ-PIE sub-queue */
728 static void
729 pie_drop_head(struct fq_pie_flow *q, struct fq_pie_si *si)
730 {
731 	struct mbuf *m = q->mq.head;
732 
733 	if (m == NULL)
734 		return;
735 	q->mq.head = m->m_nextpkt;
736 
737 	fq_update_stats(q, si, -m->m_pkthdr.len, 1);
738 
739 	if (si->main_q.ni.length == 0) /* queue is now idle */
740 			si->main_q.q_time = dn_cfg.curr_time;
741 	/* reset accu_prob after packet drop */
742 	q->pst.accu_prob = 0;
743 
744 	FREE_PKT(m);
745 }
746 
747 /*
748  * Classify a packet to queue number using Jenkins hash function.
749  * Return: queue number
750  * the input of the hash are protocol no, perturbation, src IP, dst IP,
751  * src port, dst port,
752  */
753 static inline int
754 fq_pie_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_pie_si *si)
755 {
756 	struct ip *ip;
757 	struct tcphdr *th;
758 	struct udphdr *uh;
759 	uint8_t tuple[41];
760 	uint16_t hash=0;
761 
762 //#ifdef INET6
763 	struct ip6_hdr *ip6;
764 	int isip6;
765 	isip6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
766 
767 	if(isip6) {
768 		ip6 = mtod(m, struct ip6_hdr *);
769 		*((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
770 		*((uint32_t *) &tuple[1]) = si->perturbation;
771 		memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
772 		memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
773 
774 		switch (ip6->ip6_nxt) {
775 		case IPPROTO_TCP:
776 			th = (struct tcphdr *)(ip6 + 1);
777 			*((uint16_t *) &tuple[37]) = th->th_dport;
778 			*((uint16_t *) &tuple[39]) = th->th_sport;
779 			break;
780 
781 		case IPPROTO_UDP:
782 			uh = (struct udphdr *)(ip6 + 1);
783 			*((uint16_t *) &tuple[37]) = uh->uh_dport;
784 			*((uint16_t *) &tuple[39]) = uh->uh_sport;
785 			break;
786 		default:
787 			memset(&tuple[37], 0, 4);
788 		}
789 
790 		hash = jenkins_hash(tuple, 41, HASHINIT) %  fcount;
791 		return hash;
792 	}
793 //#endif
794 
795 	/* IPv4 */
796 	ip = mtod(m, struct ip *);
797 	*((uint8_t *) &tuple[0]) = ip->ip_p;
798 	*((uint32_t *) &tuple[1]) = si->perturbation;
799 	*((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
800 	*((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
801 
802 	switch (ip->ip_p) {
803 		case IPPROTO_TCP:
804 			th = (struct tcphdr *)(ip + 1);
805 			*((uint16_t *) &tuple[13]) = th->th_dport;
806 			*((uint16_t *) &tuple[15]) = th->th_sport;
807 			break;
808 
809 		case IPPROTO_UDP:
810 			uh = (struct udphdr *)(ip + 1);
811 			*((uint16_t *) &tuple[13]) = uh->uh_dport;
812 			*((uint16_t *) &tuple[15]) = uh->uh_sport;
813 			break;
814 		default:
815 			memset(&tuple[13], 0, 4);
816 	}
817 	hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
818 
819 	return hash;
820 }
821 
822 /*
823  * Enqueue a packet into an appropriate queue according to
824  * FQ-CoDe; algorithm.
825  */
826 static int
827 fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
828 	struct mbuf *m)
829 {
830 	struct fq_pie_si *si;
831 	struct fq_pie_schk *schk;
832 	struct dn_sch_fq_pie_parms *param;
833 	struct dn_queue *mainq;
834 	int idx, drop, i, maxidx;
835 
836 	mainq = (struct dn_queue *)(_si + 1);
837 	si = (struct fq_pie_si *)_si;
838 	schk = (struct fq_pie_schk *)(si->_si.sched+1);
839 	param = &schk->cfg;
840 
841 	 /* classify a packet to queue number*/
842 	idx = fq_pie_classify_flow(m, param->flows_cnt, si);
843 
844 	/* enqueue packet into appropriate queue using PIE AQM.
845 	 * Note: 'pie_enqueue' function returns 1 only when it unable to
846 	 * add timestamp to packet (no limit check)*/
847 	drop = pie_enqueue(&si->flows[idx], m, si);
848 
849 	/* pie unable to timestamp a packet */
850 	if (drop)
851 		return 1;
852 
853 	/* If the flow (sub-queue) is not active ,then add it to tail of
854 	 * new flows list, initialize and activate it.
855 	 */
856 	if (!si->flows[idx].active) {
857 		STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
858 		si->flows[idx].deficit = param->quantum;
859 		fq_activate_pie(&si->flows[idx]);
860 		si->flows[idx].active = 1;
861 	}
862 
863 	/* check the limit for all queues and remove a packet from the
864 	 * largest one
865 	 */
866 	if (mainq->ni.length > schk->cfg.limit) {
867 		/* find first active flow */
868 		for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
869 			if (si->flows[maxidx].active)
870 				break;
871 		if (maxidx < schk->cfg.flows_cnt) {
872 			/* find the largest sub- queue */
873 			for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
874 				if (si->flows[i].active && si->flows[i].stats.length >
875 					si->flows[maxidx].stats.length)
876 					maxidx = i;
877 			pie_drop_head(&si->flows[maxidx], si);
878 			drop = 1;
879 		}
880 	}
881 
882 	return drop;
883 }
884 
885 /*
886  * Dequeue a packet from an appropriate queue according to
887  * FQ-CoDel algorithm.
888  */
889 static struct mbuf *
890 fq_pie_dequeue(struct dn_sch_inst *_si)
891 {
892 	struct fq_pie_si *si;
893 	struct fq_pie_schk *schk;
894 	struct dn_sch_fq_pie_parms *param;
895 	struct fq_pie_flow *f;
896 	struct mbuf *mbuf;
897 	struct fq_pie_list *fq_pie_flowlist;
898 
899 	si = (struct fq_pie_si *)_si;
900 	schk = (struct fq_pie_schk *)(si->_si.sched+1);
901 	param = &schk->cfg;
902 
903 	do {
904 		/* select a list to start with */
905 		if (STAILQ_EMPTY(&si->newflows))
906 			fq_pie_flowlist = &si->oldflows;
907 		else
908 			fq_pie_flowlist = &si->newflows;
909 
910 		/* Both new and old queue lists are empty, return NULL */
911 		if (STAILQ_EMPTY(fq_pie_flowlist))
912 			return NULL;
913 
914 		f = STAILQ_FIRST(fq_pie_flowlist);
915 		while (f != NULL)	{
916 			/* if there is no flow(sub-queue) deficit, increase deficit
917 			 * by quantum, move the flow to the tail of old flows list
918 			 * and try another flow.
919 			 * Otherwise, the flow will be used for dequeue.
920 			 */
921 			if (f->deficit < 0) {
922 				 f->deficit += param->quantum;
923 				 STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
924 				 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
925 			 } else
926 				 break;
927 
928 			f = STAILQ_FIRST(fq_pie_flowlist);
929 		}
930 
931 		/* the new flows list is empty, try old flows list */
932 		if (STAILQ_EMPTY(fq_pie_flowlist))
933 			continue;
934 
935 		/* Dequeue a packet from the selected flow */
936 		mbuf = pie_dequeue(f, si);
937 
938 		/* pie did not return a packet */
939 		if (!mbuf) {
940 			/* If the selected flow belongs to new flows list, then move
941 			 * it to the tail of old flows list. Otherwise, deactivate it and
942 			 * remove it from the old list and
943 			 */
944 			if (fq_pie_flowlist == &si->newflows) {
945 				STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
946 				STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
947 			}	else {
948 				f->active = 0;
949 				fq_deactivate_pie(&f->pst);
950 				STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
951 			}
952 			/* start again */
953 			continue;
954 		}
955 
956 		/* we have a packet to return,
957 		 * update flow deficit and return the packet*/
958 		f->deficit -= mbuf->m_pkthdr.len;
959 		return mbuf;
960 
961 	} while (1);
962 
963 	/* unreachable point */
964 	return NULL;
965 }
966 
967 /*
968  * Initialize fq_pie scheduler instance.
969  * also, allocate memory for flows array.
970  */
971 static int
972 fq_pie_new_sched(struct dn_sch_inst *_si)
973 {
974 	struct fq_pie_si *si;
975 	struct dn_queue *q;
976 	struct fq_pie_schk *schk;
977 	int i;
978 
979 	si = (struct fq_pie_si *)_si;
980 	schk = (struct fq_pie_schk *)(_si->sched+1);
981 
982 	if(si->flows) {
983 		D("si already configured!");
984 		return 0;
985 	}
986 
987 	/* init the main queue */
988 	q = &si->main_q;
989 	set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
990 	q->_si = _si;
991 	q->fs = _si->sched->fs;
992 
993 	/* allocate memory for flows array */
994 	si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow),
995 		 M_DUMMYNET, M_NOWAIT | M_ZERO);
996 	if (si->flows == NULL) {
997 		D("cannot allocate memory for fq_pie configuration parameters");
998 		return ENOMEM ;
999 	}
1000 
1001 	/* init perturbation for this si */
1002 	si->perturbation = random();
1003 	si->nr_active_q = 0;
1004 
1005 	/* init the old and new flows lists */
1006 	STAILQ_INIT(&si->newflows);
1007 	STAILQ_INIT(&si->oldflows);
1008 
1009 	/* init the flows (sub-queues) */
1010 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
1011 		si->flows[i].pst.parms = &schk->cfg.pcfg;
1012 		si->flows[i].psi = si;
1013 		pie_init(&si->flows[i]);
1014 	}
1015 
1016 	/* init mtx lock and callout function for free memory  */
1017 	if (!fq_pie_desc.ref_count) {
1018 		mtx_init(&freemem_mtx, "mtx_pie", NULL, MTX_DEF);
1019 	}
1020 
1021 	mtx_lock(&freemem_mtx);
1022 	fq_pie_desc.ref_count++;
1023 	mtx_unlock(&freemem_mtx);
1024 
1025 	return 0;
1026 }
1027 
1028 /*
1029  * Free FQ-PIE flows memory callout function.
1030  * This function is scheduled when a flow or more still active and
1031  *  the scheduer is about to be destroyed, to prevent memory leak.
1032  */
1033 static void
1034 free_flows(void *_mem)
1035 {
1036 	struct mem_to_free *mem = _mem;
1037 
1038 	free(mem->mem_flows, M_DUMMYNET);
1039 	free(mem->mem_callout, M_DUMMYNET);
1040 	free(_mem, M_DUMMYNET);
1041 
1042 	fq_pie_desc.ref_count--;
1043 	if (!fq_pie_desc.ref_count) {
1044 		mtx_unlock(&freemem_mtx);
1045 		mtx_destroy(&freemem_mtx);
1046 	} else
1047 		mtx_unlock(&freemem_mtx);
1048 	//D("mem freed ok!");
1049 }
1050 
1051 /*
1052  * Free fq_pie scheduler instance.
1053  */
1054 static int
1055 fq_pie_free_sched(struct dn_sch_inst *_si)
1056 {
1057 	struct fq_pie_si *si;
1058 	struct fq_pie_schk *schk;
1059 	int i;
1060 
1061 	si = (struct fq_pie_si *)_si;
1062 	schk = (struct fq_pie_schk *)(_si->sched+1);
1063 
1064 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
1065 		pie_cleanup(&si->flows[i]);
1066 	}
1067 
1068 	/* if there are still some queues have a callout going to start,
1069 	 * we cannot free flows memory. If we do so, a panic can happen
1070 	 *  as prob calculate callout function uses flows memory.
1071 	 */
1072 	if (!si->nr_active_q) {
1073 		/* free the flows array */
1074 		free(si->flows , M_DUMMYNET);
1075 		si->flows = NULL;
1076 		mtx_lock(&freemem_mtx);
1077 		fq_pie_desc.ref_count--;
1078 		if (!fq_pie_desc.ref_count) {
1079 			mtx_unlock(&freemem_mtx);
1080 			mtx_destroy(&freemem_mtx);
1081 		} else
1082 			mtx_unlock(&freemem_mtx);
1083 		//D("ok!");
1084 		return 0;
1085 	} else {
1086 		/* memory leak happens here. So, we register a callout function to free
1087 		 *  flows memory later.
1088 		 */
1089 		D("unable to stop all fq_pie sub-queues!");
1090 		mtx_lock(&freemem_mtx);
1091 
1092 		struct callout *mem_callout;
1093 		struct mem_to_free *mem;
1094 
1095 		mem = malloc(sizeof(*mem), M_DUMMYNET,
1096 			M_NOWAIT | M_ZERO);
1097 		mem_callout = malloc(sizeof(*mem_callout), M_DUMMYNET,
1098 			M_NOWAIT | M_ZERO);
1099 
1100 		callout_init_mtx(mem_callout, &freemem_mtx,
1101 			CALLOUT_RETURNUNLOCKED);
1102 
1103 		mem->mem_flows = si->flows;
1104 		mem->mem_callout = mem_callout;
1105 		callout_reset_sbt(mem_callout,
1106 			(uint64_t)(si->flows[0].pst.parms->tupdate + 1000) * SBT_1US,
1107 			0, free_flows, mem, 0);
1108 
1109 		si->flows = NULL;
1110 		mtx_unlock(&freemem_mtx);
1111 
1112 		return EBUSY;
1113 	}
1114 }
1115 
1116 /*
1117  * Configure FQ-PIE scheduler.
1118  * the configurations for the scheduler is passed fromipfw  userland.
1119  */
1120 static int
1121 fq_pie_config(struct dn_schk *_schk)
1122 {
1123 	struct fq_pie_schk *schk;
1124 	struct dn_extra_parms *ep;
1125 	struct dn_sch_fq_pie_parms *fqp_cfg;
1126 
1127 	schk = (struct fq_pie_schk *)(_schk+1);
1128 	ep = (struct dn_extra_parms *) _schk->cfg;
1129 
1130 	/* par array contains fq_pie configuration as follow
1131 	 * PIE: 0- qdelay_ref,1- tupdate, 2- max_burst
1132 	 * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
1133 	 * FQ_PIE: 7- quantum, 8- limit, 9- flows
1134 	 */
1135 	if (ep && ep->oid.len ==sizeof(*ep) &&
1136 		ep->oid.subtype == DN_SCH_PARAMS) {
1137 
1138 		fqp_cfg = &schk->cfg;
1139 		if (ep->par[0] < 0)
1140 			fqp_cfg->pcfg.qdelay_ref = fq_pie_sysctl.pcfg.qdelay_ref;
1141 		else
1142 			fqp_cfg->pcfg.qdelay_ref = ep->par[0];
1143 		if (ep->par[1] < 0)
1144 			fqp_cfg->pcfg.tupdate = fq_pie_sysctl.pcfg.tupdate;
1145 		else
1146 			fqp_cfg->pcfg.tupdate = ep->par[1];
1147 		if (ep->par[2] < 0)
1148 			fqp_cfg->pcfg.max_burst = fq_pie_sysctl.pcfg.max_burst;
1149 		else
1150 			fqp_cfg->pcfg.max_burst = ep->par[2];
1151 		if (ep->par[3] < 0)
1152 			fqp_cfg->pcfg.max_ecnth = fq_pie_sysctl.pcfg.max_ecnth;
1153 		else
1154 			fqp_cfg->pcfg.max_ecnth = ep->par[3];
1155 		if (ep->par[4] < 0)
1156 			fqp_cfg->pcfg.alpha = fq_pie_sysctl.pcfg.alpha;
1157 		else
1158 			fqp_cfg->pcfg.alpha = ep->par[4];
1159 		if (ep->par[5] < 0)
1160 			fqp_cfg->pcfg.beta = fq_pie_sysctl.pcfg.beta;
1161 		else
1162 			fqp_cfg->pcfg.beta = ep->par[5];
1163 		if (ep->par[6] < 0)
1164 			fqp_cfg->pcfg.flags = 0;
1165 		else
1166 			fqp_cfg->pcfg.flags = ep->par[6];
1167 
1168 		/* FQ configurations */
1169 		if (ep->par[7] < 0)
1170 			fqp_cfg->quantum = fq_pie_sysctl.quantum;
1171 		else
1172 			fqp_cfg->quantum = ep->par[7];
1173 		if (ep->par[8] < 0)
1174 			fqp_cfg->limit = fq_pie_sysctl.limit;
1175 		else
1176 			fqp_cfg->limit = ep->par[8];
1177 		if (ep->par[9] < 0)
1178 			fqp_cfg->flows_cnt = fq_pie_sysctl.flows_cnt;
1179 		else
1180 			fqp_cfg->flows_cnt = ep->par[9];
1181 
1182 		/* Bound the configurations */
1183 		fqp_cfg->pcfg.qdelay_ref = BOUND_VAR(fqp_cfg->pcfg.qdelay_ref,
1184 			1, 5 * AQM_TIME_1S);
1185 		fqp_cfg->pcfg.tupdate = BOUND_VAR(fqp_cfg->pcfg.tupdate,
1186 			1, 5 * AQM_TIME_1S);
1187 		fqp_cfg->pcfg.max_burst = BOUND_VAR(fqp_cfg->pcfg.max_burst,
1188 			0, 5 * AQM_TIME_1S);
1189 		fqp_cfg->pcfg.max_ecnth = BOUND_VAR(fqp_cfg->pcfg.max_ecnth,
1190 			0, PIE_SCALE);
1191 		fqp_cfg->pcfg.alpha = BOUND_VAR(fqp_cfg->pcfg.alpha, 0, 7 * PIE_SCALE);
1192 		fqp_cfg->pcfg.beta = BOUND_VAR(fqp_cfg->pcfg.beta, 0, 7 * PIE_SCALE);
1193 
1194 		fqp_cfg->quantum = BOUND_VAR(fqp_cfg->quantum,1,9000);
1195 		fqp_cfg->limit= BOUND_VAR(fqp_cfg->limit,1,20480);
1196 		fqp_cfg->flows_cnt= BOUND_VAR(fqp_cfg->flows_cnt,1,65536);
1197 	}
1198 	else {
1199 		D("Wrong parameters for fq_pie scheduler");
1200 		return 1;
1201 	}
1202 
1203 	return 0;
1204 }
1205 
1206 /*
1207  * Return FQ-PIE scheduler configurations
1208  * the configurations for the scheduler is passed to userland.
1209  */
1210 static int
1211 fq_pie_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
1212 
1213 	struct fq_pie_schk *schk = (struct fq_pie_schk *)(_schk+1);
1214 	struct dn_sch_fq_pie_parms *fqp_cfg;
1215 
1216 	fqp_cfg = &schk->cfg;
1217 
1218 	strcpy(ep->name, fq_pie_desc.name);
1219 	ep->par[0] = fqp_cfg->pcfg.qdelay_ref;
1220 	ep->par[1] = fqp_cfg->pcfg.tupdate;
1221 	ep->par[2] = fqp_cfg->pcfg.max_burst;
1222 	ep->par[3] = fqp_cfg->pcfg.max_ecnth;
1223 	ep->par[4] = fqp_cfg->pcfg.alpha;
1224 	ep->par[5] = fqp_cfg->pcfg.beta;
1225 	ep->par[6] = fqp_cfg->pcfg.flags;
1226 
1227 	ep->par[7] = fqp_cfg->quantum;
1228 	ep->par[8] = fqp_cfg->limit;
1229 	ep->par[9] = fqp_cfg->flows_cnt;
1230 
1231 	return 0;
1232 }
1233 
1234 /*
1235  *  FQ-PIE scheduler descriptor
1236  * contains the type of the scheduler, the name, the size of extra
1237  * data structures, and function pointers.
1238  */
1239 static struct dn_alg fq_pie_desc = {
1240 	_SI( .type = )  DN_SCHED_FQ_PIE,
1241 	_SI( .name = ) "FQ_PIE",
1242 	_SI( .flags = ) 0,
1243 
1244 	_SI( .schk_datalen = ) sizeof(struct fq_pie_schk),
1245 	_SI( .si_datalen = ) sizeof(struct fq_pie_si) - sizeof(struct dn_sch_inst),
1246 	_SI( .q_datalen = ) 0,
1247 
1248 	_SI( .enqueue = ) fq_pie_enqueue,
1249 	_SI( .dequeue = ) fq_pie_dequeue,
1250 	_SI( .config = ) fq_pie_config, /* new sched i.e. sched X config ...*/
1251 	_SI( .destroy = ) NULL,  /*sched x delete */
1252 	_SI( .new_sched = ) fq_pie_new_sched, /* new schd instance */
1253 	_SI( .free_sched = ) fq_pie_free_sched,	/* delete schd instance */
1254 	_SI( .new_fsk = ) NULL,
1255 	_SI( .free_fsk = ) NULL,
1256 	_SI( .new_queue = ) NULL,
1257 	_SI( .free_queue = ) NULL,
1258 	_SI( .getconfig = )  fq_pie_getconfig,
1259 	_SI( .ref_count = ) 0
1260 };
1261 
1262 DECLARE_DNSCHED_MODULE(dn_fq_pie, &fq_pie_desc);
1263