1 /*
2 * PIE - Proportional Integral controller Enhanced AQM algorithm.
3 *
4 * Copyright (C) 2016 Centre for Advanced Internet Architectures,
5 * Swinburne University of Technology, Melbourne, Australia.
6 * Portions of this code were made possible in part by a gift from
7 * The Comcast Innovation Fund.
8 * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #include "opt_inet6.h"
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/rwlock.h>
46 #include <sys/socket.h>
47 #include <sys/time.h>
48 #include <sys/sysctl.h>
49
50 #include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
51 #include <net/netisr.h>
52 #include <net/vnet.h>
53
54 #include <netinet/in.h>
55 #include <netinet/ip.h> /* ip_len, ip_off */
56 #include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
57 #include <netinet/ip_fw.h>
58 #include <netinet/ip_dummynet.h>
59 #include <netinet/if_ether.h> /* various ether_* routines */
60 #include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
61 #include <netinet6/ip6_var.h>
62 #include <netpfil/ipfw/dn_heap.h>
63
64 #ifdef NEW_AQM
65 #include <netpfil/ipfw/ip_fw_private.h>
66 #include <netpfil/ipfw/ip_dn_private.h>
67 #include <netpfil/ipfw/dn_aqm.h>
68 #include <netpfil/ipfw/dn_aqm_pie.h>
69 #include <netpfil/ipfw/dn_sched.h>
70
71 /* for debugging */
72 #include <sys/syslog.h>
73
74 static struct dn_aqm pie_desc;
75
76 /* PIE defaults
77 * target=15ms, tupdate=15ms, max_burst=150ms,
78 * max_ecnth=0.1, alpha=0.125, beta=1.25,
79 */
80 struct dn_aqm_pie_parms pie_sysctl =
81 { 15 * AQM_TIME_1MS, 15 * AQM_TIME_1MS, 150 * AQM_TIME_1MS,
82 PIE_SCALE/10 , PIE_SCALE * 0.125, PIE_SCALE * 1.25 ,
83 PIE_CAPDROP_ENABLED | PIE_DEPRATEEST_ENABLED | PIE_DERAND_ENABLED };
84
85 static int
pie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)86 pie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
87 {
88 int error;
89 long value;
90
91 if (!strcmp(oidp->oid_name,"alpha"))
92 value = pie_sysctl.alpha;
93 else
94 value = pie_sysctl.beta;
95
96 value = value * 1000 / PIE_SCALE;
97 error = sysctl_handle_long(oidp, &value, 0, req);
98 if (error != 0 || req->newptr == NULL)
99 return (error);
100 if (value < 1 || value > 7 * PIE_SCALE)
101 return (EINVAL);
102 value = (value * PIE_SCALE) / 1000;
103 if (!strcmp(oidp->oid_name,"alpha"))
104 pie_sysctl.alpha = value;
105 else
106 pie_sysctl.beta = value;
107 return (0);
108 }
109
110 static int
pie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)111 pie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
112 {
113 int error;
114 long value;
115
116 if (!strcmp(oidp->oid_name,"target"))
117 value = pie_sysctl.qdelay_ref;
118 else if (!strcmp(oidp->oid_name,"tupdate"))
119 value = pie_sysctl.tupdate;
120 else
121 value = pie_sysctl.max_burst;
122
123 value = value / AQM_TIME_1US;
124 error = sysctl_handle_long(oidp, &value, 0, req);
125 if (error != 0 || req->newptr == NULL)
126 return (error);
127 if (value < 1 || value > 10 * AQM_TIME_1S)
128 return (EINVAL);
129 value = value * AQM_TIME_1US;
130
131 if (!strcmp(oidp->oid_name,"target"))
132 pie_sysctl.qdelay_ref = value;
133 else if (!strcmp(oidp->oid_name,"tupdate"))
134 pie_sysctl.tupdate = value;
135 else
136 pie_sysctl.max_burst = value;
137 return (0);
138 }
139
140 static int
pie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)141 pie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
142 {
143 int error;
144 long value;
145
146 value = pie_sysctl.max_ecnth;
147 value = value * 1000 / PIE_SCALE;
148 error = sysctl_handle_long(oidp, &value, 0, req);
149 if (error != 0 || req->newptr == NULL)
150 return (error);
151 if (value < 1 || value > PIE_SCALE)
152 return (EINVAL);
153 value = (value * PIE_SCALE) / 1000;
154 pie_sysctl.max_ecnth = value;
155 return (0);
156 }
157
158 /* define PIE sysctl variables */
159 SYSBEGIN(f4)
160 SYSCTL_DECL(_net_inet);
161 SYSCTL_DECL(_net_inet_ip);
162 SYSCTL_DECL(_net_inet_ip_dummynet);
163 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, pie,
164 CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
165 "PIE");
166
167 #ifdef SYSCTL_NODE
168 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, target,
169 CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
170 pie_sysctl_target_tupdate_maxb_handler, "L",
171 "queue target in microsecond");
172 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, tupdate,
173 CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
174 pie_sysctl_target_tupdate_maxb_handler, "L",
175 "the frequency of drop probability calculation in microsecond");
176 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_burst,
177 CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
178 pie_sysctl_target_tupdate_maxb_handler, "L",
179 "Burst allowance interval in microsecond");
180
181 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_ecnth,
182 CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
183 pie_sysctl_max_ecnth_handler, "L",
184 "ECN safeguard threshold scaled by 1000");
185
186 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, alpha,
187 CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
188 pie_sysctl_alpha_beta_handler, "L",
189 "PIE alpha scaled by 1000");
190 SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, beta,
191 CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, NULL, 0,
192 pie_sysctl_alpha_beta_handler, "L",
193 "beta scaled by 1000");
194 #endif
195
196 /*
197 * Callout function for drop probability calculation
198 * This function is called over tupdate ms and takes pointer of PIE
199 * status variables as an argument
200 */
201 static void
calculate_drop_prob(void * x)202 calculate_drop_prob(void *x)
203 {
204 int64_t p, prob, oldprob;
205 struct dn_aqm_pie_parms *pprms;
206 struct pie_status *pst = (struct pie_status *) x;
207 int p_isneg;
208
209 pprms = pst->parms;
210 prob = pst->drop_prob;
211
212 /* calculate current qdelay using DRE method.
213 * If TS is used and no data in the queue, reset current_qdelay
214 * as it stays at last value during dequeue process.
215 */
216 if (pprms->flags & PIE_DEPRATEEST_ENABLED)
217 pst->current_qdelay = ((uint64_t)pst->pq->ni.len_bytes *
218 pst->avg_dq_time) >> PIE_DQ_THRESHOLD_BITS;
219 else
220 if (!pst->pq->ni.len_bytes)
221 pst->current_qdelay = 0;
222
223 /* calculate drop probability */
224 p = (int64_t)pprms->alpha *
225 ((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref);
226 p +=(int64_t) pprms->beta *
227 ((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old);
228
229 /* take absolute value so right shift result is well defined */
230 p_isneg = p < 0;
231 if (p_isneg) {
232 p = -p;
233 }
234
235 /* We PIE_MAX_PROB shift by 12-bits to increase the division precision */
236 p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
237
238 /* auto-tune drop probability */
239 if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
240 p >>= 11 + PIE_FIX_POINT_BITS + 12;
241 else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
242 p >>= 9 + PIE_FIX_POINT_BITS + 12;
243 else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
244 p >>= 7 + PIE_FIX_POINT_BITS + 12;
245 else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
246 p >>= 5 + PIE_FIX_POINT_BITS + 12;
247 else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
248 p >>= 3 + PIE_FIX_POINT_BITS + 12;
249 else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
250 p >>= 1 + PIE_FIX_POINT_BITS + 12;
251 else
252 p >>= PIE_FIX_POINT_BITS + 12;
253
254 oldprob = prob;
255
256 if (p_isneg) {
257 prob = prob - p;
258
259 /* check for multiplication underflow */
260 if (prob > oldprob) {
261 prob= 0;
262 D("underflow");
263 }
264 } else {
265 /* Cap Drop adjustment */
266 if ((pprms->flags & PIE_CAPDROP_ENABLED) &&
267 prob >= PIE_MAX_PROB / 10 &&
268 p > PIE_MAX_PROB / 50 ) {
269 p = PIE_MAX_PROB / 50;
270 }
271
272 prob = prob + p;
273
274 /* check for multiplication overflow */
275 if (prob<oldprob) {
276 D("overflow");
277 prob= PIE_MAX_PROB;
278 }
279 }
280
281 /*
282 * decay the drop probability exponentially
283 * and restrict it to range 0 to PIE_MAX_PROB
284 */
285 if (prob < 0) {
286 prob = 0;
287 } else {
288 if (pst->current_qdelay == 0 && pst->qdelay_old == 0) {
289 /* 0.98 ~= 1- 1/64 */
290 prob = prob - (prob >> 6);
291 }
292
293 if (prob > PIE_MAX_PROB) {
294 prob = PIE_MAX_PROB;
295 }
296 }
297
298 pst->drop_prob = prob;
299
300 /* store current queue delay value in old queue delay*/
301 pst->qdelay_old = pst->current_qdelay;
302
303 /* update burst allowance */
304 if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance>0) {
305
306 if (pst->burst_allowance > pprms->tupdate )
307 pst->burst_allowance -= pprms->tupdate;
308 else
309 pst->burst_allowance = 0;
310 }
311
312 /* reschedule calculate_drop_prob function */
313 if (pst->sflags & PIE_ACTIVE)
314 callout_reset_sbt(&pst->aqm_pie_callout,
315 (uint64_t)pprms->tupdate * SBT_1US, 0, calculate_drop_prob, pst, 0);
316
317 mtx_unlock(&pst->lock_mtx);
318 }
319
320 /*
321 * Extract a packet from the head of queue 'q'
322 * Return a packet or NULL if the queue is empty.
323 * If getts is set, also extract packet's timestamp from mtag.
324 */
325 static struct mbuf *
pie_extract_head(struct dn_queue * q,aqm_time_t * pkt_ts,int getts)326 pie_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts, int getts)
327 {
328 struct m_tag *mtag;
329 struct mbuf *m;
330
331 next: m = q->mq.head;
332 if (m == NULL)
333 return m;
334 q->mq.head = m->m_nextpkt;
335
336 /* Update stats */
337 update_stats(q, -m->m_pkthdr.len, 0);
338
339 if (q->ni.length == 0) /* queue is now idle */
340 q->q_time = V_dn_cfg.curr_time;
341
342 if (getts) {
343 /* extract packet TS*/
344 mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
345 if (mtag == NULL) {
346 D("PIE timestamp mtag not found!");
347 *pkt_ts = 0;
348 } else {
349 *pkt_ts = *(aqm_time_t *)(mtag + 1);
350 m_tag_delete(m,mtag);
351 }
352 }
353 if (m->m_pkthdr.rcvif != NULL &&
354 __predict_false(m_rcvif_restore(m) == NULL)) {
355 m_freem(m);
356 goto next;
357 }
358 return m;
359 }
360
361 /*
362 * Initiate PIE variable and optionally activate it
363 */
364 __inline static void
init_activate_pie(struct pie_status * pst,int resettimer)365 init_activate_pie(struct pie_status *pst, int resettimer)
366 {
367 struct dn_aqm_pie_parms *pprms;
368
369 mtx_lock(&pst->lock_mtx);
370 pprms = pst->parms;
371 pst->drop_prob = 0;
372 pst->qdelay_old = 0;
373 pst->burst_allowance = pprms->max_burst;
374 pst->accu_prob = 0;
375 pst->dq_count = 0;
376 pst->avg_dq_time = 0;
377 pst->sflags = PIE_INMEASUREMENT;
378 pst->measurement_start = AQM_UNOW;
379
380 if (resettimer) {
381 pst->sflags |= PIE_ACTIVE;
382 callout_reset_sbt(&pst->aqm_pie_callout,
383 (uint64_t)pprms->tupdate * SBT_1US,
384 0, calculate_drop_prob, pst, 0);
385 }
386 //DX(2, "PIE Activated");
387 mtx_unlock(&pst->lock_mtx);
388 }
389
390 /*
391 * Deactivate PIE and stop probe update callout
392 */
393 __inline static void
deactivate_pie(struct pie_status * pst)394 deactivate_pie(struct pie_status *pst)
395 {
396 mtx_lock(&pst->lock_mtx);
397 pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
398 callout_stop(&pst->aqm_pie_callout);
399 //D("PIE Deactivated");
400 mtx_unlock(&pst->lock_mtx);
401 }
402
403 /*
404 * Dequeue and return a pcaket from queue 'q' or NULL if 'q' is empty.
405 * Also, caculate depature time or queue delay using timestamp
406 */
407 static struct mbuf *
aqm_pie_dequeue(struct dn_queue * q)408 aqm_pie_dequeue(struct dn_queue *q)
409 {
410 struct mbuf *m;
411 struct dn_aqm_pie_parms *pprms;
412 struct pie_status *pst;
413 aqm_time_t now;
414 aqm_time_t pkt_ts, dq_time;
415 int32_t w;
416
417 pst = q->aqm_status;
418 pprms = pst->parms;
419
420 /*we extarct packet ts only when Departure Rate Estimation dis not used*/
421 m = pie_extract_head(q, &pkt_ts, !(pprms->flags & PIE_DEPRATEEST_ENABLED));
422
423 if (!m || !(pst->sflags & PIE_ACTIVE))
424 return m;
425
426 now = AQM_UNOW;
427 if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
428 /* calculate average depature time */
429 if(pst->sflags & PIE_INMEASUREMENT) {
430 pst->dq_count += m->m_pkthdr.len;
431
432 if (pst->dq_count >= PIE_DQ_THRESHOLD) {
433 dq_time = now - pst->measurement_start;
434
435 /*
436 * if we don't have old avg dq_time i.e PIE is (re)initialized,
437 * don't use weight to calculate new avg_dq_time
438 */
439 if(pst->avg_dq_time == 0)
440 pst->avg_dq_time = dq_time;
441 else {
442 /*
443 * weight = PIE_DQ_THRESHOLD/2^6, but we scaled
444 * weight by 2^8. Thus, scaled
445 * weight = PIE_DQ_THRESHOLD /2^8
446 * */
447 w = PIE_DQ_THRESHOLD >> 8;
448 pst->avg_dq_time = (dq_time* w
449 + (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
450 pst->sflags &= ~PIE_INMEASUREMENT;
451 }
452 }
453 }
454
455 /*
456 * Start new measurement cycle when the queue has
457 * PIE_DQ_THRESHOLD worth of bytes.
458 */
459 if(!(pst->sflags & PIE_INMEASUREMENT) &&
460 q->ni.len_bytes >= PIE_DQ_THRESHOLD) {
461 pst->sflags |= PIE_INMEASUREMENT;
462 pst->measurement_start = now;
463 pst->dq_count = 0;
464 }
465 }
466 /* Optionally, use packet timestamp to estimate queue delay */
467 else
468 pst->current_qdelay = now - pkt_ts;
469
470 return m;
471 }
472
473 /*
474 * Enqueue a packet in q, subject to space and PIE queue management policy
475 * (whose parameters are in q->fs).
476 * Update stats for the queue and the scheduler.
477 * Return 0 on success, 1 on drop. The packet is consumed anyways.
478 */
479 static int
aqm_pie_enqueue(struct dn_queue * q,struct mbuf * m)480 aqm_pie_enqueue(struct dn_queue *q, struct mbuf* m)
481 {
482 struct dn_fs *f;
483 uint64_t len;
484 uint32_t qlen;
485 struct pie_status *pst;
486 struct dn_aqm_pie_parms *pprms;
487 int t;
488
489 len = m->m_pkthdr.len;
490 pst = q->aqm_status;
491 if(!pst) {
492 DX(2, "PIE queue is not initialized\n");
493 update_stats(q, 0, 1);
494 FREE_PKT(m);
495 return 1;
496 }
497
498 f = &(q->fs->fs);
499 pprms = pst->parms;
500 t = ENQUE;
501
502 /* get current queue length in bytes or packets*/
503 qlen = (f->flags & DN_QSIZE_BYTES) ?
504 q->ni.len_bytes : q->ni.length;
505
506 /* check for queue size and drop the tail if exceed queue limit*/
507 if (qlen >= f->qsize)
508 t = DROP;
509 /* drop/mark the packet when PIE is active and burst time elapsed */
510 else if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance==0
511 && drop_early(pst, q->ni.len_bytes) == DROP) {
512 /*
513 * if drop_prob over ECN threshold, drop the packet
514 * otherwise mark and enqueue it.
515 */
516 if ((pprms->flags & PIE_ECN_ENABLED) && pst->drop_prob <
517 (pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
518 && ecn_mark(m))
519 t = ENQUE;
520 else
521 t = DROP;
522 }
523
524 /* Turn PIE on when 1/3 of the queue is full */
525 if (!(pst->sflags & PIE_ACTIVE) && qlen >= pst->one_third_q_size) {
526 init_activate_pie(pst, 1);
527 }
528
529 /* Reset burst tolerance and optinally turn PIE off*/
530 if ((pst->sflags & PIE_ACTIVE) && pst->drop_prob == 0 &&
531 pst->current_qdelay < (pprms->qdelay_ref >> 1) &&
532 pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
533 pst->burst_allowance = pprms->max_burst;
534 if ((pprms->flags & PIE_ON_OFF_MODE_ENABLED) && qlen<=0)
535 deactivate_pie(pst);
536 }
537
538 /* Timestamp the packet if Departure Rate Estimation is disabled */
539 if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
540 /* Add TS to mbuf as a TAG */
541 struct m_tag *mtag;
542 mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
543 if (mtag == NULL)
544 mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
545 sizeof(aqm_time_t), M_NOWAIT);
546 if (mtag == NULL) {
547 t = DROP;
548 } else {
549 *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
550 m_tag_prepend(m, mtag);
551 }
552 }
553
554 if (t != DROP) {
555 mq_append(&q->mq, m);
556 update_stats(q, len, 0);
557 return (0);
558 } else {
559 update_stats(q, 0, 1);
560
561 /* reset accu_prob after packet drop */
562 pst->accu_prob = 0;
563 FREE_PKT(m);
564 return 1;
565 }
566 return 0;
567 }
568
569 /*
570 * initialize PIE for queue 'q'
571 * First allocate memory for PIE status.
572 */
573 static int
aqm_pie_init(struct dn_queue * q)574 aqm_pie_init(struct dn_queue *q)
575 {
576 struct pie_status *pst;
577 struct dn_aqm_pie_parms *pprms;
578 int err = 0;
579
580 pprms = q->fs->aqmcfg;
581
582 do { /* exit with break when error occurs*/
583 if (!pprms){
584 DX(2, "AQM_PIE is not configured");
585 err = EINVAL;
586 break;
587 }
588
589 q->aqm_status = malloc(sizeof(struct pie_status),
590 M_DUMMYNET, M_NOWAIT | M_ZERO);
591 if (q->aqm_status == NULL) {
592 D("cannot allocate PIE private data");
593 err = ENOMEM ;
594 break;
595 }
596
597 pst = q->aqm_status;
598 dummynet_sched_lock();
599 /* increase reference count for PIE module */
600 pie_desc.ref_count++;
601 dummynet_sched_unlock();
602
603 pst->pq = q;
604 pst->parms = pprms;
605
606 /* For speed optimization, we caculate 1/3 queue size once here */
607 // we can use x/3 = (x >>2) + (x >>4) + (x >>7)
608 pst->one_third_q_size = q->fs->fs.qsize/3;
609
610 mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
611 callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
612 CALLOUT_RETURNUNLOCKED);
613
614 pst->current_qdelay = 0;
615 init_activate_pie(pst, !(pprms->flags & PIE_ON_OFF_MODE_ENABLED));
616
617 //DX(2, "aqm_PIE_init");
618
619 } while(0);
620
621 return err;
622 }
623
624 /*
625 * Callout function to destroy pie mtx and free PIE status memory
626 */
627 static void
pie_callout_cleanup(void * x)628 pie_callout_cleanup(void *x)
629 {
630 struct pie_status *pst = (struct pie_status *) x;
631
632 mtx_unlock(&pst->lock_mtx);
633 mtx_destroy(&pst->lock_mtx);
634 free(x, M_DUMMYNET);
635 dummynet_sched_lock();
636 pie_desc.ref_count--;
637 dummynet_sched_unlock();
638 }
639
640 /*
641 * Clean up PIE status for queue 'q'
642 * Destroy memory allocated for PIE status.
643 */
644 static int
aqm_pie_cleanup(struct dn_queue * q)645 aqm_pie_cleanup(struct dn_queue *q)
646 {
647
648 if(!q) {
649 D("q is null");
650 return 0;
651 }
652 struct pie_status *pst = q->aqm_status;
653 if(!pst) {
654 //D("queue is already cleaned up");
655 return 0;
656 }
657 if(!q->fs || !q->fs->aqmcfg) {
658 D("fs is null or no cfg");
659 return 1;
660 }
661 if (q->fs->aqmfp && q->fs->aqmfp->type !=DN_AQM_PIE) {
662 D("Not PIE fs (%d)", q->fs->fs.fs_nr);
663 return 1;
664 }
665
666 /*
667 * Free PIE status allocated memory using pie_callout_cleanup() callout
668 * function to avoid any potential race.
669 * We reset aqm_pie_callout to call pie_callout_cleanup() in next 1um. This
670 * stops the scheduled calculate_drop_prob() callout and call pie_callout_cleanup()
671 * which does memory freeing.
672 */
673 mtx_lock(&pst->lock_mtx);
674 callout_reset_sbt(&pst->aqm_pie_callout,
675 SBT_1US, 0, pie_callout_cleanup, pst, 0);
676 q->aqm_status = NULL;
677 mtx_unlock(&pst->lock_mtx);
678
679 return 0;
680 }
681
682 /*
683 * Config PIE parameters
684 * also allocate memory for PIE configurations
685 */
686 static int
aqm_pie_config(struct dn_fsk * fs,struct dn_extra_parms * ep,int len)687 aqm_pie_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len)
688 {
689 struct dn_aqm_pie_parms *pcfg;
690
691 int l = sizeof(struct dn_extra_parms);
692 if (len < l) {
693 D("invalid sched parms length got %d need %d", len, l);
694 return EINVAL;
695 }
696 /* we free the old cfg because maybe the orignal allocation
697 * was used for diffirent AQM type.
698 */
699 if (fs->aqmcfg) {
700 free(fs->aqmcfg, M_DUMMYNET);
701 fs->aqmcfg = NULL;
702 }
703
704 fs->aqmcfg = malloc(sizeof(struct dn_aqm_pie_parms),
705 M_DUMMYNET, M_NOWAIT | M_ZERO);
706 if (fs->aqmcfg== NULL) {
707 D("cannot allocate PIE configuration parameters");
708 return ENOMEM;
709 }
710
711 /* par array contains pie configuration as follow
712 * 0- qdelay_ref,1- tupdate, 2- max_burst
713 * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
714 */
715
716 /* configure PIE parameters */
717 pcfg = fs->aqmcfg;
718
719 if (ep->par[0] < 0)
720 pcfg->qdelay_ref = pie_sysctl.qdelay_ref * AQM_TIME_1US;
721 else
722 pcfg->qdelay_ref = ep->par[0];
723 if (ep->par[1] < 0)
724 pcfg->tupdate = pie_sysctl.tupdate * AQM_TIME_1US;
725 else
726 pcfg->tupdate = ep->par[1];
727 if (ep->par[2] < 0)
728 pcfg->max_burst = pie_sysctl.max_burst * AQM_TIME_1US;
729 else
730 pcfg->max_burst = ep->par[2];
731 if (ep->par[3] < 0)
732 pcfg->max_ecnth = pie_sysctl.max_ecnth;
733 else
734 pcfg->max_ecnth = ep->par[3];
735 if (ep->par[4] < 0)
736 pcfg->alpha = pie_sysctl.alpha;
737 else
738 pcfg->alpha = ep->par[4];
739 if (ep->par[5] < 0)
740 pcfg->beta = pie_sysctl.beta;
741 else
742 pcfg->beta = ep->par[5];
743 if (ep->par[6] < 0)
744 pcfg->flags = pie_sysctl.flags;
745 else
746 pcfg->flags = ep->par[6];
747
748 /* bound PIE configurations */
749 pcfg->qdelay_ref = BOUND_VAR(pcfg->qdelay_ref, 1, 10 * AQM_TIME_1S);
750 pcfg->tupdate = BOUND_VAR(pcfg->tupdate, 1, 10 * AQM_TIME_1S);
751 pcfg->max_burst = BOUND_VAR(pcfg->max_burst, 0, 10 * AQM_TIME_1S);
752 pcfg->max_ecnth = BOUND_VAR(pcfg->max_ecnth, 0, PIE_SCALE);
753 pcfg->alpha = BOUND_VAR(pcfg->alpha, 0, 7 * PIE_SCALE);
754 pcfg->beta = BOUND_VAR(pcfg->beta, 0 , 7 * PIE_SCALE);
755
756 pie_desc.cfg_ref_count++;
757 //D("pie cfg_ref_count=%d", pie_desc.cfg_ref_count);
758 return 0;
759 }
760
761 /*
762 * Deconfigure PIE and free memory allocation
763 */
764 static int
aqm_pie_deconfig(struct dn_fsk * fs)765 aqm_pie_deconfig(struct dn_fsk* fs)
766 {
767 if (fs && fs->aqmcfg) {
768 free(fs->aqmcfg, M_DUMMYNET);
769 fs->aqmcfg = NULL;
770 pie_desc.cfg_ref_count--;
771 }
772 return 0;
773 }
774
775 /*
776 * Retrieve PIE configuration parameters.
777 */
778 static int
aqm_pie_getconfig(struct dn_fsk * fs,struct dn_extra_parms * ep)779 aqm_pie_getconfig (struct dn_fsk *fs, struct dn_extra_parms * ep)
780 {
781 struct dn_aqm_pie_parms *pcfg;
782 if (fs->aqmcfg) {
783 strlcpy(ep->name, pie_desc.name, sizeof(ep->name));
784 pcfg = fs->aqmcfg;
785 ep->par[0] = pcfg->qdelay_ref / AQM_TIME_1US;
786 ep->par[1] = pcfg->tupdate / AQM_TIME_1US;
787 ep->par[2] = pcfg->max_burst / AQM_TIME_1US;
788 ep->par[3] = pcfg->max_ecnth;
789 ep->par[4] = pcfg->alpha;
790 ep->par[5] = pcfg->beta;
791 ep->par[6] = pcfg->flags;
792
793 return 0;
794 }
795 return 1;
796 }
797
798 static struct dn_aqm pie_desc = {
799 _SI( .type = ) DN_AQM_PIE,
800 _SI( .name = ) "PIE",
801 _SI( .ref_count = ) 0,
802 _SI( .cfg_ref_count = ) 0,
803 _SI( .enqueue = ) aqm_pie_enqueue,
804 _SI( .dequeue = ) aqm_pie_dequeue,
805 _SI( .config = ) aqm_pie_config,
806 _SI( .deconfig = ) aqm_pie_deconfig,
807 _SI( .getconfig = ) aqm_pie_getconfig,
808 _SI( .init = ) aqm_pie_init,
809 _SI( .cleanup = ) aqm_pie_cleanup,
810 };
811
812 DECLARE_DNAQM_MODULE(dn_aqm_pie, &pie_desc);
813 #endif
814