191336b40SDon Lewis /*
291336b40SDon Lewis * FQ_Codel - The FlowQueue-Codel scheduler/AQM
391336b40SDon Lewis *
491336b40SDon Lewis * Copyright (C) 2016 Centre for Advanced Internet Architectures,
591336b40SDon Lewis * Swinburne University of Technology, Melbourne, Australia.
691336b40SDon Lewis * Portions of this code were made possible in part by a gift from
791336b40SDon Lewis * The Comcast Innovation Fund.
891336b40SDon Lewis * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
991336b40SDon Lewis *
1091336b40SDon Lewis * Redistribution and use in source and binary forms, with or without
1191336b40SDon Lewis * modification, are permitted provided that the following conditions
1291336b40SDon Lewis * are met:
1391336b40SDon Lewis * 1. Redistributions of source code must retain the above copyright
1491336b40SDon Lewis * notice, this list of conditions and the following disclaimer.
1591336b40SDon Lewis * 2. Redistributions in binary form must reproduce the above copyright
1691336b40SDon Lewis * notice, this list of conditions and the following disclaimer in the
1791336b40SDon Lewis * documentation and/or other materials provided with the distribution.
1891336b40SDon Lewis *
1991336b40SDon Lewis * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2091336b40SDon Lewis * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2191336b40SDon Lewis * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2291336b40SDon Lewis * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2391336b40SDon Lewis * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2491336b40SDon Lewis * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2591336b40SDon Lewis * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2691336b40SDon Lewis * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2791336b40SDon Lewis * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2891336b40SDon Lewis * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2991336b40SDon Lewis * SUCH DAMAGE.
3091336b40SDon Lewis */
3191336b40SDon Lewis
3291336b40SDon Lewis #ifdef _KERNEL
3391336b40SDon Lewis #include <sys/malloc.h>
3491336b40SDon Lewis #include <sys/socket.h>
3591336b40SDon Lewis //#include <sys/socketvar.h>
3691336b40SDon Lewis #include <sys/kernel.h>
3791336b40SDon Lewis #include <sys/mbuf.h>
3891336b40SDon Lewis #include <sys/module.h>
3991336b40SDon Lewis #include <net/if.h> /* IFNAMSIZ */
4091336b40SDon Lewis #include <netinet/in.h>
4191336b40SDon Lewis #include <netinet/ip_var.h> /* ipfw_rule_ref */
4291336b40SDon Lewis #include <netinet/ip_fw.h> /* flow_id */
4391336b40SDon Lewis #include <netinet/ip_dummynet.h>
4491336b40SDon Lewis
453f289c3fSJeff Roberson #include <sys/lock.h>
4691336b40SDon Lewis #include <sys/proc.h>
4791336b40SDon Lewis #include <sys/rwlock.h>
4891336b40SDon Lewis
4991336b40SDon Lewis #include <netpfil/ipfw/ip_fw_private.h>
5091336b40SDon Lewis #include <sys/sysctl.h>
5191336b40SDon Lewis #include <netinet/ip.h>
5291336b40SDon Lewis #include <netinet/ip6.h>
5391336b40SDon Lewis #include <netinet/ip_icmp.h>
5491336b40SDon Lewis #include <netinet/tcp.h>
5591336b40SDon Lewis #include <netinet/udp.h>
5691336b40SDon Lewis #include <sys/queue.h>
5791336b40SDon Lewis #include <sys/hash.h>
5891336b40SDon Lewis
5991336b40SDon Lewis #include <netpfil/ipfw/dn_heap.h>
6091336b40SDon Lewis #include <netpfil/ipfw/ip_dn_private.h>
6191336b40SDon Lewis
6291336b40SDon Lewis #include <netpfil/ipfw/dn_aqm.h>
6391336b40SDon Lewis #include <netpfil/ipfw/dn_aqm_codel.h>
6491336b40SDon Lewis #include <netpfil/ipfw/dn_sched.h>
6591336b40SDon Lewis #include <netpfil/ipfw/dn_sched_fq_codel.h>
6691336b40SDon Lewis #include <netpfil/ipfw/dn_sched_fq_codel_helper.h>
6791336b40SDon Lewis
6891336b40SDon Lewis #else
6991336b40SDon Lewis #include <dn_test.h>
7091336b40SDon Lewis #endif
7191336b40SDon Lewis
7291336b40SDon Lewis /* NOTE: In fq_codel module, we reimplements CoDel AQM functions
7391336b40SDon Lewis * because fq_codel use different flows (sub-queues) structure and
7491336b40SDon Lewis * dn_queue includes many variables not needed by a flow (sub-queue
7591336b40SDon Lewis * )i.e. avoid extra overhead (88 bytes vs 208 bytes).
7691336b40SDon Lewis * Also, CoDel functions manages stats of sub-queues as well as the main queue.
7791336b40SDon Lewis */
7891336b40SDon Lewis
7991336b40SDon Lewis #define DN_SCHED_FQ_CODEL 6
8091336b40SDon Lewis
8191336b40SDon Lewis static struct dn_alg fq_codel_desc;
8291336b40SDon Lewis
8391336b40SDon Lewis /* fq_codel default parameters including codel */
8491336b40SDon Lewis struct dn_sch_fq_codel_parms
8591336b40SDon Lewis fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US,
8691336b40SDon Lewis CODEL_ECN_ENABLED}, 1024, 10240, 1514};
8791336b40SDon Lewis
8891336b40SDon Lewis static int
fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)8991336b40SDon Lewis fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
9091336b40SDon Lewis {
9191336b40SDon Lewis int error;
9291336b40SDon Lewis long value;
9391336b40SDon Lewis
9491336b40SDon Lewis value = fq_codel_sysctl.ccfg.interval;
9591336b40SDon Lewis value /= AQM_TIME_1US;
9691336b40SDon Lewis error = sysctl_handle_long(oidp, &value, 0, req);
9791336b40SDon Lewis if (error != 0 || req->newptr == NULL)
9891336b40SDon Lewis return (error);
9991336b40SDon Lewis if (value < 1 || value > 100 * AQM_TIME_1S)
10091336b40SDon Lewis return (EINVAL);
10191336b40SDon Lewis fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ;
10291336b40SDon Lewis
10391336b40SDon Lewis return (0);
10491336b40SDon Lewis }
10591336b40SDon Lewis
10691336b40SDon Lewis static int
fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)10791336b40SDon Lewis fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
10891336b40SDon Lewis {
10991336b40SDon Lewis int error;
11091336b40SDon Lewis long value;
11191336b40SDon Lewis
11291336b40SDon Lewis value = fq_codel_sysctl.ccfg.target;
11391336b40SDon Lewis value /= AQM_TIME_1US;
11491336b40SDon Lewis error = sysctl_handle_long(oidp, &value, 0, req);
11591336b40SDon Lewis if (error != 0 || req->newptr == NULL)
11691336b40SDon Lewis return (error);
11791336b40SDon Lewis if (value < 1 || value > 5 * AQM_TIME_1S)
11891336b40SDon Lewis return (EINVAL);
11991336b40SDon Lewis fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ;
12091336b40SDon Lewis
12191336b40SDon Lewis return (0);
12291336b40SDon Lewis }
12391336b40SDon Lewis
12491336b40SDon Lewis SYSBEGIN(f4)
12591336b40SDon Lewis
12691336b40SDon Lewis SYSCTL_DECL(_net_inet);
12791336b40SDon Lewis SYSCTL_DECL(_net_inet_ip);
12891336b40SDon Lewis SYSCTL_DECL(_net_inet_ip_dummynet);
12991336b40SDon Lewis static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel,
1307029da5cSPawel Biernacki CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1317029da5cSPawel Biernacki "FQ_CODEL");
13291336b40SDon Lewis
13391336b40SDon Lewis #ifdef SYSCTL_NODE
13491336b40SDon Lewis
13591336b40SDon Lewis SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target,
1367029da5cSPawel Biernacki CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1377029da5cSPawel Biernacki NULL, 0, fqcodel_sysctl_target_handler, "L",
13891336b40SDon Lewis "FQ_CoDel target in microsecond");
13991336b40SDon Lewis SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval,
1407029da5cSPawel Biernacki CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1417029da5cSPawel Biernacki NULL, 0, fqcodel_sysctl_interval_handler, "L",
14291336b40SDon Lewis "FQ_CoDel interval in microsecond");
14391336b40SDon Lewis
14491336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum,
14591336b40SDon Lewis CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum");
14691336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows,
14791336b40SDon Lewis CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024,
14891336b40SDon Lewis "Number of queues for FQ_CoDel");
14991336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit,
15091336b40SDon Lewis CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit");
15191336b40SDon Lewis #endif
15291336b40SDon Lewis
15391336b40SDon Lewis /* Drop a packet form the head of codel queue */
15491336b40SDon Lewis static void
codel_drop_head(struct fq_codel_flow * q,struct fq_codel_si * si)15591336b40SDon Lewis codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si)
15691336b40SDon Lewis {
15791336b40SDon Lewis struct mbuf *m = q->mq.head;
15891336b40SDon Lewis
15991336b40SDon Lewis if (m == NULL)
16091336b40SDon Lewis return;
16191336b40SDon Lewis q->mq.head = m->m_nextpkt;
16291336b40SDon Lewis
16391336b40SDon Lewis fq_update_stats(q, si, -m->m_pkthdr.len, 1);
16491336b40SDon Lewis
16591336b40SDon Lewis if (si->main_q.ni.length == 0) /* queue is now idle */
166fe3bcfbdSTom Jones si->main_q.q_time = V_dn_cfg.curr_time;
16791336b40SDon Lewis
16891336b40SDon Lewis FREE_PKT(m);
16991336b40SDon Lewis }
17091336b40SDon Lewis
17191336b40SDon Lewis /* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet.
17291336b40SDon Lewis * Return 1 when unable to add timestamp, otherwise return 0
17391336b40SDon Lewis */
17491336b40SDon Lewis static int
codel_enqueue(struct fq_codel_flow * q,struct mbuf * m,struct fq_codel_si * si)17591336b40SDon Lewis codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si)
17691336b40SDon Lewis {
17791336b40SDon Lewis uint64_t len;
17891336b40SDon Lewis
17991336b40SDon Lewis len = m->m_pkthdr.len;
18091336b40SDon Lewis /* finding maximum packet size */
18191336b40SDon Lewis if (len > q->cst.maxpkt_size)
18291336b40SDon Lewis q->cst.maxpkt_size = len;
18391336b40SDon Lewis
18491336b40SDon Lewis /* Add timestamp to mbuf as MTAG */
18591336b40SDon Lewis struct m_tag *mtag;
18691336b40SDon Lewis mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
18791336b40SDon Lewis if (mtag == NULL)
18891336b40SDon Lewis mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t),
18991336b40SDon Lewis M_NOWAIT);
190c4a6258dSMark Johnston if (mtag == NULL)
19191336b40SDon Lewis goto drop;
19291336b40SDon Lewis *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
19391336b40SDon Lewis m_tag_prepend(m, mtag);
19491336b40SDon Lewis
195*26b9e1f0SKristof Provost if (m->m_pkthdr.rcvif != NULL)
196*26b9e1f0SKristof Provost m_rcvif_serialize(m);
197*26b9e1f0SKristof Provost
19891336b40SDon Lewis mq_append(&q->mq, m);
19991336b40SDon Lewis fq_update_stats(q, si, len, 0);
20091336b40SDon Lewis return 0;
20191336b40SDon Lewis
20291336b40SDon Lewis drop:
20391336b40SDon Lewis fq_update_stats(q, si, len, 1);
20491336b40SDon Lewis m_freem(m);
20591336b40SDon Lewis return 1;
20691336b40SDon Lewis }
20791336b40SDon Lewis
20891336b40SDon Lewis /*
20991336b40SDon Lewis * Classify a packet to queue number using Jenkins hash function.
21091336b40SDon Lewis * Return: queue number
21191336b40SDon Lewis * the input of the hash are protocol no, perturbation, src IP, dst IP,
21291336b40SDon Lewis * src port, dst port,
21391336b40SDon Lewis */
21491336b40SDon Lewis static inline int
fq_codel_classify_flow(struct mbuf * m,uint16_t fcount,struct fq_codel_si * si)21591336b40SDon Lewis fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si)
21691336b40SDon Lewis {
21791336b40SDon Lewis struct ip *ip;
21891336b40SDon Lewis struct tcphdr *th;
21991336b40SDon Lewis struct udphdr *uh;
22091336b40SDon Lewis uint8_t tuple[41];
22191336b40SDon Lewis uint16_t hash=0;
22291336b40SDon Lewis
2234001fcbeSDon Lewis ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
22491336b40SDon Lewis //#ifdef INET6
22591336b40SDon Lewis struct ip6_hdr *ip6;
22691336b40SDon Lewis int isip6;
2274001fcbeSDon Lewis isip6 = (ip->ip_v == 6);
22891336b40SDon Lewis
22991336b40SDon Lewis if(isip6) {
2304001fcbeSDon Lewis ip6 = (struct ip6_hdr *)ip;
23191336b40SDon Lewis *((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
23291336b40SDon Lewis *((uint32_t *) &tuple[1]) = si->perturbation;
23391336b40SDon Lewis memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
23491336b40SDon Lewis memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
23591336b40SDon Lewis
23691336b40SDon Lewis switch (ip6->ip6_nxt) {
23791336b40SDon Lewis case IPPROTO_TCP:
23891336b40SDon Lewis th = (struct tcphdr *)(ip6 + 1);
23991336b40SDon Lewis *((uint16_t *) &tuple[37]) = th->th_dport;
24091336b40SDon Lewis *((uint16_t *) &tuple[39]) = th->th_sport;
24191336b40SDon Lewis break;
24291336b40SDon Lewis
24391336b40SDon Lewis case IPPROTO_UDP:
24491336b40SDon Lewis uh = (struct udphdr *)(ip6 + 1);
24591336b40SDon Lewis *((uint16_t *) &tuple[37]) = uh->uh_dport;
24691336b40SDon Lewis *((uint16_t *) &tuple[39]) = uh->uh_sport;
24791336b40SDon Lewis break;
24891336b40SDon Lewis default:
24991336b40SDon Lewis memset(&tuple[37], 0, 4);
25091336b40SDon Lewis }
25191336b40SDon Lewis
25291336b40SDon Lewis hash = jenkins_hash(tuple, 41, HASHINIT) % fcount;
25391336b40SDon Lewis return hash;
25491336b40SDon Lewis }
25591336b40SDon Lewis //#endif
25691336b40SDon Lewis
25791336b40SDon Lewis /* IPv4 */
25891336b40SDon Lewis *((uint8_t *) &tuple[0]) = ip->ip_p;
25991336b40SDon Lewis *((uint32_t *) &tuple[1]) = si->perturbation;
26091336b40SDon Lewis *((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
26191336b40SDon Lewis *((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
26291336b40SDon Lewis
26391336b40SDon Lewis switch (ip->ip_p) {
26491336b40SDon Lewis case IPPROTO_TCP:
26591336b40SDon Lewis th = (struct tcphdr *)(ip + 1);
26691336b40SDon Lewis *((uint16_t *) &tuple[13]) = th->th_dport;
26791336b40SDon Lewis *((uint16_t *) &tuple[15]) = th->th_sport;
26891336b40SDon Lewis break;
26991336b40SDon Lewis
27091336b40SDon Lewis case IPPROTO_UDP:
27191336b40SDon Lewis uh = (struct udphdr *)(ip + 1);
27291336b40SDon Lewis *((uint16_t *) &tuple[13]) = uh->uh_dport;
27391336b40SDon Lewis *((uint16_t *) &tuple[15]) = uh->uh_sport;
27491336b40SDon Lewis break;
27591336b40SDon Lewis default:
27691336b40SDon Lewis memset(&tuple[13], 0, 4);
27791336b40SDon Lewis }
27891336b40SDon Lewis hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
27991336b40SDon Lewis
28091336b40SDon Lewis return hash;
28191336b40SDon Lewis }
28291336b40SDon Lewis
28391336b40SDon Lewis /*
28491336b40SDon Lewis * Enqueue a packet into an appropriate queue according to
28591336b40SDon Lewis * FQ_CODEL algorithm.
28691336b40SDon Lewis */
28791336b40SDon Lewis static int
fq_codel_enqueue(struct dn_sch_inst * _si,struct dn_queue * _q,struct mbuf * m)28891336b40SDon Lewis fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
28991336b40SDon Lewis struct mbuf *m)
29091336b40SDon Lewis {
29191336b40SDon Lewis struct fq_codel_si *si;
29291336b40SDon Lewis struct fq_codel_schk *schk;
29391336b40SDon Lewis struct dn_sch_fq_codel_parms *param;
29491336b40SDon Lewis struct dn_queue *mainq;
29591336b40SDon Lewis int idx, drop, i, maxidx;
29691336b40SDon Lewis
29791336b40SDon Lewis mainq = (struct dn_queue *)(_si + 1);
29891336b40SDon Lewis si = (struct fq_codel_si *)_si;
29991336b40SDon Lewis schk = (struct fq_codel_schk *)(si->_si.sched+1);
30091336b40SDon Lewis param = &schk->cfg;
30191336b40SDon Lewis
30291336b40SDon Lewis /* classify a packet to queue number*/
30391336b40SDon Lewis idx = fq_codel_classify_flow(m, param->flows_cnt, si);
30491336b40SDon Lewis /* enqueue packet into appropriate queue using CoDel AQM.
30591336b40SDon Lewis * Note: 'codel_enqueue' function returns 1 only when it unable to
30691336b40SDon Lewis * add timestamp to packet (no limit check)*/
30791336b40SDon Lewis drop = codel_enqueue(&si->flows[idx], m, si);
30891336b40SDon Lewis
30991336b40SDon Lewis /* codel unable to timestamp a packet */
31091336b40SDon Lewis if (drop)
31191336b40SDon Lewis return 1;
31291336b40SDon Lewis
31391336b40SDon Lewis /* If the flow (sub-queue) is not active ,then add it to the tail of
31491336b40SDon Lewis * new flows list, initialize and activate it.
31591336b40SDon Lewis */
31691336b40SDon Lewis if (!si->flows[idx].active ) {
31791336b40SDon Lewis STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
31891336b40SDon Lewis si->flows[idx].deficit = param->quantum;
31991336b40SDon Lewis si->flows[idx].cst.dropping = false;
32091336b40SDon Lewis si->flows[idx].cst.first_above_time = 0;
32191336b40SDon Lewis si->flows[idx].active = 1;
32291336b40SDon Lewis //D("activate %d",idx);
32391336b40SDon Lewis }
32491336b40SDon Lewis
32591336b40SDon Lewis /* check the limit for all queues and remove a packet from the
32691336b40SDon Lewis * largest one
32791336b40SDon Lewis */
32891336b40SDon Lewis if (mainq->ni.length > schk->cfg.limit) { D("over limit");
32991336b40SDon Lewis /* find first active flow */
33091336b40SDon Lewis for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
33191336b40SDon Lewis if (si->flows[maxidx].active)
33291336b40SDon Lewis break;
33391336b40SDon Lewis if (maxidx < schk->cfg.flows_cnt) {
33491336b40SDon Lewis /* find the largest sub- queue */
33591336b40SDon Lewis for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
33691336b40SDon Lewis if (si->flows[i].active && si->flows[i].stats.length >
33791336b40SDon Lewis si->flows[maxidx].stats.length)
33891336b40SDon Lewis maxidx = i;
33991336b40SDon Lewis codel_drop_head(&si->flows[maxidx], si);
34091336b40SDon Lewis D("maxidx = %d",maxidx);
34191336b40SDon Lewis drop = 1;
34291336b40SDon Lewis }
34391336b40SDon Lewis }
34491336b40SDon Lewis
34591336b40SDon Lewis return drop;
34691336b40SDon Lewis }
34791336b40SDon Lewis
34891336b40SDon Lewis /*
34991336b40SDon Lewis * Dequeue a packet from an appropriate queue according to
35091336b40SDon Lewis * FQ_CODEL algorithm.
35191336b40SDon Lewis */
35291336b40SDon Lewis static struct mbuf *
fq_codel_dequeue(struct dn_sch_inst * _si)35391336b40SDon Lewis fq_codel_dequeue(struct dn_sch_inst *_si)
35491336b40SDon Lewis {
35591336b40SDon Lewis struct fq_codel_si *si;
35691336b40SDon Lewis struct fq_codel_schk *schk;
35791336b40SDon Lewis struct dn_sch_fq_codel_parms *param;
35891336b40SDon Lewis struct fq_codel_flow *f;
35991336b40SDon Lewis struct mbuf *mbuf;
36091336b40SDon Lewis struct fq_codel_list *fq_codel_flowlist;
36191336b40SDon Lewis
36291336b40SDon Lewis si = (struct fq_codel_si *)_si;
36391336b40SDon Lewis schk = (struct fq_codel_schk *)(si->_si.sched+1);
36491336b40SDon Lewis param = &schk->cfg;
36591336b40SDon Lewis
36691336b40SDon Lewis do {
36791336b40SDon Lewis /* select a list to start with */
36891336b40SDon Lewis if (STAILQ_EMPTY(&si->newflows))
36991336b40SDon Lewis fq_codel_flowlist = &si->oldflows;
37091336b40SDon Lewis else
37191336b40SDon Lewis fq_codel_flowlist = &si->newflows;
37291336b40SDon Lewis
37391336b40SDon Lewis /* Both new and old queue lists are empty, return NULL */
37491336b40SDon Lewis if (STAILQ_EMPTY(fq_codel_flowlist))
37591336b40SDon Lewis return NULL;
37691336b40SDon Lewis
37791336b40SDon Lewis f = STAILQ_FIRST(fq_codel_flowlist);
37891336b40SDon Lewis while (f != NULL) {
37991336b40SDon Lewis /* if there is no flow(sub-queue) deficit, increase deficit
38091336b40SDon Lewis * by quantum, move the flow to the tail of old flows list
38191336b40SDon Lewis * and try another flow.
38291336b40SDon Lewis * Otherwise, the flow will be used for dequeue.
38391336b40SDon Lewis */
38491336b40SDon Lewis if (f->deficit < 0) {
38591336b40SDon Lewis f->deficit += param->quantum;
38691336b40SDon Lewis STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
38791336b40SDon Lewis STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
38891336b40SDon Lewis } else
38991336b40SDon Lewis break;
39091336b40SDon Lewis
39191336b40SDon Lewis f = STAILQ_FIRST(fq_codel_flowlist);
39291336b40SDon Lewis }
39391336b40SDon Lewis
39491336b40SDon Lewis /* the new flows list is empty, try old flows list */
39591336b40SDon Lewis if (STAILQ_EMPTY(fq_codel_flowlist))
39691336b40SDon Lewis continue;
39791336b40SDon Lewis
39891336b40SDon Lewis /* Dequeue a packet from the selected flow */
39991336b40SDon Lewis mbuf = fqc_codel_dequeue(f, si);
40091336b40SDon Lewis
40191336b40SDon Lewis /* Codel did not return a packet */
40291336b40SDon Lewis if (!mbuf) {
40391336b40SDon Lewis /* If the selected flow belongs to new flows list, then move
40491336b40SDon Lewis * it to the tail of old flows list. Otherwise, deactivate it and
40591336b40SDon Lewis * remove it from the old list and
40691336b40SDon Lewis */
40791336b40SDon Lewis if (fq_codel_flowlist == &si->newflows) {
40891336b40SDon Lewis STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
40991336b40SDon Lewis STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
41091336b40SDon Lewis } else {
41191336b40SDon Lewis f->active = 0;
41291336b40SDon Lewis STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
41391336b40SDon Lewis }
41491336b40SDon Lewis /* start again */
41591336b40SDon Lewis continue;
41691336b40SDon Lewis }
41791336b40SDon Lewis
41891336b40SDon Lewis /* we have a packet to return,
41991336b40SDon Lewis * update flow deficit and return the packet*/
42091336b40SDon Lewis f->deficit -= mbuf->m_pkthdr.len;
42191336b40SDon Lewis return mbuf;
42291336b40SDon Lewis
42391336b40SDon Lewis } while (1);
42491336b40SDon Lewis
42591336b40SDon Lewis /* unreachable point */
42691336b40SDon Lewis return NULL;
42791336b40SDon Lewis }
42891336b40SDon Lewis
42991336b40SDon Lewis /*
43091336b40SDon Lewis * Initialize fq_codel scheduler instance.
43191336b40SDon Lewis * also, allocate memory for flows array.
43291336b40SDon Lewis */
43391336b40SDon Lewis static int
fq_codel_new_sched(struct dn_sch_inst * _si)43491336b40SDon Lewis fq_codel_new_sched(struct dn_sch_inst *_si)
43591336b40SDon Lewis {
43691336b40SDon Lewis struct fq_codel_si *si;
43791336b40SDon Lewis struct dn_queue *q;
43891336b40SDon Lewis struct fq_codel_schk *schk;
43991336b40SDon Lewis int i;
44091336b40SDon Lewis
44191336b40SDon Lewis si = (struct fq_codel_si *)_si;
44291336b40SDon Lewis schk = (struct fq_codel_schk *)(_si->sched+1);
44391336b40SDon Lewis
44491336b40SDon Lewis if(si->flows) {
44591336b40SDon Lewis D("si already configured!");
44691336b40SDon Lewis return 0;
44791336b40SDon Lewis }
44891336b40SDon Lewis
44991336b40SDon Lewis /* init the main queue */
45091336b40SDon Lewis q = &si->main_q;
45191336b40SDon Lewis set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
45291336b40SDon Lewis q->_si = _si;
45391336b40SDon Lewis q->fs = _si->sched->fs;
45491336b40SDon Lewis
45591336b40SDon Lewis /* allocate memory for flows array */
456454529cdSPedro F. Giffuni si->flows = mallocarray(schk->cfg.flows_cnt,
457454529cdSPedro F. Giffuni sizeof(struct fq_codel_flow), M_DUMMYNET, M_NOWAIT | M_ZERO);
45891336b40SDon Lewis if (si->flows == NULL) {
45991336b40SDon Lewis D("cannot allocate memory for fq_codel configuration parameters");
46091336b40SDon Lewis return ENOMEM ;
46191336b40SDon Lewis }
46291336b40SDon Lewis
46391336b40SDon Lewis /* init perturbation for this si */
46491336b40SDon Lewis si->perturbation = random();
46591336b40SDon Lewis
46691336b40SDon Lewis /* init the old and new flows lists */
46791336b40SDon Lewis STAILQ_INIT(&si->newflows);
46891336b40SDon Lewis STAILQ_INIT(&si->oldflows);
46991336b40SDon Lewis
47091336b40SDon Lewis /* init the flows (sub-queues) */
47191336b40SDon Lewis for (i = 0; i < schk->cfg.flows_cnt; i++) {
47291336b40SDon Lewis /* init codel */
47391336b40SDon Lewis si->flows[i].cst.maxpkt_size = 500;
47491336b40SDon Lewis }
47591336b40SDon Lewis
47691336b40SDon Lewis fq_codel_desc.ref_count++;
47791336b40SDon Lewis return 0;
47891336b40SDon Lewis }
47991336b40SDon Lewis
48091336b40SDon Lewis /*
48191336b40SDon Lewis * Free fq_codel scheduler instance.
48291336b40SDon Lewis */
48391336b40SDon Lewis static int
fq_codel_free_sched(struct dn_sch_inst * _si)48491336b40SDon Lewis fq_codel_free_sched(struct dn_sch_inst *_si)
48591336b40SDon Lewis {
48691336b40SDon Lewis struct fq_codel_si *si = (struct fq_codel_si *)_si ;
48791336b40SDon Lewis
48891336b40SDon Lewis /* free the flows array */
48991336b40SDon Lewis free(si->flows , M_DUMMYNET);
49091336b40SDon Lewis si->flows = NULL;
49191336b40SDon Lewis fq_codel_desc.ref_count--;
49291336b40SDon Lewis
49391336b40SDon Lewis return 0;
49491336b40SDon Lewis }
49591336b40SDon Lewis
49691336b40SDon Lewis /*
49791336b40SDon Lewis * Configure fq_codel scheduler.
49891336b40SDon Lewis * the configurations for the scheduler is passed from userland.
49991336b40SDon Lewis */
50091336b40SDon Lewis static int
fq_codel_config(struct dn_schk * _schk)50191336b40SDon Lewis fq_codel_config(struct dn_schk *_schk)
50291336b40SDon Lewis {
50391336b40SDon Lewis struct fq_codel_schk *schk;
50491336b40SDon Lewis struct dn_extra_parms *ep;
50591336b40SDon Lewis struct dn_sch_fq_codel_parms *fqc_cfg;
50691336b40SDon Lewis
50791336b40SDon Lewis schk = (struct fq_codel_schk *)(_schk+1);
50891336b40SDon Lewis ep = (struct dn_extra_parms *) _schk->cfg;
50991336b40SDon Lewis
51091336b40SDon Lewis /* par array contains fq_codel configuration as follow
51191336b40SDon Lewis * Codel: 0- target,1- interval, 2- flags
51291336b40SDon Lewis * FQ_CODEL: 3- quantum, 4- limit, 5- flows
51391336b40SDon Lewis */
51491336b40SDon Lewis if (ep && ep->oid.len ==sizeof(*ep) &&
51591336b40SDon Lewis ep->oid.subtype == DN_SCH_PARAMS) {
51691336b40SDon Lewis fqc_cfg = &schk->cfg;
51791336b40SDon Lewis if (ep->par[0] < 0)
51891336b40SDon Lewis fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target;
51991336b40SDon Lewis else
52091336b40SDon Lewis fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US;
52191336b40SDon Lewis
52291336b40SDon Lewis if (ep->par[1] < 0)
52391336b40SDon Lewis fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval;
52491336b40SDon Lewis else
52591336b40SDon Lewis fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US;
52691336b40SDon Lewis
52791336b40SDon Lewis if (ep->par[2] < 0)
52891336b40SDon Lewis fqc_cfg->ccfg.flags = 0;
52991336b40SDon Lewis else
53091336b40SDon Lewis fqc_cfg->ccfg.flags = ep->par[2];
53191336b40SDon Lewis
53291336b40SDon Lewis /* FQ configurations */
53391336b40SDon Lewis if (ep->par[3] < 0)
53491336b40SDon Lewis fqc_cfg->quantum = fq_codel_sysctl.quantum;
53591336b40SDon Lewis else
53691336b40SDon Lewis fqc_cfg->quantum = ep->par[3];
53791336b40SDon Lewis
53891336b40SDon Lewis if (ep->par[4] < 0)
53991336b40SDon Lewis fqc_cfg->limit = fq_codel_sysctl.limit;
54091336b40SDon Lewis else
54191336b40SDon Lewis fqc_cfg->limit = ep->par[4];
54291336b40SDon Lewis
54391336b40SDon Lewis if (ep->par[5] < 0)
54491336b40SDon Lewis fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt;
54591336b40SDon Lewis else
54691336b40SDon Lewis fqc_cfg->flows_cnt = ep->par[5];
54791336b40SDon Lewis
54891336b40SDon Lewis /* Bound the configurations */
54991336b40SDon Lewis fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 ,
55091336b40SDon Lewis 5 * AQM_TIME_1S); ;
55191336b40SDon Lewis fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1,
55291336b40SDon Lewis 100 * AQM_TIME_1S);
55391336b40SDon Lewis
55491336b40SDon Lewis fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000);
55591336b40SDon Lewis fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480);
55691336b40SDon Lewis fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536);
55791336b40SDon Lewis }
55891336b40SDon Lewis else
55991336b40SDon Lewis return 1;
56091336b40SDon Lewis
56191336b40SDon Lewis return 0;
56291336b40SDon Lewis }
56391336b40SDon Lewis
56491336b40SDon Lewis /*
56591336b40SDon Lewis * Return fq_codel scheduler configurations
56691336b40SDon Lewis * the configurations for the scheduler is passed to userland.
56791336b40SDon Lewis */
56891336b40SDon Lewis static int
fq_codel_getconfig(struct dn_schk * _schk,struct dn_extra_parms * ep)56991336b40SDon Lewis fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
57091336b40SDon Lewis struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1);
57191336b40SDon Lewis struct dn_sch_fq_codel_parms *fqc_cfg;
57291336b40SDon Lewis
57391336b40SDon Lewis fqc_cfg = &schk->cfg;
57491336b40SDon Lewis
57591336b40SDon Lewis strcpy(ep->name, fq_codel_desc.name);
57691336b40SDon Lewis ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US;
57791336b40SDon Lewis ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US;
57891336b40SDon Lewis ep->par[2] = fqc_cfg->ccfg.flags;
57991336b40SDon Lewis
58091336b40SDon Lewis ep->par[3] = fqc_cfg->quantum;
58191336b40SDon Lewis ep->par[4] = fqc_cfg->limit;
58291336b40SDon Lewis ep->par[5] = fqc_cfg->flows_cnt;
58391336b40SDon Lewis
58491336b40SDon Lewis return 0;
58591336b40SDon Lewis }
58691336b40SDon Lewis
58791336b40SDon Lewis /*
58891336b40SDon Lewis * fq_codel scheduler descriptor
58991336b40SDon Lewis * contains the type of the scheduler, the name, the size of extra
59091336b40SDon Lewis * data structures, and function pointers.
59191336b40SDon Lewis */
59291336b40SDon Lewis static struct dn_alg fq_codel_desc = {
59391336b40SDon Lewis _SI( .type = ) DN_SCHED_FQ_CODEL,
59491336b40SDon Lewis _SI( .name = ) "FQ_CODEL",
59591336b40SDon Lewis _SI( .flags = ) 0,
59691336b40SDon Lewis
59791336b40SDon Lewis _SI( .schk_datalen = ) sizeof(struct fq_codel_schk),
59891336b40SDon Lewis _SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst),
59991336b40SDon Lewis _SI( .q_datalen = ) 0,
60091336b40SDon Lewis
60191336b40SDon Lewis _SI( .enqueue = ) fq_codel_enqueue,
60291336b40SDon Lewis _SI( .dequeue = ) fq_codel_dequeue,
60391336b40SDon Lewis _SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/
60491336b40SDon Lewis _SI( .destroy = ) NULL, /*sched x delete */
60591336b40SDon Lewis _SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */
60691336b40SDon Lewis _SI( .free_sched = ) fq_codel_free_sched, /* delete schd instance */
60791336b40SDon Lewis _SI( .new_fsk = ) NULL,
60891336b40SDon Lewis _SI( .free_fsk = ) NULL,
60991336b40SDon Lewis _SI( .new_queue = ) NULL,
61091336b40SDon Lewis _SI( .free_queue = ) NULL,
61191336b40SDon Lewis _SI( .getconfig = ) fq_codel_getconfig,
61291336b40SDon Lewis _SI( .ref_count = ) 0
61391336b40SDon Lewis };
61491336b40SDon Lewis
61591336b40SDon Lewis DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc);
616