xref: /freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.c (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
191336b40SDon Lewis /*
291336b40SDon Lewis  * FQ_Codel - The FlowQueue-Codel scheduler/AQM
391336b40SDon Lewis  *
491336b40SDon Lewis  * Copyright (C) 2016 Centre for Advanced Internet Architectures,
591336b40SDon Lewis  *  Swinburne University of Technology, Melbourne, Australia.
691336b40SDon Lewis  * Portions of this code were made possible in part by a gift from
791336b40SDon Lewis  *  The Comcast Innovation Fund.
891336b40SDon Lewis  * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
991336b40SDon Lewis  *
1091336b40SDon Lewis  * Redistribution and use in source and binary forms, with or without
1191336b40SDon Lewis  * modification, are permitted provided that the following conditions
1291336b40SDon Lewis  * are met:
1391336b40SDon Lewis  * 1. Redistributions of source code must retain the above copyright
1491336b40SDon Lewis  *    notice, this list of conditions and the following disclaimer.
1591336b40SDon Lewis  * 2. Redistributions in binary form must reproduce the above copyright
1691336b40SDon Lewis  *    notice, this list of conditions and the following disclaimer in the
1791336b40SDon Lewis  *    documentation and/or other materials provided with the distribution.
1891336b40SDon Lewis  *
1991336b40SDon Lewis  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2091336b40SDon Lewis  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2191336b40SDon Lewis  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2291336b40SDon Lewis  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2391336b40SDon Lewis  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2491336b40SDon Lewis  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2591336b40SDon Lewis  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2691336b40SDon Lewis  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2791336b40SDon Lewis  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2891336b40SDon Lewis  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2991336b40SDon Lewis  * SUCH DAMAGE.
3091336b40SDon Lewis  */
3191336b40SDon Lewis 
3291336b40SDon Lewis #ifdef _KERNEL
3391336b40SDon Lewis #include <sys/malloc.h>
3491336b40SDon Lewis #include <sys/socket.h>
3591336b40SDon Lewis //#include <sys/socketvar.h>
3691336b40SDon Lewis #include <sys/kernel.h>
3791336b40SDon Lewis #include <sys/mbuf.h>
3891336b40SDon Lewis #include <sys/module.h>
3991336b40SDon Lewis #include <net/if.h>	/* IFNAMSIZ */
4091336b40SDon Lewis #include <netinet/in.h>
4191336b40SDon Lewis #include <netinet/ip_var.h>		/* ipfw_rule_ref */
4291336b40SDon Lewis #include <netinet/ip_fw.h>	/* flow_id */
4391336b40SDon Lewis #include <netinet/ip_dummynet.h>
4491336b40SDon Lewis 
453f289c3fSJeff Roberson #include <sys/lock.h>
4691336b40SDon Lewis #include <sys/proc.h>
4791336b40SDon Lewis #include <sys/rwlock.h>
4891336b40SDon Lewis 
4991336b40SDon Lewis #include <netpfil/ipfw/ip_fw_private.h>
5091336b40SDon Lewis #include <sys/sysctl.h>
5191336b40SDon Lewis #include <netinet/ip.h>
5291336b40SDon Lewis #include <netinet/ip6.h>
5391336b40SDon Lewis #include <netinet/ip_icmp.h>
5491336b40SDon Lewis #include <netinet/tcp.h>
5591336b40SDon Lewis #include <netinet/udp.h>
5691336b40SDon Lewis #include <sys/queue.h>
5791336b40SDon Lewis #include <sys/hash.h>
5891336b40SDon Lewis 
5991336b40SDon Lewis #include <netpfil/ipfw/dn_heap.h>
6091336b40SDon Lewis #include <netpfil/ipfw/ip_dn_private.h>
6191336b40SDon Lewis 
6291336b40SDon Lewis #include <netpfil/ipfw/dn_aqm.h>
6391336b40SDon Lewis #include <netpfil/ipfw/dn_aqm_codel.h>
6491336b40SDon Lewis #include <netpfil/ipfw/dn_sched.h>
6591336b40SDon Lewis #include <netpfil/ipfw/dn_sched_fq_codel.h>
6691336b40SDon Lewis #include <netpfil/ipfw/dn_sched_fq_codel_helper.h>
6791336b40SDon Lewis 
6891336b40SDon Lewis #else
6991336b40SDon Lewis #include <dn_test.h>
7091336b40SDon Lewis #endif
7191336b40SDon Lewis 
7291336b40SDon Lewis /* NOTE: In fq_codel module, we reimplements CoDel AQM functions
7391336b40SDon Lewis  * because fq_codel use different flows (sub-queues) structure and
7491336b40SDon Lewis  * dn_queue includes many variables not needed by a flow (sub-queue
7591336b40SDon Lewis  * )i.e. avoid extra overhead (88 bytes vs 208 bytes).
7691336b40SDon Lewis  * Also, CoDel functions manages stats of sub-queues as well as the main queue.
7791336b40SDon Lewis  */
7891336b40SDon Lewis 
7991336b40SDon Lewis #define DN_SCHED_FQ_CODEL 6
8091336b40SDon Lewis 
8191336b40SDon Lewis static struct dn_alg fq_codel_desc;
8291336b40SDon Lewis 
8391336b40SDon Lewis /* fq_codel default parameters including codel */
8491336b40SDon Lewis struct dn_sch_fq_codel_parms
8591336b40SDon Lewis fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US,
8691336b40SDon Lewis 	CODEL_ECN_ENABLED}, 1024, 10240, 1514};
8791336b40SDon Lewis 
8891336b40SDon Lewis static int
fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)8991336b40SDon Lewis fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
9091336b40SDon Lewis {
9191336b40SDon Lewis 	int error;
9291336b40SDon Lewis 	long  value;
9391336b40SDon Lewis 
9491336b40SDon Lewis 	value = fq_codel_sysctl.ccfg.interval;
9591336b40SDon Lewis 	value /= AQM_TIME_1US;
9691336b40SDon Lewis 	error = sysctl_handle_long(oidp, &value, 0, req);
9791336b40SDon Lewis 	if (error != 0 || req->newptr == NULL)
9891336b40SDon Lewis 		return (error);
9991336b40SDon Lewis 	if (value < 1 || value > 100 * AQM_TIME_1S)
10091336b40SDon Lewis 		return (EINVAL);
10191336b40SDon Lewis 	fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ;
10291336b40SDon Lewis 
10391336b40SDon Lewis 	return (0);
10491336b40SDon Lewis }
10591336b40SDon Lewis 
10691336b40SDon Lewis static int
fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)10791336b40SDon Lewis fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
10891336b40SDon Lewis {
10991336b40SDon Lewis 	int error;
11091336b40SDon Lewis 	long  value;
11191336b40SDon Lewis 
11291336b40SDon Lewis 	value = fq_codel_sysctl.ccfg.target;
11391336b40SDon Lewis 	value /= AQM_TIME_1US;
11491336b40SDon Lewis 	error = sysctl_handle_long(oidp, &value, 0, req);
11591336b40SDon Lewis 	if (error != 0 || req->newptr == NULL)
11691336b40SDon Lewis 		return (error);
11791336b40SDon Lewis 	if (value < 1 || value > 5 * AQM_TIME_1S)
11891336b40SDon Lewis 		return (EINVAL);
11991336b40SDon Lewis 	fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ;
12091336b40SDon Lewis 
12191336b40SDon Lewis 	return (0);
12291336b40SDon Lewis }
12391336b40SDon Lewis 
12491336b40SDon Lewis SYSBEGIN(f4)
12591336b40SDon Lewis 
12691336b40SDon Lewis SYSCTL_DECL(_net_inet);
12791336b40SDon Lewis SYSCTL_DECL(_net_inet_ip);
12891336b40SDon Lewis SYSCTL_DECL(_net_inet_ip_dummynet);
12991336b40SDon Lewis static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel,
1307029da5cSPawel Biernacki     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1317029da5cSPawel Biernacki     "FQ_CODEL");
13291336b40SDon Lewis 
13391336b40SDon Lewis #ifdef SYSCTL_NODE
13491336b40SDon Lewis 
13591336b40SDon Lewis SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target,
1367029da5cSPawel Biernacki     CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1377029da5cSPawel Biernacki     NULL, 0, fqcodel_sysctl_target_handler, "L",
13891336b40SDon Lewis     "FQ_CoDel target in microsecond");
13991336b40SDon Lewis SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval,
1407029da5cSPawel Biernacki     CTLTYPE_LONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1417029da5cSPawel Biernacki     NULL, 0, fqcodel_sysctl_interval_handler, "L",
14291336b40SDon Lewis     "FQ_CoDel interval in microsecond");
14391336b40SDon Lewis 
14491336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum,
14591336b40SDon Lewis 	CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum");
14691336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows,
14791336b40SDon Lewis 	CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024,
14891336b40SDon Lewis 	"Number of queues for FQ_CoDel");
14991336b40SDon Lewis SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit,
15091336b40SDon Lewis 	CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit");
15191336b40SDon Lewis #endif
15291336b40SDon Lewis 
15391336b40SDon Lewis /* Drop a packet form the head of codel queue */
15491336b40SDon Lewis static void
codel_drop_head(struct fq_codel_flow * q,struct fq_codel_si * si)15591336b40SDon Lewis codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si)
15691336b40SDon Lewis {
15791336b40SDon Lewis 	struct mbuf *m = q->mq.head;
15891336b40SDon Lewis 
15991336b40SDon Lewis 	if (m == NULL)
16091336b40SDon Lewis 		return;
16191336b40SDon Lewis 	q->mq.head = m->m_nextpkt;
16291336b40SDon Lewis 
16391336b40SDon Lewis 	fq_update_stats(q, si, -m->m_pkthdr.len, 1);
16491336b40SDon Lewis 
16591336b40SDon Lewis 	if (si->main_q.ni.length == 0) /* queue is now idle */
166fe3bcfbdSTom Jones 			si->main_q.q_time = V_dn_cfg.curr_time;
16791336b40SDon Lewis 
16891336b40SDon Lewis 	FREE_PKT(m);
16991336b40SDon Lewis }
17091336b40SDon Lewis 
17191336b40SDon Lewis /* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet.
17291336b40SDon Lewis  * Return 1 when unable to add timestamp, otherwise return 0
17391336b40SDon Lewis  */
17491336b40SDon Lewis static int
codel_enqueue(struct fq_codel_flow * q,struct mbuf * m,struct fq_codel_si * si)17591336b40SDon Lewis codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si)
17691336b40SDon Lewis {
17791336b40SDon Lewis 	uint64_t len;
17891336b40SDon Lewis 
17991336b40SDon Lewis 	len = m->m_pkthdr.len;
18091336b40SDon Lewis 	/* finding maximum packet size */
18191336b40SDon Lewis 	if (len > q->cst.maxpkt_size)
18291336b40SDon Lewis 		q->cst.maxpkt_size = len;
18391336b40SDon Lewis 
18491336b40SDon Lewis 	/* Add timestamp to mbuf as MTAG */
18591336b40SDon Lewis 	struct m_tag *mtag;
18691336b40SDon Lewis 	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
18791336b40SDon Lewis 	if (mtag == NULL)
18891336b40SDon Lewis 		mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t),
18991336b40SDon Lewis 			M_NOWAIT);
190c4a6258dSMark Johnston 	if (mtag == NULL)
19191336b40SDon Lewis 		goto drop;
19291336b40SDon Lewis 	*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
19391336b40SDon Lewis 	m_tag_prepend(m, mtag);
19491336b40SDon Lewis 
195*26b9e1f0SKristof Provost 	if (m->m_pkthdr.rcvif != NULL)
196*26b9e1f0SKristof Provost 		m_rcvif_serialize(m);
197*26b9e1f0SKristof Provost 
19891336b40SDon Lewis 	mq_append(&q->mq, m);
19991336b40SDon Lewis 	fq_update_stats(q, si, len, 0);
20091336b40SDon Lewis 	return 0;
20191336b40SDon Lewis 
20291336b40SDon Lewis drop:
20391336b40SDon Lewis 	fq_update_stats(q, si, len, 1);
20491336b40SDon Lewis 	m_freem(m);
20591336b40SDon Lewis 	return 1;
20691336b40SDon Lewis }
20791336b40SDon Lewis 
20891336b40SDon Lewis /*
20991336b40SDon Lewis  * Classify a packet to queue number using Jenkins hash function.
21091336b40SDon Lewis  * Return: queue number
21191336b40SDon Lewis  * the input of the hash are protocol no, perturbation, src IP, dst IP,
21291336b40SDon Lewis  * src port, dst port,
21391336b40SDon Lewis  */
21491336b40SDon Lewis static inline int
fq_codel_classify_flow(struct mbuf * m,uint16_t fcount,struct fq_codel_si * si)21591336b40SDon Lewis fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si)
21691336b40SDon Lewis {
21791336b40SDon Lewis 	struct ip *ip;
21891336b40SDon Lewis 	struct tcphdr *th;
21991336b40SDon Lewis 	struct udphdr *uh;
22091336b40SDon Lewis 	uint8_t tuple[41];
22191336b40SDon Lewis 	uint16_t hash=0;
22291336b40SDon Lewis 
2234001fcbeSDon Lewis 	ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
22491336b40SDon Lewis //#ifdef INET6
22591336b40SDon Lewis 	struct ip6_hdr *ip6;
22691336b40SDon Lewis 	int isip6;
2274001fcbeSDon Lewis 	isip6 = (ip->ip_v == 6);
22891336b40SDon Lewis 
22991336b40SDon Lewis 	if(isip6) {
2304001fcbeSDon Lewis 		ip6 = (struct ip6_hdr *)ip;
23191336b40SDon Lewis 		*((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
23291336b40SDon Lewis 		*((uint32_t *) &tuple[1]) = si->perturbation;
23391336b40SDon Lewis 		memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
23491336b40SDon Lewis 		memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
23591336b40SDon Lewis 
23691336b40SDon Lewis 		switch (ip6->ip6_nxt) {
23791336b40SDon Lewis 		case IPPROTO_TCP:
23891336b40SDon Lewis 			th = (struct tcphdr *)(ip6 + 1);
23991336b40SDon Lewis 			*((uint16_t *) &tuple[37]) = th->th_dport;
24091336b40SDon Lewis 			*((uint16_t *) &tuple[39]) = th->th_sport;
24191336b40SDon Lewis 			break;
24291336b40SDon Lewis 
24391336b40SDon Lewis 		case IPPROTO_UDP:
24491336b40SDon Lewis 			uh = (struct udphdr *)(ip6 + 1);
24591336b40SDon Lewis 			*((uint16_t *) &tuple[37]) = uh->uh_dport;
24691336b40SDon Lewis 			*((uint16_t *) &tuple[39]) = uh->uh_sport;
24791336b40SDon Lewis 			break;
24891336b40SDon Lewis 		default:
24991336b40SDon Lewis 			memset(&tuple[37], 0, 4);
25091336b40SDon Lewis 		}
25191336b40SDon Lewis 
25291336b40SDon Lewis 		hash = jenkins_hash(tuple, 41, HASHINIT) %  fcount;
25391336b40SDon Lewis 		return hash;
25491336b40SDon Lewis 	}
25591336b40SDon Lewis //#endif
25691336b40SDon Lewis 
25791336b40SDon Lewis 	/* IPv4 */
25891336b40SDon Lewis 	*((uint8_t *) &tuple[0]) = ip->ip_p;
25991336b40SDon Lewis 	*((uint32_t *) &tuple[1]) = si->perturbation;
26091336b40SDon Lewis 	*((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
26191336b40SDon Lewis 	*((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
26291336b40SDon Lewis 
26391336b40SDon Lewis 	switch (ip->ip_p) {
26491336b40SDon Lewis 		case IPPROTO_TCP:
26591336b40SDon Lewis 			th = (struct tcphdr *)(ip + 1);
26691336b40SDon Lewis 			*((uint16_t *) &tuple[13]) = th->th_dport;
26791336b40SDon Lewis 			*((uint16_t *) &tuple[15]) = th->th_sport;
26891336b40SDon Lewis 			break;
26991336b40SDon Lewis 
27091336b40SDon Lewis 		case IPPROTO_UDP:
27191336b40SDon Lewis 			uh = (struct udphdr *)(ip + 1);
27291336b40SDon Lewis 			*((uint16_t *) &tuple[13]) = uh->uh_dport;
27391336b40SDon Lewis 			*((uint16_t *) &tuple[15]) = uh->uh_sport;
27491336b40SDon Lewis 			break;
27591336b40SDon Lewis 		default:
27691336b40SDon Lewis 			memset(&tuple[13], 0, 4);
27791336b40SDon Lewis 	}
27891336b40SDon Lewis 	hash = jenkins_hash(tuple, 17, HASHINIT) %  fcount;
27991336b40SDon Lewis 
28091336b40SDon Lewis 	return hash;
28191336b40SDon Lewis }
28291336b40SDon Lewis 
28391336b40SDon Lewis /*
28491336b40SDon Lewis  * Enqueue a packet into an appropriate queue according to
28591336b40SDon Lewis  * FQ_CODEL algorithm.
28691336b40SDon Lewis  */
28791336b40SDon Lewis static int
fq_codel_enqueue(struct dn_sch_inst * _si,struct dn_queue * _q,struct mbuf * m)28891336b40SDon Lewis fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
28991336b40SDon Lewis 	struct mbuf *m)
29091336b40SDon Lewis {
29191336b40SDon Lewis 	struct fq_codel_si *si;
29291336b40SDon Lewis 	struct fq_codel_schk *schk;
29391336b40SDon Lewis 	struct dn_sch_fq_codel_parms *param;
29491336b40SDon Lewis 	struct dn_queue *mainq;
29591336b40SDon Lewis 	int idx, drop, i, maxidx;
29691336b40SDon Lewis 
29791336b40SDon Lewis 	mainq = (struct dn_queue *)(_si + 1);
29891336b40SDon Lewis 	si = (struct fq_codel_si *)_si;
29991336b40SDon Lewis 	schk = (struct fq_codel_schk *)(si->_si.sched+1);
30091336b40SDon Lewis 	param = &schk->cfg;
30191336b40SDon Lewis 
30291336b40SDon Lewis 	 /* classify a packet to queue number*/
30391336b40SDon Lewis 	idx = fq_codel_classify_flow(m, param->flows_cnt, si);
30491336b40SDon Lewis 	/* enqueue packet into appropriate queue using CoDel AQM.
30591336b40SDon Lewis 	 * Note: 'codel_enqueue' function returns 1 only when it unable to
30691336b40SDon Lewis 	 * add timestamp to packet (no limit check)*/
30791336b40SDon Lewis 	drop = codel_enqueue(&si->flows[idx], m, si);
30891336b40SDon Lewis 
30991336b40SDon Lewis 	/* codel unable to timestamp a packet */
31091336b40SDon Lewis 	if (drop)
31191336b40SDon Lewis 		return 1;
31291336b40SDon Lewis 
31391336b40SDon Lewis 	/* If the flow (sub-queue) is not active ,then add it to the tail of
31491336b40SDon Lewis 	 * new flows list, initialize and activate it.
31591336b40SDon Lewis 	 */
31691336b40SDon Lewis 	if (!si->flows[idx].active ) {
31791336b40SDon Lewis 		STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
31891336b40SDon Lewis 		si->flows[idx].deficit = param->quantum;
31991336b40SDon Lewis 		si->flows[idx].cst.dropping = false;
32091336b40SDon Lewis 		si->flows[idx].cst.first_above_time = 0;
32191336b40SDon Lewis 		si->flows[idx].active = 1;
32291336b40SDon Lewis 		//D("activate %d",idx);
32391336b40SDon Lewis 	}
32491336b40SDon Lewis 
32591336b40SDon Lewis 	/* check the limit for all queues and remove a packet from the
32691336b40SDon Lewis 	 * largest one
32791336b40SDon Lewis 	 */
32891336b40SDon Lewis 	if (mainq->ni.length > schk->cfg.limit) { D("over limit");
32991336b40SDon Lewis 		/* find first active flow */
33091336b40SDon Lewis 		for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
33191336b40SDon Lewis 			if (si->flows[maxidx].active)
33291336b40SDon Lewis 				break;
33391336b40SDon Lewis 		if (maxidx < schk->cfg.flows_cnt) {
33491336b40SDon Lewis 			/* find the largest sub- queue */
33591336b40SDon Lewis 			for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
33691336b40SDon Lewis 				if (si->flows[i].active && si->flows[i].stats.length >
33791336b40SDon Lewis 					si->flows[maxidx].stats.length)
33891336b40SDon Lewis 					maxidx = i;
33991336b40SDon Lewis 			codel_drop_head(&si->flows[maxidx], si);
34091336b40SDon Lewis 			D("maxidx = %d",maxidx);
34191336b40SDon Lewis 			drop = 1;
34291336b40SDon Lewis 		}
34391336b40SDon Lewis 	}
34491336b40SDon Lewis 
34591336b40SDon Lewis 	return drop;
34691336b40SDon Lewis }
34791336b40SDon Lewis 
34891336b40SDon Lewis /*
34991336b40SDon Lewis  * Dequeue a packet from an appropriate queue according to
35091336b40SDon Lewis  * FQ_CODEL algorithm.
35191336b40SDon Lewis  */
35291336b40SDon Lewis static struct mbuf *
fq_codel_dequeue(struct dn_sch_inst * _si)35391336b40SDon Lewis fq_codel_dequeue(struct dn_sch_inst *_si)
35491336b40SDon Lewis {
35591336b40SDon Lewis 	struct fq_codel_si *si;
35691336b40SDon Lewis 	struct fq_codel_schk *schk;
35791336b40SDon Lewis 	struct dn_sch_fq_codel_parms *param;
35891336b40SDon Lewis 	struct fq_codel_flow *f;
35991336b40SDon Lewis 	struct mbuf *mbuf;
36091336b40SDon Lewis 	struct fq_codel_list *fq_codel_flowlist;
36191336b40SDon Lewis 
36291336b40SDon Lewis 	si = (struct fq_codel_si *)_si;
36391336b40SDon Lewis 	schk = (struct fq_codel_schk *)(si->_si.sched+1);
36491336b40SDon Lewis 	param = &schk->cfg;
36591336b40SDon Lewis 
36691336b40SDon Lewis 	do {
36791336b40SDon Lewis 		/* select a list to start with */
36891336b40SDon Lewis 		if (STAILQ_EMPTY(&si->newflows))
36991336b40SDon Lewis 			fq_codel_flowlist = &si->oldflows;
37091336b40SDon Lewis 		else
37191336b40SDon Lewis 			fq_codel_flowlist = &si->newflows;
37291336b40SDon Lewis 
37391336b40SDon Lewis 		/* Both new and old queue lists are empty, return NULL */
37491336b40SDon Lewis 		if (STAILQ_EMPTY(fq_codel_flowlist))
37591336b40SDon Lewis 			return NULL;
37691336b40SDon Lewis 
37791336b40SDon Lewis 		f = STAILQ_FIRST(fq_codel_flowlist);
37891336b40SDon Lewis 		while (f != NULL)	{
37991336b40SDon Lewis 			/* if there is no flow(sub-queue) deficit, increase deficit
38091336b40SDon Lewis 			 * by quantum, move the flow to the tail of old flows list
38191336b40SDon Lewis 			 * and try another flow.
38291336b40SDon Lewis 			 * Otherwise, the flow will be used for dequeue.
38391336b40SDon Lewis 			 */
38491336b40SDon Lewis 			if (f->deficit < 0) {
38591336b40SDon Lewis 				 f->deficit += param->quantum;
38691336b40SDon Lewis 				 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
38791336b40SDon Lewis 				 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
38891336b40SDon Lewis 			 } else
38991336b40SDon Lewis 				 break;
39091336b40SDon Lewis 
39191336b40SDon Lewis 			f = STAILQ_FIRST(fq_codel_flowlist);
39291336b40SDon Lewis 		}
39391336b40SDon Lewis 
39491336b40SDon Lewis 		/* the new flows list is empty, try old flows list */
39591336b40SDon Lewis 		if (STAILQ_EMPTY(fq_codel_flowlist))
39691336b40SDon Lewis 			continue;
39791336b40SDon Lewis 
39891336b40SDon Lewis 		/* Dequeue a packet from the selected flow */
39991336b40SDon Lewis 		mbuf = fqc_codel_dequeue(f, si);
40091336b40SDon Lewis 
40191336b40SDon Lewis 		/* Codel did not return a packet */
40291336b40SDon Lewis 		if (!mbuf) {
40391336b40SDon Lewis 			/* If the selected flow belongs to new flows list, then move
40491336b40SDon Lewis 			 * it to the tail of old flows list. Otherwise, deactivate it and
40591336b40SDon Lewis 			 * remove it from the old list and
40691336b40SDon Lewis 			 */
40791336b40SDon Lewis 			if (fq_codel_flowlist == &si->newflows) {
40891336b40SDon Lewis 				STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
40991336b40SDon Lewis 				STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
41091336b40SDon Lewis 			}	else {
41191336b40SDon Lewis 				f->active = 0;
41291336b40SDon Lewis 				STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
41391336b40SDon Lewis 			}
41491336b40SDon Lewis 			/* start again */
41591336b40SDon Lewis 			continue;
41691336b40SDon Lewis 		}
41791336b40SDon Lewis 
41891336b40SDon Lewis 		/* we have a packet to return,
41991336b40SDon Lewis 		 * update flow deficit and return the packet*/
42091336b40SDon Lewis 		f->deficit -= mbuf->m_pkthdr.len;
42191336b40SDon Lewis 		return mbuf;
42291336b40SDon Lewis 
42391336b40SDon Lewis 	} while (1);
42491336b40SDon Lewis 
42591336b40SDon Lewis 	/* unreachable point */
42691336b40SDon Lewis 	return NULL;
42791336b40SDon Lewis }
42891336b40SDon Lewis 
42991336b40SDon Lewis /*
43091336b40SDon Lewis  * Initialize fq_codel scheduler instance.
43191336b40SDon Lewis  * also, allocate memory for flows array.
43291336b40SDon Lewis  */
43391336b40SDon Lewis static int
fq_codel_new_sched(struct dn_sch_inst * _si)43491336b40SDon Lewis fq_codel_new_sched(struct dn_sch_inst *_si)
43591336b40SDon Lewis {
43691336b40SDon Lewis 	struct fq_codel_si *si;
43791336b40SDon Lewis 	struct dn_queue *q;
43891336b40SDon Lewis 	struct fq_codel_schk *schk;
43991336b40SDon Lewis 	int i;
44091336b40SDon Lewis 
44191336b40SDon Lewis 	si = (struct fq_codel_si *)_si;
44291336b40SDon Lewis 	schk = (struct fq_codel_schk *)(_si->sched+1);
44391336b40SDon Lewis 
44491336b40SDon Lewis 	if(si->flows) {
44591336b40SDon Lewis 		D("si already configured!");
44691336b40SDon Lewis 		return 0;
44791336b40SDon Lewis 	}
44891336b40SDon Lewis 
44991336b40SDon Lewis 	/* init the main queue */
45091336b40SDon Lewis 	q = &si->main_q;
45191336b40SDon Lewis 	set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
45291336b40SDon Lewis 	q->_si = _si;
45391336b40SDon Lewis 	q->fs = _si->sched->fs;
45491336b40SDon Lewis 
45591336b40SDon Lewis 	/* allocate memory for flows array */
456454529cdSPedro F. Giffuni 	si->flows = mallocarray(schk->cfg.flows_cnt,
457454529cdSPedro F. Giffuni 	    sizeof(struct fq_codel_flow), M_DUMMYNET, M_NOWAIT | M_ZERO);
45891336b40SDon Lewis 	if (si->flows == NULL) {
45991336b40SDon Lewis 		D("cannot allocate memory for fq_codel configuration parameters");
46091336b40SDon Lewis 		return ENOMEM ;
46191336b40SDon Lewis 	}
46291336b40SDon Lewis 
46391336b40SDon Lewis 	/* init perturbation for this si */
46491336b40SDon Lewis 	si->perturbation = random();
46591336b40SDon Lewis 
46691336b40SDon Lewis 	/* init the old and new flows lists */
46791336b40SDon Lewis 	STAILQ_INIT(&si->newflows);
46891336b40SDon Lewis 	STAILQ_INIT(&si->oldflows);
46991336b40SDon Lewis 
47091336b40SDon Lewis 	/* init the flows (sub-queues) */
47191336b40SDon Lewis 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
47291336b40SDon Lewis 		/* init codel */
47391336b40SDon Lewis 		si->flows[i].cst.maxpkt_size = 500;
47491336b40SDon Lewis 	}
47591336b40SDon Lewis 
47691336b40SDon Lewis 	fq_codel_desc.ref_count++;
47791336b40SDon Lewis 	return 0;
47891336b40SDon Lewis }
47991336b40SDon Lewis 
48091336b40SDon Lewis /*
48191336b40SDon Lewis  * Free fq_codel scheduler instance.
48291336b40SDon Lewis  */
48391336b40SDon Lewis static int
fq_codel_free_sched(struct dn_sch_inst * _si)48491336b40SDon Lewis fq_codel_free_sched(struct dn_sch_inst *_si)
48591336b40SDon Lewis {
48691336b40SDon Lewis 	struct fq_codel_si *si = (struct fq_codel_si *)_si ;
48791336b40SDon Lewis 
48891336b40SDon Lewis 	/* free the flows array */
48991336b40SDon Lewis 	free(si->flows , M_DUMMYNET);
49091336b40SDon Lewis 	si->flows = NULL;
49191336b40SDon Lewis 	fq_codel_desc.ref_count--;
49291336b40SDon Lewis 
49391336b40SDon Lewis 	return 0;
49491336b40SDon Lewis }
49591336b40SDon Lewis 
49691336b40SDon Lewis /*
49791336b40SDon Lewis  * Configure fq_codel scheduler.
49891336b40SDon Lewis  * the configurations for the scheduler is passed from userland.
49991336b40SDon Lewis  */
50091336b40SDon Lewis static int
fq_codel_config(struct dn_schk * _schk)50191336b40SDon Lewis fq_codel_config(struct dn_schk *_schk)
50291336b40SDon Lewis {
50391336b40SDon Lewis 	struct fq_codel_schk *schk;
50491336b40SDon Lewis 	struct dn_extra_parms *ep;
50591336b40SDon Lewis 	struct dn_sch_fq_codel_parms *fqc_cfg;
50691336b40SDon Lewis 
50791336b40SDon Lewis 	schk = (struct fq_codel_schk *)(_schk+1);
50891336b40SDon Lewis 	ep = (struct dn_extra_parms *) _schk->cfg;
50991336b40SDon Lewis 
51091336b40SDon Lewis 	/* par array contains fq_codel configuration as follow
51191336b40SDon Lewis 	 * Codel: 0- target,1- interval, 2- flags
51291336b40SDon Lewis 	 * FQ_CODEL: 3- quantum, 4- limit, 5- flows
51391336b40SDon Lewis 	 */
51491336b40SDon Lewis 	if (ep && ep->oid.len ==sizeof(*ep) &&
51591336b40SDon Lewis 		ep->oid.subtype == DN_SCH_PARAMS) {
51691336b40SDon Lewis 		fqc_cfg = &schk->cfg;
51791336b40SDon Lewis 		if (ep->par[0] < 0)
51891336b40SDon Lewis 			fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target;
51991336b40SDon Lewis 		else
52091336b40SDon Lewis 			fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US;
52191336b40SDon Lewis 
52291336b40SDon Lewis 		if (ep->par[1] < 0)
52391336b40SDon Lewis 			fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval;
52491336b40SDon Lewis 		else
52591336b40SDon Lewis 			fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US;
52691336b40SDon Lewis 
52791336b40SDon Lewis 		if (ep->par[2] < 0)
52891336b40SDon Lewis 			fqc_cfg->ccfg.flags = 0;
52991336b40SDon Lewis 		else
53091336b40SDon Lewis 			fqc_cfg->ccfg.flags = ep->par[2];
53191336b40SDon Lewis 
53291336b40SDon Lewis 		/* FQ configurations */
53391336b40SDon Lewis 		if (ep->par[3] < 0)
53491336b40SDon Lewis 			fqc_cfg->quantum = fq_codel_sysctl.quantum;
53591336b40SDon Lewis 		else
53691336b40SDon Lewis 			fqc_cfg->quantum = ep->par[3];
53791336b40SDon Lewis 
53891336b40SDon Lewis 		if (ep->par[4] < 0)
53991336b40SDon Lewis 			fqc_cfg->limit = fq_codel_sysctl.limit;
54091336b40SDon Lewis 		else
54191336b40SDon Lewis 			fqc_cfg->limit = ep->par[4];
54291336b40SDon Lewis 
54391336b40SDon Lewis 		if (ep->par[5] < 0)
54491336b40SDon Lewis 			fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt;
54591336b40SDon Lewis 		else
54691336b40SDon Lewis 			fqc_cfg->flows_cnt = ep->par[5];
54791336b40SDon Lewis 
54891336b40SDon Lewis 		/* Bound the configurations */
54991336b40SDon Lewis 		fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 ,
55091336b40SDon Lewis 			5 * AQM_TIME_1S); ;
55191336b40SDon Lewis 		fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1,
55291336b40SDon Lewis 			100 * AQM_TIME_1S);
55391336b40SDon Lewis 
55491336b40SDon Lewis 		fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000);
55591336b40SDon Lewis 		fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480);
55691336b40SDon Lewis 		fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536);
55791336b40SDon Lewis 	}
55891336b40SDon Lewis 	else
55991336b40SDon Lewis 		return 1;
56091336b40SDon Lewis 
56191336b40SDon Lewis 	return 0;
56291336b40SDon Lewis }
56391336b40SDon Lewis 
56491336b40SDon Lewis /*
56591336b40SDon Lewis  * Return fq_codel scheduler configurations
56691336b40SDon Lewis  * the configurations for the scheduler is passed to userland.
56791336b40SDon Lewis  */
56891336b40SDon Lewis static int
fq_codel_getconfig(struct dn_schk * _schk,struct dn_extra_parms * ep)56991336b40SDon Lewis fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
57091336b40SDon Lewis 	struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1);
57191336b40SDon Lewis 	struct dn_sch_fq_codel_parms *fqc_cfg;
57291336b40SDon Lewis 
57391336b40SDon Lewis 	fqc_cfg = &schk->cfg;
57491336b40SDon Lewis 
57591336b40SDon Lewis 	strcpy(ep->name, fq_codel_desc.name);
57691336b40SDon Lewis 	ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US;
57791336b40SDon Lewis 	ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US;
57891336b40SDon Lewis 	ep->par[2] = fqc_cfg->ccfg.flags;
57991336b40SDon Lewis 
58091336b40SDon Lewis 	ep->par[3] = fqc_cfg->quantum;
58191336b40SDon Lewis 	ep->par[4] = fqc_cfg->limit;
58291336b40SDon Lewis 	ep->par[5] = fqc_cfg->flows_cnt;
58391336b40SDon Lewis 
58491336b40SDon Lewis 	return 0;
58591336b40SDon Lewis }
58691336b40SDon Lewis 
58791336b40SDon Lewis /*
58891336b40SDon Lewis  * fq_codel scheduler descriptor
58991336b40SDon Lewis  * contains the type of the scheduler, the name, the size of extra
59091336b40SDon Lewis  * data structures, and function pointers.
59191336b40SDon Lewis  */
59291336b40SDon Lewis static struct dn_alg fq_codel_desc = {
59391336b40SDon Lewis 	_SI( .type = )  DN_SCHED_FQ_CODEL,
59491336b40SDon Lewis 	_SI( .name = ) "FQ_CODEL",
59591336b40SDon Lewis 	_SI( .flags = ) 0,
59691336b40SDon Lewis 
59791336b40SDon Lewis 	_SI( .schk_datalen = ) sizeof(struct fq_codel_schk),
59891336b40SDon Lewis 	_SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst),
59991336b40SDon Lewis 	_SI( .q_datalen = ) 0,
60091336b40SDon Lewis 
60191336b40SDon Lewis 	_SI( .enqueue = ) fq_codel_enqueue,
60291336b40SDon Lewis 	_SI( .dequeue = ) fq_codel_dequeue,
60391336b40SDon Lewis 	_SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/
60491336b40SDon Lewis 	_SI( .destroy = ) NULL,  /*sched x delete */
60591336b40SDon Lewis 	_SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */
60691336b40SDon Lewis 	_SI( .free_sched = ) fq_codel_free_sched,	/* delete schd instance */
60791336b40SDon Lewis 	_SI( .new_fsk = ) NULL,
60891336b40SDon Lewis 	_SI( .free_fsk = ) NULL,
60991336b40SDon Lewis 	_SI( .new_queue = ) NULL,
61091336b40SDon Lewis 	_SI( .free_queue = ) NULL,
61191336b40SDon Lewis 	_SI( .getconfig = )  fq_codel_getconfig,
61291336b40SDon Lewis 	_SI( .ref_count = ) 0
61391336b40SDon Lewis };
61491336b40SDon Lewis 
61591336b40SDon Lewis DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc);
616