1fe267a55SPedro F. Giffuni /*-
2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3fe267a55SPedro F. Giffuni *
43b3a8eb9SGleb Smirnoff * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
53b3a8eb9SGleb Smirnoff * All rights reserved
63b3a8eb9SGleb Smirnoff *
73b3a8eb9SGleb Smirnoff * Redistribution and use in source and binary forms, with or without
83b3a8eb9SGleb Smirnoff * modification, are permitted provided that the following conditions
93b3a8eb9SGleb Smirnoff * are met:
103b3a8eb9SGleb Smirnoff * 1. Redistributions of source code must retain the above copyright
113b3a8eb9SGleb Smirnoff * notice, this list of conditions and the following disclaimer.
123b3a8eb9SGleb Smirnoff * 2. Redistributions in binary form must reproduce the above copyright
133b3a8eb9SGleb Smirnoff * notice, this list of conditions and the following disclaimer in the
143b3a8eb9SGleb Smirnoff * documentation and/or other materials provided with the distribution.
153b3a8eb9SGleb Smirnoff *
163b3a8eb9SGleb Smirnoff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
173b3a8eb9SGleb Smirnoff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
183b3a8eb9SGleb Smirnoff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
193b3a8eb9SGleb Smirnoff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
203b3a8eb9SGleb Smirnoff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
213b3a8eb9SGleb Smirnoff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
223b3a8eb9SGleb Smirnoff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
233b3a8eb9SGleb Smirnoff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
243b3a8eb9SGleb Smirnoff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
253b3a8eb9SGleb Smirnoff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
263b3a8eb9SGleb Smirnoff * SUCH DAMAGE.
273b3a8eb9SGleb Smirnoff */
283b3a8eb9SGleb Smirnoff
293b3a8eb9SGleb Smirnoff /*
303b3a8eb9SGleb Smirnoff */
313b3a8eb9SGleb Smirnoff
323b3a8eb9SGleb Smirnoff #ifdef _KERNEL
333b3a8eb9SGleb Smirnoff #include <sys/malloc.h>
343b3a8eb9SGleb Smirnoff #include <sys/socket.h>
353b3a8eb9SGleb Smirnoff #include <sys/socketvar.h>
363b3a8eb9SGleb Smirnoff #include <sys/kernel.h>
374001fcbeSDon Lewis #include <sys/lock.h>
383b3a8eb9SGleb Smirnoff #include <sys/mbuf.h>
393b3a8eb9SGleb Smirnoff #include <sys/module.h>
404001fcbeSDon Lewis #include <sys/rwlock.h>
413b3a8eb9SGleb Smirnoff #include <net/if.h> /* IFNAMSIZ */
423b3a8eb9SGleb Smirnoff #include <netinet/in.h>
433b3a8eb9SGleb Smirnoff #include <netinet/ip_var.h> /* ipfw_rule_ref */
443b3a8eb9SGleb Smirnoff #include <netinet/ip_fw.h> /* flow_id */
453b3a8eb9SGleb Smirnoff #include <netinet/ip_dummynet.h>
464001fcbeSDon Lewis #include <netpfil/ipfw/ip_fw_private.h>
473b3a8eb9SGleb Smirnoff #include <netpfil/ipfw/dn_heap.h>
483b3a8eb9SGleb Smirnoff #include <netpfil/ipfw/ip_dn_private.h>
4991336b40SDon Lewis #ifdef NEW_AQM
5091336b40SDon Lewis #include <netpfil/ipfw/dn_aqm.h>
5191336b40SDon Lewis #endif
523b3a8eb9SGleb Smirnoff #include <netpfil/ipfw/dn_sched.h>
533b3a8eb9SGleb Smirnoff #else
543b3a8eb9SGleb Smirnoff #include <dn_test.h>
553b3a8eb9SGleb Smirnoff #endif
563b3a8eb9SGleb Smirnoff
573b3a8eb9SGleb Smirnoff #ifdef QFQ_DEBUG
58fa57c83cSLuigi Rizzo #define _P64 unsigned long long /* cast for printing uint64_t */
593b3a8eb9SGleb Smirnoff struct qfq_sched;
603b3a8eb9SGleb Smirnoff static void dump_sched(struct qfq_sched *q, const char *msg);
613b3a8eb9SGleb Smirnoff #define NO(x) x
623b3a8eb9SGleb Smirnoff #else
633b3a8eb9SGleb Smirnoff #define NO(x)
643b3a8eb9SGleb Smirnoff #endif
653b3a8eb9SGleb Smirnoff #define DN_SCHED_QFQ 4 // XXX Where?
663b3a8eb9SGleb Smirnoff typedef unsigned long bitmap;
673b3a8eb9SGleb Smirnoff
683b3a8eb9SGleb Smirnoff /*
693b3a8eb9SGleb Smirnoff * bitmaps ops are critical. Some linux versions have __fls
703b3a8eb9SGleb Smirnoff * and the bitmap ops. Some machines have ffs
71f51b072dSLuigi Rizzo * NOTE: fls() returns 1 for the least significant bit,
72f51b072dSLuigi Rizzo * __fls() returns 0 for the same case.
73f51b072dSLuigi Rizzo * We use the base-0 version __fls() to match the description in
74f51b072dSLuigi Rizzo * the ToN QFQ paper
753b3a8eb9SGleb Smirnoff */
763b3a8eb9SGleb Smirnoff #if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
fls(unsigned int n)773b3a8eb9SGleb Smirnoff int fls(unsigned int n)
783b3a8eb9SGleb Smirnoff {
793b3a8eb9SGleb Smirnoff int i = 0;
803b3a8eb9SGleb Smirnoff for (i = 0; n > 0; n >>= 1, i++)
813b3a8eb9SGleb Smirnoff ;
823b3a8eb9SGleb Smirnoff return i;
833b3a8eb9SGleb Smirnoff }
843b3a8eb9SGleb Smirnoff #endif
853b3a8eb9SGleb Smirnoff
863b3a8eb9SGleb Smirnoff #if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
__fls(unsigned long word)873b3a8eb9SGleb Smirnoff static inline unsigned long __fls(unsigned long word)
883b3a8eb9SGleb Smirnoff {
893b3a8eb9SGleb Smirnoff return fls(word) - 1;
903b3a8eb9SGleb Smirnoff }
913b3a8eb9SGleb Smirnoff #endif
923b3a8eb9SGleb Smirnoff
933b3a8eb9SGleb Smirnoff #if !defined(_KERNEL) || !defined(__linux__)
943b3a8eb9SGleb Smirnoff #ifdef QFQ_DEBUG
test_bit(int ix,bitmap * p)95fa57c83cSLuigi Rizzo static int test_bit(int ix, bitmap *p)
963b3a8eb9SGleb Smirnoff {
973b3a8eb9SGleb Smirnoff if (ix < 0 || ix > 31)
983b3a8eb9SGleb Smirnoff D("bad index %d", ix);
993b3a8eb9SGleb Smirnoff return *p & (1<<ix);
1003b3a8eb9SGleb Smirnoff }
__set_bit(int ix,bitmap * p)101fa57c83cSLuigi Rizzo static void __set_bit(int ix, bitmap *p)
1023b3a8eb9SGleb Smirnoff {
1033b3a8eb9SGleb Smirnoff if (ix < 0 || ix > 31)
1043b3a8eb9SGleb Smirnoff D("bad index %d", ix);
1053b3a8eb9SGleb Smirnoff *p |= (1<<ix);
1063b3a8eb9SGleb Smirnoff }
__clear_bit(int ix,bitmap * p)107fa57c83cSLuigi Rizzo static void __clear_bit(int ix, bitmap *p)
1083b3a8eb9SGleb Smirnoff {
1093b3a8eb9SGleb Smirnoff if (ix < 0 || ix > 31)
1103b3a8eb9SGleb Smirnoff D("bad index %d", ix);
1113b3a8eb9SGleb Smirnoff *p &= ~(1<<ix);
1123b3a8eb9SGleb Smirnoff }
1133b3a8eb9SGleb Smirnoff #else /* !QFQ_DEBUG */
1143b3a8eb9SGleb Smirnoff /* XXX do we have fast version, or leave it to the compiler ? */
1153b3a8eb9SGleb Smirnoff #define test_bit(ix, pData) ((*pData) & (1<<(ix)))
1163b3a8eb9SGleb Smirnoff #define __set_bit(ix, pData) (*pData) |= (1<<(ix))
1173b3a8eb9SGleb Smirnoff #define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
1183b3a8eb9SGleb Smirnoff #endif /* !QFQ_DEBUG */
1193b3a8eb9SGleb Smirnoff #endif /* !__linux__ */
1203b3a8eb9SGleb Smirnoff
1213b3a8eb9SGleb Smirnoff #ifdef __MIPSEL__
1223b3a8eb9SGleb Smirnoff #define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
1233b3a8eb9SGleb Smirnoff #endif
1243b3a8eb9SGleb Smirnoff
1253b3a8eb9SGleb Smirnoff /*-------------------------------------------*/
1263b3a8eb9SGleb Smirnoff /*
1273b3a8eb9SGleb Smirnoff
1283b3a8eb9SGleb Smirnoff Virtual time computations.
1293b3a8eb9SGleb Smirnoff
1303b3a8eb9SGleb Smirnoff S, F and V are all computed in fixed point arithmetic with
1313b3a8eb9SGleb Smirnoff FRAC_BITS decimal bits.
1323b3a8eb9SGleb Smirnoff
1333b3a8eb9SGleb Smirnoff QFQ_MAX_INDEX is the maximum index allowed for a group. We need
1343b3a8eb9SGleb Smirnoff one bit per index.
1353b3a8eb9SGleb Smirnoff QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
1363b3a8eb9SGleb Smirnoff The layout of the bits is as below:
1373b3a8eb9SGleb Smirnoff
1383b3a8eb9SGleb Smirnoff [ MTU_SHIFT ][ FRAC_BITS ]
1393b3a8eb9SGleb Smirnoff [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
1403b3a8eb9SGleb Smirnoff ^.__grp->index = 0
1413b3a8eb9SGleb Smirnoff *.__grp->slot_shift
1423b3a8eb9SGleb Smirnoff
1433b3a8eb9SGleb Smirnoff where MIN_SLOT_SHIFT is derived by difference from the others.
1443b3a8eb9SGleb Smirnoff
1453b3a8eb9SGleb Smirnoff The max group index corresponds to Lmax/w_min, where
1463b3a8eb9SGleb Smirnoff Lmax=1<<MTU_SHIFT, w_min = 1 .
1473b3a8eb9SGleb Smirnoff From this, and knowing how many groups (MAX_INDEX) we want,
1483b3a8eb9SGleb Smirnoff we can derive the shift corresponding to each group.
1493b3a8eb9SGleb Smirnoff
1503b3a8eb9SGleb Smirnoff Because we often need to compute
1513b3a8eb9SGleb Smirnoff F = S + len/w_i and V = V + len/wsum
1523b3a8eb9SGleb Smirnoff instead of storing w_i store the value
1533b3a8eb9SGleb Smirnoff inv_w = (1<<FRAC_BITS)/w_i
1543b3a8eb9SGleb Smirnoff so we can do F = S + len * inv_w * wsum.
1553b3a8eb9SGleb Smirnoff We use W_TOT in the formulas so we can easily move between
1563b3a8eb9SGleb Smirnoff static and adaptive weight sum.
1573b3a8eb9SGleb Smirnoff
1583b3a8eb9SGleb Smirnoff The per-scheduler-instance data contain all the data structures
1593b3a8eb9SGleb Smirnoff for the scheduler: bitmaps and bucket lists.
1603b3a8eb9SGleb Smirnoff
1613b3a8eb9SGleb Smirnoff */
1623b3a8eb9SGleb Smirnoff /*
1633b3a8eb9SGleb Smirnoff * Maximum number of consecutive slots occupied by backlogged classes
1643b3a8eb9SGleb Smirnoff * inside a group. This is approx lmax/lmin + 5.
1653b3a8eb9SGleb Smirnoff * XXX check because it poses constraints on MAX_INDEX
1663b3a8eb9SGleb Smirnoff */
1673b3a8eb9SGleb Smirnoff #define QFQ_MAX_SLOTS 32
1683b3a8eb9SGleb Smirnoff /*
1693b3a8eb9SGleb Smirnoff * Shifts used for class<->group mapping. Class weights are
1703b3a8eb9SGleb Smirnoff * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
1713b3a8eb9SGleb Smirnoff * group with the smallest index that can support the L_i / r_i
1723b3a8eb9SGleb Smirnoff * configured for the class.
1733b3a8eb9SGleb Smirnoff *
1743b3a8eb9SGleb Smirnoff * grp->index is the index of the group; and grp->slot_shift
1753b3a8eb9SGleb Smirnoff * is the shift for the corresponding (scaled) sigma_i.
1763b3a8eb9SGleb Smirnoff *
1773b3a8eb9SGleb Smirnoff * When computing the group index, we do (len<<FP_SHIFT)/weight,
1783b3a8eb9SGleb Smirnoff * then compute an FLS (which is like a log2()), and if the result
1793b3a8eb9SGleb Smirnoff * is below the MAX_INDEX region we use 0 (which is the same as
1803b3a8eb9SGleb Smirnoff * using a larger len).
1813b3a8eb9SGleb Smirnoff */
1823b3a8eb9SGleb Smirnoff #define QFQ_MAX_INDEX 19
1833b3a8eb9SGleb Smirnoff #define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */
1843b3a8eb9SGleb Smirnoff
1853b3a8eb9SGleb Smirnoff #define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
1863b3a8eb9SGleb Smirnoff #define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
1873b3a8eb9SGleb Smirnoff
1883b3a8eb9SGleb Smirnoff #define FRAC_BITS 30 /* fixed point arithmetic */
1893b3a8eb9SGleb Smirnoff #define ONE_FP (1UL << FRAC_BITS)
1903b3a8eb9SGleb Smirnoff
1913b3a8eb9SGleb Smirnoff #define QFQ_MTU_SHIFT 11 /* log2(max_len) */
1923b3a8eb9SGleb Smirnoff #define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
1933b3a8eb9SGleb Smirnoff
1943b3a8eb9SGleb Smirnoff /*
1953b3a8eb9SGleb Smirnoff * Possible group states, also indexes for the bitmaps array in
1963b3a8eb9SGleb Smirnoff * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
1973b3a8eb9SGleb Smirnoff */
1983b3a8eb9SGleb Smirnoff enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
1993b3a8eb9SGleb Smirnoff
2003b3a8eb9SGleb Smirnoff struct qfq_group;
2013b3a8eb9SGleb Smirnoff /*
2023b3a8eb9SGleb Smirnoff * additional queue info. Some of this info should come from
2033b3a8eb9SGleb Smirnoff * the flowset, we copy them here for faster processing.
2043b3a8eb9SGleb Smirnoff * This is an overlay of the struct dn_queue
2053b3a8eb9SGleb Smirnoff */
2063b3a8eb9SGleb Smirnoff struct qfq_class {
2073b3a8eb9SGleb Smirnoff struct dn_queue _q;
2083b3a8eb9SGleb Smirnoff uint64_t S, F; /* flow timestamps (exact) */
2093b3a8eb9SGleb Smirnoff struct qfq_class *next; /* Link for the slot list. */
2103b3a8eb9SGleb Smirnoff
2113b3a8eb9SGleb Smirnoff /* group we belong to. In principle we would need the index,
2123b3a8eb9SGleb Smirnoff * which is log_2(lmax/weight), but we never reference it
2133b3a8eb9SGleb Smirnoff * directly, only the group.
2143b3a8eb9SGleb Smirnoff */
2153b3a8eb9SGleb Smirnoff struct qfq_group *grp;
2163b3a8eb9SGleb Smirnoff
2173b3a8eb9SGleb Smirnoff /* these are copied from the flowset. */
2183b3a8eb9SGleb Smirnoff uint32_t inv_w; /* ONE_FP/weight */
2193b3a8eb9SGleb Smirnoff uint32_t lmax; /* Max packet size for this flow. */
2203b3a8eb9SGleb Smirnoff };
2213b3a8eb9SGleb Smirnoff
2223b3a8eb9SGleb Smirnoff /* Group descriptor, see the paper for details.
2233b3a8eb9SGleb Smirnoff * Basically this contains the bucket lists
2243b3a8eb9SGleb Smirnoff */
2253b3a8eb9SGleb Smirnoff struct qfq_group {
2263b3a8eb9SGleb Smirnoff uint64_t S, F; /* group timestamps (approx). */
2273b3a8eb9SGleb Smirnoff unsigned int slot_shift; /* Slot shift. */
2283b3a8eb9SGleb Smirnoff unsigned int index; /* Group index. */
2293b3a8eb9SGleb Smirnoff unsigned int front; /* Index of the front slot. */
2303b3a8eb9SGleb Smirnoff bitmap full_slots; /* non-empty slots */
2313b3a8eb9SGleb Smirnoff
2323b3a8eb9SGleb Smirnoff /* Array of lists of active classes. */
2333b3a8eb9SGleb Smirnoff struct qfq_class *slots[QFQ_MAX_SLOTS];
2343b3a8eb9SGleb Smirnoff };
2353b3a8eb9SGleb Smirnoff
2363b3a8eb9SGleb Smirnoff /* scheduler instance descriptor. */
2373b3a8eb9SGleb Smirnoff struct qfq_sched {
2383b3a8eb9SGleb Smirnoff uint64_t V; /* Precise virtual time. */
2393b3a8eb9SGleb Smirnoff uint32_t wsum; /* weight sum */
2404af7aed7SLuigi Rizzo uint32_t iwsum; /* inverse weight sum */
241fa57c83cSLuigi Rizzo NO(uint32_t i_wsum;) /* ONE_FP/w_sum */
242fa57c83cSLuigi Rizzo NO(uint32_t queued;) /* debugging */
243fa57c83cSLuigi Rizzo NO(uint32_t loops;) /* debugging */
2443b3a8eb9SGleb Smirnoff bitmap bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
2453b3a8eb9SGleb Smirnoff struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
2463b3a8eb9SGleb Smirnoff };
2473b3a8eb9SGleb Smirnoff
2483b3a8eb9SGleb Smirnoff /*---- support functions ----------------------------*/
2493b3a8eb9SGleb Smirnoff
2503b3a8eb9SGleb Smirnoff /* Generic comparison function, handling wraparound. */
qfq_gt(uint64_t a,uint64_t b)2513b3a8eb9SGleb Smirnoff static inline int qfq_gt(uint64_t a, uint64_t b)
2523b3a8eb9SGleb Smirnoff {
2533b3a8eb9SGleb Smirnoff return (int64_t)(a - b) > 0;
2543b3a8eb9SGleb Smirnoff }
2553b3a8eb9SGleb Smirnoff
2563b3a8eb9SGleb Smirnoff /* Round a precise timestamp to its slotted value. */
qfq_round_down(uint64_t ts,unsigned int shift)2573b3a8eb9SGleb Smirnoff static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
2583b3a8eb9SGleb Smirnoff {
2593b3a8eb9SGleb Smirnoff return ts & ~((1ULL << shift) - 1);
2603b3a8eb9SGleb Smirnoff }
2613b3a8eb9SGleb Smirnoff
2623b3a8eb9SGleb Smirnoff /* return the pointer to the group with lowest index in the bitmap */
qfq_ffs(struct qfq_sched * q,unsigned long bitmap)2633b3a8eb9SGleb Smirnoff static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
2643b3a8eb9SGleb Smirnoff unsigned long bitmap)
2653b3a8eb9SGleb Smirnoff {
2663b3a8eb9SGleb Smirnoff int index = ffs(bitmap) - 1; // zero-based
2673b3a8eb9SGleb Smirnoff return &q->groups[index];
2683b3a8eb9SGleb Smirnoff }
2693b3a8eb9SGleb Smirnoff
2703b3a8eb9SGleb Smirnoff /*
2713b3a8eb9SGleb Smirnoff * Calculate a flow index, given its weight and maximum packet length.
2723b3a8eb9SGleb Smirnoff * index = log_2(maxlen/weight) but we need to apply the scaling.
2733b3a8eb9SGleb Smirnoff * This is used only once at flow creation.
2743b3a8eb9SGleb Smirnoff */
qfq_calc_index(uint32_t inv_w,unsigned int maxlen)2753b3a8eb9SGleb Smirnoff static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
2763b3a8eb9SGleb Smirnoff {
2773b3a8eb9SGleb Smirnoff uint64_t slot_size = (uint64_t)maxlen *inv_w;
2783b3a8eb9SGleb Smirnoff unsigned long size_map;
2793b3a8eb9SGleb Smirnoff int index = 0;
2803b3a8eb9SGleb Smirnoff
2813b3a8eb9SGleb Smirnoff size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
2823b3a8eb9SGleb Smirnoff if (!size_map)
2833b3a8eb9SGleb Smirnoff goto out;
2843b3a8eb9SGleb Smirnoff
2853b3a8eb9SGleb Smirnoff index = __fls(size_map) + 1; // basically a log_2()
2863b3a8eb9SGleb Smirnoff index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
2873b3a8eb9SGleb Smirnoff
2883b3a8eb9SGleb Smirnoff if (index < 0)
2893b3a8eb9SGleb Smirnoff index = 0;
2903b3a8eb9SGleb Smirnoff
2913b3a8eb9SGleb Smirnoff out:
2923b3a8eb9SGleb Smirnoff ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
2933b3a8eb9SGleb Smirnoff return index;
2943b3a8eb9SGleb Smirnoff }
2953b3a8eb9SGleb Smirnoff /*---- end support functions ----*/
2963b3a8eb9SGleb Smirnoff
2973b3a8eb9SGleb Smirnoff /*-------- API calls --------------------------------*/
2983b3a8eb9SGleb Smirnoff /*
2993b3a8eb9SGleb Smirnoff * Validate and copy parameters from flowset.
3003b3a8eb9SGleb Smirnoff */
3013b3a8eb9SGleb Smirnoff static int
qfq_new_queue(struct dn_queue * _q)3023b3a8eb9SGleb Smirnoff qfq_new_queue(struct dn_queue *_q)
3033b3a8eb9SGleb Smirnoff {
3043b3a8eb9SGleb Smirnoff struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
3053b3a8eb9SGleb Smirnoff struct qfq_class *cl = (struct qfq_class *)_q;
3063b3a8eb9SGleb Smirnoff int i;
3073b3a8eb9SGleb Smirnoff uint32_t w; /* approximated weight */
3083b3a8eb9SGleb Smirnoff
3093b3a8eb9SGleb Smirnoff /* import parameters from the flowset. They should be correct
3103b3a8eb9SGleb Smirnoff * already.
3113b3a8eb9SGleb Smirnoff */
3123b3a8eb9SGleb Smirnoff w = _q->fs->fs.par[0];
3133b3a8eb9SGleb Smirnoff cl->lmax = _q->fs->fs.par[1];
3143b3a8eb9SGleb Smirnoff if (!w || w > QFQ_MAX_WEIGHT) {
3153b3a8eb9SGleb Smirnoff w = 1;
3163b3a8eb9SGleb Smirnoff D("rounding weight to 1");
3173b3a8eb9SGleb Smirnoff }
3183b3a8eb9SGleb Smirnoff cl->inv_w = ONE_FP/w;
3193b3a8eb9SGleb Smirnoff w = ONE_FP/cl->inv_w;
3203b3a8eb9SGleb Smirnoff if (q->wsum + w > QFQ_MAX_WSUM)
3213b3a8eb9SGleb Smirnoff return EINVAL;
3223b3a8eb9SGleb Smirnoff
3233b3a8eb9SGleb Smirnoff i = qfq_calc_index(cl->inv_w, cl->lmax);
3243b3a8eb9SGleb Smirnoff cl->grp = &q->groups[i];
3253b3a8eb9SGleb Smirnoff q->wsum += w;
3264af7aed7SLuigi Rizzo q->iwsum = ONE_FP / q->wsum; /* XXX note theory */
3273b3a8eb9SGleb Smirnoff // XXX cl->S = q->V; ?
3283b3a8eb9SGleb Smirnoff return 0;
3293b3a8eb9SGleb Smirnoff }
3303b3a8eb9SGleb Smirnoff
3313b3a8eb9SGleb Smirnoff /* remove an empty queue */
3323b3a8eb9SGleb Smirnoff static int
qfq_free_queue(struct dn_queue * _q)3333b3a8eb9SGleb Smirnoff qfq_free_queue(struct dn_queue *_q)
3343b3a8eb9SGleb Smirnoff {
3353b3a8eb9SGleb Smirnoff struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
3363b3a8eb9SGleb Smirnoff struct qfq_class *cl = (struct qfq_class *)_q;
3373b3a8eb9SGleb Smirnoff if (cl->inv_w) {
3383b3a8eb9SGleb Smirnoff q->wsum -= ONE_FP/cl->inv_w;
3394af7aed7SLuigi Rizzo if (q->wsum != 0)
3404af7aed7SLuigi Rizzo q->iwsum = ONE_FP / q->wsum;
3413b3a8eb9SGleb Smirnoff cl->inv_w = 0; /* reset weight to avoid run twice */
3423b3a8eb9SGleb Smirnoff }
3433b3a8eb9SGleb Smirnoff return 0;
3443b3a8eb9SGleb Smirnoff }
3453b3a8eb9SGleb Smirnoff
3463b3a8eb9SGleb Smirnoff /* Calculate a mask to mimic what would be ffs_from(). */
3473b3a8eb9SGleb Smirnoff static inline unsigned long
mask_from(unsigned long bitmap,int from)3483b3a8eb9SGleb Smirnoff mask_from(unsigned long bitmap, int from)
3493b3a8eb9SGleb Smirnoff {
3503b3a8eb9SGleb Smirnoff return bitmap & ~((1UL << from) - 1);
3513b3a8eb9SGleb Smirnoff }
3523b3a8eb9SGleb Smirnoff
3533b3a8eb9SGleb Smirnoff /*
3543b3a8eb9SGleb Smirnoff * The state computation relies on ER=0, IR=1, EB=2, IB=3
3553b3a8eb9SGleb Smirnoff * First compute eligibility comparing grp->S, q->V,
3563b3a8eb9SGleb Smirnoff * then check if someone is blocking us and possibly add EB
3573b3a8eb9SGleb Smirnoff */
3583b3a8eb9SGleb Smirnoff static inline unsigned int
qfq_calc_state(struct qfq_sched * q,struct qfq_group * grp)3593b3a8eb9SGleb Smirnoff qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
3603b3a8eb9SGleb Smirnoff {
3613b3a8eb9SGleb Smirnoff /* if S > V we are not eligible */
3623b3a8eb9SGleb Smirnoff unsigned int state = qfq_gt(grp->S, q->V);
3633b3a8eb9SGleb Smirnoff unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
3643b3a8eb9SGleb Smirnoff struct qfq_group *next;
3653b3a8eb9SGleb Smirnoff
3663b3a8eb9SGleb Smirnoff if (mask) {
3673b3a8eb9SGleb Smirnoff next = qfq_ffs(q, mask);
3683b3a8eb9SGleb Smirnoff if (qfq_gt(grp->F, next->F))
3693b3a8eb9SGleb Smirnoff state |= EB;
3703b3a8eb9SGleb Smirnoff }
3713b3a8eb9SGleb Smirnoff
3723b3a8eb9SGleb Smirnoff return state;
3733b3a8eb9SGleb Smirnoff }
3743b3a8eb9SGleb Smirnoff
3753b3a8eb9SGleb Smirnoff /*
3763b3a8eb9SGleb Smirnoff * In principle
3773b3a8eb9SGleb Smirnoff * q->bitmaps[dst] |= q->bitmaps[src] & mask;
3783b3a8eb9SGleb Smirnoff * q->bitmaps[src] &= ~mask;
3793b3a8eb9SGleb Smirnoff * but we should make sure that src != dst
3803b3a8eb9SGleb Smirnoff */
3813b3a8eb9SGleb Smirnoff static inline void
qfq_move_groups(struct qfq_sched * q,unsigned long mask,int src,int dst)3823b3a8eb9SGleb Smirnoff qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
3833b3a8eb9SGleb Smirnoff {
3843b3a8eb9SGleb Smirnoff q->bitmaps[dst] |= q->bitmaps[src] & mask;
3853b3a8eb9SGleb Smirnoff q->bitmaps[src] &= ~mask;
3863b3a8eb9SGleb Smirnoff }
3873b3a8eb9SGleb Smirnoff
3883b3a8eb9SGleb Smirnoff static inline void
qfq_unblock_groups(struct qfq_sched * q,int index,uint64_t old_finish)3893b3a8eb9SGleb Smirnoff qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
3903b3a8eb9SGleb Smirnoff {
3913b3a8eb9SGleb Smirnoff unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
3923b3a8eb9SGleb Smirnoff struct qfq_group *next;
3933b3a8eb9SGleb Smirnoff
3943b3a8eb9SGleb Smirnoff if (mask) {
3953b3a8eb9SGleb Smirnoff next = qfq_ffs(q, mask);
3963b3a8eb9SGleb Smirnoff if (!qfq_gt(next->F, old_finish))
3973b3a8eb9SGleb Smirnoff return;
3983b3a8eb9SGleb Smirnoff }
3993b3a8eb9SGleb Smirnoff
4003b3a8eb9SGleb Smirnoff mask = (1UL << index) - 1;
4013b3a8eb9SGleb Smirnoff qfq_move_groups(q, mask, EB, ER);
4023b3a8eb9SGleb Smirnoff qfq_move_groups(q, mask, IB, IR);
4033b3a8eb9SGleb Smirnoff }
4043b3a8eb9SGleb Smirnoff
4053b3a8eb9SGleb Smirnoff /*
4063b3a8eb9SGleb Smirnoff * perhaps
4073b3a8eb9SGleb Smirnoff *
4083b3a8eb9SGleb Smirnoff old_V ^= q->V;
4093b3a8eb9SGleb Smirnoff old_V >>= QFQ_MIN_SLOT_SHIFT;
4103b3a8eb9SGleb Smirnoff if (old_V) {
4113b3a8eb9SGleb Smirnoff ...
4123b3a8eb9SGleb Smirnoff }
4133b3a8eb9SGleb Smirnoff *
4143b3a8eb9SGleb Smirnoff */
4153b3a8eb9SGleb Smirnoff static inline void
qfq_make_eligible(struct qfq_sched * q,uint64_t old_V)4163b3a8eb9SGleb Smirnoff qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
4173b3a8eb9SGleb Smirnoff {
4183b3a8eb9SGleb Smirnoff unsigned long mask, vslot, old_vslot;
4193b3a8eb9SGleb Smirnoff
4203b3a8eb9SGleb Smirnoff vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
4213b3a8eb9SGleb Smirnoff old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
4223b3a8eb9SGleb Smirnoff
4233b3a8eb9SGleb Smirnoff if (vslot != old_vslot) {
424f51b072dSLuigi Rizzo /* must be 2ULL, see ToN QFQ article fig.5, we use base-0 fls */
425f51b072dSLuigi Rizzo mask = (2ULL << (__fls(vslot ^ old_vslot))) - 1;
4263b3a8eb9SGleb Smirnoff qfq_move_groups(q, mask, IR, ER);
4273b3a8eb9SGleb Smirnoff qfq_move_groups(q, mask, IB, EB);
4283b3a8eb9SGleb Smirnoff }
4293b3a8eb9SGleb Smirnoff }
4303b3a8eb9SGleb Smirnoff
4313b3a8eb9SGleb Smirnoff /*
4323b3a8eb9SGleb Smirnoff * XXX we should make sure that slot becomes less than 32.
4333b3a8eb9SGleb Smirnoff * This is guaranteed by the input values.
4343b3a8eb9SGleb Smirnoff * roundedS is always cl->S rounded on grp->slot_shift bits.
4353b3a8eb9SGleb Smirnoff */
4363b3a8eb9SGleb Smirnoff static inline void
qfq_slot_insert(struct qfq_group * grp,struct qfq_class * cl,uint64_t roundedS)4373b3a8eb9SGleb Smirnoff qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
4383b3a8eb9SGleb Smirnoff {
4393b3a8eb9SGleb Smirnoff uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
4403b3a8eb9SGleb Smirnoff unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
4413b3a8eb9SGleb Smirnoff
4423b3a8eb9SGleb Smirnoff cl->next = grp->slots[i];
4433b3a8eb9SGleb Smirnoff grp->slots[i] = cl;
4443b3a8eb9SGleb Smirnoff __set_bit(slot, &grp->full_slots);
4453b3a8eb9SGleb Smirnoff }
4463b3a8eb9SGleb Smirnoff
4473b3a8eb9SGleb Smirnoff /*
4483b3a8eb9SGleb Smirnoff * remove the entry from the slot
4493b3a8eb9SGleb Smirnoff */
4503b3a8eb9SGleb Smirnoff static inline void
qfq_front_slot_remove(struct qfq_group * grp)4513b3a8eb9SGleb Smirnoff qfq_front_slot_remove(struct qfq_group *grp)
4523b3a8eb9SGleb Smirnoff {
4533b3a8eb9SGleb Smirnoff struct qfq_class **h = &grp->slots[grp->front];
4543b3a8eb9SGleb Smirnoff
4553b3a8eb9SGleb Smirnoff *h = (*h)->next;
4563b3a8eb9SGleb Smirnoff if (!*h)
4573b3a8eb9SGleb Smirnoff __clear_bit(0, &grp->full_slots);
4583b3a8eb9SGleb Smirnoff }
4593b3a8eb9SGleb Smirnoff
4603b3a8eb9SGleb Smirnoff /*
4613b3a8eb9SGleb Smirnoff * Returns the first full queue in a group. As a side effect,
4623b3a8eb9SGleb Smirnoff * adjust the bucket list so the first non-empty bucket is at
4633b3a8eb9SGleb Smirnoff * position 0 in full_slots.
4643b3a8eb9SGleb Smirnoff */
4653b3a8eb9SGleb Smirnoff static inline struct qfq_class *
qfq_slot_scan(struct qfq_group * grp)4663b3a8eb9SGleb Smirnoff qfq_slot_scan(struct qfq_group *grp)
4673b3a8eb9SGleb Smirnoff {
4683b3a8eb9SGleb Smirnoff int i;
4693b3a8eb9SGleb Smirnoff
4703b3a8eb9SGleb Smirnoff ND("grp %d full %x", grp->index, grp->full_slots);
4713b3a8eb9SGleb Smirnoff if (!grp->full_slots)
4723b3a8eb9SGleb Smirnoff return NULL;
4733b3a8eb9SGleb Smirnoff
4743b3a8eb9SGleb Smirnoff i = ffs(grp->full_slots) - 1; // zero-based
4753b3a8eb9SGleb Smirnoff if (i > 0) {
4763b3a8eb9SGleb Smirnoff grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
4773b3a8eb9SGleb Smirnoff grp->full_slots >>= i;
4783b3a8eb9SGleb Smirnoff }
4793b3a8eb9SGleb Smirnoff
4803b3a8eb9SGleb Smirnoff return grp->slots[grp->front];
4813b3a8eb9SGleb Smirnoff }
4823b3a8eb9SGleb Smirnoff
4833b3a8eb9SGleb Smirnoff /*
4843b3a8eb9SGleb Smirnoff * adjust the bucket list. When the start time of a group decreases,
4853b3a8eb9SGleb Smirnoff * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
4863b3a8eb9SGleb Smirnoff * move the objects. The mask of occupied slots must be shifted
4873b3a8eb9SGleb Smirnoff * because we use ffs() to find the first non-empty slot.
4883b3a8eb9SGleb Smirnoff * This covers decreases in the group's start time, but what about
4893b3a8eb9SGleb Smirnoff * increases of the start time ?
4903b3a8eb9SGleb Smirnoff * Here too we should make sure that i is less than 32
4913b3a8eb9SGleb Smirnoff */
4923b3a8eb9SGleb Smirnoff static inline void
qfq_slot_rotate(struct qfq_sched * q,struct qfq_group * grp,uint64_t roundedS)4933b3a8eb9SGleb Smirnoff qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
4943b3a8eb9SGleb Smirnoff {
4953b3a8eb9SGleb Smirnoff unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
4963b3a8eb9SGleb Smirnoff
497fa57c83cSLuigi Rizzo (void)q;
4983b3a8eb9SGleb Smirnoff grp->full_slots <<= i;
4993b3a8eb9SGleb Smirnoff grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
5003b3a8eb9SGleb Smirnoff }
5013b3a8eb9SGleb Smirnoff
5023b3a8eb9SGleb Smirnoff static inline void
qfq_update_eligible(struct qfq_sched * q,uint64_t old_V)5033b3a8eb9SGleb Smirnoff qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
5043b3a8eb9SGleb Smirnoff {
5053b3a8eb9SGleb Smirnoff bitmap ineligible;
5063b3a8eb9SGleb Smirnoff
5073b3a8eb9SGleb Smirnoff ineligible = q->bitmaps[IR] | q->bitmaps[IB];
5083b3a8eb9SGleb Smirnoff if (ineligible) {
5093b3a8eb9SGleb Smirnoff if (!q->bitmaps[ER]) {
5103b3a8eb9SGleb Smirnoff struct qfq_group *grp;
5113b3a8eb9SGleb Smirnoff grp = qfq_ffs(q, ineligible);
5123b3a8eb9SGleb Smirnoff if (qfq_gt(grp->S, q->V))
5133b3a8eb9SGleb Smirnoff q->V = grp->S;
5143b3a8eb9SGleb Smirnoff }
5153b3a8eb9SGleb Smirnoff qfq_make_eligible(q, old_V);
5163b3a8eb9SGleb Smirnoff }
5173b3a8eb9SGleb Smirnoff }
5183b3a8eb9SGleb Smirnoff
5193b3a8eb9SGleb Smirnoff /*
5203b3a8eb9SGleb Smirnoff * Updates the class, returns true if also the group needs to be updated.
5213b3a8eb9SGleb Smirnoff */
5223b3a8eb9SGleb Smirnoff static inline int
qfq_update_class(struct qfq_sched * q,struct qfq_group * grp,struct qfq_class * cl)5233b3a8eb9SGleb Smirnoff qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
5243b3a8eb9SGleb Smirnoff struct qfq_class *cl)
5253b3a8eb9SGleb Smirnoff {
5263b3a8eb9SGleb Smirnoff
527fa57c83cSLuigi Rizzo (void)q;
5283b3a8eb9SGleb Smirnoff cl->S = cl->F;
5293b3a8eb9SGleb Smirnoff if (cl->_q.mq.head == NULL) {
5303b3a8eb9SGleb Smirnoff qfq_front_slot_remove(grp);
5313b3a8eb9SGleb Smirnoff } else {
5323b3a8eb9SGleb Smirnoff unsigned int len;
5333b3a8eb9SGleb Smirnoff uint64_t roundedS;
5343b3a8eb9SGleb Smirnoff
5353b3a8eb9SGleb Smirnoff len = cl->_q.mq.head->m_pkthdr.len;
5363b3a8eb9SGleb Smirnoff cl->F = cl->S + (uint64_t)len * cl->inv_w;
5373b3a8eb9SGleb Smirnoff roundedS = qfq_round_down(cl->S, grp->slot_shift);
5383b3a8eb9SGleb Smirnoff if (roundedS == grp->S)
5393b3a8eb9SGleb Smirnoff return 0;
5403b3a8eb9SGleb Smirnoff
5413b3a8eb9SGleb Smirnoff qfq_front_slot_remove(grp);
5423b3a8eb9SGleb Smirnoff qfq_slot_insert(grp, cl, roundedS);
5433b3a8eb9SGleb Smirnoff }
5443b3a8eb9SGleb Smirnoff return 1;
5453b3a8eb9SGleb Smirnoff }
5463b3a8eb9SGleb Smirnoff
5473b3a8eb9SGleb Smirnoff static struct mbuf *
qfq_dequeue(struct dn_sch_inst * si)5483b3a8eb9SGleb Smirnoff qfq_dequeue(struct dn_sch_inst *si)
5493b3a8eb9SGleb Smirnoff {
5503b3a8eb9SGleb Smirnoff struct qfq_sched *q = (struct qfq_sched *)(si + 1);
5513b3a8eb9SGleb Smirnoff struct qfq_group *grp;
5523b3a8eb9SGleb Smirnoff struct qfq_class *cl;
5533b3a8eb9SGleb Smirnoff struct mbuf *m;
5543b3a8eb9SGleb Smirnoff uint64_t old_V;
5553b3a8eb9SGleb Smirnoff
5563b3a8eb9SGleb Smirnoff NO(q->loops++;)
5573b3a8eb9SGleb Smirnoff if (!q->bitmaps[ER]) {
5583b3a8eb9SGleb Smirnoff NO(if (q->queued)
5593b3a8eb9SGleb Smirnoff dump_sched(q, "start dequeue");)
5603b3a8eb9SGleb Smirnoff return NULL;
5613b3a8eb9SGleb Smirnoff }
5623b3a8eb9SGleb Smirnoff
5633b3a8eb9SGleb Smirnoff grp = qfq_ffs(q, q->bitmaps[ER]);
5643b3a8eb9SGleb Smirnoff
5653b3a8eb9SGleb Smirnoff cl = grp->slots[grp->front];
5663b3a8eb9SGleb Smirnoff /* extract from the first bucket in the bucket list */
5673b3a8eb9SGleb Smirnoff m = dn_dequeue(&cl->_q);
5683b3a8eb9SGleb Smirnoff
5693b3a8eb9SGleb Smirnoff if (!m) {
5703b3a8eb9SGleb Smirnoff D("BUG/* non-workconserving leaf */");
5713b3a8eb9SGleb Smirnoff return NULL;
5723b3a8eb9SGleb Smirnoff }
5733b3a8eb9SGleb Smirnoff NO(q->queued--;)
5743b3a8eb9SGleb Smirnoff old_V = q->V;
5754af7aed7SLuigi Rizzo q->V += (uint64_t)m->m_pkthdr.len * q->iwsum;
5763b3a8eb9SGleb Smirnoff ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
5773b3a8eb9SGleb Smirnoff
5783b3a8eb9SGleb Smirnoff if (qfq_update_class(q, grp, cl)) {
5793b3a8eb9SGleb Smirnoff uint64_t old_F = grp->F;
5803b3a8eb9SGleb Smirnoff cl = qfq_slot_scan(grp);
5813b3a8eb9SGleb Smirnoff if (!cl) { /* group gone, remove from ER */
5823b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[ER]);
5833b3a8eb9SGleb Smirnoff // grp->S = grp->F + 1; // XXX debugging only
5843b3a8eb9SGleb Smirnoff } else {
5853b3a8eb9SGleb Smirnoff uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
5863b3a8eb9SGleb Smirnoff unsigned int s;
5873b3a8eb9SGleb Smirnoff
5883b3a8eb9SGleb Smirnoff if (grp->S == roundedS)
5893b3a8eb9SGleb Smirnoff goto skip_unblock;
5903b3a8eb9SGleb Smirnoff grp->S = roundedS;
5913b3a8eb9SGleb Smirnoff grp->F = roundedS + (2ULL << grp->slot_shift);
5923b3a8eb9SGleb Smirnoff /* remove from ER and put in the new set */
5933b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[ER]);
5943b3a8eb9SGleb Smirnoff s = qfq_calc_state(q, grp);
5953b3a8eb9SGleb Smirnoff __set_bit(grp->index, &q->bitmaps[s]);
5963b3a8eb9SGleb Smirnoff }
5973b3a8eb9SGleb Smirnoff /* we need to unblock even if the group has gone away */
5983b3a8eb9SGleb Smirnoff qfq_unblock_groups(q, grp->index, old_F);
5993b3a8eb9SGleb Smirnoff }
6003b3a8eb9SGleb Smirnoff
6013b3a8eb9SGleb Smirnoff skip_unblock:
6023b3a8eb9SGleb Smirnoff qfq_update_eligible(q, old_V);
6033b3a8eb9SGleb Smirnoff NO(if (!q->bitmaps[ER] && q->queued)
6043b3a8eb9SGleb Smirnoff dump_sched(q, "end dequeue");)
6053b3a8eb9SGleb Smirnoff
6063b3a8eb9SGleb Smirnoff return m;
6073b3a8eb9SGleb Smirnoff }
6083b3a8eb9SGleb Smirnoff
6093b3a8eb9SGleb Smirnoff /*
6103b3a8eb9SGleb Smirnoff * Assign a reasonable start time for a new flow k in group i.
6113b3a8eb9SGleb Smirnoff * Admissible values for \hat(F) are multiples of \sigma_i
6123b3a8eb9SGleb Smirnoff * no greater than V+\sigma_i . Larger values mean that
6133b3a8eb9SGleb Smirnoff * we had a wraparound so we consider the timestamp to be stale.
6143b3a8eb9SGleb Smirnoff *
6153b3a8eb9SGleb Smirnoff * If F is not stale and F >= V then we set S = F.
6163b3a8eb9SGleb Smirnoff * Otherwise we should assign S = V, but this may violate
6173b3a8eb9SGleb Smirnoff * the ordering in ER. So, if we have groups in ER, set S to
6183b3a8eb9SGleb Smirnoff * the F_j of the first group j which would be blocking us.
6193b3a8eb9SGleb Smirnoff * We are guaranteed not to move S backward because
6203b3a8eb9SGleb Smirnoff * otherwise our group i would still be blocked.
6213b3a8eb9SGleb Smirnoff */
6223b3a8eb9SGleb Smirnoff static inline void
qfq_update_start(struct qfq_sched * q,struct qfq_class * cl)6233b3a8eb9SGleb Smirnoff qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
6243b3a8eb9SGleb Smirnoff {
6253b3a8eb9SGleb Smirnoff unsigned long mask;
6263b3a8eb9SGleb Smirnoff uint64_t limit, roundedF;
6273b3a8eb9SGleb Smirnoff int slot_shift = cl->grp->slot_shift;
6283b3a8eb9SGleb Smirnoff
6293b3a8eb9SGleb Smirnoff roundedF = qfq_round_down(cl->F, slot_shift);
6304af7aed7SLuigi Rizzo limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
6313b3a8eb9SGleb Smirnoff
6323b3a8eb9SGleb Smirnoff if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
6333b3a8eb9SGleb Smirnoff /* timestamp was stale */
6343b3a8eb9SGleb Smirnoff mask = mask_from(q->bitmaps[ER], cl->grp->index);
6353b3a8eb9SGleb Smirnoff if (mask) {
6363b3a8eb9SGleb Smirnoff struct qfq_group *next = qfq_ffs(q, mask);
6373b3a8eb9SGleb Smirnoff if (qfq_gt(roundedF, next->F)) {
6384af7aed7SLuigi Rizzo /* from pv 71261956973ba9e0637848a5adb4a5819b4bae83 */
6394af7aed7SLuigi Rizzo if (qfq_gt(limit, next->F))
6403b3a8eb9SGleb Smirnoff cl->S = next->F;
6414af7aed7SLuigi Rizzo else /* preserve timestamp correctness */
6424af7aed7SLuigi Rizzo cl->S = limit;
6433b3a8eb9SGleb Smirnoff return;
6443b3a8eb9SGleb Smirnoff }
6453b3a8eb9SGleb Smirnoff }
6463b3a8eb9SGleb Smirnoff cl->S = q->V;
6473b3a8eb9SGleb Smirnoff } else { /* timestamp is not stale */
6483b3a8eb9SGleb Smirnoff cl->S = cl->F;
6493b3a8eb9SGleb Smirnoff }
6503b3a8eb9SGleb Smirnoff }
6513b3a8eb9SGleb Smirnoff
6523b3a8eb9SGleb Smirnoff static int
qfq_enqueue(struct dn_sch_inst * si,struct dn_queue * _q,struct mbuf * m)6533b3a8eb9SGleb Smirnoff qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
6543b3a8eb9SGleb Smirnoff {
6553b3a8eb9SGleb Smirnoff struct qfq_sched *q = (struct qfq_sched *)(si + 1);
6563b3a8eb9SGleb Smirnoff struct qfq_group *grp;
6573b3a8eb9SGleb Smirnoff struct qfq_class *cl = (struct qfq_class *)_q;
6583b3a8eb9SGleb Smirnoff uint64_t roundedS;
6593b3a8eb9SGleb Smirnoff int s;
6603b3a8eb9SGleb Smirnoff
6613b3a8eb9SGleb Smirnoff NO(q->loops++;)
6623b3a8eb9SGleb Smirnoff DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
6633b3a8eb9SGleb Smirnoff _q, cl->inv_w, cl->grp->index);
6643b3a8eb9SGleb Smirnoff /* XXX verify that the packet obeys the parameters */
6653b3a8eb9SGleb Smirnoff if (m != _q->mq.head) {
6663b3a8eb9SGleb Smirnoff if (dn_enqueue(_q, m, 0)) /* packet was dropped */
6673b3a8eb9SGleb Smirnoff return 1;
6683b3a8eb9SGleb Smirnoff NO(q->queued++;)
6693b3a8eb9SGleb Smirnoff if (m != _q->mq.head)
6703b3a8eb9SGleb Smirnoff return 0;
6713b3a8eb9SGleb Smirnoff }
6723b3a8eb9SGleb Smirnoff /* If reach this point, queue q was idle */
6733b3a8eb9SGleb Smirnoff grp = cl->grp;
6743b3a8eb9SGleb Smirnoff qfq_update_start(q, cl); /* adjust start time */
6753b3a8eb9SGleb Smirnoff /* compute new finish time and rounded start. */
6763b3a8eb9SGleb Smirnoff cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
6773b3a8eb9SGleb Smirnoff roundedS = qfq_round_down(cl->S, grp->slot_shift);
6783b3a8eb9SGleb Smirnoff
6793b3a8eb9SGleb Smirnoff /*
6803b3a8eb9SGleb Smirnoff * insert cl in the correct bucket.
6813b3a8eb9SGleb Smirnoff * If cl->S >= grp->S we don't need to adjust the
6823b3a8eb9SGleb Smirnoff * bucket list and simply go to the insertion phase.
6833b3a8eb9SGleb Smirnoff * Otherwise grp->S is decreasing, we must make room
6843b3a8eb9SGleb Smirnoff * in the bucket list, and also recompute the group state.
6853b3a8eb9SGleb Smirnoff * Finally, if there were no flows in this group and nobody
6863b3a8eb9SGleb Smirnoff * was in ER make sure to adjust V.
6873b3a8eb9SGleb Smirnoff */
6883b3a8eb9SGleb Smirnoff if (grp->full_slots) {
6893b3a8eb9SGleb Smirnoff if (!qfq_gt(grp->S, cl->S))
6903b3a8eb9SGleb Smirnoff goto skip_update;
6913b3a8eb9SGleb Smirnoff /* create a slot for this cl->S */
6923b3a8eb9SGleb Smirnoff qfq_slot_rotate(q, grp, roundedS);
6933b3a8eb9SGleb Smirnoff /* group was surely ineligible, remove */
6943b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[IR]);
6953b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[IB]);
6963b3a8eb9SGleb Smirnoff } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
6973b3a8eb9SGleb Smirnoff q->V = roundedS;
6983b3a8eb9SGleb Smirnoff
6993b3a8eb9SGleb Smirnoff grp->S = roundedS;
7003b3a8eb9SGleb Smirnoff grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
7013b3a8eb9SGleb Smirnoff s = qfq_calc_state(q, grp);
7023b3a8eb9SGleb Smirnoff __set_bit(grp->index, &q->bitmaps[s]);
7033b3a8eb9SGleb Smirnoff ND("new state %d 0x%x", s, q->bitmaps[s]);
7043b3a8eb9SGleb Smirnoff ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
7053b3a8eb9SGleb Smirnoff skip_update:
7063b3a8eb9SGleb Smirnoff qfq_slot_insert(grp, cl, roundedS);
7073b3a8eb9SGleb Smirnoff
7083b3a8eb9SGleb Smirnoff return 0;
7093b3a8eb9SGleb Smirnoff }
7103b3a8eb9SGleb Smirnoff
7113b3a8eb9SGleb Smirnoff #if 0
7123b3a8eb9SGleb Smirnoff static inline void
7133b3a8eb9SGleb Smirnoff qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
7143b3a8eb9SGleb Smirnoff struct qfq_class *cl, struct qfq_class **pprev)
7153b3a8eb9SGleb Smirnoff {
7163b3a8eb9SGleb Smirnoff unsigned int i, offset;
7173b3a8eb9SGleb Smirnoff uint64_t roundedS;
7183b3a8eb9SGleb Smirnoff
7193b3a8eb9SGleb Smirnoff roundedS = qfq_round_down(cl->S, grp->slot_shift);
7203b3a8eb9SGleb Smirnoff offset = (roundedS - grp->S) >> grp->slot_shift;
7213b3a8eb9SGleb Smirnoff i = (grp->front + offset) % QFQ_MAX_SLOTS;
7223b3a8eb9SGleb Smirnoff
7233b3a8eb9SGleb Smirnoff #ifdef notyet
7243b3a8eb9SGleb Smirnoff if (!pprev) {
7253b3a8eb9SGleb Smirnoff pprev = &grp->slots[i];
7263b3a8eb9SGleb Smirnoff while (*pprev && *pprev != cl)
7273b3a8eb9SGleb Smirnoff pprev = &(*pprev)->next;
7283b3a8eb9SGleb Smirnoff }
7293b3a8eb9SGleb Smirnoff #endif
7303b3a8eb9SGleb Smirnoff
7313b3a8eb9SGleb Smirnoff *pprev = cl->next;
7323b3a8eb9SGleb Smirnoff if (!grp->slots[i])
7333b3a8eb9SGleb Smirnoff __clear_bit(offset, &grp->full_slots);
7343b3a8eb9SGleb Smirnoff }
7353b3a8eb9SGleb Smirnoff
7363b3a8eb9SGleb Smirnoff /*
7373b3a8eb9SGleb Smirnoff * called to forcibly destroy a queue.
7383b3a8eb9SGleb Smirnoff * If the queue is not in the front bucket, or if it has
7393b3a8eb9SGleb Smirnoff * other queues in the front bucket, we can simply remove
7403b3a8eb9SGleb Smirnoff * the queue with no other side effects.
7413b3a8eb9SGleb Smirnoff * Otherwise we must propagate the event up.
7423b3a8eb9SGleb Smirnoff * XXX description to be completed.
7433b3a8eb9SGleb Smirnoff */
7443b3a8eb9SGleb Smirnoff static void
7453b3a8eb9SGleb Smirnoff qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
7463b3a8eb9SGleb Smirnoff struct qfq_class **pprev)
7473b3a8eb9SGleb Smirnoff {
7483b3a8eb9SGleb Smirnoff struct qfq_group *grp = &q->groups[cl->index];
7493b3a8eb9SGleb Smirnoff unsigned long mask;
7503b3a8eb9SGleb Smirnoff uint64_t roundedS;
7513b3a8eb9SGleb Smirnoff int s;
7523b3a8eb9SGleb Smirnoff
7533b3a8eb9SGleb Smirnoff cl->F = cl->S; // not needed if the class goes away.
7543b3a8eb9SGleb Smirnoff qfq_slot_remove(q, grp, cl, pprev);
7553b3a8eb9SGleb Smirnoff
7563b3a8eb9SGleb Smirnoff if (!grp->full_slots) {
7573b3a8eb9SGleb Smirnoff /* nothing left in the group, remove from all sets.
7583b3a8eb9SGleb Smirnoff * Do ER last because if we were blocking other groups
7593b3a8eb9SGleb Smirnoff * we must unblock them.
7603b3a8eb9SGleb Smirnoff */
7613b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[IR]);
7623b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[EB]);
7633b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[IB]);
7643b3a8eb9SGleb Smirnoff
7653b3a8eb9SGleb Smirnoff if (test_bit(grp->index, &q->bitmaps[ER]) &&
7663b3a8eb9SGleb Smirnoff !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
7673b3a8eb9SGleb Smirnoff mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
7683b3a8eb9SGleb Smirnoff if (mask)
7693b3a8eb9SGleb Smirnoff mask = ~((1UL << __fls(mask)) - 1);
7703b3a8eb9SGleb Smirnoff else
7713b3a8eb9SGleb Smirnoff mask = ~0UL;
7723b3a8eb9SGleb Smirnoff qfq_move_groups(q, mask, EB, ER);
7733b3a8eb9SGleb Smirnoff qfq_move_groups(q, mask, IB, IR);
7743b3a8eb9SGleb Smirnoff }
7753b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[ER]);
7763b3a8eb9SGleb Smirnoff } else if (!grp->slots[grp->front]) {
7773b3a8eb9SGleb Smirnoff cl = qfq_slot_scan(grp);
7783b3a8eb9SGleb Smirnoff roundedS = qfq_round_down(cl->S, grp->slot_shift);
7793b3a8eb9SGleb Smirnoff if (grp->S != roundedS) {
7803b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[ER]);
7813b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[IR]);
7823b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[EB]);
7833b3a8eb9SGleb Smirnoff __clear_bit(grp->index, &q->bitmaps[IB]);
7843b3a8eb9SGleb Smirnoff grp->S = roundedS;
7853b3a8eb9SGleb Smirnoff grp->F = roundedS + (2ULL << grp->slot_shift);
7863b3a8eb9SGleb Smirnoff s = qfq_calc_state(q, grp);
7873b3a8eb9SGleb Smirnoff __set_bit(grp->index, &q->bitmaps[s]);
7883b3a8eb9SGleb Smirnoff }
7893b3a8eb9SGleb Smirnoff }
7903b3a8eb9SGleb Smirnoff qfq_update_eligible(q, q->V);
7913b3a8eb9SGleb Smirnoff }
7923b3a8eb9SGleb Smirnoff #endif
7933b3a8eb9SGleb Smirnoff
7943b3a8eb9SGleb Smirnoff static int
qfq_new_fsk(struct dn_fsk * f)7953b3a8eb9SGleb Smirnoff qfq_new_fsk(struct dn_fsk *f)
7963b3a8eb9SGleb Smirnoff {
7973b3a8eb9SGleb Smirnoff ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
7983b3a8eb9SGleb Smirnoff ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
7993b3a8eb9SGleb Smirnoff ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
8003b3a8eb9SGleb Smirnoff return 0;
8013b3a8eb9SGleb Smirnoff }
8023b3a8eb9SGleb Smirnoff
8033b3a8eb9SGleb Smirnoff /*
8043b3a8eb9SGleb Smirnoff * initialize a new scheduler instance
8053b3a8eb9SGleb Smirnoff */
8063b3a8eb9SGleb Smirnoff static int
qfq_new_sched(struct dn_sch_inst * si)8073b3a8eb9SGleb Smirnoff qfq_new_sched(struct dn_sch_inst *si)
8083b3a8eb9SGleb Smirnoff {
8093b3a8eb9SGleb Smirnoff struct qfq_sched *q = (struct qfq_sched *)(si + 1);
8103b3a8eb9SGleb Smirnoff struct qfq_group *grp;
8113b3a8eb9SGleb Smirnoff int i;
8123b3a8eb9SGleb Smirnoff
8133b3a8eb9SGleb Smirnoff for (i = 0; i <= QFQ_MAX_INDEX; i++) {
8143b3a8eb9SGleb Smirnoff grp = &q->groups[i];
8153b3a8eb9SGleb Smirnoff grp->index = i;
8163b3a8eb9SGleb Smirnoff grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
8173b3a8eb9SGleb Smirnoff (QFQ_MAX_INDEX - i);
8183b3a8eb9SGleb Smirnoff }
8193b3a8eb9SGleb Smirnoff return 0;
8203b3a8eb9SGleb Smirnoff }
8213b3a8eb9SGleb Smirnoff
8223b3a8eb9SGleb Smirnoff /*
8233b3a8eb9SGleb Smirnoff * QFQ scheduler descriptor
8243b3a8eb9SGleb Smirnoff */
8253b3a8eb9SGleb Smirnoff static struct dn_alg qfq_desc = {
8263b3a8eb9SGleb Smirnoff _SI( .type = ) DN_SCHED_QFQ,
8273b3a8eb9SGleb Smirnoff _SI( .name = ) "QFQ",
8283b3a8eb9SGleb Smirnoff _SI( .flags = ) DN_MULTIQUEUE,
8293b3a8eb9SGleb Smirnoff
8303b3a8eb9SGleb Smirnoff _SI( .schk_datalen = ) 0,
8313b3a8eb9SGleb Smirnoff _SI( .si_datalen = ) sizeof(struct qfq_sched),
8323b3a8eb9SGleb Smirnoff _SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
8333b3a8eb9SGleb Smirnoff
8343b3a8eb9SGleb Smirnoff _SI( .enqueue = ) qfq_enqueue,
8353b3a8eb9SGleb Smirnoff _SI( .dequeue = ) qfq_dequeue,
8363b3a8eb9SGleb Smirnoff
8373b3a8eb9SGleb Smirnoff _SI( .config = ) NULL,
8383b3a8eb9SGleb Smirnoff _SI( .destroy = ) NULL,
8393b3a8eb9SGleb Smirnoff _SI( .new_sched = ) qfq_new_sched,
8403b3a8eb9SGleb Smirnoff _SI( .free_sched = ) NULL,
8413b3a8eb9SGleb Smirnoff _SI( .new_fsk = ) qfq_new_fsk,
8423b3a8eb9SGleb Smirnoff _SI( .free_fsk = ) NULL,
8433b3a8eb9SGleb Smirnoff _SI( .new_queue = ) qfq_new_queue,
8443b3a8eb9SGleb Smirnoff _SI( .free_queue = ) qfq_free_queue,
84591336b40SDon Lewis #ifdef NEW_AQM
84691336b40SDon Lewis _SI( .getconfig = ) NULL,
84791336b40SDon Lewis #endif
8483b3a8eb9SGleb Smirnoff };
8493b3a8eb9SGleb Smirnoff
8503b3a8eb9SGleb Smirnoff DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
8513b3a8eb9SGleb Smirnoff
8523b3a8eb9SGleb Smirnoff #ifdef QFQ_DEBUG
8533b3a8eb9SGleb Smirnoff static void
dump_groups(struct qfq_sched * q,uint32_t mask)8543b3a8eb9SGleb Smirnoff dump_groups(struct qfq_sched *q, uint32_t mask)
8553b3a8eb9SGleb Smirnoff {
8563b3a8eb9SGleb Smirnoff int i, j;
8573b3a8eb9SGleb Smirnoff
8583b3a8eb9SGleb Smirnoff for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
8593b3a8eb9SGleb Smirnoff struct qfq_group *g = &q->groups[i];
8603b3a8eb9SGleb Smirnoff
8613b3a8eb9SGleb Smirnoff if (0 == (mask & (1<<i)))
8623b3a8eb9SGleb Smirnoff continue;
8633b3a8eb9SGleb Smirnoff for (j = 0; j < QFQ_MAX_SLOTS; j++) {
8643b3a8eb9SGleb Smirnoff if (g->slots[j])
8653b3a8eb9SGleb Smirnoff D(" bucket %d %p", j, g->slots[j]);
8663b3a8eb9SGleb Smirnoff }
867fa57c83cSLuigi Rizzo D("full_slots 0x%llx", (_P64)g->full_slots);
8683b3a8eb9SGleb Smirnoff D(" %2d S 0x%20llx F 0x%llx %c", i,
869fa57c83cSLuigi Rizzo (_P64)g->S, (_P64)g->F,
8703b3a8eb9SGleb Smirnoff mask & (1<<i) ? '1' : '0');
8713b3a8eb9SGleb Smirnoff }
8723b3a8eb9SGleb Smirnoff }
8733b3a8eb9SGleb Smirnoff
8743b3a8eb9SGleb Smirnoff static void
dump_sched(struct qfq_sched * q,const char * msg)8753b3a8eb9SGleb Smirnoff dump_sched(struct qfq_sched *q, const char *msg)
8763b3a8eb9SGleb Smirnoff {
8773b3a8eb9SGleb Smirnoff D("--- in %s: ---", msg);
878fa57c83cSLuigi Rizzo D("loops %d queued %d V 0x%llx", q->loops, q->queued, (_P64)q->V);
879fa57c83cSLuigi Rizzo D(" ER 0x%08x", (unsigned)q->bitmaps[ER]);
880fa57c83cSLuigi Rizzo D(" EB 0x%08x", (unsigned)q->bitmaps[EB]);
881fa57c83cSLuigi Rizzo D(" IR 0x%08x", (unsigned)q->bitmaps[IR]);
882fa57c83cSLuigi Rizzo D(" IB 0x%08x", (unsigned)q->bitmaps[IB]);
8833b3a8eb9SGleb Smirnoff dump_groups(q, 0xffffffff);
8843b3a8eb9SGleb Smirnoff };
8853b3a8eb9SGleb Smirnoff #endif /* QFQ_DEBUG */
886