1*585d763aSVinicius Costa Gomes /* 2*585d763aSVinicius Costa Gomes * net/sched/sch_cbs.c Credit Based Shaper 3*585d763aSVinicius Costa Gomes * 4*585d763aSVinicius Costa Gomes * This program is free software; you can redistribute it and/or 5*585d763aSVinicius Costa Gomes * modify it under the terms of the GNU General Public License 6*585d763aSVinicius Costa Gomes * as published by the Free Software Foundation; either version 7*585d763aSVinicius Costa Gomes * 2 of the License, or (at your option) any later version. 8*585d763aSVinicius Costa Gomes * 9*585d763aSVinicius Costa Gomes * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> 10*585d763aSVinicius Costa Gomes * 11*585d763aSVinicius Costa Gomes */ 12*585d763aSVinicius Costa Gomes 13*585d763aSVinicius Costa Gomes /* Credit Based Shaper (CBS) 14*585d763aSVinicius Costa Gomes * ========================= 15*585d763aSVinicius Costa Gomes * 16*585d763aSVinicius Costa Gomes * This is a simple rate-limiting shaper aimed at TSN applications on 17*585d763aSVinicius Costa Gomes * systems with known traffic workloads. 18*585d763aSVinicius Costa Gomes * 19*585d763aSVinicius Costa Gomes * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, 20*585d763aSVinicius Costa Gomes * Section 8.6.8.2, and explained in more detail in the Annex L of the 21*585d763aSVinicius Costa Gomes * same specification. 22*585d763aSVinicius Costa Gomes * 23*585d763aSVinicius Costa Gomes * There are four tunables to be considered: 24*585d763aSVinicius Costa Gomes * 25*585d763aSVinicius Costa Gomes * 'idleslope': Idleslope is the rate of credits that is 26*585d763aSVinicius Costa Gomes * accumulated (in kilobits per second) when there is at least 27*585d763aSVinicius Costa Gomes * one packet waiting for transmission. Packets are transmitted 28*585d763aSVinicius Costa Gomes * when the current value of credits is equal or greater than 29*585d763aSVinicius Costa Gomes * zero. When there is no packet to be transmitted the amount of 30*585d763aSVinicius Costa Gomes * credits is set to zero. This is the main tunable of the CBS 31*585d763aSVinicius Costa Gomes * algorithm. 32*585d763aSVinicius Costa Gomes * 33*585d763aSVinicius Costa Gomes * 'sendslope': 34*585d763aSVinicius Costa Gomes * Sendslope is the rate of credits that is depleted (it should be a 35*585d763aSVinicius Costa Gomes * negative number of kilobits per second) when a transmission is 36*585d763aSVinicius Costa Gomes * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section 37*585d763aSVinicius Costa Gomes * 8.6.8.2 item g): 38*585d763aSVinicius Costa Gomes * 39*585d763aSVinicius Costa Gomes * sendslope = idleslope - port_transmit_rate 40*585d763aSVinicius Costa Gomes * 41*585d763aSVinicius Costa Gomes * 'hicredit': Hicredit defines the maximum amount of credits (in 42*585d763aSVinicius Costa Gomes * bytes) that can be accumulated. Hicredit depends on the 43*585d763aSVinicius Costa Gomes * characteristics of interfering traffic, 44*585d763aSVinicius Costa Gomes * 'max_interference_size' is the maximum size of any burst of 45*585d763aSVinicius Costa Gomes * traffic that can delay the transmission of a frame that is 46*585d763aSVinicius Costa Gomes * available for transmission for this traffic class, (IEEE 47*585d763aSVinicius Costa Gomes * 802.1Q-2014 Annex L, Equation L-3): 48*585d763aSVinicius Costa Gomes * 49*585d763aSVinicius Costa Gomes * hicredit = max_interference_size * (idleslope / port_transmit_rate) 50*585d763aSVinicius Costa Gomes * 51*585d763aSVinicius Costa Gomes * 'locredit': Locredit is the minimum amount of credits that can 52*585d763aSVinicius Costa Gomes * be reached. It is a function of the traffic flowing through 53*585d763aSVinicius Costa Gomes * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): 54*585d763aSVinicius Costa Gomes * 55*585d763aSVinicius Costa Gomes * locredit = max_frame_size * (sendslope / port_transmit_rate) 56*585d763aSVinicius Costa Gomes */ 57*585d763aSVinicius Costa Gomes 58*585d763aSVinicius Costa Gomes #include <linux/module.h> 59*585d763aSVinicius Costa Gomes #include <linux/types.h> 60*585d763aSVinicius Costa Gomes #include <linux/kernel.h> 61*585d763aSVinicius Costa Gomes #include <linux/string.h> 62*585d763aSVinicius Costa Gomes #include <linux/errno.h> 63*585d763aSVinicius Costa Gomes #include <linux/skbuff.h> 64*585d763aSVinicius Costa Gomes #include <net/netlink.h> 65*585d763aSVinicius Costa Gomes #include <net/sch_generic.h> 66*585d763aSVinicius Costa Gomes #include <net/pkt_sched.h> 67*585d763aSVinicius Costa Gomes 68*585d763aSVinicius Costa Gomes #define BYTES_PER_KBIT (1000LL / 8) 69*585d763aSVinicius Costa Gomes 70*585d763aSVinicius Costa Gomes struct cbs_sched_data { 71*585d763aSVinicius Costa Gomes s64 port_rate; /* in bytes/s */ 72*585d763aSVinicius Costa Gomes s64 last; /* timestamp in ns */ 73*585d763aSVinicius Costa Gomes s64 credits; /* in bytes */ 74*585d763aSVinicius Costa Gomes s32 locredit; /* in bytes */ 75*585d763aSVinicius Costa Gomes s32 hicredit; /* in bytes */ 76*585d763aSVinicius Costa Gomes s64 sendslope; /* in bytes/s */ 77*585d763aSVinicius Costa Gomes s64 idleslope; /* in bytes/s */ 78*585d763aSVinicius Costa Gomes struct qdisc_watchdog watchdog; 79*585d763aSVinicius Costa Gomes int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch); 80*585d763aSVinicius Costa Gomes struct sk_buff *(*dequeue)(struct Qdisc *sch); 81*585d763aSVinicius Costa Gomes }; 82*585d763aSVinicius Costa Gomes 83*585d763aSVinicius Costa Gomes static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) 84*585d763aSVinicius Costa Gomes { 85*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 86*585d763aSVinicius Costa Gomes 87*585d763aSVinicius Costa Gomes if (sch->q.qlen == 0 && q->credits > 0) { 88*585d763aSVinicius Costa Gomes /* We need to stop accumulating credits when there's 89*585d763aSVinicius Costa Gomes * no enqueued packets and q->credits is positive. 90*585d763aSVinicius Costa Gomes */ 91*585d763aSVinicius Costa Gomes q->credits = 0; 92*585d763aSVinicius Costa Gomes q->last = ktime_get_ns(); 93*585d763aSVinicius Costa Gomes } 94*585d763aSVinicius Costa Gomes 95*585d763aSVinicius Costa Gomes return qdisc_enqueue_tail(skb, sch); 96*585d763aSVinicius Costa Gomes } 97*585d763aSVinicius Costa Gomes 98*585d763aSVinicius Costa Gomes static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, 99*585d763aSVinicius Costa Gomes struct sk_buff **to_free) 100*585d763aSVinicius Costa Gomes { 101*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 102*585d763aSVinicius Costa Gomes 103*585d763aSVinicius Costa Gomes return q->enqueue(skb, sch); 104*585d763aSVinicius Costa Gomes } 105*585d763aSVinicius Costa Gomes 106*585d763aSVinicius Costa Gomes /* timediff is in ns, slope is in bytes/s */ 107*585d763aSVinicius Costa Gomes static s64 timediff_to_credits(s64 timediff, s64 slope) 108*585d763aSVinicius Costa Gomes { 109*585d763aSVinicius Costa Gomes return div64_s64(timediff * slope, NSEC_PER_SEC); 110*585d763aSVinicius Costa Gomes } 111*585d763aSVinicius Costa Gomes 112*585d763aSVinicius Costa Gomes static s64 delay_from_credits(s64 credits, s64 slope) 113*585d763aSVinicius Costa Gomes { 114*585d763aSVinicius Costa Gomes if (unlikely(slope == 0)) 115*585d763aSVinicius Costa Gomes return S64_MAX; 116*585d763aSVinicius Costa Gomes 117*585d763aSVinicius Costa Gomes return div64_s64(-credits * NSEC_PER_SEC, slope); 118*585d763aSVinicius Costa Gomes } 119*585d763aSVinicius Costa Gomes 120*585d763aSVinicius Costa Gomes static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) 121*585d763aSVinicius Costa Gomes { 122*585d763aSVinicius Costa Gomes if (unlikely(port_rate == 0)) 123*585d763aSVinicius Costa Gomes return S64_MAX; 124*585d763aSVinicius Costa Gomes 125*585d763aSVinicius Costa Gomes return div64_s64(len * slope, port_rate); 126*585d763aSVinicius Costa Gomes } 127*585d763aSVinicius Costa Gomes 128*585d763aSVinicius Costa Gomes static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) 129*585d763aSVinicius Costa Gomes { 130*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 131*585d763aSVinicius Costa Gomes s64 now = ktime_get_ns(); 132*585d763aSVinicius Costa Gomes struct sk_buff *skb; 133*585d763aSVinicius Costa Gomes s64 credits; 134*585d763aSVinicius Costa Gomes int len; 135*585d763aSVinicius Costa Gomes 136*585d763aSVinicius Costa Gomes if (q->credits < 0) { 137*585d763aSVinicius Costa Gomes credits = timediff_to_credits(now - q->last, q->idleslope); 138*585d763aSVinicius Costa Gomes 139*585d763aSVinicius Costa Gomes credits = q->credits + credits; 140*585d763aSVinicius Costa Gomes q->credits = min_t(s64, credits, q->hicredit); 141*585d763aSVinicius Costa Gomes 142*585d763aSVinicius Costa Gomes if (q->credits < 0) { 143*585d763aSVinicius Costa Gomes s64 delay; 144*585d763aSVinicius Costa Gomes 145*585d763aSVinicius Costa Gomes delay = delay_from_credits(q->credits, q->idleslope); 146*585d763aSVinicius Costa Gomes qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); 147*585d763aSVinicius Costa Gomes 148*585d763aSVinicius Costa Gomes q->last = now; 149*585d763aSVinicius Costa Gomes 150*585d763aSVinicius Costa Gomes return NULL; 151*585d763aSVinicius Costa Gomes } 152*585d763aSVinicius Costa Gomes } 153*585d763aSVinicius Costa Gomes 154*585d763aSVinicius Costa Gomes skb = qdisc_dequeue_head(sch); 155*585d763aSVinicius Costa Gomes if (!skb) 156*585d763aSVinicius Costa Gomes return NULL; 157*585d763aSVinicius Costa Gomes 158*585d763aSVinicius Costa Gomes len = qdisc_pkt_len(skb); 159*585d763aSVinicius Costa Gomes 160*585d763aSVinicius Costa Gomes /* As sendslope is a negative number, this will decrease the 161*585d763aSVinicius Costa Gomes * amount of q->credits. 162*585d763aSVinicius Costa Gomes */ 163*585d763aSVinicius Costa Gomes credits = credits_from_len(len, q->sendslope, q->port_rate); 164*585d763aSVinicius Costa Gomes credits += q->credits; 165*585d763aSVinicius Costa Gomes 166*585d763aSVinicius Costa Gomes q->credits = max_t(s64, credits, q->locredit); 167*585d763aSVinicius Costa Gomes q->last = now; 168*585d763aSVinicius Costa Gomes 169*585d763aSVinicius Costa Gomes return skb; 170*585d763aSVinicius Costa Gomes } 171*585d763aSVinicius Costa Gomes 172*585d763aSVinicius Costa Gomes static struct sk_buff *cbs_dequeue(struct Qdisc *sch) 173*585d763aSVinicius Costa Gomes { 174*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 175*585d763aSVinicius Costa Gomes 176*585d763aSVinicius Costa Gomes return q->dequeue(sch); 177*585d763aSVinicius Costa Gomes } 178*585d763aSVinicius Costa Gomes 179*585d763aSVinicius Costa Gomes static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { 180*585d763aSVinicius Costa Gomes [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, 181*585d763aSVinicius Costa Gomes }; 182*585d763aSVinicius Costa Gomes 183*585d763aSVinicius Costa Gomes static int cbs_change(struct Qdisc *sch, struct nlattr *opt) 184*585d763aSVinicius Costa Gomes { 185*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 186*585d763aSVinicius Costa Gomes struct net_device *dev = qdisc_dev(sch); 187*585d763aSVinicius Costa Gomes struct nlattr *tb[TCA_CBS_MAX + 1]; 188*585d763aSVinicius Costa Gomes struct ethtool_link_ksettings ecmd; 189*585d763aSVinicius Costa Gomes struct tc_cbs_qopt *qopt; 190*585d763aSVinicius Costa Gomes s64 link_speed; 191*585d763aSVinicius Costa Gomes int err; 192*585d763aSVinicius Costa Gomes 193*585d763aSVinicius Costa Gomes err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL); 194*585d763aSVinicius Costa Gomes if (err < 0) 195*585d763aSVinicius Costa Gomes return err; 196*585d763aSVinicius Costa Gomes 197*585d763aSVinicius Costa Gomes if (!tb[TCA_CBS_PARMS]) 198*585d763aSVinicius Costa Gomes return -EINVAL; 199*585d763aSVinicius Costa Gomes 200*585d763aSVinicius Costa Gomes qopt = nla_data(tb[TCA_CBS_PARMS]); 201*585d763aSVinicius Costa Gomes 202*585d763aSVinicius Costa Gomes if (qopt->offload) 203*585d763aSVinicius Costa Gomes return -EOPNOTSUPP; 204*585d763aSVinicius Costa Gomes 205*585d763aSVinicius Costa Gomes if (!__ethtool_get_link_ksettings(dev, &ecmd)) 206*585d763aSVinicius Costa Gomes link_speed = ecmd.base.speed; 207*585d763aSVinicius Costa Gomes else 208*585d763aSVinicius Costa Gomes link_speed = SPEED_1000; 209*585d763aSVinicius Costa Gomes 210*585d763aSVinicius Costa Gomes q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; 211*585d763aSVinicius Costa Gomes 212*585d763aSVinicius Costa Gomes q->enqueue = cbs_enqueue_soft; 213*585d763aSVinicius Costa Gomes q->dequeue = cbs_dequeue_soft; 214*585d763aSVinicius Costa Gomes 215*585d763aSVinicius Costa Gomes q->hicredit = qopt->hicredit; 216*585d763aSVinicius Costa Gomes q->locredit = qopt->locredit; 217*585d763aSVinicius Costa Gomes q->idleslope = qopt->idleslope * BYTES_PER_KBIT; 218*585d763aSVinicius Costa Gomes q->sendslope = qopt->sendslope * BYTES_PER_KBIT; 219*585d763aSVinicius Costa Gomes 220*585d763aSVinicius Costa Gomes return 0; 221*585d763aSVinicius Costa Gomes } 222*585d763aSVinicius Costa Gomes 223*585d763aSVinicius Costa Gomes static int cbs_init(struct Qdisc *sch, struct nlattr *opt) 224*585d763aSVinicius Costa Gomes { 225*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 226*585d763aSVinicius Costa Gomes 227*585d763aSVinicius Costa Gomes if (!opt) 228*585d763aSVinicius Costa Gomes return -EINVAL; 229*585d763aSVinicius Costa Gomes 230*585d763aSVinicius Costa Gomes qdisc_watchdog_init(&q->watchdog, sch); 231*585d763aSVinicius Costa Gomes 232*585d763aSVinicius Costa Gomes return cbs_change(sch, opt); 233*585d763aSVinicius Costa Gomes } 234*585d763aSVinicius Costa Gomes 235*585d763aSVinicius Costa Gomes static void cbs_destroy(struct Qdisc *sch) 236*585d763aSVinicius Costa Gomes { 237*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 238*585d763aSVinicius Costa Gomes 239*585d763aSVinicius Costa Gomes qdisc_watchdog_cancel(&q->watchdog); 240*585d763aSVinicius Costa Gomes } 241*585d763aSVinicius Costa Gomes 242*585d763aSVinicius Costa Gomes static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) 243*585d763aSVinicius Costa Gomes { 244*585d763aSVinicius Costa Gomes struct cbs_sched_data *q = qdisc_priv(sch); 245*585d763aSVinicius Costa Gomes struct tc_cbs_qopt opt = { }; 246*585d763aSVinicius Costa Gomes struct nlattr *nest; 247*585d763aSVinicius Costa Gomes 248*585d763aSVinicius Costa Gomes nest = nla_nest_start(skb, TCA_OPTIONS); 249*585d763aSVinicius Costa Gomes if (!nest) 250*585d763aSVinicius Costa Gomes goto nla_put_failure; 251*585d763aSVinicius Costa Gomes 252*585d763aSVinicius Costa Gomes opt.hicredit = q->hicredit; 253*585d763aSVinicius Costa Gomes opt.locredit = q->locredit; 254*585d763aSVinicius Costa Gomes opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); 255*585d763aSVinicius Costa Gomes opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); 256*585d763aSVinicius Costa Gomes opt.offload = 0; 257*585d763aSVinicius Costa Gomes 258*585d763aSVinicius Costa Gomes if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) 259*585d763aSVinicius Costa Gomes goto nla_put_failure; 260*585d763aSVinicius Costa Gomes 261*585d763aSVinicius Costa Gomes return nla_nest_end(skb, nest); 262*585d763aSVinicius Costa Gomes 263*585d763aSVinicius Costa Gomes nla_put_failure: 264*585d763aSVinicius Costa Gomes nla_nest_cancel(skb, nest); 265*585d763aSVinicius Costa Gomes return -1; 266*585d763aSVinicius Costa Gomes } 267*585d763aSVinicius Costa Gomes 268*585d763aSVinicius Costa Gomes static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { 269*585d763aSVinicius Costa Gomes .id = "cbs", 270*585d763aSVinicius Costa Gomes .priv_size = sizeof(struct cbs_sched_data), 271*585d763aSVinicius Costa Gomes .enqueue = cbs_enqueue, 272*585d763aSVinicius Costa Gomes .dequeue = cbs_dequeue, 273*585d763aSVinicius Costa Gomes .peek = qdisc_peek_dequeued, 274*585d763aSVinicius Costa Gomes .init = cbs_init, 275*585d763aSVinicius Costa Gomes .reset = qdisc_reset_queue, 276*585d763aSVinicius Costa Gomes .destroy = cbs_destroy, 277*585d763aSVinicius Costa Gomes .change = cbs_change, 278*585d763aSVinicius Costa Gomes .dump = cbs_dump, 279*585d763aSVinicius Costa Gomes .owner = THIS_MODULE, 280*585d763aSVinicius Costa Gomes }; 281*585d763aSVinicius Costa Gomes 282*585d763aSVinicius Costa Gomes static int __init cbs_module_init(void) 283*585d763aSVinicius Costa Gomes { 284*585d763aSVinicius Costa Gomes return register_qdisc(&cbs_qdisc_ops); 285*585d763aSVinicius Costa Gomes } 286*585d763aSVinicius Costa Gomes 287*585d763aSVinicius Costa Gomes static void __exit cbs_module_exit(void) 288*585d763aSVinicius Costa Gomes { 289*585d763aSVinicius Costa Gomes unregister_qdisc(&cbs_qdisc_ops); 290*585d763aSVinicius Costa Gomes } 291*585d763aSVinicius Costa Gomes module_init(cbs_module_init) 292*585d763aSVinicius Costa Gomes module_exit(cbs_module_exit) 293*585d763aSVinicius Costa Gomes MODULE_LICENSE("GPL"); 294