xref: /linux/net/sched/sch_cbs.c (revision 585d763af09cc21daf48ecc873604ccdb70f6014)
1*585d763aSVinicius Costa Gomes /*
2*585d763aSVinicius Costa Gomes  * net/sched/sch_cbs.c	Credit Based Shaper
3*585d763aSVinicius Costa Gomes  *
4*585d763aSVinicius Costa Gomes  *		This program is free software; you can redistribute it and/or
5*585d763aSVinicius Costa Gomes  *		modify it under the terms of the GNU General Public License
6*585d763aSVinicius Costa Gomes  *		as published by the Free Software Foundation; either version
7*585d763aSVinicius Costa Gomes  *		2 of the License, or (at your option) any later version.
8*585d763aSVinicius Costa Gomes  *
9*585d763aSVinicius Costa Gomes  * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
10*585d763aSVinicius Costa Gomes  *
11*585d763aSVinicius Costa Gomes  */
12*585d763aSVinicius Costa Gomes 
13*585d763aSVinicius Costa Gomes /* Credit Based Shaper (CBS)
14*585d763aSVinicius Costa Gomes  * =========================
15*585d763aSVinicius Costa Gomes  *
16*585d763aSVinicius Costa Gomes  * This is a simple rate-limiting shaper aimed at TSN applications on
17*585d763aSVinicius Costa Gomes  * systems with known traffic workloads.
18*585d763aSVinicius Costa Gomes  *
19*585d763aSVinicius Costa Gomes  * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
20*585d763aSVinicius Costa Gomes  * Section 8.6.8.2, and explained in more detail in the Annex L of the
21*585d763aSVinicius Costa Gomes  * same specification.
22*585d763aSVinicius Costa Gomes  *
23*585d763aSVinicius Costa Gomes  * There are four tunables to be considered:
24*585d763aSVinicius Costa Gomes  *
25*585d763aSVinicius Costa Gomes  *	'idleslope': Idleslope is the rate of credits that is
26*585d763aSVinicius Costa Gomes  *	accumulated (in kilobits per second) when there is at least
27*585d763aSVinicius Costa Gomes  *	one packet waiting for transmission. Packets are transmitted
28*585d763aSVinicius Costa Gomes  *	when the current value of credits is equal or greater than
29*585d763aSVinicius Costa Gomes  *	zero. When there is no packet to be transmitted the amount of
30*585d763aSVinicius Costa Gomes  *	credits is set to zero. This is the main tunable of the CBS
31*585d763aSVinicius Costa Gomes  *	algorithm.
32*585d763aSVinicius Costa Gomes  *
33*585d763aSVinicius Costa Gomes  *	'sendslope':
34*585d763aSVinicius Costa Gomes  *	Sendslope is the rate of credits that is depleted (it should be a
35*585d763aSVinicius Costa Gomes  *	negative number of kilobits per second) when a transmission is
36*585d763aSVinicius Costa Gomes  *	ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
37*585d763aSVinicius Costa Gomes  *	8.6.8.2 item g):
38*585d763aSVinicius Costa Gomes  *
39*585d763aSVinicius Costa Gomes  *	sendslope = idleslope - port_transmit_rate
40*585d763aSVinicius Costa Gomes  *
41*585d763aSVinicius Costa Gomes  *	'hicredit': Hicredit defines the maximum amount of credits (in
42*585d763aSVinicius Costa Gomes  *	bytes) that can be accumulated. Hicredit depends on the
43*585d763aSVinicius Costa Gomes  *	characteristics of interfering traffic,
44*585d763aSVinicius Costa Gomes  *	'max_interference_size' is the maximum size of any burst of
45*585d763aSVinicius Costa Gomes  *	traffic that can delay the transmission of a frame that is
46*585d763aSVinicius Costa Gomes  *	available for transmission for this traffic class, (IEEE
47*585d763aSVinicius Costa Gomes  *	802.1Q-2014 Annex L, Equation L-3):
48*585d763aSVinicius Costa Gomes  *
49*585d763aSVinicius Costa Gomes  *	hicredit = max_interference_size * (idleslope / port_transmit_rate)
50*585d763aSVinicius Costa Gomes  *
51*585d763aSVinicius Costa Gomes  *	'locredit': Locredit is the minimum amount of credits that can
52*585d763aSVinicius Costa Gomes  *	be reached. It is a function of the traffic flowing through
53*585d763aSVinicius Costa Gomes  *	this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
54*585d763aSVinicius Costa Gomes  *
55*585d763aSVinicius Costa Gomes  *	locredit = max_frame_size * (sendslope / port_transmit_rate)
56*585d763aSVinicius Costa Gomes  */
57*585d763aSVinicius Costa Gomes 
58*585d763aSVinicius Costa Gomes #include <linux/module.h>
59*585d763aSVinicius Costa Gomes #include <linux/types.h>
60*585d763aSVinicius Costa Gomes #include <linux/kernel.h>
61*585d763aSVinicius Costa Gomes #include <linux/string.h>
62*585d763aSVinicius Costa Gomes #include <linux/errno.h>
63*585d763aSVinicius Costa Gomes #include <linux/skbuff.h>
64*585d763aSVinicius Costa Gomes #include <net/netlink.h>
65*585d763aSVinicius Costa Gomes #include <net/sch_generic.h>
66*585d763aSVinicius Costa Gomes #include <net/pkt_sched.h>
67*585d763aSVinicius Costa Gomes 
68*585d763aSVinicius Costa Gomes #define BYTES_PER_KBIT (1000LL / 8)
69*585d763aSVinicius Costa Gomes 
70*585d763aSVinicius Costa Gomes struct cbs_sched_data {
71*585d763aSVinicius Costa Gomes 	s64 port_rate; /* in bytes/s */
72*585d763aSVinicius Costa Gomes 	s64 last; /* timestamp in ns */
73*585d763aSVinicius Costa Gomes 	s64 credits; /* in bytes */
74*585d763aSVinicius Costa Gomes 	s32 locredit; /* in bytes */
75*585d763aSVinicius Costa Gomes 	s32 hicredit; /* in bytes */
76*585d763aSVinicius Costa Gomes 	s64 sendslope; /* in bytes/s */
77*585d763aSVinicius Costa Gomes 	s64 idleslope; /* in bytes/s */
78*585d763aSVinicius Costa Gomes 	struct qdisc_watchdog watchdog;
79*585d763aSVinicius Costa Gomes 	int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
80*585d763aSVinicius Costa Gomes 	struct sk_buff *(*dequeue)(struct Qdisc *sch);
81*585d763aSVinicius Costa Gomes };
82*585d763aSVinicius Costa Gomes 
83*585d763aSVinicius Costa Gomes static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
84*585d763aSVinicius Costa Gomes {
85*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
86*585d763aSVinicius Costa Gomes 
87*585d763aSVinicius Costa Gomes 	if (sch->q.qlen == 0 && q->credits > 0) {
88*585d763aSVinicius Costa Gomes 		/* We need to stop accumulating credits when there's
89*585d763aSVinicius Costa Gomes 		 * no enqueued packets and q->credits is positive.
90*585d763aSVinicius Costa Gomes 		 */
91*585d763aSVinicius Costa Gomes 		q->credits = 0;
92*585d763aSVinicius Costa Gomes 		q->last = ktime_get_ns();
93*585d763aSVinicius Costa Gomes 	}
94*585d763aSVinicius Costa Gomes 
95*585d763aSVinicius Costa Gomes 	return qdisc_enqueue_tail(skb, sch);
96*585d763aSVinicius Costa Gomes }
97*585d763aSVinicius Costa Gomes 
98*585d763aSVinicius Costa Gomes static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
99*585d763aSVinicius Costa Gomes 		       struct sk_buff **to_free)
100*585d763aSVinicius Costa Gomes {
101*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
102*585d763aSVinicius Costa Gomes 
103*585d763aSVinicius Costa Gomes 	return q->enqueue(skb, sch);
104*585d763aSVinicius Costa Gomes }
105*585d763aSVinicius Costa Gomes 
106*585d763aSVinicius Costa Gomes /* timediff is in ns, slope is in bytes/s */
107*585d763aSVinicius Costa Gomes static s64 timediff_to_credits(s64 timediff, s64 slope)
108*585d763aSVinicius Costa Gomes {
109*585d763aSVinicius Costa Gomes 	return div64_s64(timediff * slope, NSEC_PER_SEC);
110*585d763aSVinicius Costa Gomes }
111*585d763aSVinicius Costa Gomes 
112*585d763aSVinicius Costa Gomes static s64 delay_from_credits(s64 credits, s64 slope)
113*585d763aSVinicius Costa Gomes {
114*585d763aSVinicius Costa Gomes 	if (unlikely(slope == 0))
115*585d763aSVinicius Costa Gomes 		return S64_MAX;
116*585d763aSVinicius Costa Gomes 
117*585d763aSVinicius Costa Gomes 	return div64_s64(-credits * NSEC_PER_SEC, slope);
118*585d763aSVinicius Costa Gomes }
119*585d763aSVinicius Costa Gomes 
120*585d763aSVinicius Costa Gomes static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
121*585d763aSVinicius Costa Gomes {
122*585d763aSVinicius Costa Gomes 	if (unlikely(port_rate == 0))
123*585d763aSVinicius Costa Gomes 		return S64_MAX;
124*585d763aSVinicius Costa Gomes 
125*585d763aSVinicius Costa Gomes 	return div64_s64(len * slope, port_rate);
126*585d763aSVinicius Costa Gomes }
127*585d763aSVinicius Costa Gomes 
128*585d763aSVinicius Costa Gomes static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
129*585d763aSVinicius Costa Gomes {
130*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
131*585d763aSVinicius Costa Gomes 	s64 now = ktime_get_ns();
132*585d763aSVinicius Costa Gomes 	struct sk_buff *skb;
133*585d763aSVinicius Costa Gomes 	s64 credits;
134*585d763aSVinicius Costa Gomes 	int len;
135*585d763aSVinicius Costa Gomes 
136*585d763aSVinicius Costa Gomes 	if (q->credits < 0) {
137*585d763aSVinicius Costa Gomes 		credits = timediff_to_credits(now - q->last, q->idleslope);
138*585d763aSVinicius Costa Gomes 
139*585d763aSVinicius Costa Gomes 		credits = q->credits + credits;
140*585d763aSVinicius Costa Gomes 		q->credits = min_t(s64, credits, q->hicredit);
141*585d763aSVinicius Costa Gomes 
142*585d763aSVinicius Costa Gomes 		if (q->credits < 0) {
143*585d763aSVinicius Costa Gomes 			s64 delay;
144*585d763aSVinicius Costa Gomes 
145*585d763aSVinicius Costa Gomes 			delay = delay_from_credits(q->credits, q->idleslope);
146*585d763aSVinicius Costa Gomes 			qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
147*585d763aSVinicius Costa Gomes 
148*585d763aSVinicius Costa Gomes 			q->last = now;
149*585d763aSVinicius Costa Gomes 
150*585d763aSVinicius Costa Gomes 			return NULL;
151*585d763aSVinicius Costa Gomes 		}
152*585d763aSVinicius Costa Gomes 	}
153*585d763aSVinicius Costa Gomes 
154*585d763aSVinicius Costa Gomes 	skb = qdisc_dequeue_head(sch);
155*585d763aSVinicius Costa Gomes 	if (!skb)
156*585d763aSVinicius Costa Gomes 		return NULL;
157*585d763aSVinicius Costa Gomes 
158*585d763aSVinicius Costa Gomes 	len = qdisc_pkt_len(skb);
159*585d763aSVinicius Costa Gomes 
160*585d763aSVinicius Costa Gomes 	/* As sendslope is a negative number, this will decrease the
161*585d763aSVinicius Costa Gomes 	 * amount of q->credits.
162*585d763aSVinicius Costa Gomes 	 */
163*585d763aSVinicius Costa Gomes 	credits = credits_from_len(len, q->sendslope, q->port_rate);
164*585d763aSVinicius Costa Gomes 	credits += q->credits;
165*585d763aSVinicius Costa Gomes 
166*585d763aSVinicius Costa Gomes 	q->credits = max_t(s64, credits, q->locredit);
167*585d763aSVinicius Costa Gomes 	q->last = now;
168*585d763aSVinicius Costa Gomes 
169*585d763aSVinicius Costa Gomes 	return skb;
170*585d763aSVinicius Costa Gomes }
171*585d763aSVinicius Costa Gomes 
172*585d763aSVinicius Costa Gomes static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
173*585d763aSVinicius Costa Gomes {
174*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
175*585d763aSVinicius Costa Gomes 
176*585d763aSVinicius Costa Gomes 	return q->dequeue(sch);
177*585d763aSVinicius Costa Gomes }
178*585d763aSVinicius Costa Gomes 
179*585d763aSVinicius Costa Gomes static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
180*585d763aSVinicius Costa Gomes 	[TCA_CBS_PARMS]	= { .len = sizeof(struct tc_cbs_qopt) },
181*585d763aSVinicius Costa Gomes };
182*585d763aSVinicius Costa Gomes 
183*585d763aSVinicius Costa Gomes static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
184*585d763aSVinicius Costa Gomes {
185*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
186*585d763aSVinicius Costa Gomes 	struct net_device *dev = qdisc_dev(sch);
187*585d763aSVinicius Costa Gomes 	struct nlattr *tb[TCA_CBS_MAX + 1];
188*585d763aSVinicius Costa Gomes 	struct ethtool_link_ksettings ecmd;
189*585d763aSVinicius Costa Gomes 	struct tc_cbs_qopt *qopt;
190*585d763aSVinicius Costa Gomes 	s64 link_speed;
191*585d763aSVinicius Costa Gomes 	int err;
192*585d763aSVinicius Costa Gomes 
193*585d763aSVinicius Costa Gomes 	err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
194*585d763aSVinicius Costa Gomes 	if (err < 0)
195*585d763aSVinicius Costa Gomes 		return err;
196*585d763aSVinicius Costa Gomes 
197*585d763aSVinicius Costa Gomes 	if (!tb[TCA_CBS_PARMS])
198*585d763aSVinicius Costa Gomes 		return -EINVAL;
199*585d763aSVinicius Costa Gomes 
200*585d763aSVinicius Costa Gomes 	qopt = nla_data(tb[TCA_CBS_PARMS]);
201*585d763aSVinicius Costa Gomes 
202*585d763aSVinicius Costa Gomes 	if (qopt->offload)
203*585d763aSVinicius Costa Gomes 		return -EOPNOTSUPP;
204*585d763aSVinicius Costa Gomes 
205*585d763aSVinicius Costa Gomes 	if (!__ethtool_get_link_ksettings(dev, &ecmd))
206*585d763aSVinicius Costa Gomes 		link_speed = ecmd.base.speed;
207*585d763aSVinicius Costa Gomes 	else
208*585d763aSVinicius Costa Gomes 		link_speed = SPEED_1000;
209*585d763aSVinicius Costa Gomes 
210*585d763aSVinicius Costa Gomes 	q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
211*585d763aSVinicius Costa Gomes 
212*585d763aSVinicius Costa Gomes 	q->enqueue = cbs_enqueue_soft;
213*585d763aSVinicius Costa Gomes 	q->dequeue = cbs_dequeue_soft;
214*585d763aSVinicius Costa Gomes 
215*585d763aSVinicius Costa Gomes 	q->hicredit = qopt->hicredit;
216*585d763aSVinicius Costa Gomes 	q->locredit = qopt->locredit;
217*585d763aSVinicius Costa Gomes 	q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
218*585d763aSVinicius Costa Gomes 	q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
219*585d763aSVinicius Costa Gomes 
220*585d763aSVinicius Costa Gomes 	return 0;
221*585d763aSVinicius Costa Gomes }
222*585d763aSVinicius Costa Gomes 
223*585d763aSVinicius Costa Gomes static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
224*585d763aSVinicius Costa Gomes {
225*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
226*585d763aSVinicius Costa Gomes 
227*585d763aSVinicius Costa Gomes 	if (!opt)
228*585d763aSVinicius Costa Gomes 		return -EINVAL;
229*585d763aSVinicius Costa Gomes 
230*585d763aSVinicius Costa Gomes 	qdisc_watchdog_init(&q->watchdog, sch);
231*585d763aSVinicius Costa Gomes 
232*585d763aSVinicius Costa Gomes 	return cbs_change(sch, opt);
233*585d763aSVinicius Costa Gomes }
234*585d763aSVinicius Costa Gomes 
235*585d763aSVinicius Costa Gomes static void cbs_destroy(struct Qdisc *sch)
236*585d763aSVinicius Costa Gomes {
237*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
238*585d763aSVinicius Costa Gomes 
239*585d763aSVinicius Costa Gomes 	qdisc_watchdog_cancel(&q->watchdog);
240*585d763aSVinicius Costa Gomes }
241*585d763aSVinicius Costa Gomes 
242*585d763aSVinicius Costa Gomes static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
243*585d763aSVinicius Costa Gomes {
244*585d763aSVinicius Costa Gomes 	struct cbs_sched_data *q = qdisc_priv(sch);
245*585d763aSVinicius Costa Gomes 	struct tc_cbs_qopt opt = { };
246*585d763aSVinicius Costa Gomes 	struct nlattr *nest;
247*585d763aSVinicius Costa Gomes 
248*585d763aSVinicius Costa Gomes 	nest = nla_nest_start(skb, TCA_OPTIONS);
249*585d763aSVinicius Costa Gomes 	if (!nest)
250*585d763aSVinicius Costa Gomes 		goto nla_put_failure;
251*585d763aSVinicius Costa Gomes 
252*585d763aSVinicius Costa Gomes 	opt.hicredit = q->hicredit;
253*585d763aSVinicius Costa Gomes 	opt.locredit = q->locredit;
254*585d763aSVinicius Costa Gomes 	opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
255*585d763aSVinicius Costa Gomes 	opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
256*585d763aSVinicius Costa Gomes 	opt.offload = 0;
257*585d763aSVinicius Costa Gomes 
258*585d763aSVinicius Costa Gomes 	if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
259*585d763aSVinicius Costa Gomes 		goto nla_put_failure;
260*585d763aSVinicius Costa Gomes 
261*585d763aSVinicius Costa Gomes 	return nla_nest_end(skb, nest);
262*585d763aSVinicius Costa Gomes 
263*585d763aSVinicius Costa Gomes nla_put_failure:
264*585d763aSVinicius Costa Gomes 	nla_nest_cancel(skb, nest);
265*585d763aSVinicius Costa Gomes 	return -1;
266*585d763aSVinicius Costa Gomes }
267*585d763aSVinicius Costa Gomes 
268*585d763aSVinicius Costa Gomes static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
269*585d763aSVinicius Costa Gomes 	.id		=	"cbs",
270*585d763aSVinicius Costa Gomes 	.priv_size	=	sizeof(struct cbs_sched_data),
271*585d763aSVinicius Costa Gomes 	.enqueue	=	cbs_enqueue,
272*585d763aSVinicius Costa Gomes 	.dequeue	=	cbs_dequeue,
273*585d763aSVinicius Costa Gomes 	.peek		=	qdisc_peek_dequeued,
274*585d763aSVinicius Costa Gomes 	.init		=	cbs_init,
275*585d763aSVinicius Costa Gomes 	.reset		=	qdisc_reset_queue,
276*585d763aSVinicius Costa Gomes 	.destroy	=	cbs_destroy,
277*585d763aSVinicius Costa Gomes 	.change		=	cbs_change,
278*585d763aSVinicius Costa Gomes 	.dump		=	cbs_dump,
279*585d763aSVinicius Costa Gomes 	.owner		=	THIS_MODULE,
280*585d763aSVinicius Costa Gomes };
281*585d763aSVinicius Costa Gomes 
282*585d763aSVinicius Costa Gomes static int __init cbs_module_init(void)
283*585d763aSVinicius Costa Gomes {
284*585d763aSVinicius Costa Gomes 	return register_qdisc(&cbs_qdisc_ops);
285*585d763aSVinicius Costa Gomes }
286*585d763aSVinicius Costa Gomes 
287*585d763aSVinicius Costa Gomes static void __exit cbs_module_exit(void)
288*585d763aSVinicius Costa Gomes {
289*585d763aSVinicius Costa Gomes 	unregister_qdisc(&cbs_qdisc_ops);
290*585d763aSVinicius Costa Gomes }
291*585d763aSVinicius Costa Gomes module_init(cbs_module_init)
292*585d763aSVinicius Costa Gomes module_exit(cbs_module_exit)
293*585d763aSVinicius Costa Gomes MODULE_LICENSE("GPL");
294