xref: /linux/net/sched/sch_ets.c (revision 27605c8c0f69e319df156b471974e4e223035378)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sched/sch_ets.c         Enhanced Transmission Selection scheduler
4  *
5  * Description
6  * -----------
7  *
8  * The Enhanced Transmission Selection scheduler is a classful queuing
9  * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
10  * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
11  * implement the transmission selection described in 802.1Qaz.
12  *
13  * Although ETS is technically classful, it's not possible to add and remove
14  * classes at will. Instead one specifies number of classes, how many are
15  * PRIO-like and how many DRR-like, and quanta for the latter.
16  *
17  * Algorithm
18  * ---------
19  *
20  * The strict classes, if any, are tried for traffic first: first band 0, if it
21  * has no traffic then band 1, etc.
22  *
23  * When there is no traffic in any of the strict queues, the bandwidth-sharing
24  * ones are tried next. Each band is assigned a deficit counter, initialized to
25  * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
26  * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
27  * head of the list if the packet size is smaller or equal to the deficit
28  * counter. If the counter is too small, it is increased by "quantum" and the
29  * scheduler moves on to the next band in the active list.
30  */
31 
32 #include <linux/module.h>
33 #include <net/gen_stats.h>
34 #include <net/netlink.h>
35 #include <net/pkt_cls.h>
36 #include <net/pkt_sched.h>
37 #include <net/sch_generic.h>
38 
39 struct ets_class {
40 	struct list_head alist; /* In struct ets_sched.active. */
41 	struct Qdisc *qdisc;
42 	u32 quantum;
43 	u32 deficit;
44 	struct gnet_stats_basic_sync bstats;
45 	struct gnet_stats_queue qstats;
46 };
47 
48 struct ets_sched {
49 	struct list_head active;
50 	struct tcf_proto __rcu *filter_list;
51 	struct tcf_block *block;
52 	unsigned int nbands;
53 	unsigned int nstrict;
54 	u8 prio2band[TC_PRIO_MAX + 1];
55 	struct ets_class classes[TCQ_ETS_MAX_BANDS];
56 };
57 
58 static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
59 	[TCA_ETS_NBANDS] = { .type = NLA_U8 },
60 	[TCA_ETS_NSTRICT] = { .type = NLA_U8 },
61 	[TCA_ETS_QUANTA] = { .type = NLA_NESTED },
62 	[TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
63 };
64 
65 static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
66 	[TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
67 };
68 
69 static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
70 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
71 };
72 
73 static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
74 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
75 };
76 
cl_is_active(struct ets_class * cl)77 static bool cl_is_active(struct ets_class *cl)
78 {
79 	return !list_empty(&cl->alist);
80 }
81 
ets_quantum_parse(struct Qdisc * sch,const struct nlattr * attr,unsigned int * quantum,struct netlink_ext_ack * extack)82 static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
83 			     unsigned int *quantum,
84 			     struct netlink_ext_ack *extack)
85 {
86 	*quantum = nla_get_u32(attr);
87 	if (!*quantum) {
88 		NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
89 		return -EINVAL;
90 	}
91 	return 0;
92 }
93 
94 static struct ets_class *
ets_class_from_arg(struct Qdisc * sch,unsigned long arg)95 ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
96 {
97 	struct ets_sched *q = qdisc_priv(sch);
98 
99 	if (arg == 0 || arg > q->nbands)
100 		return NULL;
101 	return &q->classes[arg - 1];
102 }
103 
ets_class_id(struct Qdisc * sch,const struct ets_class * cl)104 static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
105 {
106 	struct ets_sched *q = qdisc_priv(sch);
107 	int band = cl - q->classes;
108 
109 	return TC_H_MAKE(sch->handle, band + 1);
110 }
111 
ets_offload_change(struct Qdisc * sch)112 static void ets_offload_change(struct Qdisc *sch)
113 {
114 	struct net_device *dev = qdisc_dev(sch);
115 	struct ets_sched *q = qdisc_priv(sch);
116 	struct tc_ets_qopt_offload qopt;
117 	unsigned int w_psum_prev = 0;
118 	unsigned int q_psum = 0;
119 	unsigned int q_sum = 0;
120 	unsigned int quantum;
121 	unsigned int w_psum;
122 	unsigned int weight;
123 	unsigned int i;
124 
125 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
126 		return;
127 
128 	qopt.command = TC_ETS_REPLACE;
129 	qopt.handle = sch->handle;
130 	qopt.parent = sch->parent;
131 	qopt.replace_params.bands = q->nbands;
132 	qopt.replace_params.qstats = &sch->qstats;
133 	memcpy(&qopt.replace_params.priomap,
134 	       q->prio2band, sizeof(q->prio2band));
135 
136 	for (i = 0; i < q->nbands; i++)
137 		q_sum += q->classes[i].quantum;
138 
139 	for (i = 0; i < q->nbands; i++) {
140 		quantum = q->classes[i].quantum;
141 		q_psum += quantum;
142 		w_psum = quantum ? q_psum * 100 / q_sum : 0;
143 		weight = w_psum - w_psum_prev;
144 		w_psum_prev = w_psum;
145 
146 		qopt.replace_params.quanta[i] = quantum;
147 		qopt.replace_params.weights[i] = weight;
148 	}
149 
150 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
151 }
152 
ets_offload_destroy(struct Qdisc * sch)153 static void ets_offload_destroy(struct Qdisc *sch)
154 {
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_ets_qopt_offload qopt;
157 
158 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
159 		return;
160 
161 	qopt.command = TC_ETS_DESTROY;
162 	qopt.handle = sch->handle;
163 	qopt.parent = sch->parent;
164 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
165 }
166 
ets_offload_graft(struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,unsigned long arg,struct netlink_ext_ack * extack)167 static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
168 			      struct Qdisc *old, unsigned long arg,
169 			      struct netlink_ext_ack *extack)
170 {
171 	struct net_device *dev = qdisc_dev(sch);
172 	struct tc_ets_qopt_offload qopt;
173 
174 	qopt.command = TC_ETS_GRAFT;
175 	qopt.handle = sch->handle;
176 	qopt.parent = sch->parent;
177 	qopt.graft_params.band = arg - 1;
178 	qopt.graft_params.child_handle = new->handle;
179 
180 	qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
181 				   &qopt, extack);
182 }
183 
ets_offload_dump(struct Qdisc * sch)184 static int ets_offload_dump(struct Qdisc *sch)
185 {
186 	struct tc_ets_qopt_offload qopt;
187 
188 	qopt.command = TC_ETS_STATS;
189 	qopt.handle = sch->handle;
190 	qopt.parent = sch->parent;
191 	qopt.stats.bstats = &sch->bstats;
192 	qopt.stats.qstats = &sch->qstats;
193 
194 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
195 }
196 
ets_class_is_strict(struct ets_sched * q,const struct ets_class * cl)197 static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
198 {
199 	unsigned int band = cl - q->classes;
200 
201 	return band < q->nstrict;
202 }
203 
ets_class_change(struct Qdisc * sch,u32 classid,u32 parentid,struct nlattr ** tca,unsigned long * arg,struct netlink_ext_ack * extack)204 static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
205 			    struct nlattr **tca, unsigned long *arg,
206 			    struct netlink_ext_ack *extack)
207 {
208 	struct ets_class *cl = ets_class_from_arg(sch, *arg);
209 	struct ets_sched *q = qdisc_priv(sch);
210 	struct nlattr *opt = tca[TCA_OPTIONS];
211 	struct nlattr *tb[TCA_ETS_MAX + 1];
212 	unsigned int quantum;
213 	int err;
214 
215 	/* Classes can be added and removed only through Qdisc_ops.change
216 	 * interface.
217 	 */
218 	if (!cl) {
219 		NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
220 		return -EOPNOTSUPP;
221 	}
222 
223 	if (!opt) {
224 		NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
225 		return -EINVAL;
226 	}
227 
228 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
229 	if (err < 0)
230 		return err;
231 
232 	if (!tb[TCA_ETS_QUANTA_BAND])
233 		/* Nothing to configure. */
234 		return 0;
235 
236 	if (ets_class_is_strict(q, cl)) {
237 		NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
238 		return -EINVAL;
239 	}
240 
241 	err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
242 				extack);
243 	if (err)
244 		return err;
245 
246 	sch_tree_lock(sch);
247 	cl->quantum = quantum;
248 	sch_tree_unlock(sch);
249 
250 	ets_offload_change(sch);
251 	return 0;
252 }
253 
ets_class_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)254 static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
255 			   struct Qdisc *new, struct Qdisc **old,
256 			   struct netlink_ext_ack *extack)
257 {
258 	struct ets_class *cl = ets_class_from_arg(sch, arg);
259 
260 	if (!new) {
261 		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
262 					ets_class_id(sch, cl), NULL);
263 		if (!new)
264 			new = &noop_qdisc;
265 		else
266 			qdisc_hash_add(new, true);
267 	}
268 
269 	*old = qdisc_replace(sch, new, &cl->qdisc);
270 	ets_offload_graft(sch, new, *old, arg, extack);
271 	return 0;
272 }
273 
ets_class_leaf(struct Qdisc * sch,unsigned long arg)274 static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
275 {
276 	struct ets_class *cl = ets_class_from_arg(sch, arg);
277 
278 	return cl->qdisc;
279 }
280 
ets_class_find(struct Qdisc * sch,u32 classid)281 static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
282 {
283 	unsigned long band = TC_H_MIN(classid);
284 	struct ets_sched *q = qdisc_priv(sch);
285 
286 	if (band - 1 >= q->nbands)
287 		return 0;
288 	return band;
289 }
290 
ets_class_qlen_notify(struct Qdisc * sch,unsigned long arg)291 static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
292 {
293 	struct ets_class *cl = ets_class_from_arg(sch, arg);
294 	struct ets_sched *q = qdisc_priv(sch);
295 
296 	/* We get notified about zero-length child Qdiscs as well if they are
297 	 * offloaded. Those aren't on the active list though, so don't attempt
298 	 * to remove them.
299 	 */
300 	if (!ets_class_is_strict(q, cl) && sch->q.qlen)
301 		list_del_init(&cl->alist);
302 }
303 
ets_class_dump(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)304 static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
305 			  struct sk_buff *skb, struct tcmsg *tcm)
306 {
307 	struct ets_class *cl = ets_class_from_arg(sch, arg);
308 	struct ets_sched *q = qdisc_priv(sch);
309 	struct nlattr *nest;
310 
311 	tcm->tcm_parent = TC_H_ROOT;
312 	tcm->tcm_handle = ets_class_id(sch, cl);
313 	tcm->tcm_info = cl->qdisc->handle;
314 
315 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
316 	if (!nest)
317 		goto nla_put_failure;
318 	if (!ets_class_is_strict(q, cl)) {
319 		if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum))
320 			goto nla_put_failure;
321 	}
322 	return nla_nest_end(skb, nest);
323 
324 nla_put_failure:
325 	nla_nest_cancel(skb, nest);
326 	return -EMSGSIZE;
327 }
328 
ets_class_dump_stats(struct Qdisc * sch,unsigned long arg,struct gnet_dump * d)329 static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
330 				struct gnet_dump *d)
331 {
332 	struct ets_class *cl = ets_class_from_arg(sch, arg);
333 	struct Qdisc *cl_q = cl->qdisc;
334 
335 	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
336 	    qdisc_qstats_copy(d, cl_q) < 0)
337 		return -1;
338 
339 	return 0;
340 }
341 
ets_qdisc_walk(struct Qdisc * sch,struct qdisc_walker * arg)342 static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
343 {
344 	struct ets_sched *q = qdisc_priv(sch);
345 	int i;
346 
347 	if (arg->stop)
348 		return;
349 
350 	for (i = 0; i < q->nbands; i++) {
351 		if (!tc_qdisc_stats_dump(sch, i + 1, arg))
352 			break;
353 	}
354 }
355 
356 static struct tcf_block *
ets_qdisc_tcf_block(struct Qdisc * sch,unsigned long cl,struct netlink_ext_ack * extack)357 ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
358 		    struct netlink_ext_ack *extack)
359 {
360 	struct ets_sched *q = qdisc_priv(sch);
361 
362 	if (cl) {
363 		NL_SET_ERR_MSG(extack, "ETS classid must be zero");
364 		return NULL;
365 	}
366 
367 	return q->block;
368 }
369 
ets_qdisc_bind_tcf(struct Qdisc * sch,unsigned long parent,u32 classid)370 static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
371 					u32 classid)
372 {
373 	return ets_class_find(sch, classid);
374 }
375 
ets_qdisc_unbind_tcf(struct Qdisc * sch,unsigned long arg)376 static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
377 {
378 }
379 
ets_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)380 static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
381 				      int *qerr)
382 {
383 	struct ets_sched *q = qdisc_priv(sch);
384 	u32 band = skb->priority;
385 	struct tcf_result res;
386 	struct tcf_proto *fl;
387 	int err;
388 
389 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
390 	if (TC_H_MAJ(skb->priority) != sch->handle) {
391 		fl = rcu_dereference_bh(q->filter_list);
392 		err = tcf_classify(skb, NULL, fl, &res, false);
393 #ifdef CONFIG_NET_CLS_ACT
394 		switch (err) {
395 		case TC_ACT_STOLEN:
396 		case TC_ACT_QUEUED:
397 		case TC_ACT_TRAP:
398 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
399 			fallthrough;
400 		case TC_ACT_SHOT:
401 			return NULL;
402 		}
403 #endif
404 		if (!fl || err < 0) {
405 			if (TC_H_MAJ(band))
406 				band = 0;
407 			return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
408 		}
409 		band = res.classid;
410 	}
411 	band = TC_H_MIN(band) - 1;
412 	if (band >= q->nbands)
413 		return &q->classes[q->prio2band[0]];
414 	return &q->classes[band];
415 }
416 
ets_qdisc_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)417 static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
418 			     struct sk_buff **to_free)
419 {
420 	unsigned int len = qdisc_pkt_len(skb);
421 	struct ets_sched *q = qdisc_priv(sch);
422 	struct ets_class *cl;
423 	int err = 0;
424 
425 	cl = ets_classify(skb, sch, &err);
426 	if (!cl) {
427 		if (err & __NET_XMIT_BYPASS)
428 			qdisc_qstats_drop(sch);
429 		__qdisc_drop(skb, to_free);
430 		return err;
431 	}
432 
433 	err = qdisc_enqueue(skb, cl->qdisc, to_free);
434 	if (unlikely(err != NET_XMIT_SUCCESS)) {
435 		if (net_xmit_drop_count(err)) {
436 			cl->qstats.drops++;
437 			qdisc_qstats_drop(sch);
438 		}
439 		return err;
440 	}
441 
442 	if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) {
443 		list_add_tail(&cl->alist, &q->active);
444 		cl->deficit = cl->quantum;
445 	}
446 
447 	sch->qstats.backlog += len;
448 	sch->q.qlen++;
449 	return err;
450 }
451 
452 static struct sk_buff *
ets_qdisc_dequeue_skb(struct Qdisc * sch,struct sk_buff * skb)453 ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
454 {
455 	qdisc_bstats_update(sch, skb);
456 	qdisc_qstats_backlog_dec(sch, skb);
457 	sch->q.qlen--;
458 	return skb;
459 }
460 
ets_qdisc_dequeue(struct Qdisc * sch)461 static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
462 {
463 	struct ets_sched *q = qdisc_priv(sch);
464 	struct ets_class *cl;
465 	struct sk_buff *skb;
466 	unsigned int band;
467 	unsigned int len;
468 
469 	while (1) {
470 		for (band = 0; band < q->nstrict; band++) {
471 			cl = &q->classes[band];
472 			skb = qdisc_dequeue_peeked(cl->qdisc);
473 			if (skb)
474 				return ets_qdisc_dequeue_skb(sch, skb);
475 		}
476 
477 		if (list_empty(&q->active))
478 			goto out;
479 
480 		cl = list_first_entry(&q->active, struct ets_class, alist);
481 		skb = cl->qdisc->ops->peek(cl->qdisc);
482 		if (!skb) {
483 			qdisc_warn_nonwc(__func__, cl->qdisc);
484 			goto out;
485 		}
486 
487 		len = qdisc_pkt_len(skb);
488 		if (len <= cl->deficit) {
489 			cl->deficit -= len;
490 			skb = qdisc_dequeue_peeked(cl->qdisc);
491 			if (unlikely(!skb))
492 				goto out;
493 			if (cl->qdisc->q.qlen == 0)
494 				list_del_init(&cl->alist);
495 			return ets_qdisc_dequeue_skb(sch, skb);
496 		}
497 
498 		cl->deficit += cl->quantum;
499 		list_move_tail(&cl->alist, &q->active);
500 	}
501 out:
502 	return NULL;
503 }
504 
ets_qdisc_priomap_parse(struct nlattr * priomap_attr,unsigned int nbands,u8 * priomap,struct netlink_ext_ack * extack)505 static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
506 				   unsigned int nbands, u8 *priomap,
507 				   struct netlink_ext_ack *extack)
508 {
509 	const struct nlattr *attr;
510 	int prio = 0;
511 	u8 band;
512 	int rem;
513 	int err;
514 
515 	err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
516 				    ets_priomap_policy, NL_VALIDATE_STRICT,
517 				    extack);
518 	if (err)
519 		return err;
520 
521 	nla_for_each_nested(attr, priomap_attr, rem) {
522 		switch (nla_type(attr)) {
523 		case TCA_ETS_PRIOMAP_BAND:
524 			if (prio > TC_PRIO_MAX) {
525 				NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
526 				return -EINVAL;
527 			}
528 			band = nla_get_u8(attr);
529 			if (band >= nbands) {
530 				NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
531 				return -EINVAL;
532 			}
533 			priomap[prio++] = band;
534 			break;
535 		default:
536 			WARN_ON_ONCE(1); /* Validate should have caught this. */
537 			return -EINVAL;
538 		}
539 	}
540 
541 	return 0;
542 }
543 
ets_qdisc_quanta_parse(struct Qdisc * sch,struct nlattr * quanta_attr,unsigned int nbands,unsigned int nstrict,unsigned int * quanta,struct netlink_ext_ack * extack)544 static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
545 				  unsigned int nbands, unsigned int nstrict,
546 				  unsigned int *quanta,
547 				  struct netlink_ext_ack *extack)
548 {
549 	const struct nlattr *attr;
550 	int band = nstrict;
551 	int rem;
552 	int err;
553 
554 	err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
555 				    ets_quanta_policy, NL_VALIDATE_STRICT,
556 				    extack);
557 	if (err < 0)
558 		return err;
559 
560 	nla_for_each_nested(attr, quanta_attr, rem) {
561 		switch (nla_type(attr)) {
562 		case TCA_ETS_QUANTA_BAND:
563 			if (band >= nbands) {
564 				NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
565 				return -EINVAL;
566 			}
567 			err = ets_quantum_parse(sch, attr, &quanta[band++],
568 						extack);
569 			if (err)
570 				return err;
571 			break;
572 		default:
573 			WARN_ON_ONCE(1); /* Validate should have caught this. */
574 			return -EINVAL;
575 		}
576 	}
577 
578 	return 0;
579 }
580 
ets_qdisc_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)581 static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
582 			    struct netlink_ext_ack *extack)
583 {
584 	unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
585 	struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
586 	struct ets_sched *q = qdisc_priv(sch);
587 	struct nlattr *tb[TCA_ETS_MAX + 1];
588 	unsigned int oldbands = q->nbands;
589 	u8 priomap[TC_PRIO_MAX + 1];
590 	unsigned int nstrict = 0;
591 	unsigned int nbands;
592 	unsigned int i;
593 	int err;
594 
595 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
596 	if (err < 0)
597 		return err;
598 
599 	if (!tb[TCA_ETS_NBANDS]) {
600 		NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
601 		return -EINVAL;
602 	}
603 	nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
604 	if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
605 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
606 		return -EINVAL;
607 	}
608 	/* Unless overridden, traffic goes to the last band. */
609 	memset(priomap, nbands - 1, sizeof(priomap));
610 
611 	if (tb[TCA_ETS_NSTRICT]) {
612 		nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
613 		if (nstrict > nbands) {
614 			NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
615 			return -EINVAL;
616 		}
617 	}
618 
619 	if (tb[TCA_ETS_PRIOMAP]) {
620 		err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
621 					      nbands, priomap, extack);
622 		if (err)
623 			return err;
624 	}
625 
626 	if (tb[TCA_ETS_QUANTA]) {
627 		err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
628 					     nbands, nstrict, quanta, extack);
629 		if (err)
630 			return err;
631 	}
632 	/* If there are more bands than strict + quanta provided, the remaining
633 	 * ones are ETS with quantum of MTU. Initialize the missing values here.
634 	 */
635 	for (i = nstrict; i < nbands; i++) {
636 		if (!quanta[i])
637 			quanta[i] = psched_mtu(qdisc_dev(sch));
638 	}
639 
640 	/* Before commit, make sure we can allocate all new qdiscs */
641 	for (i = oldbands; i < nbands; i++) {
642 		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
643 					      ets_class_id(sch, &q->classes[i]),
644 					      extack);
645 		if (!queues[i]) {
646 			while (i > oldbands)
647 				qdisc_put(queues[--i]);
648 			return -ENOMEM;
649 		}
650 	}
651 
652 	sch_tree_lock(sch);
653 
654 	WRITE_ONCE(q->nbands, nbands);
655 	for (i = nstrict; i < q->nstrict; i++) {
656 		if (q->classes[i].qdisc->q.qlen) {
657 			list_add_tail(&q->classes[i].alist, &q->active);
658 			q->classes[i].deficit = quanta[i];
659 		}
660 	}
661 	for (i = q->nbands; i < oldbands; i++) {
662 		if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
663 			list_del_init(&q->classes[i].alist);
664 		qdisc_purge_queue(q->classes[i].qdisc);
665 	}
666 	WRITE_ONCE(q->nstrict, nstrict);
667 	memcpy(q->prio2band, priomap, sizeof(priomap));
668 
669 	for (i = 0; i < q->nbands; i++)
670 		WRITE_ONCE(q->classes[i].quantum, quanta[i]);
671 
672 	for (i = oldbands; i < q->nbands; i++) {
673 		q->classes[i].qdisc = queues[i];
674 		if (q->classes[i].qdisc != &noop_qdisc)
675 			qdisc_hash_add(q->classes[i].qdisc, true);
676 	}
677 
678 	sch_tree_unlock(sch);
679 
680 	ets_offload_change(sch);
681 	for (i = q->nbands; i < oldbands; i++) {
682 		qdisc_put(q->classes[i].qdisc);
683 		q->classes[i].qdisc = NULL;
684 		WRITE_ONCE(q->classes[i].quantum, 0);
685 		q->classes[i].deficit = 0;
686 		gnet_stats_basic_sync_init(&q->classes[i].bstats);
687 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
688 	}
689 	return 0;
690 }
691 
ets_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)692 static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
693 			  struct netlink_ext_ack *extack)
694 {
695 	struct ets_sched *q = qdisc_priv(sch);
696 	int err, i;
697 
698 	if (!opt)
699 		return -EINVAL;
700 
701 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
702 	if (err)
703 		return err;
704 
705 	INIT_LIST_HEAD(&q->active);
706 	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
707 		INIT_LIST_HEAD(&q->classes[i].alist);
708 
709 	return ets_qdisc_change(sch, opt, extack);
710 }
711 
ets_qdisc_reset(struct Qdisc * sch)712 static void ets_qdisc_reset(struct Qdisc *sch)
713 {
714 	struct ets_sched *q = qdisc_priv(sch);
715 	int band;
716 
717 	for (band = q->nstrict; band < q->nbands; band++) {
718 		if (q->classes[band].qdisc->q.qlen)
719 			list_del_init(&q->classes[band].alist);
720 	}
721 	for (band = 0; band < q->nbands; band++)
722 		qdisc_reset(q->classes[band].qdisc);
723 }
724 
ets_qdisc_destroy(struct Qdisc * sch)725 static void ets_qdisc_destroy(struct Qdisc *sch)
726 {
727 	struct ets_sched *q = qdisc_priv(sch);
728 	int band;
729 
730 	ets_offload_destroy(sch);
731 	tcf_block_put(q->block);
732 	for (band = 0; band < q->nbands; band++)
733 		qdisc_put(q->classes[band].qdisc);
734 }
735 
ets_qdisc_dump(struct Qdisc * sch,struct sk_buff * skb)736 static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
737 {
738 	struct ets_sched *q = qdisc_priv(sch);
739 	struct nlattr *opts;
740 	struct nlattr *nest;
741 	u8 nbands, nstrict;
742 	int band;
743 	int prio;
744 	int err;
745 
746 	err = ets_offload_dump(sch);
747 	if (err)
748 		return err;
749 
750 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
751 	if (!opts)
752 		goto nla_err;
753 
754 	nbands = READ_ONCE(q->nbands);
755 	if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
756 		goto nla_err;
757 
758 	nstrict = READ_ONCE(q->nstrict);
759 	if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
760 		goto nla_err;
761 
762 	if (nbands > nstrict) {
763 		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
764 		if (!nest)
765 			goto nla_err;
766 
767 		for (band = nstrict; band < nbands; band++) {
768 			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
769 					READ_ONCE(q->classes[band].quantum)))
770 				goto nla_err;
771 		}
772 
773 		nla_nest_end(skb, nest);
774 	}
775 
776 	nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
777 	if (!nest)
778 		goto nla_err;
779 
780 	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
781 		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
782 			       READ_ONCE(q->prio2band[prio])))
783 			goto nla_err;
784 	}
785 
786 	nla_nest_end(skb, nest);
787 
788 	return nla_nest_end(skb, opts);
789 
790 nla_err:
791 	nla_nest_cancel(skb, opts);
792 	return -EMSGSIZE;
793 }
794 
795 static const struct Qdisc_class_ops ets_class_ops = {
796 	.change		= ets_class_change,
797 	.graft		= ets_class_graft,
798 	.leaf		= ets_class_leaf,
799 	.find		= ets_class_find,
800 	.qlen_notify	= ets_class_qlen_notify,
801 	.dump		= ets_class_dump,
802 	.dump_stats	= ets_class_dump_stats,
803 	.walk		= ets_qdisc_walk,
804 	.tcf_block	= ets_qdisc_tcf_block,
805 	.bind_tcf	= ets_qdisc_bind_tcf,
806 	.unbind_tcf	= ets_qdisc_unbind_tcf,
807 };
808 
809 static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
810 	.cl_ops		= &ets_class_ops,
811 	.id		= "ets",
812 	.priv_size	= sizeof(struct ets_sched),
813 	.enqueue	= ets_qdisc_enqueue,
814 	.dequeue	= ets_qdisc_dequeue,
815 	.peek		= qdisc_peek_dequeued,
816 	.change		= ets_qdisc_change,
817 	.init		= ets_qdisc_init,
818 	.reset		= ets_qdisc_reset,
819 	.destroy	= ets_qdisc_destroy,
820 	.dump		= ets_qdisc_dump,
821 	.owner		= THIS_MODULE,
822 };
823 MODULE_ALIAS_NET_SCH("ets");
824 
ets_init(void)825 static int __init ets_init(void)
826 {
827 	return register_qdisc(&ets_qdisc_ops);
828 }
829 
ets_exit(void)830 static void __exit ets_exit(void)
831 {
832 	unregister_qdisc(&ets_qdisc_ops);
833 }
834 
835 module_init(ets_init);
836 module_exit(ets_exit);
837 MODULE_LICENSE("GPL");
838 MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler");
839