xref: /linux/net/sched/sch_ets.c (revision 621cde16e49b3ecf7d59a8106a20aaebfb4a59a9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sched/sch_ets.c         Enhanced Transmission Selection scheduler
4  *
5  * Description
6  * -----------
7  *
8  * The Enhanced Transmission Selection scheduler is a classful queuing
9  * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
10  * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
11  * implement the transmission selection described in 802.1Qaz.
12  *
13  * Although ETS is technically classful, it's not possible to add and remove
14  * classes at will. Instead one specifies number of classes, how many are
15  * PRIO-like and how many DRR-like, and quanta for the latter.
16  *
17  * Algorithm
18  * ---------
19  *
20  * The strict classes, if any, are tried for traffic first: first band 0, if it
21  * has no traffic then band 1, etc.
22  *
23  * When there is no traffic in any of the strict queues, the bandwidth-sharing
24  * ones are tried next. Each band is assigned a deficit counter, initialized to
25  * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
26  * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
27  * head of the list if the packet size is smaller or equal to the deficit
28  * counter. If the counter is too small, it is increased by "quantum" and the
29  * scheduler moves on to the next band in the active list.
30  */
31 
32 #include <linux/module.h>
33 #include <net/gen_stats.h>
34 #include <net/netlink.h>
35 #include <net/pkt_cls.h>
36 #include <net/pkt_sched.h>
37 #include <net/sch_generic.h>
38 
39 struct ets_class {
40 	struct list_head alist; /* In struct ets_sched.active. */
41 	struct Qdisc *qdisc;
42 	u32 quantum;
43 	u32 deficit;
44 	struct gnet_stats_basic_sync bstats;
45 	struct gnet_stats_queue qstats;
46 };
47 
48 struct ets_sched {
49 	struct list_head active;
50 	struct tcf_proto __rcu *filter_list;
51 	struct tcf_block *block;
52 	unsigned int nbands;
53 	unsigned int nstrict;
54 	u8 prio2band[TC_PRIO_MAX + 1];
55 	struct ets_class classes[TCQ_ETS_MAX_BANDS];
56 };
57 
58 static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
59 	[TCA_ETS_NBANDS] = { .type = NLA_U8 },
60 	[TCA_ETS_NSTRICT] = { .type = NLA_U8 },
61 	[TCA_ETS_QUANTA] = { .type = NLA_NESTED },
62 	[TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
63 };
64 
65 static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
66 	[TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
67 };
68 
69 static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
70 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
71 };
72 
73 static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
74 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
75 };
76 
ets_quantum_parse(struct Qdisc * sch,const struct nlattr * attr,unsigned int * quantum,struct netlink_ext_ack * extack)77 static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
78 			     unsigned int *quantum,
79 			     struct netlink_ext_ack *extack)
80 {
81 	*quantum = nla_get_u32(attr);
82 	if (!*quantum) {
83 		NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
84 		return -EINVAL;
85 	}
86 	return 0;
87 }
88 
89 static struct ets_class *
ets_class_from_arg(struct Qdisc * sch,unsigned long arg)90 ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
91 {
92 	struct ets_sched *q = qdisc_priv(sch);
93 
94 	return &q->classes[arg - 1];
95 }
96 
ets_class_id(struct Qdisc * sch,const struct ets_class * cl)97 static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
98 {
99 	struct ets_sched *q = qdisc_priv(sch);
100 	int band = cl - q->classes;
101 
102 	return TC_H_MAKE(sch->handle, band + 1);
103 }
104 
ets_offload_change(struct Qdisc * sch)105 static void ets_offload_change(struct Qdisc *sch)
106 {
107 	struct net_device *dev = qdisc_dev(sch);
108 	struct ets_sched *q = qdisc_priv(sch);
109 	struct tc_ets_qopt_offload qopt;
110 	unsigned int w_psum_prev = 0;
111 	unsigned int q_psum = 0;
112 	unsigned int q_sum = 0;
113 	unsigned int quantum;
114 	unsigned int w_psum;
115 	unsigned int weight;
116 	unsigned int i;
117 
118 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
119 		return;
120 
121 	qopt.command = TC_ETS_REPLACE;
122 	qopt.handle = sch->handle;
123 	qopt.parent = sch->parent;
124 	qopt.replace_params.bands = q->nbands;
125 	qopt.replace_params.qstats = &sch->qstats;
126 	memcpy(&qopt.replace_params.priomap,
127 	       q->prio2band, sizeof(q->prio2band));
128 
129 	for (i = 0; i < q->nbands; i++)
130 		q_sum += q->classes[i].quantum;
131 
132 	for (i = 0; i < q->nbands; i++) {
133 		quantum = q->classes[i].quantum;
134 		q_psum += quantum;
135 		w_psum = quantum ? q_psum * 100 / q_sum : 0;
136 		weight = w_psum - w_psum_prev;
137 		w_psum_prev = w_psum;
138 
139 		qopt.replace_params.quanta[i] = quantum;
140 		qopt.replace_params.weights[i] = weight;
141 	}
142 
143 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
144 }
145 
ets_offload_destroy(struct Qdisc * sch)146 static void ets_offload_destroy(struct Qdisc *sch)
147 {
148 	struct net_device *dev = qdisc_dev(sch);
149 	struct tc_ets_qopt_offload qopt;
150 
151 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
152 		return;
153 
154 	qopt.command = TC_ETS_DESTROY;
155 	qopt.handle = sch->handle;
156 	qopt.parent = sch->parent;
157 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
158 }
159 
ets_offload_graft(struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,unsigned long arg,struct netlink_ext_ack * extack)160 static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
161 			      struct Qdisc *old, unsigned long arg,
162 			      struct netlink_ext_ack *extack)
163 {
164 	struct net_device *dev = qdisc_dev(sch);
165 	struct tc_ets_qopt_offload qopt;
166 
167 	qopt.command = TC_ETS_GRAFT;
168 	qopt.handle = sch->handle;
169 	qopt.parent = sch->parent;
170 	qopt.graft_params.band = arg - 1;
171 	qopt.graft_params.child_handle = new->handle;
172 
173 	qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
174 				   &qopt, extack);
175 }
176 
ets_offload_dump(struct Qdisc * sch)177 static int ets_offload_dump(struct Qdisc *sch)
178 {
179 	struct tc_ets_qopt_offload qopt;
180 
181 	qopt.command = TC_ETS_STATS;
182 	qopt.handle = sch->handle;
183 	qopt.parent = sch->parent;
184 	qopt.stats.bstats = &sch->bstats;
185 	qopt.stats.qstats = &sch->qstats;
186 
187 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
188 }
189 
ets_class_is_strict(struct ets_sched * q,const struct ets_class * cl)190 static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
191 {
192 	unsigned int band = cl - q->classes;
193 
194 	return band < q->nstrict;
195 }
196 
ets_class_change(struct Qdisc * sch,u32 classid,u32 parentid,struct nlattr ** tca,unsigned long * arg,struct netlink_ext_ack * extack)197 static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
198 			    struct nlattr **tca, unsigned long *arg,
199 			    struct netlink_ext_ack *extack)
200 {
201 	struct ets_class *cl = ets_class_from_arg(sch, *arg);
202 	struct ets_sched *q = qdisc_priv(sch);
203 	struct nlattr *opt = tca[TCA_OPTIONS];
204 	struct nlattr *tb[TCA_ETS_MAX + 1];
205 	unsigned int quantum;
206 	int err;
207 
208 	/* Classes can be added and removed only through Qdisc_ops.change
209 	 * interface.
210 	 */
211 	if (!cl) {
212 		NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
213 		return -EOPNOTSUPP;
214 	}
215 
216 	if (!opt) {
217 		NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
218 		return -EINVAL;
219 	}
220 
221 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
222 	if (err < 0)
223 		return err;
224 
225 	if (!tb[TCA_ETS_QUANTA_BAND])
226 		/* Nothing to configure. */
227 		return 0;
228 
229 	if (ets_class_is_strict(q, cl)) {
230 		NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
231 		return -EINVAL;
232 	}
233 
234 	err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
235 				extack);
236 	if (err)
237 		return err;
238 
239 	sch_tree_lock(sch);
240 	cl->quantum = quantum;
241 	sch_tree_unlock(sch);
242 
243 	ets_offload_change(sch);
244 	return 0;
245 }
246 
ets_class_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)247 static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
248 			   struct Qdisc *new, struct Qdisc **old,
249 			   struct netlink_ext_ack *extack)
250 {
251 	struct ets_class *cl = ets_class_from_arg(sch, arg);
252 
253 	if (!new) {
254 		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
255 					ets_class_id(sch, cl), NULL);
256 		if (!new)
257 			new = &noop_qdisc;
258 		else
259 			qdisc_hash_add(new, true);
260 	}
261 
262 	*old = qdisc_replace(sch, new, &cl->qdisc);
263 	ets_offload_graft(sch, new, *old, arg, extack);
264 	return 0;
265 }
266 
ets_class_leaf(struct Qdisc * sch,unsigned long arg)267 static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
268 {
269 	struct ets_class *cl = ets_class_from_arg(sch, arg);
270 
271 	return cl->qdisc;
272 }
273 
ets_class_find(struct Qdisc * sch,u32 classid)274 static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
275 {
276 	unsigned long band = TC_H_MIN(classid);
277 	struct ets_sched *q = qdisc_priv(sch);
278 
279 	if (band - 1 >= q->nbands)
280 		return 0;
281 	return band;
282 }
283 
ets_class_qlen_notify(struct Qdisc * sch,unsigned long arg)284 static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
285 {
286 	struct ets_class *cl = ets_class_from_arg(sch, arg);
287 	struct ets_sched *q = qdisc_priv(sch);
288 
289 	/* We get notified about zero-length child Qdiscs as well if they are
290 	 * offloaded. Those aren't on the active list though, so don't attempt
291 	 * to remove them.
292 	 */
293 	if (!ets_class_is_strict(q, cl) && sch->q.qlen)
294 		list_del(&cl->alist);
295 }
296 
ets_class_dump(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)297 static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
298 			  struct sk_buff *skb, struct tcmsg *tcm)
299 {
300 	struct ets_class *cl = ets_class_from_arg(sch, arg);
301 	struct ets_sched *q = qdisc_priv(sch);
302 	struct nlattr *nest;
303 
304 	tcm->tcm_parent = TC_H_ROOT;
305 	tcm->tcm_handle = ets_class_id(sch, cl);
306 	tcm->tcm_info = cl->qdisc->handle;
307 
308 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
309 	if (!nest)
310 		goto nla_put_failure;
311 	if (!ets_class_is_strict(q, cl)) {
312 		if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum))
313 			goto nla_put_failure;
314 	}
315 	return nla_nest_end(skb, nest);
316 
317 nla_put_failure:
318 	nla_nest_cancel(skb, nest);
319 	return -EMSGSIZE;
320 }
321 
ets_class_dump_stats(struct Qdisc * sch,unsigned long arg,struct gnet_dump * d)322 static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
323 				struct gnet_dump *d)
324 {
325 	struct ets_class *cl = ets_class_from_arg(sch, arg);
326 	struct Qdisc *cl_q = cl->qdisc;
327 
328 	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
329 	    qdisc_qstats_copy(d, cl_q) < 0)
330 		return -1;
331 
332 	return 0;
333 }
334 
ets_qdisc_walk(struct Qdisc * sch,struct qdisc_walker * arg)335 static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
336 {
337 	struct ets_sched *q = qdisc_priv(sch);
338 	int i;
339 
340 	if (arg->stop)
341 		return;
342 
343 	for (i = 0; i < q->nbands; i++) {
344 		if (!tc_qdisc_stats_dump(sch, i + 1, arg))
345 			break;
346 	}
347 }
348 
349 static struct tcf_block *
ets_qdisc_tcf_block(struct Qdisc * sch,unsigned long cl,struct netlink_ext_ack * extack)350 ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
351 		    struct netlink_ext_ack *extack)
352 {
353 	struct ets_sched *q = qdisc_priv(sch);
354 
355 	if (cl) {
356 		NL_SET_ERR_MSG(extack, "ETS classid must be zero");
357 		return NULL;
358 	}
359 
360 	return q->block;
361 }
362 
ets_qdisc_bind_tcf(struct Qdisc * sch,unsigned long parent,u32 classid)363 static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
364 					u32 classid)
365 {
366 	return ets_class_find(sch, classid);
367 }
368 
ets_qdisc_unbind_tcf(struct Qdisc * sch,unsigned long arg)369 static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
370 {
371 }
372 
ets_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)373 static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
374 				      int *qerr)
375 {
376 	struct ets_sched *q = qdisc_priv(sch);
377 	u32 band = skb->priority;
378 	struct tcf_result res;
379 	struct tcf_proto *fl;
380 	int err;
381 
382 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
383 	if (TC_H_MAJ(skb->priority) != sch->handle) {
384 		fl = rcu_dereference_bh(q->filter_list);
385 		err = tcf_classify(skb, NULL, fl, &res, false);
386 #ifdef CONFIG_NET_CLS_ACT
387 		switch (err) {
388 		case TC_ACT_STOLEN:
389 		case TC_ACT_QUEUED:
390 		case TC_ACT_TRAP:
391 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
392 			fallthrough;
393 		case TC_ACT_SHOT:
394 			return NULL;
395 		}
396 #endif
397 		if (!fl || err < 0) {
398 			if (TC_H_MAJ(band))
399 				band = 0;
400 			return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
401 		}
402 		band = res.classid;
403 	}
404 	band = TC_H_MIN(band) - 1;
405 	if (band >= q->nbands)
406 		return &q->classes[q->prio2band[0]];
407 	return &q->classes[band];
408 }
409 
ets_qdisc_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)410 static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
411 			     struct sk_buff **to_free)
412 {
413 	unsigned int len = qdisc_pkt_len(skb);
414 	struct ets_sched *q = qdisc_priv(sch);
415 	struct ets_class *cl;
416 	int err = 0;
417 	bool first;
418 
419 	cl = ets_classify(skb, sch, &err);
420 	if (!cl) {
421 		if (err & __NET_XMIT_BYPASS)
422 			qdisc_qstats_drop(sch);
423 		__qdisc_drop(skb, to_free);
424 		return err;
425 	}
426 
427 	first = !cl->qdisc->q.qlen;
428 	err = qdisc_enqueue(skb, cl->qdisc, to_free);
429 	if (unlikely(err != NET_XMIT_SUCCESS)) {
430 		if (net_xmit_drop_count(err)) {
431 			cl->qstats.drops++;
432 			qdisc_qstats_drop(sch);
433 		}
434 		return err;
435 	}
436 
437 	if (first && !ets_class_is_strict(q, cl)) {
438 		list_add_tail(&cl->alist, &q->active);
439 		cl->deficit = cl->quantum;
440 	}
441 
442 	sch->qstats.backlog += len;
443 	sch->q.qlen++;
444 	return err;
445 }
446 
447 static struct sk_buff *
ets_qdisc_dequeue_skb(struct Qdisc * sch,struct sk_buff * skb)448 ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
449 {
450 	qdisc_bstats_update(sch, skb);
451 	qdisc_qstats_backlog_dec(sch, skb);
452 	sch->q.qlen--;
453 	return skb;
454 }
455 
ets_qdisc_dequeue(struct Qdisc * sch)456 static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
457 {
458 	struct ets_sched *q = qdisc_priv(sch);
459 	struct ets_class *cl;
460 	struct sk_buff *skb;
461 	unsigned int band;
462 	unsigned int len;
463 
464 	while (1) {
465 		for (band = 0; band < q->nstrict; band++) {
466 			cl = &q->classes[band];
467 			skb = qdisc_dequeue_peeked(cl->qdisc);
468 			if (skb)
469 				return ets_qdisc_dequeue_skb(sch, skb);
470 		}
471 
472 		if (list_empty(&q->active))
473 			goto out;
474 
475 		cl = list_first_entry(&q->active, struct ets_class, alist);
476 		skb = cl->qdisc->ops->peek(cl->qdisc);
477 		if (!skb) {
478 			qdisc_warn_nonwc(__func__, cl->qdisc);
479 			goto out;
480 		}
481 
482 		len = qdisc_pkt_len(skb);
483 		if (len <= cl->deficit) {
484 			cl->deficit -= len;
485 			skb = qdisc_dequeue_peeked(cl->qdisc);
486 			if (unlikely(!skb))
487 				goto out;
488 			if (cl->qdisc->q.qlen == 0)
489 				list_del(&cl->alist);
490 			return ets_qdisc_dequeue_skb(sch, skb);
491 		}
492 
493 		cl->deficit += cl->quantum;
494 		list_move_tail(&cl->alist, &q->active);
495 	}
496 out:
497 	return NULL;
498 }
499 
ets_qdisc_priomap_parse(struct nlattr * priomap_attr,unsigned int nbands,u8 * priomap,struct netlink_ext_ack * extack)500 static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
501 				   unsigned int nbands, u8 *priomap,
502 				   struct netlink_ext_ack *extack)
503 {
504 	const struct nlattr *attr;
505 	int prio = 0;
506 	u8 band;
507 	int rem;
508 	int err;
509 
510 	err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
511 				    ets_priomap_policy, NL_VALIDATE_STRICT,
512 				    extack);
513 	if (err)
514 		return err;
515 
516 	nla_for_each_nested(attr, priomap_attr, rem) {
517 		switch (nla_type(attr)) {
518 		case TCA_ETS_PRIOMAP_BAND:
519 			if (prio > TC_PRIO_MAX) {
520 				NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
521 				return -EINVAL;
522 			}
523 			band = nla_get_u8(attr);
524 			if (band >= nbands) {
525 				NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
526 				return -EINVAL;
527 			}
528 			priomap[prio++] = band;
529 			break;
530 		default:
531 			WARN_ON_ONCE(1); /* Validate should have caught this. */
532 			return -EINVAL;
533 		}
534 	}
535 
536 	return 0;
537 }
538 
ets_qdisc_quanta_parse(struct Qdisc * sch,struct nlattr * quanta_attr,unsigned int nbands,unsigned int nstrict,unsigned int * quanta,struct netlink_ext_ack * extack)539 static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
540 				  unsigned int nbands, unsigned int nstrict,
541 				  unsigned int *quanta,
542 				  struct netlink_ext_ack *extack)
543 {
544 	const struct nlattr *attr;
545 	int band = nstrict;
546 	int rem;
547 	int err;
548 
549 	err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
550 				    ets_quanta_policy, NL_VALIDATE_STRICT,
551 				    extack);
552 	if (err < 0)
553 		return err;
554 
555 	nla_for_each_nested(attr, quanta_attr, rem) {
556 		switch (nla_type(attr)) {
557 		case TCA_ETS_QUANTA_BAND:
558 			if (band >= nbands) {
559 				NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
560 				return -EINVAL;
561 			}
562 			err = ets_quantum_parse(sch, attr, &quanta[band++],
563 						extack);
564 			if (err)
565 				return err;
566 			break;
567 		default:
568 			WARN_ON_ONCE(1); /* Validate should have caught this. */
569 			return -EINVAL;
570 		}
571 	}
572 
573 	return 0;
574 }
575 
ets_qdisc_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)576 static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
577 			    struct netlink_ext_ack *extack)
578 {
579 	unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
580 	struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
581 	struct ets_sched *q = qdisc_priv(sch);
582 	struct nlattr *tb[TCA_ETS_MAX + 1];
583 	unsigned int oldbands = q->nbands;
584 	u8 priomap[TC_PRIO_MAX + 1];
585 	unsigned int nstrict = 0;
586 	unsigned int nbands;
587 	unsigned int i;
588 	int err;
589 
590 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
591 	if (err < 0)
592 		return err;
593 
594 	if (!tb[TCA_ETS_NBANDS]) {
595 		NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
596 		return -EINVAL;
597 	}
598 	nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
599 	if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
600 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
601 		return -EINVAL;
602 	}
603 	/* Unless overridden, traffic goes to the last band. */
604 	memset(priomap, nbands - 1, sizeof(priomap));
605 
606 	if (tb[TCA_ETS_NSTRICT]) {
607 		nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
608 		if (nstrict > nbands) {
609 			NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
610 			return -EINVAL;
611 		}
612 	}
613 
614 	if (tb[TCA_ETS_PRIOMAP]) {
615 		err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
616 					      nbands, priomap, extack);
617 		if (err)
618 			return err;
619 	}
620 
621 	if (tb[TCA_ETS_QUANTA]) {
622 		err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
623 					     nbands, nstrict, quanta, extack);
624 		if (err)
625 			return err;
626 	}
627 	/* If there are more bands than strict + quanta provided, the remaining
628 	 * ones are ETS with quantum of MTU. Initialize the missing values here.
629 	 */
630 	for (i = nstrict; i < nbands; i++) {
631 		if (!quanta[i])
632 			quanta[i] = psched_mtu(qdisc_dev(sch));
633 	}
634 
635 	/* Before commit, make sure we can allocate all new qdiscs */
636 	for (i = oldbands; i < nbands; i++) {
637 		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
638 					      ets_class_id(sch, &q->classes[i]),
639 					      extack);
640 		if (!queues[i]) {
641 			while (i > oldbands)
642 				qdisc_put(queues[--i]);
643 			return -ENOMEM;
644 		}
645 	}
646 
647 	sch_tree_lock(sch);
648 
649 	WRITE_ONCE(q->nbands, nbands);
650 	for (i = nstrict; i < q->nstrict; i++) {
651 		if (q->classes[i].qdisc->q.qlen) {
652 			list_add_tail(&q->classes[i].alist, &q->active);
653 			q->classes[i].deficit = quanta[i];
654 		}
655 	}
656 	for (i = q->nbands; i < oldbands; i++) {
657 		if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
658 			list_del(&q->classes[i].alist);
659 		qdisc_tree_flush_backlog(q->classes[i].qdisc);
660 	}
661 	WRITE_ONCE(q->nstrict, nstrict);
662 	memcpy(q->prio2band, priomap, sizeof(priomap));
663 
664 	for (i = 0; i < q->nbands; i++)
665 		WRITE_ONCE(q->classes[i].quantum, quanta[i]);
666 
667 	for (i = oldbands; i < q->nbands; i++) {
668 		q->classes[i].qdisc = queues[i];
669 		if (q->classes[i].qdisc != &noop_qdisc)
670 			qdisc_hash_add(q->classes[i].qdisc, true);
671 	}
672 
673 	sch_tree_unlock(sch);
674 
675 	ets_offload_change(sch);
676 	for (i = q->nbands; i < oldbands; i++) {
677 		qdisc_put(q->classes[i].qdisc);
678 		q->classes[i].qdisc = NULL;
679 		WRITE_ONCE(q->classes[i].quantum, 0);
680 		q->classes[i].deficit = 0;
681 		gnet_stats_basic_sync_init(&q->classes[i].bstats);
682 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
683 	}
684 	return 0;
685 }
686 
ets_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)687 static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
688 			  struct netlink_ext_ack *extack)
689 {
690 	struct ets_sched *q = qdisc_priv(sch);
691 	int err, i;
692 
693 	if (!opt)
694 		return -EINVAL;
695 
696 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
697 	if (err)
698 		return err;
699 
700 	INIT_LIST_HEAD(&q->active);
701 	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
702 		INIT_LIST_HEAD(&q->classes[i].alist);
703 
704 	return ets_qdisc_change(sch, opt, extack);
705 }
706 
ets_qdisc_reset(struct Qdisc * sch)707 static void ets_qdisc_reset(struct Qdisc *sch)
708 {
709 	struct ets_sched *q = qdisc_priv(sch);
710 	int band;
711 
712 	for (band = q->nstrict; band < q->nbands; band++) {
713 		if (q->classes[band].qdisc->q.qlen)
714 			list_del(&q->classes[band].alist);
715 	}
716 	for (band = 0; band < q->nbands; band++)
717 		qdisc_reset(q->classes[band].qdisc);
718 }
719 
ets_qdisc_destroy(struct Qdisc * sch)720 static void ets_qdisc_destroy(struct Qdisc *sch)
721 {
722 	struct ets_sched *q = qdisc_priv(sch);
723 	int band;
724 
725 	ets_offload_destroy(sch);
726 	tcf_block_put(q->block);
727 	for (band = 0; band < q->nbands; band++)
728 		qdisc_put(q->classes[band].qdisc);
729 }
730 
ets_qdisc_dump(struct Qdisc * sch,struct sk_buff * skb)731 static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
732 {
733 	struct ets_sched *q = qdisc_priv(sch);
734 	struct nlattr *opts;
735 	struct nlattr *nest;
736 	u8 nbands, nstrict;
737 	int band;
738 	int prio;
739 	int err;
740 
741 	err = ets_offload_dump(sch);
742 	if (err)
743 		return err;
744 
745 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
746 	if (!opts)
747 		goto nla_err;
748 
749 	nbands = READ_ONCE(q->nbands);
750 	if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
751 		goto nla_err;
752 
753 	nstrict = READ_ONCE(q->nstrict);
754 	if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
755 		goto nla_err;
756 
757 	if (nbands > nstrict) {
758 		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
759 		if (!nest)
760 			goto nla_err;
761 
762 		for (band = nstrict; band < nbands; band++) {
763 			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
764 					READ_ONCE(q->classes[band].quantum)))
765 				goto nla_err;
766 		}
767 
768 		nla_nest_end(skb, nest);
769 	}
770 
771 	nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
772 	if (!nest)
773 		goto nla_err;
774 
775 	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
776 		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
777 			       READ_ONCE(q->prio2band[prio])))
778 			goto nla_err;
779 	}
780 
781 	nla_nest_end(skb, nest);
782 
783 	return nla_nest_end(skb, opts);
784 
785 nla_err:
786 	nla_nest_cancel(skb, opts);
787 	return -EMSGSIZE;
788 }
789 
790 static const struct Qdisc_class_ops ets_class_ops = {
791 	.change		= ets_class_change,
792 	.graft		= ets_class_graft,
793 	.leaf		= ets_class_leaf,
794 	.find		= ets_class_find,
795 	.qlen_notify	= ets_class_qlen_notify,
796 	.dump		= ets_class_dump,
797 	.dump_stats	= ets_class_dump_stats,
798 	.walk		= ets_qdisc_walk,
799 	.tcf_block	= ets_qdisc_tcf_block,
800 	.bind_tcf	= ets_qdisc_bind_tcf,
801 	.unbind_tcf	= ets_qdisc_unbind_tcf,
802 };
803 
804 static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
805 	.cl_ops		= &ets_class_ops,
806 	.id		= "ets",
807 	.priv_size	= sizeof(struct ets_sched),
808 	.enqueue	= ets_qdisc_enqueue,
809 	.dequeue	= ets_qdisc_dequeue,
810 	.peek		= qdisc_peek_dequeued,
811 	.change		= ets_qdisc_change,
812 	.init		= ets_qdisc_init,
813 	.reset		= ets_qdisc_reset,
814 	.destroy	= ets_qdisc_destroy,
815 	.dump		= ets_qdisc_dump,
816 	.owner		= THIS_MODULE,
817 };
818 MODULE_ALIAS_NET_SCH("ets");
819 
ets_init(void)820 static int __init ets_init(void)
821 {
822 	return register_qdisc(&ets_qdisc_ops);
823 }
824 
ets_exit(void)825 static void __exit ets_exit(void)
826 {
827 	unregister_qdisc(&ets_qdisc_ops);
828 }
829 
830 module_init(ets_init);
831 module_exit(ets_exit);
832 MODULE_LICENSE("GPL");
833 MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler");
834