xref: /linux/net/sched/sch_ets.c (revision 90e63d5354951d37fa2b3b91e6f17b95d2bf9bee)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sched/sch_ets.c         Enhanced Transmission Selection scheduler
4  *
5  * Description
6  * -----------
7  *
8  * The Enhanced Transmission Selection scheduler is a classful queuing
9  * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
10  * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
11  * implement the transmission selection described in 802.1Qaz.
12  *
13  * Although ETS is technically classful, it's not possible to add and remove
14  * classes at will. Instead one specifies number of classes, how many are
15  * PRIO-like and how many DRR-like, and quanta for the latter.
16  *
17  * Algorithm
18  * ---------
19  *
20  * The strict classes, if any, are tried for traffic first: first band 0, if it
21  * has no traffic then band 1, etc.
22  *
23  * When there is no traffic in any of the strict queues, the bandwidth-sharing
24  * ones are tried next. Each band is assigned a deficit counter, initialized to
25  * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
26  * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
27  * head of the list if the packet size is smaller or equal to the deficit
28  * counter. If the counter is too small, it is increased by "quantum" and the
29  * scheduler moves on to the next band in the active list.
30  */
31 
32 #include <linux/module.h>
33 #include <net/gen_stats.h>
34 #include <net/netlink.h>
35 #include <net/pkt_cls.h>
36 #include <net/pkt_sched.h>
37 #include <net/sch_generic.h>
38 
39 struct ets_class {
40 	struct list_head alist; /* In struct ets_sched.active. */
41 	struct Qdisc *qdisc;
42 	u32 quantum;
43 	u32 deficit;
44 	struct gnet_stats_basic_sync bstats;
45 	struct gnet_stats_queue qstats;
46 };
47 
48 struct ets_sched {
49 	struct list_head active;
50 	struct tcf_proto __rcu *filter_list;
51 	struct tcf_block *block;
52 	unsigned int nbands;
53 	unsigned int nstrict;
54 	u8 prio2band[TC_PRIO_MAX + 1];
55 	struct ets_class classes[TCQ_ETS_MAX_BANDS];
56 };
57 
58 static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
59 	[TCA_ETS_NBANDS] = { .type = NLA_U8 },
60 	[TCA_ETS_NSTRICT] = { .type = NLA_U8 },
61 	[TCA_ETS_QUANTA] = { .type = NLA_NESTED },
62 	[TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
63 };
64 
65 static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
66 	[TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
67 };
68 
69 static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
70 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
71 };
72 
73 static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
74 	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
75 };
76 
77 static bool cl_is_active(struct ets_class *cl)
78 {
79 	return !list_empty(&cl->alist);
80 }
81 
82 static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
83 			     unsigned int *quantum,
84 			     struct netlink_ext_ack *extack)
85 {
86 	*quantum = nla_get_u32(attr);
87 	if (!*quantum) {
88 		NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
89 		return -EINVAL;
90 	}
91 	return 0;
92 }
93 
94 static struct ets_class *
95 ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
96 {
97 	struct ets_sched *q = qdisc_priv(sch);
98 
99 	if (arg == 0 || arg > q->nbands)
100 		return NULL;
101 	return &q->classes[arg - 1];
102 }
103 
104 static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
105 {
106 	struct ets_sched *q = qdisc_priv(sch);
107 	int band = cl - q->classes;
108 
109 	return TC_H_MAKE(sch->handle, band + 1);
110 }
111 
112 static void ets_offload_change(struct Qdisc *sch)
113 {
114 	struct net_device *dev = qdisc_dev(sch);
115 	struct ets_sched *q = qdisc_priv(sch);
116 	struct tc_ets_qopt_offload qopt;
117 	unsigned int w_psum_prev = 0;
118 	unsigned int quantum;
119 	unsigned int w_psum;
120 	unsigned int weight;
121 	unsigned int i;
122 	u64 q_psum = 0;
123 	u64 q_sum = 0;
124 
125 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
126 		return;
127 
128 	qopt.command = TC_ETS_REPLACE;
129 	qopt.handle = sch->handle;
130 	qopt.parent = sch->parent;
131 	qopt.replace_params.bands = q->nbands;
132 	qopt.replace_params.qstats = &sch->qstats;
133 	memcpy(&qopt.replace_params.priomap,
134 	       q->prio2band, sizeof(q->prio2band));
135 
136 	for (i = 0; i < q->nbands; i++)
137 		q_sum += q->classes[i].quantum;
138 
139 	for (i = 0; i < q->nbands; i++) {
140 		quantum = q->classes[i].quantum;
141 		if (quantum) {
142 			q_psum += quantum;
143 			w_psum = div64_u64(q_psum * 100, q_sum);
144 		} else {
145 			w_psum = 0;
146 		}
147 		weight = w_psum - w_psum_prev;
148 		w_psum_prev = w_psum;
149 
150 		qopt.replace_params.quanta[i] = quantum;
151 		qopt.replace_params.weights[i] = weight;
152 	}
153 
154 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
155 }
156 
157 static void ets_offload_destroy(struct Qdisc *sch)
158 {
159 	struct net_device *dev = qdisc_dev(sch);
160 	struct tc_ets_qopt_offload qopt;
161 
162 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
163 		return;
164 
165 	qopt.command = TC_ETS_DESTROY;
166 	qopt.handle = sch->handle;
167 	qopt.parent = sch->parent;
168 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
169 }
170 
171 static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
172 			      struct Qdisc *old, unsigned long arg,
173 			      struct netlink_ext_ack *extack)
174 {
175 	struct net_device *dev = qdisc_dev(sch);
176 	struct tc_ets_qopt_offload qopt;
177 
178 	qopt.command = TC_ETS_GRAFT;
179 	qopt.handle = sch->handle;
180 	qopt.parent = sch->parent;
181 	qopt.graft_params.band = arg - 1;
182 	qopt.graft_params.child_handle = new->handle;
183 
184 	qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
185 				   &qopt, extack);
186 }
187 
188 static int ets_offload_dump(struct Qdisc *sch)
189 {
190 	struct tc_ets_qopt_offload qopt;
191 
192 	qopt.command = TC_ETS_STATS;
193 	qopt.handle = sch->handle;
194 	qopt.parent = sch->parent;
195 	qopt.stats.bstats = &sch->bstats;
196 	qopt.stats.qstats = &sch->qstats;
197 
198 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
199 }
200 
201 static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
202 {
203 	unsigned int band = cl - q->classes;
204 
205 	return band < q->nstrict;
206 }
207 
208 static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
209 			    struct nlattr **tca, unsigned long *arg,
210 			    struct netlink_ext_ack *extack)
211 {
212 	struct ets_class *cl = ets_class_from_arg(sch, *arg);
213 	struct ets_sched *q = qdisc_priv(sch);
214 	struct nlattr *opt = tca[TCA_OPTIONS];
215 	struct nlattr *tb[TCA_ETS_MAX + 1];
216 	unsigned int quantum;
217 	int err;
218 
219 	/* Classes can be added and removed only through Qdisc_ops.change
220 	 * interface.
221 	 */
222 	if (!cl) {
223 		NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
224 		return -EOPNOTSUPP;
225 	}
226 
227 	if (!opt) {
228 		NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
229 		return -EINVAL;
230 	}
231 
232 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
233 	if (err < 0)
234 		return err;
235 
236 	if (!tb[TCA_ETS_QUANTA_BAND])
237 		/* Nothing to configure. */
238 		return 0;
239 
240 	if (ets_class_is_strict(q, cl)) {
241 		NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
242 		return -EINVAL;
243 	}
244 
245 	err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
246 				extack);
247 	if (err)
248 		return err;
249 
250 	WRITE_ONCE(cl->quantum, quantum);
251 
252 	ets_offload_change(sch);
253 	return 0;
254 }
255 
256 static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
257 			   struct Qdisc *new, struct Qdisc **old,
258 			   struct netlink_ext_ack *extack)
259 {
260 	struct ets_class *cl = ets_class_from_arg(sch, arg);
261 
262 	if (!new) {
263 		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
264 					ets_class_id(sch, cl), NULL);
265 		if (!new)
266 			new = &noop_qdisc;
267 		else
268 			qdisc_hash_add(new, true);
269 	}
270 
271 	*old = qdisc_replace(sch, new, &cl->qdisc);
272 	ets_offload_graft(sch, new, *old, arg, extack);
273 	return 0;
274 }
275 
276 static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
277 {
278 	struct ets_class *cl = ets_class_from_arg(sch, arg);
279 
280 	return cl->qdisc;
281 }
282 
283 static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
284 {
285 	unsigned long band = TC_H_MIN(classid);
286 	struct ets_sched *q = qdisc_priv(sch);
287 
288 	if (band - 1 >= q->nbands)
289 		return 0;
290 	return band;
291 }
292 
293 static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
294 {
295 	struct ets_class *cl = ets_class_from_arg(sch, arg);
296 	struct ets_sched *q = qdisc_priv(sch);
297 
298 	/* We get notified about zero-length child Qdiscs as well if they are
299 	 * offloaded. Those aren't on the active list though, so don't attempt
300 	 * to remove them.
301 	 */
302 	if (!ets_class_is_strict(q, cl) && sch->q.qlen)
303 		list_del_init(&cl->alist);
304 }
305 
306 static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
307 			  struct sk_buff *skb, struct tcmsg *tcm)
308 {
309 	struct ets_class *cl = ets_class_from_arg(sch, arg);
310 	struct ets_sched *q = qdisc_priv(sch);
311 	struct nlattr *nest;
312 
313 	tcm->tcm_parent = TC_H_ROOT;
314 	tcm->tcm_handle = ets_class_id(sch, cl);
315 	tcm->tcm_info = cl->qdisc->handle;
316 
317 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
318 	if (!nest)
319 		goto nla_put_failure;
320 	if (!ets_class_is_strict(q, cl)) {
321 		if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, READ_ONCE(cl->quantum)))
322 			goto nla_put_failure;
323 	}
324 	return nla_nest_end(skb, nest);
325 
326 nla_put_failure:
327 	nla_nest_cancel(skb, nest);
328 	return -EMSGSIZE;
329 }
330 
331 static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
332 				struct gnet_dump *d)
333 {
334 	struct ets_class *cl = ets_class_from_arg(sch, arg);
335 	struct Qdisc *cl_q = cl->qdisc;
336 
337 	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
338 	    qdisc_qstats_copy(d, cl_q) < 0)
339 		return -1;
340 
341 	return 0;
342 }
343 
344 static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
345 {
346 	struct ets_sched *q = qdisc_priv(sch);
347 	int i;
348 
349 	if (arg->stop)
350 		return;
351 
352 	for (i = 0; i < q->nbands; i++) {
353 		if (!tc_qdisc_stats_dump(sch, i + 1, arg))
354 			break;
355 	}
356 }
357 
358 static struct tcf_block *
359 ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
360 		    struct netlink_ext_ack *extack)
361 {
362 	struct ets_sched *q = qdisc_priv(sch);
363 
364 	if (cl) {
365 		NL_SET_ERR_MSG(extack, "ETS classid must be zero");
366 		return NULL;
367 	}
368 
369 	return q->block;
370 }
371 
372 static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
373 					u32 classid)
374 {
375 	return ets_class_find(sch, classid);
376 }
377 
378 static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
379 {
380 }
381 
382 static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
383 				      int *qerr)
384 {
385 	struct ets_sched *q = qdisc_priv(sch);
386 	u32 band = skb->priority;
387 	struct tcf_result res;
388 	struct tcf_proto *fl;
389 	int err;
390 
391 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
392 	if (TC_H_MAJ(skb->priority) != sch->handle) {
393 		fl = rcu_dereference_bh(q->filter_list);
394 		err = tcf_classify(skb, NULL, fl, &res, false);
395 #ifdef CONFIG_NET_CLS_ACT
396 		switch (err) {
397 		case TC_ACT_STOLEN:
398 		case TC_ACT_QUEUED:
399 		case TC_ACT_TRAP:
400 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
401 			fallthrough;
402 		case TC_ACT_SHOT:
403 			return NULL;
404 		}
405 #endif
406 		if (!fl || err < 0) {
407 			if (TC_H_MAJ(band))
408 				band = 0;
409 			return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
410 		}
411 		band = res.classid;
412 	}
413 	band = TC_H_MIN(band) - 1;
414 	if (band >= q->nbands)
415 		return &q->classes[q->prio2band[0]];
416 	return &q->classes[band];
417 }
418 
419 static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
420 			     struct sk_buff **to_free)
421 {
422 	unsigned int len = qdisc_pkt_len(skb);
423 	struct ets_sched *q = qdisc_priv(sch);
424 	struct ets_class *cl;
425 	int err = 0;
426 
427 	cl = ets_classify(skb, sch, &err);
428 	if (!cl) {
429 		if (err & __NET_XMIT_BYPASS)
430 			qdisc_qstats_drop(sch);
431 		__qdisc_drop(skb, to_free);
432 		return err;
433 	}
434 
435 	err = qdisc_enqueue(skb, cl->qdisc, to_free);
436 	if (unlikely(err != NET_XMIT_SUCCESS)) {
437 		if (net_xmit_drop_count(err)) {
438 			cl->qstats.drops++;
439 			qdisc_qstats_drop(sch);
440 		}
441 		return err;
442 	}
443 
444 	if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) {
445 		list_add_tail(&cl->alist, &q->active);
446 		cl->deficit = READ_ONCE(cl->quantum);
447 	}
448 
449 	qstats_backlog_add(sch, len);
450 	qdisc_qlen_inc(sch);
451 	return err;
452 }
453 
454 static struct sk_buff *
455 ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
456 {
457 	qdisc_bstats_update(sch, skb);
458 	qdisc_qstats_backlog_dec(sch, skb);
459 	qdisc_qlen_dec(sch);
460 	return skb;
461 }
462 
463 static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
464 {
465 	struct ets_sched *q = qdisc_priv(sch);
466 	struct ets_class *cl;
467 	struct sk_buff *skb;
468 	unsigned int band;
469 	unsigned int len;
470 
471 	while (1) {
472 		for (band = 0; band < q->nstrict; band++) {
473 			cl = &q->classes[band];
474 			skb = qdisc_dequeue_peeked(cl->qdisc);
475 			if (skb)
476 				return ets_qdisc_dequeue_skb(sch, skb);
477 		}
478 
479 		if (list_empty(&q->active))
480 			goto out;
481 
482 		cl = list_first_entry(&q->active, struct ets_class, alist);
483 		skb = cl->qdisc->ops->peek(cl->qdisc);
484 		if (!skb) {
485 			qdisc_warn_nonwc(__func__, cl->qdisc);
486 			goto out;
487 		}
488 
489 		len = qdisc_pkt_len(skb);
490 		if (len <= cl->deficit) {
491 			cl->deficit -= len;
492 			skb = qdisc_dequeue_peeked(cl->qdisc);
493 			if (unlikely(!skb))
494 				goto out;
495 			if (cl->qdisc->q.qlen == 0)
496 				list_del_init(&cl->alist);
497 			return ets_qdisc_dequeue_skb(sch, skb);
498 		}
499 
500 		cl->deficit += READ_ONCE(cl->quantum);
501 		list_move_tail(&cl->alist, &q->active);
502 	}
503 out:
504 	return NULL;
505 }
506 
507 static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
508 				   unsigned int nbands, u8 *priomap,
509 				   struct netlink_ext_ack *extack)
510 {
511 	const struct nlattr *attr;
512 	int prio = 0;
513 	u8 band;
514 	int rem;
515 	int err;
516 
517 	err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
518 				    ets_priomap_policy, NL_VALIDATE_STRICT,
519 				    extack);
520 	if (err)
521 		return err;
522 
523 	nla_for_each_nested(attr, priomap_attr, rem) {
524 		switch (nla_type(attr)) {
525 		case TCA_ETS_PRIOMAP_BAND:
526 			if (prio > TC_PRIO_MAX) {
527 				NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
528 				return -EINVAL;
529 			}
530 			band = nla_get_u8(attr);
531 			if (band >= nbands) {
532 				NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
533 				return -EINVAL;
534 			}
535 			priomap[prio++] = band;
536 			break;
537 		default:
538 			WARN_ON_ONCE(1); /* Validate should have caught this. */
539 			return -EINVAL;
540 		}
541 	}
542 
543 	return 0;
544 }
545 
546 static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
547 				  unsigned int nbands, unsigned int nstrict,
548 				  unsigned int *quanta,
549 				  struct netlink_ext_ack *extack)
550 {
551 	const struct nlattr *attr;
552 	int band = nstrict;
553 	int rem;
554 	int err;
555 
556 	err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
557 				    ets_quanta_policy, NL_VALIDATE_STRICT,
558 				    extack);
559 	if (err < 0)
560 		return err;
561 
562 	nla_for_each_nested(attr, quanta_attr, rem) {
563 		switch (nla_type(attr)) {
564 		case TCA_ETS_QUANTA_BAND:
565 			if (band >= nbands) {
566 				NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
567 				return -EINVAL;
568 			}
569 			err = ets_quantum_parse(sch, attr, &quanta[band++],
570 						extack);
571 			if (err)
572 				return err;
573 			break;
574 		default:
575 			WARN_ON_ONCE(1); /* Validate should have caught this. */
576 			return -EINVAL;
577 		}
578 	}
579 
580 	return 0;
581 }
582 
583 static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
584 			    struct netlink_ext_ack *extack)
585 {
586 	unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
587 	struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
588 	struct ets_sched *q = qdisc_priv(sch);
589 	struct nlattr *tb[TCA_ETS_MAX + 1];
590 	unsigned int oldbands = q->nbands;
591 	u8 priomap[TC_PRIO_MAX + 1];
592 	unsigned int nstrict = 0;
593 	unsigned int nbands;
594 	unsigned int i;
595 	int err;
596 
597 	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
598 	if (err < 0)
599 		return err;
600 
601 	if (!tb[TCA_ETS_NBANDS]) {
602 		NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
603 		return -EINVAL;
604 	}
605 	nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
606 	if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
607 		NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
608 		return -EINVAL;
609 	}
610 	/* Unless overridden, traffic goes to the last band. */
611 	memset(priomap, nbands - 1, sizeof(priomap));
612 
613 	if (tb[TCA_ETS_NSTRICT]) {
614 		nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
615 		if (nstrict > nbands) {
616 			NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
617 			return -EINVAL;
618 		}
619 	}
620 
621 	if (tb[TCA_ETS_PRIOMAP]) {
622 		err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
623 					      nbands, priomap, extack);
624 		if (err)
625 			return err;
626 	}
627 
628 	if (tb[TCA_ETS_QUANTA]) {
629 		err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
630 					     nbands, nstrict, quanta, extack);
631 		if (err)
632 			return err;
633 	}
634 	/* If there are more bands than strict + quanta provided, the remaining
635 	 * ones are ETS with quantum of MTU. Initialize the missing values here.
636 	 */
637 	for (i = nstrict; i < nbands; i++) {
638 		if (!quanta[i])
639 			quanta[i] = psched_mtu(qdisc_dev(sch));
640 	}
641 
642 	/* Before commit, make sure we can allocate all new qdiscs */
643 	for (i = oldbands; i < nbands; i++) {
644 		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
645 					      ets_class_id(sch, &q->classes[i]),
646 					      extack);
647 		if (!queues[i]) {
648 			while (i > oldbands)
649 				qdisc_put(queues[--i]);
650 			return -ENOMEM;
651 		}
652 	}
653 
654 	sch_tree_lock(sch);
655 
656 	for (i = nbands; i < oldbands; i++) {
657 		if (cl_is_active(&q->classes[i]))
658 			list_del_init(&q->classes[i].alist);
659 		qdisc_purge_queue(q->classes[i].qdisc);
660 	}
661 
662 	WRITE_ONCE(q->nbands, nbands);
663 	for (i = nstrict; i < q->nstrict; i++) {
664 		if (q->classes[i].qdisc->q.qlen) {
665 			list_add_tail(&q->classes[i].alist, &q->active);
666 			q->classes[i].deficit = quanta[i];
667 		}
668 	}
669 	for (i = q->nstrict; i < nstrict; i++) {
670 		if (cl_is_active(&q->classes[i]))
671 			list_del_init(&q->classes[i].alist);
672 	}
673 	WRITE_ONCE(q->nstrict, nstrict);
674 	memcpy(q->prio2band, priomap, sizeof(priomap));
675 
676 	for (i = 0; i < q->nbands; i++)
677 		WRITE_ONCE(q->classes[i].quantum, quanta[i]);
678 
679 	for (i = oldbands; i < q->nbands; i++) {
680 		q->classes[i].qdisc = queues[i];
681 		if (q->classes[i].qdisc != &noop_qdisc)
682 			qdisc_hash_add(q->classes[i].qdisc, true);
683 	}
684 
685 	sch_tree_unlock(sch);
686 
687 	ets_offload_change(sch);
688 	for (i = q->nbands; i < oldbands; i++) {
689 		qdisc_put(q->classes[i].qdisc);
690 		q->classes[i].qdisc = NULL;
691 		WRITE_ONCE(q->classes[i].quantum, 0);
692 		q->classes[i].deficit = 0;
693 		gnet_stats_basic_sync_init(&q->classes[i].bstats);
694 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
695 	}
696 	return 0;
697 }
698 
699 static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
700 			  struct netlink_ext_ack *extack)
701 {
702 	struct ets_sched *q = qdisc_priv(sch);
703 	int err, i;
704 
705 	if (!opt)
706 		return -EINVAL;
707 
708 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
709 	if (err)
710 		return err;
711 
712 	INIT_LIST_HEAD(&q->active);
713 	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
714 		INIT_LIST_HEAD(&q->classes[i].alist);
715 
716 	return ets_qdisc_change(sch, opt, extack);
717 }
718 
719 static void ets_qdisc_reset(struct Qdisc *sch)
720 {
721 	struct ets_sched *q = qdisc_priv(sch);
722 	int band;
723 
724 	for (band = q->nstrict; band < q->nbands; band++) {
725 		if (q->classes[band].qdisc->q.qlen)
726 			list_del_init(&q->classes[band].alist);
727 	}
728 	for (band = 0; band < q->nbands; band++)
729 		qdisc_reset(q->classes[band].qdisc);
730 }
731 
732 static void ets_qdisc_destroy(struct Qdisc *sch)
733 {
734 	struct ets_sched *q = qdisc_priv(sch);
735 	int band;
736 
737 	ets_offload_destroy(sch);
738 	tcf_block_put(q->block);
739 	for (band = 0; band < q->nbands; band++)
740 		qdisc_put(q->classes[band].qdisc);
741 }
742 
743 static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
744 {
745 	struct ets_sched *q = qdisc_priv(sch);
746 	struct nlattr *opts;
747 	struct nlattr *nest;
748 	u8 nbands, nstrict;
749 	int band;
750 	int prio;
751 	int err;
752 
753 	err = ets_offload_dump(sch);
754 	if (err)
755 		return err;
756 
757 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
758 	if (!opts)
759 		goto nla_err;
760 
761 	nbands = READ_ONCE(q->nbands);
762 	if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
763 		goto nla_err;
764 
765 	nstrict = READ_ONCE(q->nstrict);
766 	if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
767 		goto nla_err;
768 
769 	if (nbands > nstrict) {
770 		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
771 		if (!nest)
772 			goto nla_err;
773 
774 		for (band = nstrict; band < nbands; band++) {
775 			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
776 					READ_ONCE(q->classes[band].quantum)))
777 				goto nla_err;
778 		}
779 
780 		nla_nest_end(skb, nest);
781 	}
782 
783 	nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
784 	if (!nest)
785 		goto nla_err;
786 
787 	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
788 		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
789 			       READ_ONCE(q->prio2band[prio])))
790 			goto nla_err;
791 	}
792 
793 	nla_nest_end(skb, nest);
794 
795 	return nla_nest_end(skb, opts);
796 
797 nla_err:
798 	nla_nest_cancel(skb, opts);
799 	return -EMSGSIZE;
800 }
801 
802 static const struct Qdisc_class_ops ets_class_ops = {
803 	.change		= ets_class_change,
804 	.graft		= ets_class_graft,
805 	.leaf		= ets_class_leaf,
806 	.find		= ets_class_find,
807 	.qlen_notify	= ets_class_qlen_notify,
808 	.dump		= ets_class_dump,
809 	.dump_stats	= ets_class_dump_stats,
810 	.walk		= ets_qdisc_walk,
811 	.tcf_block	= ets_qdisc_tcf_block,
812 	.bind_tcf	= ets_qdisc_bind_tcf,
813 	.unbind_tcf	= ets_qdisc_unbind_tcf,
814 };
815 
816 static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
817 	.cl_ops		= &ets_class_ops,
818 	.id		= "ets",
819 	.priv_size	= sizeof(struct ets_sched),
820 	.enqueue	= ets_qdisc_enqueue,
821 	.dequeue	= ets_qdisc_dequeue,
822 	.peek		= qdisc_peek_dequeued,
823 	.change		= ets_qdisc_change,
824 	.init		= ets_qdisc_init,
825 	.reset		= ets_qdisc_reset,
826 	.destroy	= ets_qdisc_destroy,
827 	.dump		= ets_qdisc_dump,
828 	.owner		= THIS_MODULE,
829 };
830 MODULE_ALIAS_NET_SCH("ets");
831 
832 static int __init ets_init(void)
833 {
834 	return register_qdisc(&ets_qdisc_ops);
835 }
836 
837 static void __exit ets_exit(void)
838 {
839 	unregister_qdisc(&ets_qdisc_ops);
840 }
841 
842 module_init(ets_init);
843 module_exit(ets_exit);
844 MODULE_LICENSE("GPL");
845 MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler");
846