1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_taprio.c Time Aware Priority Scheduler 4 * 5 * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> 6 * 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/list.h> 14 #include <linux/errno.h> 15 #include <linux/skbuff.h> 16 #include <linux/math64.h> 17 #include <linux/module.h> 18 #include <linux/spinlock.h> 19 #include <net/netlink.h> 20 #include <net/pkt_sched.h> 21 #include <net/pkt_cls.h> 22 #include <net/sch_generic.h> 23 24 static LIST_HEAD(taprio_list); 25 static DEFINE_SPINLOCK(taprio_list_lock); 26 27 #define TAPRIO_ALL_GATES_OPEN -1 28 29 struct sched_entry { 30 struct list_head list; 31 32 /* The instant that this entry "closes" and the next one 33 * should open, the qdisc will make some effort so that no 34 * packet leaves after this time. 35 */ 36 ktime_t close_time; 37 atomic_t budget; 38 int index; 39 u32 gate_mask; 40 u32 interval; 41 u8 command; 42 }; 43 44 struct taprio_sched { 45 struct Qdisc **qdiscs; 46 struct Qdisc *root; 47 s64 base_time; 48 int clockid; 49 atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ 50 * speeds it's sub-nanoseconds per byte 51 */ 52 size_t num_entries; 53 54 /* Protects the update side of the RCU protected current_entry */ 55 spinlock_t current_entry_lock; 56 struct sched_entry __rcu *current_entry; 57 struct list_head entries; 58 ktime_t (*get_time)(void); 59 struct hrtimer advance_timer; 60 struct list_head taprio_list; 61 }; 62 63 static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, 64 struct sk_buff **to_free) 65 { 66 struct taprio_sched *q = qdisc_priv(sch); 67 struct Qdisc *child; 68 int queue; 69 70 queue = skb_get_queue_mapping(skb); 71 72 child = q->qdiscs[queue]; 73 if (unlikely(!child)) 74 return qdisc_drop(skb, sch, to_free); 75 76 qdisc_qstats_backlog_inc(sch, skb); 77 sch->q.qlen++; 78 79 return qdisc_enqueue(skb, child, to_free); 80 } 81 82 static struct sk_buff *taprio_peek(struct Qdisc *sch) 83 { 84 struct taprio_sched *q = qdisc_priv(sch); 85 struct net_device *dev = qdisc_dev(sch); 86 struct sched_entry *entry; 87 struct sk_buff *skb; 88 u32 gate_mask; 89 int i; 90 91 rcu_read_lock(); 92 entry = rcu_dereference(q->current_entry); 93 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; 94 rcu_read_unlock(); 95 96 if (!gate_mask) 97 return NULL; 98 99 for (i = 0; i < dev->num_tx_queues; i++) { 100 struct Qdisc *child = q->qdiscs[i]; 101 int prio; 102 u8 tc; 103 104 if (unlikely(!child)) 105 continue; 106 107 skb = child->ops->peek(child); 108 if (!skb) 109 continue; 110 111 prio = skb->priority; 112 tc = netdev_get_prio_tc_map(dev, prio); 113 114 if (!(gate_mask & BIT(tc))) 115 continue; 116 117 return skb; 118 } 119 120 return NULL; 121 } 122 123 static inline int length_to_duration(struct taprio_sched *q, int len) 124 { 125 return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); 126 } 127 128 static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) 129 { 130 atomic_set(&entry->budget, 131 div64_u64((u64)entry->interval * 1000, 132 atomic64_read(&q->picos_per_byte))); 133 } 134 135 static struct sk_buff *taprio_dequeue(struct Qdisc *sch) 136 { 137 struct taprio_sched *q = qdisc_priv(sch); 138 struct net_device *dev = qdisc_dev(sch); 139 struct sched_entry *entry; 140 struct sk_buff *skb; 141 u32 gate_mask; 142 int i; 143 144 if (atomic64_read(&q->picos_per_byte) == -1) { 145 WARN_ONCE(1, "taprio: dequeue() called with unknown picos per byte."); 146 return NULL; 147 } 148 149 rcu_read_lock(); 150 entry = rcu_dereference(q->current_entry); 151 /* if there's no entry, it means that the schedule didn't 152 * start yet, so force all gates to be open, this is in 153 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 154 * "AdminGateSates" 155 */ 156 gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; 157 rcu_read_unlock(); 158 159 if (!gate_mask) 160 return NULL; 161 162 for (i = 0; i < dev->num_tx_queues; i++) { 163 struct Qdisc *child = q->qdiscs[i]; 164 ktime_t guard; 165 int prio; 166 int len; 167 u8 tc; 168 169 if (unlikely(!child)) 170 continue; 171 172 skb = child->ops->peek(child); 173 if (!skb) 174 continue; 175 176 prio = skb->priority; 177 tc = netdev_get_prio_tc_map(dev, prio); 178 179 if (!(gate_mask & BIT(tc))) 180 continue; 181 182 len = qdisc_pkt_len(skb); 183 guard = ktime_add_ns(q->get_time(), 184 length_to_duration(q, len)); 185 186 /* In the case that there's no gate entry, there's no 187 * guard band ... 188 */ 189 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 190 ktime_after(guard, entry->close_time)) 191 continue; 192 193 /* ... and no budget. */ 194 if (gate_mask != TAPRIO_ALL_GATES_OPEN && 195 atomic_sub_return(len, &entry->budget) < 0) 196 continue; 197 198 skb = child->ops->dequeue(child); 199 if (unlikely(!skb)) 200 return NULL; 201 202 qdisc_bstats_update(sch, skb); 203 qdisc_qstats_backlog_dec(sch, skb); 204 sch->q.qlen--; 205 206 return skb; 207 } 208 209 return NULL; 210 } 211 212 static enum hrtimer_restart advance_sched(struct hrtimer *timer) 213 { 214 struct taprio_sched *q = container_of(timer, struct taprio_sched, 215 advance_timer); 216 struct sched_entry *entry, *next; 217 struct Qdisc *sch = q->root; 218 ktime_t close_time; 219 220 spin_lock(&q->current_entry_lock); 221 entry = rcu_dereference_protected(q->current_entry, 222 lockdep_is_held(&q->current_entry_lock)); 223 224 /* This is the case that it's the first time that the schedule 225 * runs, so it only happens once per schedule. The first entry 226 * is pre-calculated during the schedule initialization. 227 */ 228 if (unlikely(!entry)) { 229 next = list_first_entry(&q->entries, struct sched_entry, 230 list); 231 close_time = next->close_time; 232 goto first_run; 233 } 234 235 if (list_is_last(&entry->list, &q->entries)) 236 next = list_first_entry(&q->entries, struct sched_entry, 237 list); 238 else 239 next = list_next_entry(entry, list); 240 241 close_time = ktime_add_ns(entry->close_time, next->interval); 242 243 next->close_time = close_time; 244 taprio_set_budget(q, next); 245 246 first_run: 247 rcu_assign_pointer(q->current_entry, next); 248 spin_unlock(&q->current_entry_lock); 249 250 hrtimer_set_expires(&q->advance_timer, close_time); 251 252 rcu_read_lock(); 253 __netif_schedule(sch); 254 rcu_read_unlock(); 255 256 return HRTIMER_RESTART; 257 } 258 259 static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { 260 [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 }, 261 [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 }, 262 [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 }, 263 [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, 264 }; 265 266 static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { 267 [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, 268 }; 269 270 static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { 271 [TCA_TAPRIO_ATTR_PRIOMAP] = { 272 .len = sizeof(struct tc_mqprio_qopt) 273 }, 274 [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, 275 [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, 276 [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, 277 [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, 278 }; 279 280 static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, 281 struct netlink_ext_ack *extack) 282 { 283 u32 interval = 0; 284 285 if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) 286 entry->command = nla_get_u8( 287 tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); 288 289 if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) 290 entry->gate_mask = nla_get_u32( 291 tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); 292 293 if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) 294 interval = nla_get_u32( 295 tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); 296 297 if (interval == 0) { 298 NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); 299 return -EINVAL; 300 } 301 302 entry->interval = interval; 303 304 return 0; 305 } 306 307 static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, 308 int index, struct netlink_ext_ack *extack) 309 { 310 struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; 311 int err; 312 313 err = nla_parse_nested(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, 314 entry_policy, NULL); 315 if (err < 0) { 316 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 317 return -EINVAL; 318 } 319 320 entry->index = index; 321 322 return fill_sched_entry(tb, entry, extack); 323 } 324 325 /* Returns the number of entries in case of success */ 326 static int parse_sched_single_entry(struct nlattr *n, 327 struct taprio_sched *q, 328 struct netlink_ext_ack *extack) 329 { 330 struct nlattr *tb_entry[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; 331 struct nlattr *tb_list[TCA_TAPRIO_SCHED_MAX + 1] = { }; 332 struct sched_entry *entry; 333 bool found = false; 334 u32 index; 335 int err; 336 337 err = nla_parse_nested(tb_list, TCA_TAPRIO_SCHED_MAX, 338 n, entry_list_policy, NULL); 339 if (err < 0) { 340 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 341 return -EINVAL; 342 } 343 344 if (!tb_list[TCA_TAPRIO_SCHED_ENTRY]) { 345 NL_SET_ERR_MSG(extack, "Single-entry must include an entry"); 346 return -EINVAL; 347 } 348 349 err = nla_parse_nested(tb_entry, TCA_TAPRIO_SCHED_ENTRY_MAX, 350 tb_list[TCA_TAPRIO_SCHED_ENTRY], 351 entry_policy, NULL); 352 if (err < 0) { 353 NL_SET_ERR_MSG(extack, "Could not parse nested entry"); 354 return -EINVAL; 355 } 356 357 if (!tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]) { 358 NL_SET_ERR_MSG(extack, "Entry must specify an index\n"); 359 return -EINVAL; 360 } 361 362 index = nla_get_u32(tb_entry[TCA_TAPRIO_SCHED_ENTRY_INDEX]); 363 if (index >= q->num_entries) { 364 NL_SET_ERR_MSG(extack, "Index for single entry exceeds number of entries in schedule"); 365 return -EINVAL; 366 } 367 368 list_for_each_entry(entry, &q->entries, list) { 369 if (entry->index == index) { 370 found = true; 371 break; 372 } 373 } 374 375 if (!found) { 376 NL_SET_ERR_MSG(extack, "Could not find entry"); 377 return -ENOENT; 378 } 379 380 err = fill_sched_entry(tb_entry, entry, extack); 381 if (err < 0) 382 return err; 383 384 return q->num_entries; 385 } 386 387 static int parse_sched_list(struct nlattr *list, 388 struct taprio_sched *q, 389 struct netlink_ext_ack *extack) 390 { 391 struct nlattr *n; 392 int err, rem; 393 int i = 0; 394 395 if (!list) 396 return -EINVAL; 397 398 nla_for_each_nested(n, list, rem) { 399 struct sched_entry *entry; 400 401 if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) { 402 NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'"); 403 continue; 404 } 405 406 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 407 if (!entry) { 408 NL_SET_ERR_MSG(extack, "Not enough memory for entry"); 409 return -ENOMEM; 410 } 411 412 err = parse_sched_entry(n, entry, i, extack); 413 if (err < 0) { 414 kfree(entry); 415 return err; 416 } 417 418 list_add_tail(&entry->list, &q->entries); 419 i++; 420 } 421 422 q->num_entries = i; 423 424 return i; 425 } 426 427 /* Returns the number of entries in case of success */ 428 static int parse_taprio_opt(struct nlattr **tb, struct taprio_sched *q, 429 struct netlink_ext_ack *extack) 430 { 431 int err = 0; 432 int clockid; 433 434 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] && 435 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) 436 return -EINVAL; 437 438 if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] && q->num_entries == 0) 439 return -EINVAL; 440 441 if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) 442 return -EINVAL; 443 444 if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) 445 q->base_time = nla_get_s64( 446 tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); 447 448 if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { 449 clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); 450 451 /* We only support static clockids and we don't allow 452 * for it to be modified after the first init. 453 */ 454 if (clockid < 0 || (q->clockid != -1 && q->clockid != clockid)) 455 return -EINVAL; 456 457 q->clockid = clockid; 458 } 459 460 if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) 461 err = parse_sched_list( 462 tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], q, extack); 463 else if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) 464 err = parse_sched_single_entry( 465 tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY], q, extack); 466 467 /* parse_sched_* return the number of entries in the schedule, 468 * a schedule with zero entries is an error. 469 */ 470 if (err == 0) { 471 NL_SET_ERR_MSG(extack, "The schedule should contain at least one entry"); 472 return -EINVAL; 473 } 474 475 return err; 476 } 477 478 static int taprio_parse_mqprio_opt(struct net_device *dev, 479 struct tc_mqprio_qopt *qopt, 480 struct netlink_ext_ack *extack) 481 { 482 int i, j; 483 484 if (!qopt) { 485 NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); 486 return -EINVAL; 487 } 488 489 /* Verify num_tc is not out of max range */ 490 if (qopt->num_tc > TC_MAX_QUEUE) { 491 NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); 492 return -EINVAL; 493 } 494 495 /* taprio imposes that traffic classes map 1:n to tx queues */ 496 if (qopt->num_tc > dev->num_tx_queues) { 497 NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues"); 498 return -EINVAL; 499 } 500 501 /* Verify priority mapping uses valid tcs */ 502 for (i = 0; i < TC_BITMASK + 1; i++) { 503 if (qopt->prio_tc_map[i] >= qopt->num_tc) { 504 NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); 505 return -EINVAL; 506 } 507 } 508 509 for (i = 0; i < qopt->num_tc; i++) { 510 unsigned int last = qopt->offset[i] + qopt->count[i]; 511 512 /* Verify the queue count is in tx range being equal to the 513 * real_num_tx_queues indicates the last queue is in use. 514 */ 515 if (qopt->offset[i] >= dev->num_tx_queues || 516 !qopt->count[i] || 517 last > dev->real_num_tx_queues) { 518 NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping"); 519 return -EINVAL; 520 } 521 522 /* Verify that the offset and counts do not overlap */ 523 for (j = i + 1; j < qopt->num_tc; j++) { 524 if (last > qopt->offset[j]) { 525 NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping"); 526 return -EINVAL; 527 } 528 } 529 } 530 531 return 0; 532 } 533 534 static int taprio_get_start_time(struct Qdisc *sch, ktime_t *start) 535 { 536 struct taprio_sched *q = qdisc_priv(sch); 537 struct sched_entry *entry; 538 ktime_t now, base, cycle; 539 s64 n; 540 541 base = ns_to_ktime(q->base_time); 542 now = q->get_time(); 543 544 if (ktime_after(base, now)) { 545 *start = base; 546 return 0; 547 } 548 549 /* Calculate the cycle_time, by summing all the intervals. 550 */ 551 cycle = 0; 552 list_for_each_entry(entry, &q->entries, list) 553 cycle = ktime_add_ns(cycle, entry->interval); 554 555 /* The qdisc is expected to have at least one sched_entry. Moreover, 556 * any entry must have 'interval' > 0. Thus if the cycle time is zero, 557 * something went really wrong. In that case, we should warn about this 558 * inconsistent state and return error. 559 */ 560 if (WARN_ON(!cycle)) 561 return -EFAULT; 562 563 /* Schedule the start time for the beginning of the next 564 * cycle. 565 */ 566 n = div64_s64(ktime_sub_ns(now, base), cycle); 567 *start = ktime_add_ns(base, (n + 1) * cycle); 568 return 0; 569 } 570 571 static void taprio_start_sched(struct Qdisc *sch, ktime_t start) 572 { 573 struct taprio_sched *q = qdisc_priv(sch); 574 struct sched_entry *first; 575 unsigned long flags; 576 577 spin_lock_irqsave(&q->current_entry_lock, flags); 578 579 first = list_first_entry(&q->entries, struct sched_entry, 580 list); 581 582 first->close_time = ktime_add_ns(start, first->interval); 583 taprio_set_budget(q, first); 584 rcu_assign_pointer(q->current_entry, NULL); 585 586 spin_unlock_irqrestore(&q->current_entry_lock, flags); 587 588 hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); 589 } 590 591 static void taprio_set_picos_per_byte(struct net_device *dev, 592 struct taprio_sched *q) 593 { 594 struct ethtool_link_ksettings ecmd; 595 int picos_per_byte = -1; 596 597 if (!__ethtool_get_link_ksettings(dev, &ecmd) && 598 ecmd.base.speed != SPEED_UNKNOWN) 599 picos_per_byte = div64_s64(NSEC_PER_SEC * 1000LL * 8, 600 ecmd.base.speed * 1000 * 1000); 601 602 atomic64_set(&q->picos_per_byte, picos_per_byte); 603 netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", 604 dev->name, (long long)atomic64_read(&q->picos_per_byte), 605 ecmd.base.speed); 606 } 607 608 static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, 609 void *ptr) 610 { 611 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 612 struct net_device *qdev; 613 struct taprio_sched *q; 614 bool found = false; 615 616 ASSERT_RTNL(); 617 618 if (event != NETDEV_UP && event != NETDEV_CHANGE) 619 return NOTIFY_DONE; 620 621 spin_lock(&taprio_list_lock); 622 list_for_each_entry(q, &taprio_list, taprio_list) { 623 qdev = qdisc_dev(q->root); 624 if (qdev == dev) { 625 found = true; 626 break; 627 } 628 } 629 spin_unlock(&taprio_list_lock); 630 631 if (found) 632 taprio_set_picos_per_byte(dev, q); 633 634 return NOTIFY_DONE; 635 } 636 637 static int taprio_change(struct Qdisc *sch, struct nlattr *opt, 638 struct netlink_ext_ack *extack) 639 { 640 struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; 641 struct taprio_sched *q = qdisc_priv(sch); 642 struct net_device *dev = qdisc_dev(sch); 643 struct tc_mqprio_qopt *mqprio = NULL; 644 int i, err, size; 645 ktime_t start; 646 647 err = nla_parse_nested(tb, TCA_TAPRIO_ATTR_MAX, opt, 648 taprio_policy, extack); 649 if (err < 0) 650 return err; 651 652 if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) 653 mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); 654 655 err = taprio_parse_mqprio_opt(dev, mqprio, extack); 656 if (err < 0) 657 return err; 658 659 /* A schedule with less than one entry is an error */ 660 size = parse_taprio_opt(tb, q, extack); 661 if (size < 0) 662 return size; 663 664 hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); 665 q->advance_timer.function = advance_sched; 666 667 switch (q->clockid) { 668 case CLOCK_REALTIME: 669 q->get_time = ktime_get_real; 670 break; 671 case CLOCK_MONOTONIC: 672 q->get_time = ktime_get; 673 break; 674 case CLOCK_BOOTTIME: 675 q->get_time = ktime_get_boottime; 676 break; 677 case CLOCK_TAI: 678 q->get_time = ktime_get_clocktai; 679 break; 680 default: 681 return -ENOTSUPP; 682 } 683 684 for (i = 0; i < dev->num_tx_queues; i++) { 685 struct netdev_queue *dev_queue; 686 struct Qdisc *qdisc; 687 688 dev_queue = netdev_get_tx_queue(dev, i); 689 qdisc = qdisc_create_dflt(dev_queue, 690 &pfifo_qdisc_ops, 691 TC_H_MAKE(TC_H_MAJ(sch->handle), 692 TC_H_MIN(i + 1)), 693 extack); 694 if (!qdisc) 695 return -ENOMEM; 696 697 if (i < dev->real_num_tx_queues) 698 qdisc_hash_add(qdisc, false); 699 700 q->qdiscs[i] = qdisc; 701 } 702 703 if (mqprio) { 704 netdev_set_num_tc(dev, mqprio->num_tc); 705 for (i = 0; i < mqprio->num_tc; i++) 706 netdev_set_tc_queue(dev, i, 707 mqprio->count[i], 708 mqprio->offset[i]); 709 710 /* Always use supplied priority mappings */ 711 for (i = 0; i < TC_BITMASK + 1; i++) 712 netdev_set_prio_tc_map(dev, i, 713 mqprio->prio_tc_map[i]); 714 } 715 716 taprio_set_picos_per_byte(dev, q); 717 718 err = taprio_get_start_time(sch, &start); 719 if (err < 0) { 720 NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); 721 return err; 722 } 723 724 taprio_start_sched(sch, start); 725 726 return 0; 727 } 728 729 static void taprio_destroy(struct Qdisc *sch) 730 { 731 struct taprio_sched *q = qdisc_priv(sch); 732 struct net_device *dev = qdisc_dev(sch); 733 struct sched_entry *entry, *n; 734 unsigned int i; 735 736 spin_lock(&taprio_list_lock); 737 list_del(&q->taprio_list); 738 spin_unlock(&taprio_list_lock); 739 740 hrtimer_cancel(&q->advance_timer); 741 742 if (q->qdiscs) { 743 for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) 744 qdisc_put(q->qdiscs[i]); 745 746 kfree(q->qdiscs); 747 } 748 q->qdiscs = NULL; 749 750 netdev_set_num_tc(dev, 0); 751 752 list_for_each_entry_safe(entry, n, &q->entries, list) { 753 list_del(&entry->list); 754 kfree(entry); 755 } 756 } 757 758 static int taprio_init(struct Qdisc *sch, struct nlattr *opt, 759 struct netlink_ext_ack *extack) 760 { 761 struct taprio_sched *q = qdisc_priv(sch); 762 struct net_device *dev = qdisc_dev(sch); 763 764 INIT_LIST_HEAD(&q->entries); 765 spin_lock_init(&q->current_entry_lock); 766 767 /* We may overwrite the configuration later */ 768 hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); 769 770 q->root = sch; 771 772 /* We only support static clockids. Use an invalid value as default 773 * and get the valid one on taprio_change(). 774 */ 775 q->clockid = -1; 776 777 if (sch->parent != TC_H_ROOT) 778 return -EOPNOTSUPP; 779 780 if (!netif_is_multiqueue(dev)) 781 return -EOPNOTSUPP; 782 783 /* pre-allocate qdisc, attachment can't fail */ 784 q->qdiscs = kcalloc(dev->num_tx_queues, 785 sizeof(q->qdiscs[0]), 786 GFP_KERNEL); 787 788 if (!q->qdiscs) 789 return -ENOMEM; 790 791 if (!opt) 792 return -EINVAL; 793 794 spin_lock(&taprio_list_lock); 795 list_add(&q->taprio_list, &taprio_list); 796 spin_unlock(&taprio_list_lock); 797 798 return taprio_change(sch, opt, extack); 799 } 800 801 static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, 802 unsigned long cl) 803 { 804 struct net_device *dev = qdisc_dev(sch); 805 unsigned long ntx = cl - 1; 806 807 if (ntx >= dev->num_tx_queues) 808 return NULL; 809 810 return netdev_get_tx_queue(dev, ntx); 811 } 812 813 static int taprio_graft(struct Qdisc *sch, unsigned long cl, 814 struct Qdisc *new, struct Qdisc **old, 815 struct netlink_ext_ack *extack) 816 { 817 struct taprio_sched *q = qdisc_priv(sch); 818 struct net_device *dev = qdisc_dev(sch); 819 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 820 821 if (!dev_queue) 822 return -EINVAL; 823 824 if (dev->flags & IFF_UP) 825 dev_deactivate(dev); 826 827 *old = q->qdiscs[cl - 1]; 828 q->qdiscs[cl - 1] = new; 829 830 if (new) 831 new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 832 833 if (dev->flags & IFF_UP) 834 dev_activate(dev); 835 836 return 0; 837 } 838 839 static int dump_entry(struct sk_buff *msg, 840 const struct sched_entry *entry) 841 { 842 struct nlattr *item; 843 844 item = nla_nest_start(msg, TCA_TAPRIO_SCHED_ENTRY); 845 if (!item) 846 return -ENOSPC; 847 848 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index)) 849 goto nla_put_failure; 850 851 if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command)) 852 goto nla_put_failure; 853 854 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, 855 entry->gate_mask)) 856 goto nla_put_failure; 857 858 if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, 859 entry->interval)) 860 goto nla_put_failure; 861 862 return nla_nest_end(msg, item); 863 864 nla_put_failure: 865 nla_nest_cancel(msg, item); 866 return -1; 867 } 868 869 static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) 870 { 871 struct taprio_sched *q = qdisc_priv(sch); 872 struct net_device *dev = qdisc_dev(sch); 873 struct tc_mqprio_qopt opt = { 0 }; 874 struct nlattr *nest, *entry_list; 875 struct sched_entry *entry; 876 unsigned int i; 877 878 opt.num_tc = netdev_get_num_tc(dev); 879 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); 880 881 for (i = 0; i < netdev_get_num_tc(dev); i++) { 882 opt.count[i] = dev->tc_to_txq[i].count; 883 opt.offset[i] = dev->tc_to_txq[i].offset; 884 } 885 886 nest = nla_nest_start(skb, TCA_OPTIONS); 887 if (!nest) 888 return -ENOSPC; 889 890 if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) 891 goto options_error; 892 893 if (nla_put_s64(skb, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, 894 q->base_time, TCA_TAPRIO_PAD)) 895 goto options_error; 896 897 if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) 898 goto options_error; 899 900 entry_list = nla_nest_start(skb, TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); 901 if (!entry_list) 902 goto options_error; 903 904 list_for_each_entry(entry, &q->entries, list) { 905 if (dump_entry(skb, entry) < 0) 906 goto options_error; 907 } 908 909 nla_nest_end(skb, entry_list); 910 911 return nla_nest_end(skb, nest); 912 913 options_error: 914 nla_nest_cancel(skb, nest); 915 return -1; 916 } 917 918 static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) 919 { 920 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 921 922 if (!dev_queue) 923 return NULL; 924 925 return dev_queue->qdisc_sleeping; 926 } 927 928 static unsigned long taprio_find(struct Qdisc *sch, u32 classid) 929 { 930 unsigned int ntx = TC_H_MIN(classid); 931 932 if (!taprio_queue_get(sch, ntx)) 933 return 0; 934 return ntx; 935 } 936 937 static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, 938 struct sk_buff *skb, struct tcmsg *tcm) 939 { 940 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 941 942 tcm->tcm_parent = TC_H_ROOT; 943 tcm->tcm_handle |= TC_H_MIN(cl); 944 tcm->tcm_info = dev_queue->qdisc_sleeping->handle; 945 946 return 0; 947 } 948 949 static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, 950 struct gnet_dump *d) 951 __releases(d->lock) 952 __acquires(d->lock) 953 { 954 struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); 955 956 sch = dev_queue->qdisc_sleeping; 957 if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || 958 qdisc_qstats_copy(d, sch) < 0) 959 return -1; 960 return 0; 961 } 962 963 static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) 964 { 965 struct net_device *dev = qdisc_dev(sch); 966 unsigned long ntx; 967 968 if (arg->stop) 969 return; 970 971 arg->count = arg->skip; 972 for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { 973 if (arg->fn(sch, ntx + 1, arg) < 0) { 974 arg->stop = 1; 975 break; 976 } 977 arg->count++; 978 } 979 } 980 981 static struct netdev_queue *taprio_select_queue(struct Qdisc *sch, 982 struct tcmsg *tcm) 983 { 984 return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); 985 } 986 987 static const struct Qdisc_class_ops taprio_class_ops = { 988 .graft = taprio_graft, 989 .leaf = taprio_leaf, 990 .find = taprio_find, 991 .walk = taprio_walk, 992 .dump = taprio_dump_class, 993 .dump_stats = taprio_dump_class_stats, 994 .select_queue = taprio_select_queue, 995 }; 996 997 static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { 998 .cl_ops = &taprio_class_ops, 999 .id = "taprio", 1000 .priv_size = sizeof(struct taprio_sched), 1001 .init = taprio_init, 1002 .destroy = taprio_destroy, 1003 .peek = taprio_peek, 1004 .dequeue = taprio_dequeue, 1005 .enqueue = taprio_enqueue, 1006 .dump = taprio_dump, 1007 .owner = THIS_MODULE, 1008 }; 1009 1010 static struct notifier_block taprio_device_notifier = { 1011 .notifier_call = taprio_dev_notifier, 1012 }; 1013 1014 static int __init taprio_module_init(void) 1015 { 1016 int err = register_netdevice_notifier(&taprio_device_notifier); 1017 1018 if (err) 1019 return err; 1020 1021 return register_qdisc(&taprio_qdisc_ops); 1022 } 1023 1024 static void __exit taprio_module_exit(void) 1025 { 1026 unregister_qdisc(&taprio_qdisc_ops); 1027 unregister_netdevice_notifier(&taprio_device_notifier); 1028 } 1029 1030 module_init(taprio_module_init); 1031 module_exit(taprio_module_exit); 1032 MODULE_LICENSE("GPL"); 1033