1 // SPDX-License-Identifier: GPL-2.0 2 3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline. 4 * 5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com> 6 * Vinicius Costa Gomes <vinicius.gomes@intel.com> 7 */ 8 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/errno.h> 14 #include <linux/errqueue.h> 15 #include <linux/rbtree.h> 16 #include <linux/skbuff.h> 17 #include <linux/posix-timers.h> 18 #include <net/netlink.h> 19 #include <net/sch_generic.h> 20 #include <net/pkt_sched.h> 21 #include <net/sock.h> 22 23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON) 24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON) 25 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK) 26 27 struct etf_sched_data { 28 bool offload; 29 bool deadline_mode; 30 bool skip_sock_check; 31 int clockid; 32 int queue; 33 s32 delta; /* in ns */ 34 ktime_t last; /* The txtime of the last skb sent to the netdevice. */ 35 struct rb_root_cached head; 36 struct qdisc_watchdog watchdog; 37 ktime_t (*get_time)(void); 38 }; 39 40 static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = { 41 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) }, 42 }; 43 44 static inline int validate_input_params(struct tc_etf_qopt *qopt, 45 struct netlink_ext_ack *extack) 46 { 47 /* Check if params comply to the following rules: 48 * * Clockid and delta must be valid. 49 * 50 * * Dynamic clockids are not supported. 51 * 52 * * Delta must be a positive integer. 53 * 54 * Also note that for the HW offload case, we must 55 * expect that system clocks have been synchronized to PHC. 56 */ 57 if (qopt->clockid < 0) { 58 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported"); 59 return -ENOTSUPP; 60 } 61 62 if (qopt->clockid != CLOCK_TAI) { 63 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used"); 64 return -EINVAL; 65 } 66 67 if (qopt->delta < 0) { 68 NL_SET_ERR_MSG(extack, "Delta must be positive"); 69 return -EINVAL; 70 } 71 72 return 0; 73 } 74 75 static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb) 76 { 77 struct etf_sched_data *q = qdisc_priv(sch); 78 ktime_t txtime = nskb->tstamp; 79 struct sock *sk = nskb->sk; 80 ktime_t now; 81 82 if (q->skip_sock_check) 83 goto skip; 84 85 if (!sk || !sk_fullsock(sk)) 86 return false; 87 88 if (!sock_flag(sk, SOCK_TXTIME)) 89 return false; 90 91 /* We don't perform crosstimestamping. 92 * Drop if packet's clockid differs from qdisc's. 93 */ 94 if (sk->sk_clockid != q->clockid) 95 return false; 96 97 if (sk->sk_txtime_deadline_mode != q->deadline_mode) 98 return false; 99 100 skip: 101 now = q->get_time(); 102 if (ktime_before(txtime, now) || ktime_before(txtime, q->last)) 103 return false; 104 105 return true; 106 } 107 108 static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch) 109 { 110 struct etf_sched_data *q = qdisc_priv(sch); 111 struct rb_node *p; 112 113 p = rb_first_cached(&q->head); 114 if (!p) 115 return NULL; 116 117 return rb_to_skb(p); 118 } 119 120 static void reset_watchdog(struct Qdisc *sch) 121 { 122 struct etf_sched_data *q = qdisc_priv(sch); 123 struct sk_buff *skb = etf_peek_timesortedlist(sch); 124 ktime_t next; 125 126 if (!skb) { 127 qdisc_watchdog_cancel(&q->watchdog); 128 return; 129 } 130 131 next = ktime_sub_ns(skb->tstamp, q->delta); 132 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next)); 133 } 134 135 static void report_sock_error(struct sk_buff *skb, u32 err, u8 code) 136 { 137 struct sock_exterr_skb *serr; 138 struct sk_buff *clone; 139 ktime_t txtime = skb->tstamp; 140 struct sock *sk = skb->sk; 141 142 if (!sk || !sk_fullsock(sk) || !(sk->sk_txtime_report_errors)) 143 return; 144 145 clone = skb_clone(skb, GFP_ATOMIC); 146 if (!clone) 147 return; 148 149 serr = SKB_EXT_ERR(clone); 150 serr->ee.ee_errno = err; 151 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME; 152 serr->ee.ee_type = 0; 153 serr->ee.ee_code = code; 154 serr->ee.ee_pad = 0; 155 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */ 156 serr->ee.ee_info = txtime; /* low part of tstamp */ 157 158 if (sock_queue_err_skb(sk, clone)) 159 kfree_skb(clone); 160 } 161 162 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch, 163 struct sk_buff **to_free) 164 { 165 struct etf_sched_data *q = qdisc_priv(sch); 166 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL; 167 ktime_t txtime = nskb->tstamp; 168 bool leftmost = true; 169 170 if (!is_packet_valid(sch, nskb)) { 171 report_sock_error(nskb, EINVAL, 172 SO_EE_CODE_TXTIME_INVALID_PARAM); 173 return qdisc_drop(nskb, sch, to_free); 174 } 175 176 while (*p) { 177 struct sk_buff *skb; 178 179 parent = *p; 180 skb = rb_to_skb(parent); 181 if (ktime_compare(txtime, skb->tstamp) >= 0) { 182 p = &parent->rb_right; 183 leftmost = false; 184 } else { 185 p = &parent->rb_left; 186 } 187 } 188 rb_link_node(&nskb->rbnode, parent, p); 189 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost); 190 191 qdisc_qstats_backlog_inc(sch, nskb); 192 sch->q.qlen++; 193 194 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 195 reset_watchdog(sch); 196 197 return NET_XMIT_SUCCESS; 198 } 199 200 static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb, 201 ktime_t now) 202 { 203 struct etf_sched_data *q = qdisc_priv(sch); 204 struct sk_buff *to_free = NULL; 205 struct sk_buff *tmp = NULL; 206 207 skb_rbtree_walk_from_safe(skb, tmp) { 208 if (ktime_after(skb->tstamp, now)) 209 break; 210 211 rb_erase_cached(&skb->rbnode, &q->head); 212 213 /* The rbnode field in the skb re-uses these fields, now that 214 * we are done with the rbnode, reset them. 215 */ 216 skb->next = NULL; 217 skb->prev = NULL; 218 skb->dev = qdisc_dev(sch); 219 220 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED); 221 222 qdisc_qstats_backlog_dec(sch, skb); 223 qdisc_drop(skb, sch, &to_free); 224 qdisc_qstats_overlimit(sch); 225 sch->q.qlen--; 226 } 227 228 kfree_skb_list(to_free); 229 } 230 231 static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb) 232 { 233 struct etf_sched_data *q = qdisc_priv(sch); 234 235 rb_erase_cached(&skb->rbnode, &q->head); 236 237 /* The rbnode field in the skb re-uses these fields, now that 238 * we are done with the rbnode, reset them. 239 */ 240 skb->next = NULL; 241 skb->prev = NULL; 242 skb->dev = qdisc_dev(sch); 243 244 qdisc_qstats_backlog_dec(sch, skb); 245 246 qdisc_bstats_update(sch, skb); 247 248 q->last = skb->tstamp; 249 250 sch->q.qlen--; 251 } 252 253 static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch) 254 { 255 struct etf_sched_data *q = qdisc_priv(sch); 256 struct sk_buff *skb; 257 ktime_t now, next; 258 259 skb = etf_peek_timesortedlist(sch); 260 if (!skb) 261 return NULL; 262 263 now = q->get_time(); 264 265 /* Drop if packet has expired while in queue. */ 266 if (ktime_before(skb->tstamp, now)) { 267 timesortedlist_drop(sch, skb, now); 268 skb = NULL; 269 goto out; 270 } 271 272 /* When in deadline mode, dequeue as soon as possible and change the 273 * txtime from deadline to (now + delta). 274 */ 275 if (q->deadline_mode) { 276 timesortedlist_remove(sch, skb); 277 skb->tstamp = now; 278 goto out; 279 } 280 281 next = ktime_sub_ns(skb->tstamp, q->delta); 282 283 /* Dequeue only if now is within the [txtime - delta, txtime] range. */ 284 if (ktime_after(now, next)) 285 timesortedlist_remove(sch, skb); 286 else 287 skb = NULL; 288 289 out: 290 /* Now we may need to re-arm the qdisc watchdog for the next packet. */ 291 reset_watchdog(sch); 292 293 return skb; 294 } 295 296 static void etf_disable_offload(struct net_device *dev, 297 struct etf_sched_data *q) 298 { 299 struct tc_etf_qopt_offload etf = { }; 300 const struct net_device_ops *ops; 301 int err; 302 303 if (!q->offload) 304 return; 305 306 ops = dev->netdev_ops; 307 if (!ops->ndo_setup_tc) 308 return; 309 310 etf.queue = q->queue; 311 etf.enable = 0; 312 313 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 314 if (err < 0) 315 pr_warn("Couldn't disable ETF offload for queue %d\n", 316 etf.queue); 317 } 318 319 static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, 320 struct netlink_ext_ack *extack) 321 { 322 const struct net_device_ops *ops = dev->netdev_ops; 323 struct tc_etf_qopt_offload etf = { }; 324 int err; 325 326 if (!ops->ndo_setup_tc) { 327 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); 328 return -EOPNOTSUPP; 329 } 330 331 etf.queue = q->queue; 332 etf.enable = 1; 333 334 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf); 335 if (err < 0) { 336 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload"); 337 return err; 338 } 339 340 return 0; 341 } 342 343 static int etf_init(struct Qdisc *sch, struct nlattr *opt, 344 struct netlink_ext_ack *extack) 345 { 346 struct etf_sched_data *q = qdisc_priv(sch); 347 struct net_device *dev = qdisc_dev(sch); 348 struct nlattr *tb[TCA_ETF_MAX + 1]; 349 struct tc_etf_qopt *qopt; 350 int err; 351 352 if (!opt) { 353 NL_SET_ERR_MSG(extack, 354 "Missing ETF qdisc options which are mandatory"); 355 return -EINVAL; 356 } 357 358 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy, 359 extack); 360 if (err < 0) 361 return err; 362 363 if (!tb[TCA_ETF_PARMS]) { 364 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters"); 365 return -EINVAL; 366 } 367 368 qopt = nla_data(tb[TCA_ETF_PARMS]); 369 370 pr_debug("delta %d clockid %d offload %s deadline %s\n", 371 qopt->delta, qopt->clockid, 372 OFFLOAD_IS_ON(qopt) ? "on" : "off", 373 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off"); 374 375 err = validate_input_params(qopt, extack); 376 if (err < 0) 377 return err; 378 379 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); 380 381 if (OFFLOAD_IS_ON(qopt)) { 382 err = etf_enable_offload(dev, q, extack); 383 if (err < 0) 384 return err; 385 } 386 387 /* Everything went OK, save the parameters used. */ 388 q->delta = qopt->delta; 389 q->clockid = qopt->clockid; 390 q->offload = OFFLOAD_IS_ON(qopt); 391 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt); 392 q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt); 393 394 switch (q->clockid) { 395 case CLOCK_REALTIME: 396 q->get_time = ktime_get_real; 397 break; 398 case CLOCK_MONOTONIC: 399 q->get_time = ktime_get; 400 break; 401 case CLOCK_BOOTTIME: 402 q->get_time = ktime_get_boottime; 403 break; 404 case CLOCK_TAI: 405 q->get_time = ktime_get_clocktai; 406 break; 407 default: 408 NL_SET_ERR_MSG(extack, "Clockid is not supported"); 409 return -ENOTSUPP; 410 } 411 412 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); 413 414 return 0; 415 } 416 417 static void timesortedlist_clear(struct Qdisc *sch) 418 { 419 struct etf_sched_data *q = qdisc_priv(sch); 420 struct rb_node *p = rb_first_cached(&q->head); 421 422 while (p) { 423 struct sk_buff *skb = rb_to_skb(p); 424 425 p = rb_next(p); 426 427 rb_erase_cached(&skb->rbnode, &q->head); 428 rtnl_kfree_skbs(skb, skb); 429 sch->q.qlen--; 430 } 431 } 432 433 static void etf_reset(struct Qdisc *sch) 434 { 435 struct etf_sched_data *q = qdisc_priv(sch); 436 437 /* Only cancel watchdog if it's been initialized. */ 438 if (q->watchdog.qdisc == sch) 439 qdisc_watchdog_cancel(&q->watchdog); 440 441 /* No matter which mode we are on, it's safe to clear both lists. */ 442 timesortedlist_clear(sch); 443 __qdisc_reset_queue(&sch->q); 444 445 q->last = 0; 446 } 447 448 static void etf_destroy(struct Qdisc *sch) 449 { 450 struct etf_sched_data *q = qdisc_priv(sch); 451 struct net_device *dev = qdisc_dev(sch); 452 453 /* Only cancel watchdog if it's been initialized. */ 454 if (q->watchdog.qdisc == sch) 455 qdisc_watchdog_cancel(&q->watchdog); 456 457 etf_disable_offload(dev, q); 458 } 459 460 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb) 461 { 462 struct etf_sched_data *q = qdisc_priv(sch); 463 struct tc_etf_qopt opt = { }; 464 struct nlattr *nest; 465 466 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 467 if (!nest) 468 goto nla_put_failure; 469 470 opt.delta = q->delta; 471 opt.clockid = q->clockid; 472 if (q->offload) 473 opt.flags |= TC_ETF_OFFLOAD_ON; 474 475 if (q->deadline_mode) 476 opt.flags |= TC_ETF_DEADLINE_MODE_ON; 477 478 if (q->skip_sock_check) 479 opt.flags |= TC_ETF_SKIP_SOCK_CHECK; 480 481 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt)) 482 goto nla_put_failure; 483 484 return nla_nest_end(skb, nest); 485 486 nla_put_failure: 487 nla_nest_cancel(skb, nest); 488 return -1; 489 } 490 491 static struct Qdisc_ops etf_qdisc_ops __read_mostly = { 492 .id = "etf", 493 .priv_size = sizeof(struct etf_sched_data), 494 .enqueue = etf_enqueue_timesortedlist, 495 .dequeue = etf_dequeue_timesortedlist, 496 .peek = etf_peek_timesortedlist, 497 .init = etf_init, 498 .reset = etf_reset, 499 .destroy = etf_destroy, 500 .dump = etf_dump, 501 .owner = THIS_MODULE, 502 }; 503 MODULE_ALIAS_NET_SCH("etf"); 504 505 static int __init etf_module_init(void) 506 { 507 return register_qdisc(&etf_qdisc_ops); 508 } 509 510 static void __exit etf_module_exit(void) 511 { 512 unregister_qdisc(&etf_qdisc_ops); 513 } 514 module_init(etf_module_init) 515 module_exit(etf_module_exit) 516 MODULE_LICENSE("GPL"); 517 MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc"); 518