Lines Matching +full:compute +full:- +full:cb
1 // SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
4 * Author: Koen De Schepper <koen.de_schepper@nokia-bell-labs.com>
8 * Author: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
11 * - Supports congestion controls that comply with the Prague requirements
12 * in RFC9331 (e.g. TCP-Prague)
13 * - Supports coupled dual-queue with PI2 as defined in RFC9332
14 * - Supports ECN L4S-identifier (IP.ECN==0b*1)
16 * note: Although DCTCP and BBRv3 can use shallow-threshold ECN marks,
22 * - RFC9332: https://datatracker.ietf.org/doc/html/rfc9332
23 * - De Schepper, Koen, et al. "PI 2: A linearized AQM for both classic and
52 * netlink-provided values can use at most 31b, i.e. be at most (2^23)-1
56 #define ALPHA_BETA_MAX ((1U << 31) - 1)
65 #define ALPHA_BETA_SCALING (ALPHA_BETA_SHIFT - ALPHA_BETA_GRANULARITY)
71 struct Qdisc *l_queue; /* The L4S Low latency queue (L-queue) */
72 struct Qdisc *sch; /* The Classic queue (C-queue) */
86 /* Step AQM (L-queue only) parameters */
90 /* C-queue starvation protection */
93 u8 c_protection_wc; /* C-queue weight (between 0 and MAX_WC) */
94 u8 c_protection_wl; /* L-queue weight (MAX_WC - wc) */
99 u8 ecn_mask; /* Mask to match packets into L-queue */
106 u64 c_head_ts; /* Enqueue timestamp of the C-queue head */
107 u64 l_head_ts; /* Enqueue timestamp of the L-queue head */
109 u32 packets_in_c; /* Enqueue packet counter of the C-queue */
110 u32 packets_in_l; /* Enqueue packet counter of the L-queue */
111 u32 maxq; /* Maximum queue size of the C-queue */
130 DUALPI2_C_CLASSIC = 0, /* C-queue */
131 DUALPI2_C_L4S = 1, /* L-queue (scale mark/classic drop) */
132 DUALPI2_C_LLLL = 2, /* L-queue (no drops/marks) */
139 return (struct dualpi2_skb_cb *)qdisc_skb_cb(skb)->data; in dualpi2_skb_cb()
144 return reference - dualpi2_skb_cb(skb)->ts; in dualpi2_sojourn_time()
151 return skb ? dualpi2_skb_cb(skb)->ts : 0; in head_enqueue_time()
172 return ktime_add_ns(ktime_get_ns(), q->pi2_tupdate); in next_pi2_timeout()
177 return dualpi2_skb_cb(skb)->classified == DUALPI2_C_L4S; in skb_is_l4s()
182 return dualpi2_skb_cb(skb)->classified != DUALPI2_C_CLASSIC; in skb_in_l_queue()
187 return skb_is_l4s(skb) && qdisc_qlen(q->l_queue) >= q->min_qlen_step; in skb_apply_step()
193 q->ecn_mark++; in dualpi2_mark()
201 q->c_protection_credit = q->c_protection_init; in dualpi2_reset_c_protection()
211 q->c_protection_wc = wc; in dualpi2_calculate_c_protection()
212 q->c_protection_wl = MAX_WC - wc; in dualpi2_calculate_c_protection()
213 q->c_protection_init = (s32)psched_mtu(qdisc_dev(sch)) * in dualpi2_calculate_c_protection()
214 ((int)q->c_protection_wc - (int)q->c_protection_wl); in dualpi2_calculate_c_protection()
223 /* Packets in the C-queue are subject to a marking probability pC, which is the
235 if (overload || dualpi2_skb_cb(skb)->ect == INET_ECN_NOT_ECT) in dualpi2_classic_marking()
242 /* Packets in the L-queue are subject to a marking probability pL given by the
246 * - if the qdisc is configured to trade losses to preserve latency (i.e.,
247 * @q->drop_overload), apply classic drops first before marking.
248 * - otherwise, preserve the "no loss" property of ECN at the cost of queueing
249 * delay, eventually resulting in taildrop behavior once sch->limit is
260 if (!q->drop_overload || in dualpi2_scalable_marking()
268 /* Non-ECT packets could have classified as L4S by filters. */ in dualpi2_scalable_marking()
269 if (dualpi2_skb_cb(skb)->ect == INET_ECN_NOT_ECT) in dualpi2_scalable_marking()
289 if (sch->qstats.backlog < 2 * psched_mtu(qdisc_dev(sch))) in must_drop()
292 prob = READ_ONCE(q->pi2_prob); in must_drop()
293 local_l_prob = (u64)prob * q->coupling_factor; in must_drop()
296 switch (dualpi2_skb_cb(skb)->classified) { in must_drop()
309 struct dualpi2_skb_cb *cb = dualpi2_skb_cb(skb); in dualpi2_read_ect() local
319 cb->ect = ipv4_get_dsfield(ip_hdr(skb)) & INET_ECN_MASK; in dualpi2_read_ect()
327 cb->ect = ipv6_get_dsfield(ipv6_hdr(skb)) & INET_ECN_MASK; in dualpi2_read_ect()
338 cb->ect = INET_ECN_NOT_ECT; in dualpi2_read_ect()
344 struct dualpi2_skb_cb *cb = dualpi2_skb_cb(skb); in dualpi2_skb_classify() local
350 if (cb->ect & q->ecn_mask) { in dualpi2_skb_classify()
351 cb->classified = DUALPI2_C_L4S; in dualpi2_skb_classify()
355 if (TC_H_MAJ(skb->priority) == q->sch->handle && in dualpi2_skb_classify()
356 TC_H_MIN(skb->priority) < __DUALPI2_C_MAX) { in dualpi2_skb_classify()
357 cb->classified = TC_H_MIN(skb->priority); in dualpi2_skb_classify()
361 fl = rcu_dereference_bh(q->tcf_filters); in dualpi2_skb_classify()
363 cb->classified = DUALPI2_C_CLASSIC; in dualpi2_skb_classify()
379 cb->classified = TC_H_MIN(res.classid) < __DUALPI2_C_MAX ? in dualpi2_skb_classify()
389 struct dualpi2_skb_cb *cb; in dualpi2_enqueue_skb() local
391 if (unlikely(qdisc_qlen(sch) >= sch->limit) || in dualpi2_enqueue_skb()
392 unlikely((u64)q->memory_used + skb->truesize > q->memory_limit)) { in dualpi2_enqueue_skb()
395 qdisc_qstats_overlimit(q->l_queue); in dualpi2_enqueue_skb()
400 if (q->drop_early && must_drop(sch, q, skb)) { in dualpi2_enqueue_skb()
406 cb = dualpi2_skb_cb(skb); in dualpi2_enqueue_skb()
407 cb->ts = ktime_get_ns(); in dualpi2_enqueue_skb()
408 q->memory_used += skb->truesize; in dualpi2_enqueue_skb()
409 if (q->memory_used > q->max_memory_used) in dualpi2_enqueue_skb()
410 q->max_memory_used = q->memory_used; in dualpi2_enqueue_skb()
412 if (qdisc_qlen(sch) > q->maxq) in dualpi2_enqueue_skb()
413 q->maxq = qdisc_qlen(sch); in dualpi2_enqueue_skb()
416 /* Apply step thresh if skb is L4S && L-queue len >= min_qlen */ in dualpi2_enqueue_skb()
417 dualpi2_skb_cb(skb)->apply_step = skb_apply_step(skb, q); in dualpi2_enqueue_skb()
420 ++sch->q.qlen; in dualpi2_enqueue_skb()
422 ++q->packets_in_l; in dualpi2_enqueue_skb()
423 if (!q->l_head_ts) in dualpi2_enqueue_skb()
424 q->l_head_ts = cb->ts; in dualpi2_enqueue_skb()
425 return qdisc_enqueue_tail(skb, q->l_queue); in dualpi2_enqueue_skb()
427 ++q->packets_in_c; in dualpi2_enqueue_skb()
428 if (!q->c_head_ts) in dualpi2_enqueue_skb()
429 q->c_head_ts = cb->ts; in dualpi2_enqueue_skb()
436 * - Finer-grained AQM actions as the sub-packets of a burst no longer share the
438 * - Improved precision of the starvation protection/WRR scheduler at dequeue,
455 if (q->split_gso && skb_is_gso(skb)) { in dualpi2_qdisc_enqueue()
477 qdisc_skb_cb(nskb)->pkt_len = nskb->len; in dualpi2_qdisc_enqueue()
478 dualpi2_skb_cb(nskb)->classified = in dualpi2_qdisc_enqueue()
479 dualpi2_skb_cb(skb)->classified; in dualpi2_qdisc_enqueue()
480 dualpi2_skb_cb(nskb)->ect = dualpi2_skb_cb(skb)->ect; in dualpi2_qdisc_enqueue()
484 /* Compute the backlog adjustment that needs in dualpi2_qdisc_enqueue()
489 byte_len += nskb->len; in dualpi2_qdisc_enqueue()
496 --cnt; in dualpi2_qdisc_enqueue()
497 byte_len -= orig_len; in dualpi2_qdisc_enqueue()
499 qdisc_tree_reduce_backlog(sch, -cnt, -byte_len); in dualpi2_qdisc_enqueue()
525 c_len = qdisc_qlen(sch) - qdisc_qlen(q->l_queue); in dequeue_packet()
526 if (qdisc_qlen(q->l_queue) && (!c_len || q->c_protection_credit <= 0)) { in dequeue_packet()
527 skb = __qdisc_dequeue_head(&q->l_queue->q); in dequeue_packet()
528 WRITE_ONCE(q->l_head_ts, head_enqueue_time(q->l_queue)); in dequeue_packet()
530 *credit_change = q->c_protection_wc; in dequeue_packet()
531 qdisc_qstats_backlog_dec(q->l_queue, skb); in dequeue_packet()
534 --sch->q.qlen; in dequeue_packet()
535 q->memory_used -= skb->truesize; in dequeue_packet()
537 skb = __qdisc_dequeue_head(&sch->q); in dequeue_packet()
538 WRITE_ONCE(q->c_head_ts, head_enqueue_time(sch)); in dequeue_packet()
539 if (qdisc_qlen(q->l_queue)) in dequeue_packet()
540 *credit_change = ~((s32)q->c_protection_wl) + 1; in dequeue_packet()
541 q->memory_used -= skb->truesize; in dequeue_packet()
556 if (q->step_in_packets) in do_step_aqm()
557 qdelay = qdisc_qlen(q->l_queue); in do_step_aqm()
561 if (dualpi2_skb_cb(skb)->apply_step && qdelay > q->step_thresh) { in do_step_aqm()
562 if (!dualpi2_skb_cb(skb)->ect) { in do_step_aqm()
563 /* Drop this non-ECT packet */ in do_step_aqm()
568 ++q->step_marks; in do_step_aqm()
570 qdisc_bstats_update(q->l_queue, skb); in do_step_aqm()
577 ++q->deferred_drops_cnt; in drop_and_retry()
578 q->deferred_drops_len += qdisc_pkt_len(skb); in drop_and_retry()
593 if (!q->drop_early && must_drop(sch, q, skb)) { in dualpi2_qdisc_dequeue()
600 qdisc_qstats_drop(q->l_queue); in dualpi2_qdisc_dequeue()
606 q->c_protection_credit += credit_change; in dualpi2_qdisc_dequeue()
611 if (q->deferred_drops_cnt) { in dualpi2_qdisc_dequeue()
612 qdisc_tree_reduce_backlog(sch, q->deferred_drops_cnt, in dualpi2_qdisc_dequeue()
613 q->deferred_drops_len); in dualpi2_qdisc_dequeue()
614 q->deferred_drops_cnt = 0; in dualpi2_qdisc_dequeue()
615 q->deferred_drops_len = 0; in dualpi2_qdisc_dequeue()
632 qc = READ_ONCE(q->c_head_ts); in get_queue_delays()
633 ql = READ_ONCE(q->l_head_ts); in get_queue_delays()
635 *qdelay_c = qc ? now - qc : 0; in get_queue_delays()
636 *qdelay_l = ql ? now - ql : 0; in get_queue_delays()
654 delta = ((s64)qdelay - (s64)q->pi2_target) * q->pi2_alpha; in calculate_probability()
655 delta += ((s64)qdelay - (s64)q->last_qdelay) * q->pi2_beta; in calculate_probability()
656 q->last_qdelay = qdelay; in calculate_probability()
660 new_prob = __scale_delta(delta) + q->pi2_prob; in calculate_probability()
661 if (new_prob < q->pi2_prob) in calculate_probability()
664 new_prob = q->pi2_prob - __scale_delta(~delta + 1); in calculate_probability()
665 if (new_prob > q->pi2_prob) in calculate_probability()
672 if (!q->drop_overload) in calculate_probability()
673 return min_t(u32, new_prob, MAX_PROB / q->coupling_factor); in calculate_probability()
712 struct Qdisc *sch = q->sch; in dualpi2_timer()
719 WRITE_ONCE(q->pi2_prob, calculate_probability(sch)); in dualpi2_timer()
720 hrtimer_set_expires(&q->pi2_timer, next_pi2_timeout(q)); in dualpi2_timer()
769 return -EINVAL; in dualpi2_change()
777 return -EINVAL; in dualpi2_change()
786 WRITE_ONCE(sch->limit, limit); in dualpi2_change()
787 WRITE_ONCE(q->memory_limit, get_memory_limit(sch, limit)); in dualpi2_change()
791 WRITE_ONCE(q->memory_limit, in dualpi2_change()
797 WRITE_ONCE(q->pi2_target, target * NSEC_PER_USEC); in dualpi2_change()
803 WRITE_ONCE(q->pi2_tupdate, convert_us_to_nsec(tupdate)); in dualpi2_change()
809 WRITE_ONCE(q->pi2_alpha, dualpi2_scale_alpha_beta(alpha)); in dualpi2_change()
815 WRITE_ONCE(q->pi2_beta, dualpi2_scale_alpha_beta(beta)); in dualpi2_change()
821 WRITE_ONCE(q->step_in_packets, true); in dualpi2_change()
822 WRITE_ONCE(q->step_thresh, step_th); in dualpi2_change()
826 WRITE_ONCE(q->step_in_packets, false); in dualpi2_change()
827 WRITE_ONCE(q->step_thresh, convert_us_to_nsec(step_th)); in dualpi2_change()
831 WRITE_ONCE(q->min_qlen_step, in dualpi2_change()
837 WRITE_ONCE(q->coupling_factor, coupling); in dualpi2_change()
843 WRITE_ONCE(q->drop_overload, (bool)drop_overload); in dualpi2_change()
849 WRITE_ONCE(q->drop_early, (bool)drop_early); in dualpi2_change()
861 WRITE_ONCE(q->ecn_mask, ecn_mask); in dualpi2_change()
867 WRITE_ONCE(q->split_gso, (bool)split_gso); in dualpi2_change()
871 old_backlog = sch->qstats.backlog; in dualpi2_change()
872 while (qdisc_qlen(sch) > sch->limit || in dualpi2_change()
873 q->memory_used > q->memory_limit) { in dualpi2_change()
876 q->memory_used -= skb->truesize; in dualpi2_change()
880 qdisc_tree_reduce_backlog(sch, old_qlen - qdisc_qlen(sch), in dualpi2_change()
881 old_backlog - sch->qstats.backlog); in dualpi2_change()
892 q->sch->limit = 10000; /* Max 125ms at 1Gbps */ in dualpi2_reset_default()
893 q->memory_limit = get_memory_limit(sch, q->sch->limit); in dualpi2_reset_default()
895 q->pi2_target = 15 * NSEC_PER_MSEC; in dualpi2_reset_default()
896 q->pi2_tupdate = 16 * NSEC_PER_MSEC; in dualpi2_reset_default()
897 q->pi2_alpha = dualpi2_scale_alpha_beta(41); /* ~0.16 Hz * 256 */ in dualpi2_reset_default()
898 q->pi2_beta = dualpi2_scale_alpha_beta(819); /* ~3.20 Hz * 256 */ in dualpi2_reset_default()
900 q->step_thresh = 1 * NSEC_PER_MSEC; in dualpi2_reset_default()
901 q->step_in_packets = false; in dualpi2_reset_default()
903 dualpi2_calculate_c_protection(q->sch, q, 10); /* wc=10%, wl=90% */ in dualpi2_reset_default()
905 q->ecn_mask = TC_DUALPI2_ECN_MASK_L4S_ECT; /* INET_ECN_ECT_1 */ in dualpi2_reset_default()
906 q->min_qlen_step = 0; /* Always apply step mark in L-queue */ in dualpi2_reset_default()
907 q->coupling_factor = 2; /* window fairness for equal RTTs */ in dualpi2_reset_default()
908 q->drop_overload = TC_DUALPI2_DROP_OVERLOAD_DROP; /* Drop overload */ in dualpi2_reset_default()
909 q->drop_early = TC_DUALPI2_DROP_EARLY_DROP_DEQUEUE; /* Drop dequeue */ in dualpi2_reset_default()
910 q->split_gso = TC_DUALPI2_SPLIT_GSO_SPLIT_GSO; /* Split GSO */ in dualpi2_reset_default()
919 q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, in dualpi2_init()
920 TC_H_MAKE(sch->handle, 1), extack); in dualpi2_init()
921 if (!q->l_queue) in dualpi2_init()
922 return -ENOMEM; in dualpi2_init()
924 err = tcf_block_get(&q->tcf_block, &q->tcf_filters, sch, extack); in dualpi2_init()
928 q->sch = sch; in dualpi2_init()
930 hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, in dualpi2_init()
940 hrtimer_start(&q->pi2_timer, next_pi2_timeout(q), in dualpi2_init()
952 step_in_pkts = READ_ONCE(q->step_in_packets); in dualpi2_dump()
953 step_th = READ_ONCE(q->step_thresh); in dualpi2_dump()
960 (nla_put_u32(skb, TCA_DUALPI2_LIMIT, READ_ONCE(sch->limit)) || in dualpi2_dump()
962 READ_ONCE(q->memory_limit)) || in dualpi2_dump()
964 convert_ns_to_usec(READ_ONCE(q->pi2_target))) || in dualpi2_dump()
966 convert_ns_to_usec(READ_ONCE(q->pi2_tupdate))) || in dualpi2_dump()
968 dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_alpha))) || in dualpi2_dump()
970 dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_beta))) || in dualpi2_dump()
973 READ_ONCE(q->min_qlen_step)) || in dualpi2_dump()
975 READ_ONCE(q->coupling_factor)) || in dualpi2_dump()
977 READ_ONCE(q->drop_overload)) || in dualpi2_dump()
979 READ_ONCE(q->drop_early)) || in dualpi2_dump()
981 READ_ONCE(q->c_protection_wc)) || in dualpi2_dump()
982 nla_put_u8(skb, TCA_DUALPI2_ECN_MASK, READ_ONCE(q->ecn_mask)) || in dualpi2_dump()
983 nla_put_u8(skb, TCA_DUALPI2_SPLIT_GSO, READ_ONCE(q->split_gso)))) in dualpi2_dump()
987 (nla_put_u32(skb, TCA_DUALPI2_LIMIT, READ_ONCE(sch->limit)) || in dualpi2_dump()
989 READ_ONCE(q->memory_limit)) || in dualpi2_dump()
991 convert_ns_to_usec(READ_ONCE(q->pi2_target))) || in dualpi2_dump()
993 convert_ns_to_usec(READ_ONCE(q->pi2_tupdate))) || in dualpi2_dump()
995 dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_alpha))) || in dualpi2_dump()
997 dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_beta))) || in dualpi2_dump()
1001 READ_ONCE(q->min_qlen_step)) || in dualpi2_dump()
1003 READ_ONCE(q->coupling_factor)) || in dualpi2_dump()
1005 READ_ONCE(q->drop_overload)) || in dualpi2_dump()
1007 READ_ONCE(q->drop_early)) || in dualpi2_dump()
1009 READ_ONCE(q->c_protection_wc)) || in dualpi2_dump()
1010 nla_put_u8(skb, TCA_DUALPI2_ECN_MASK, READ_ONCE(q->ecn_mask)) || in dualpi2_dump()
1011 nla_put_u8(skb, TCA_DUALPI2_SPLIT_GSO, READ_ONCE(q->split_gso)))) in dualpi2_dump()
1018 return -1; in dualpi2_dump()
1025 .prob = READ_ONCE(q->pi2_prob), in dualpi2_dump_stats()
1026 .packets_in_c = q->packets_in_c, in dualpi2_dump_stats()
1027 .packets_in_l = q->packets_in_l, in dualpi2_dump_stats()
1028 .maxq = q->maxq, in dualpi2_dump_stats()
1029 .ecn_mark = q->ecn_mark, in dualpi2_dump_stats()
1030 .credit = q->c_protection_credit, in dualpi2_dump_stats()
1031 .step_marks = q->step_marks, in dualpi2_dump_stats()
1032 .memory_used = q->memory_used, in dualpi2_dump_stats()
1033 .max_memory_used = q->max_memory_used, in dualpi2_dump_stats()
1034 .memory_limit = q->memory_limit, in dualpi2_dump_stats()
1044 /* Reset both L-queue and C-queue, internal packet counters, PI probability,
1045 * C-queue protection credit, and timestamps, while preserving current
1053 qdisc_reset_queue(q->l_queue); in dualpi2_reset()
1054 q->c_head_ts = 0; in dualpi2_reset()
1055 q->l_head_ts = 0; in dualpi2_reset()
1056 q->pi2_prob = 0; in dualpi2_reset()
1057 q->packets_in_c = 0; in dualpi2_reset()
1058 q->packets_in_l = 0; in dualpi2_reset()
1059 q->maxq = 0; in dualpi2_reset()
1060 q->ecn_mark = 0; in dualpi2_reset()
1061 q->step_marks = 0; in dualpi2_reset()
1062 q->memory_used = 0; in dualpi2_reset()
1063 q->max_memory_used = 0; in dualpi2_reset()
1071 q->pi2_tupdate = 0; in dualpi2_destroy()
1072 hrtimer_cancel(&q->pi2_timer); in dualpi2_destroy()
1073 if (q->l_queue) in dualpi2_destroy()
1074 qdisc_put(q->l_queue); in dualpi2_destroy()
1075 tcf_block_put(q->tcf_block); in dualpi2_destroy()
1105 return q->tcf_block; in dualpi2_tcf_block()
1112 if (arg->stop) in dualpi2_walk()
1117 if (arg->count < arg->skip) { in dualpi2_walk()
1118 arg->count++; in dualpi2_walk()
1121 if (arg->fn(sch, i + 1, arg) < 0) { in dualpi2_walk()
1122 arg->stop = 1; in dualpi2_walk()
1125 arg->count++; in dualpi2_walk()
1169 MODULE_AUTHOR("Koen De Schepper <koen.de_schepper@nokia-bell-labs.com>");
1170 MODULE_AUTHOR("Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>");