xref: /linux/net/rxrpc/input_rack.c (revision 0ad9617c78acbc71373fb341a6f75d4012b01d69)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* RACK-TLP [RFC8958] Implementation
3  *
4  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 
10 #include "ar-internal.h"
11 
12 static bool rxrpc_rack_sent_after(ktime_t t1, rxrpc_seq_t seq1,
13 				  ktime_t t2, rxrpc_seq_t seq2)
14 {
15 	if (ktime_after(t1, t2))
16 		return true;
17 	return t1 == t2 && after(seq1, seq2);
18 }
19 
20 /*
21  * Mark a packet lost.
22  */
23 static void rxrpc_rack_mark_lost(struct rxrpc_call *call,
24 				 struct rxrpc_txqueue *tq, unsigned int ix)
25 {
26 	if (__test_and_set_bit(ix, &tq->segment_lost)) {
27 		if (__test_and_clear_bit(ix, &tq->segment_retransmitted))
28 			call->tx_nr_resent--;
29 	} else {
30 		call->tx_nr_lost++;
31 	}
32 	tq->segment_xmit_ts[ix] = UINT_MAX;
33 }
34 
35 /*
36  * Get the transmission time of a packet in the Tx queue.
37  */
38 static ktime_t rxrpc_get_xmit_ts(const struct rxrpc_txqueue *tq, unsigned int ix)
39 {
40 	if (tq->segment_xmit_ts[ix] == UINT_MAX)
41 		return KTIME_MAX;
42 	return ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
43 }
44 
45 /*
46  * Get a bitmask of nack bits for a queue segment and mask off any that aren't
47  * yet reported.
48  */
49 static unsigned long rxrpc_tq_nacks(const struct rxrpc_txqueue *tq)
50 {
51 	unsigned long nacks = ~tq->segment_acked;
52 
53 	if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE)
54 		nacks &= (1UL << tq->nr_reported_acks) - 1;
55 	return nacks;
56 }
57 
58 /*
59  * Update the RACK state for the most recently sent packet that has been
60  * delivered [RFC8958 6.2 Step 2].
61  */
62 static void rxrpc_rack_update(struct rxrpc_call *call,
63 			      struct rxrpc_ack_summary *summary,
64 			      struct rxrpc_txqueue *tq,
65 			      unsigned int ix)
66 {
67 	rxrpc_seq_t seq = tq->qbase + ix;
68 	ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
69 	ktime_t rtt = ktime_sub(call->acks_latest_ts, xmit_ts);
70 
71 	if (__test_and_clear_bit(ix, &tq->segment_lost))
72 		call->tx_nr_lost--;
73 
74 	if (test_bit(ix, &tq->segment_retransmitted)) {
75 		/* Use Rx.serial instead of TCP.ACK.ts_option.echo_reply. */
76 		if (before(call->acks_highest_serial, tq->segment_serial[ix]))
77 			return;
78 		if (rtt < minmax_get(&call->min_rtt))
79 			return;
80 	}
81 
82 	/* The RACK algorithm requires the segment ACKs to be traversed in
83 	 * order of segment transmission - but the only thing this seems to
84 	 * matter for is that RACK.rtt is set to the rtt of the most recently
85 	 * transmitted segment.  We should be able to achieve the same by only
86 	 * setting RACK.rtt if the xmit time is greater.
87 	 */
88 	if (ktime_after(xmit_ts, call->rack_rtt_ts)) {
89 		call->rack_rtt	  = rtt;
90 		call->rack_rtt_ts = xmit_ts;
91 	}
92 
93 	if (rxrpc_rack_sent_after(xmit_ts, seq, call->rack_xmit_ts, call->rack_end_seq)) {
94 		call->rack_rtt = rtt;
95 		call->rack_xmit_ts = xmit_ts;
96 		call->rack_end_seq = seq;
97 	}
98 }
99 
100 /*
101  * Detect data segment reordering [RFC8958 6.2 Step 3].
102  */
103 static void rxrpc_rack_detect_reordering(struct rxrpc_call *call,
104 					 struct rxrpc_ack_summary *summary,
105 					 struct rxrpc_txqueue *tq,
106 					 unsigned int ix)
107 {
108 	rxrpc_seq_t seq = tq->qbase + ix;
109 
110 	/* Track the highest sequence number so far ACK'd.  This is not
111 	 * necessarily the same as ack.firstPacket + ack.nAcks - 1 as the peer
112 	 * could put a NACK in the last SACK slot.
113 	 */
114 	if (after(seq, call->rack_fack))
115 		call->rack_fack = seq;
116 	else if (before(seq, call->rack_fack) &&
117 		 test_bit(ix, &tq->segment_retransmitted))
118 		call->rack_reordering_seen = true;
119 }
120 
121 void rxrpc_input_rack_one(struct rxrpc_call *call,
122 			  struct rxrpc_ack_summary *summary,
123 			  struct rxrpc_txqueue *tq,
124 			  unsigned int ix)
125 {
126 	rxrpc_rack_update(call, summary, tq, ix);
127 	rxrpc_rack_detect_reordering(call, summary, tq, ix);
128 }
129 
130 void rxrpc_input_rack(struct rxrpc_call *call,
131 		      struct rxrpc_ack_summary *summary,
132 		      struct rxrpc_txqueue *tq,
133 		      unsigned long new_acks)
134 {
135 	while (new_acks) {
136 		unsigned int ix = __ffs(new_acks);
137 
138 		__clear_bit(ix, &new_acks);
139 		rxrpc_input_rack_one(call, summary, tq, ix);
140 	}
141 
142 	trace_rxrpc_rack_update(call, summary);
143 }
144 
145 /*
146  * Update the reordering window [RFC8958 6.2 Step 4].  Returns the updated
147  * duration of the reordering window.
148  *
149  * Note that the Rx protocol doesn't have a 'DSACK option' per se, but ACKs can
150  * be given a 'DUPLICATE' reason with the serial number referring to the
151  * duplicated DATA packet.  Rx does not inform as to whether this was a
152  * reception of the same packet twice or of a retransmission of a packet we
153  * already received (though this could be determined by the transmitter based
154  * on the serial number).
155  */
156 static ktime_t rxrpc_rack_update_reo_wnd(struct rxrpc_call *call,
157 					 struct rxrpc_ack_summary *summary)
158 {
159 	rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
160 	rxrpc_seq_t snd_nxt = call->tx_transmitted + 1; /* Next seq to be sent */
161 	bool have_dsack_option = summary->ack_reason == RXRPC_ACK_DUPLICATE;
162 	int dup_thresh = 3;
163 
164 	/* DSACK-based reordering window adaptation */
165 	if (!call->rack_dsack_round_none &&
166 	    after_eq(snd_una, call->rack_dsack_round))
167 		call->rack_dsack_round_none = true;
168 
169 	/* Grow the reordering window per round that sees DSACK.  Reset the
170 	 * window after 16 DSACK-free recoveries.
171 	 */
172 	if (call->rack_dsack_round_none && have_dsack_option) {
173 		call->rack_dsack_round_none = false;
174 		call->rack_dsack_round = snd_nxt;
175 		call->rack_reo_wnd_mult++;
176 		call->rack_reo_wnd_persist = 16;
177 	} else if (summary->exiting_fast_or_rto_recovery) {
178 		call->rack_reo_wnd_persist--;
179 		if (call->rack_reo_wnd_persist <= 0)
180 			call->rack_reo_wnd_mult = 1;
181 	}
182 
183 	if (!call->rack_reordering_seen) {
184 		if (summary->in_fast_or_rto_recovery)
185 			return 0;
186 		if (call->acks_nr_sacks >= dup_thresh)
187 			return 0;
188 	}
189 
190 	return us_to_ktime(umin(call->rack_reo_wnd_mult * minmax_get(&call->min_rtt) / 4,
191 				call->srtt_us >> 3));
192 }
193 
194 /*
195  * Detect losses [RFC8958 6.2 Step 5].
196  */
197 static ktime_t rxrpc_rack_detect_loss(struct rxrpc_call *call,
198 				      struct rxrpc_ack_summary *summary)
199 {
200 	struct rxrpc_txqueue *tq;
201 	ktime_t timeout = 0, lost_after, now = ktime_get_real();
202 
203 	call->rack_reo_wnd = rxrpc_rack_update_reo_wnd(call, summary);
204 	lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
205 	trace_rxrpc_rack_scan_loss(call);
206 
207 	for (tq = call->tx_queue; tq; tq = tq->next) {
208 		unsigned long nacks = rxrpc_tq_nacks(tq);
209 
210 		if (after(tq->qbase, call->tx_transmitted))
211 			break;
212 		trace_rxrpc_rack_scan_loss_tq(call, tq, nacks);
213 
214 		/* Skip ones marked lost but not yet retransmitted */
215 		nacks &= ~tq->segment_lost | tq->segment_retransmitted;
216 
217 		while (nacks) {
218 			unsigned int ix = __ffs(nacks);
219 			rxrpc_seq_t seq = tq->qbase + ix;
220 			ktime_t remaining;
221 			ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
222 
223 			__clear_bit(ix, &nacks);
224 
225 			if (rxrpc_rack_sent_after(call->rack_xmit_ts, call->rack_end_seq,
226 						  xmit_ts, seq)) {
227 				remaining = ktime_sub(ktime_add(xmit_ts, lost_after), now);
228 				if (remaining <= 0) {
229 					rxrpc_rack_mark_lost(call, tq, ix);
230 					trace_rxrpc_rack_detect_loss(call, summary, seq);
231 				} else {
232 					timeout = max(remaining, timeout);
233 				}
234 			}
235 		}
236 	}
237 
238 	return timeout;
239 }
240 
241 /*
242  * Detect losses and set a timer to retry the detection [RFC8958 6.2 Step 5].
243  */
244 void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call,
245 					  struct rxrpc_ack_summary *summary)
246 {
247 	ktime_t timeout = rxrpc_rack_detect_loss(call, summary);
248 
249 	if (timeout) {
250 		call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RACK_REORDER;
251 		call->rack_timo_at = ktime_add(ktime_get_real(), timeout);
252 		trace_rxrpc_rack_timer(call, timeout, false);
253 		trace_rxrpc_timer_set(call, timeout, rxrpc_timer_trace_rack_reo);
254 	}
255 }
256 
257 /*
258  * Handle RACK-TLP RTO expiration [RFC8958 6.3].
259  */
260 static void rxrpc_rack_mark_losses_on_rto(struct rxrpc_call *call)
261 {
262 	struct rxrpc_txqueue *tq;
263 	rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
264 	ktime_t lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
265 	ktime_t deadline = ktime_sub(ktime_get_real(), lost_after);
266 
267 	for (tq = call->tx_queue; tq; tq = tq->next) {
268 		unsigned long unacked = ~tq->segment_acked;
269 
270 		trace_rxrpc_rack_mark_loss_tq(call, tq);
271 		while (unacked) {
272 			unsigned int ix = __ffs(unacked);
273 			rxrpc_seq_t seq = tq->qbase + ix;
274 			ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
275 
276 			if (after(seq, call->tx_transmitted))
277 				return;
278 			__clear_bit(ix, &unacked);
279 
280 			if (seq == snd_una ||
281 			    ktime_before(xmit_ts, deadline))
282 				rxrpc_rack_mark_lost(call, tq, ix);
283 		}
284 	}
285 }
286 
287 /*
288  * Calculate the TLP loss probe timeout (PTO) [RFC8958 7.2].
289  */
290 ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now)
291 {
292 	unsigned int flight_size = rxrpc_tx_in_flight(call);
293 	ktime_t rto_at = ktime_add(call->tx_last_sent,
294 				   rxrpc_get_rto_backoff(call, false));
295 	ktime_t pto;
296 
297 	if (call->rtt_count > 0) {
298 		/* Use 2*SRTT as the timeout. */
299 		pto = ns_to_ktime(call->srtt_us * NSEC_PER_USEC / 4);
300 		if (flight_size)
301 			pto = ktime_add(pto, call->tlp_max_ack_delay);
302 	} else {
303 		pto = NSEC_PER_SEC;
304 	}
305 
306 	if (ktime_after(ktime_add(now, pto), rto_at))
307 		pto = ktime_sub(rto_at, now);
308 	return pto;
309 }
310 
311 /*
312  * Send a TLP loss probe on PTO expiration [RFC8958 7.3].
313  */
314 void rxrpc_tlp_send_probe(struct rxrpc_call *call)
315 {
316 	unsigned int in_flight = rxrpc_tx_in_flight(call);
317 
318 	if (after_eq(call->acks_hard_ack, call->tx_transmitted))
319 		return; /* Everything we transmitted has been acked. */
320 
321 	/* There must be no other loss probe still in flight and we need to
322 	 * have taken a new RTT sample since last probe or the start of
323 	 * connection.
324 	 */
325 	if (!call->tlp_serial &&
326 	    call->tlp_rtt_taken != call->rtt_taken) {
327 		call->tlp_is_retrans = false;
328 		if (after(call->send_top, call->tx_transmitted) &&
329 		    rxrpc_tx_window_space(call) > 0) {
330 			/* Transmit the lowest-sequence unsent DATA */
331 			call->tx_last_serial = 0;
332 			rxrpc_transmit_some_data(call, 1, rxrpc_txdata_tlp_new_data);
333 			call->tlp_serial = call->tx_last_serial;
334 			call->tlp_seq = call->tx_transmitted;
335 			trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_transmit_new);
336 			in_flight = rxrpc_tx_in_flight(call);
337 		} else {
338 			/* Retransmit the highest-sequence DATA sent */
339 			call->tx_last_serial = 0;
340 			rxrpc_resend_tlp(call);
341 			call->tlp_is_retrans = true;
342 			trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_retransmit);
343 		}
344 	} else {
345 		trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_busy);
346 	}
347 
348 	if (in_flight != 0) {
349 		ktime_t rto = rxrpc_get_rto_backoff(call, false);
350 
351 		call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RTO;
352 		call->rack_timo_at = ktime_add(ktime_get_real(), rto);
353 		trace_rxrpc_rack_timer(call, rto, false);
354 		trace_rxrpc_timer_set(call, rto, rxrpc_timer_trace_rack_rto);
355 	}
356 }
357 
358 /*
359  * Detect losses using the ACK of a TLP loss probe [RFC8958 7.4].
360  */
361 void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary)
362 {
363 	if (!call->tlp_serial || after(call->tlp_seq, call->acks_hard_ack))
364 		return;
365 
366 	if (!call->tlp_is_retrans) {
367 		/* TLP of new data delivered */
368 		trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_new_data);
369 		call->tlp_serial = 0;
370 	} else if (summary->ack_reason == RXRPC_ACK_DUPLICATE &&
371 		   summary->acked_serial == call->tlp_serial) {
372 		/* General Case: Detected packet losses using RACK [7.4.1] */
373 		trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_dup_acked);
374 		call->tlp_serial = 0;
375 	} else if (after(call->acks_hard_ack, call->tlp_seq)) {
376 		/* Repaired the single loss */
377 		trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_hard_beyond);
378 		call->tlp_serial = 0;
379 		// TODO: Invoke congestion control to react to the loss
380 		// event the probe has repaired
381 	} else if (summary->tlp_probe_acked) {
382 		trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_acked);
383 		/* Special Case: Detected a single loss repaired by the loss
384 		 * probe [7.4.2]
385 		 */
386 		call->tlp_serial = 0;
387 	} else {
388 		trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_incomplete);
389 	}
390 }
391 
392 /*
393  * Handle RACK timer expiration; returns true to request a resend.
394  */
395 void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by)
396 {
397 	struct rxrpc_ack_summary summary = {};
398 	enum rxrpc_rack_timer_mode mode = call->rack_timer_mode;
399 
400 	trace_rxrpc_rack_timer(call, overran_by, true);
401 	call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF;
402 
403 	switch (mode) {
404 	case RXRPC_CALL_RACKTIMER_RACK_REORDER:
405 		rxrpc_rack_detect_loss_and_arm_timer(call, &summary);
406 		break;
407 	case RXRPC_CALL_RACKTIMER_TLP_PTO:
408 		rxrpc_tlp_send_probe(call);
409 		break;
410 	case RXRPC_CALL_RACKTIMER_RTO:
411 		// Might need to poke the congestion algo in some way
412 		rxrpc_rack_mark_losses_on_rto(call);
413 		break;
414 	//case RXRPC_CALL_RACKTIMER_ZEROWIN:
415 	default:
416 		pr_warn("Unexpected rack timer %u", call->rack_timer_mode);
417 	}
418 }
419