xref: /linux/include/net/tcp_ecn.h (revision aa55a7dde7ec506bb23448a5005ae3f4f809d022)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 #ifndef _TCP_ECN_H
3 #define _TCP_ECN_H
4 
5 #include <linux/tcp.h>
6 #include <linux/skbuff.h>
7 #include <linux/bitfield.h>
8 
9 #include <net/inet_connection_sock.h>
10 #include <net/sock.h>
11 #include <net/tcp.h>
12 #include <net/inet_ecn.h>
13 
14 /* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is
15  * attemped to be negotiated and requested for incoming connection
16  * and outgoing connection, respectively.
17  */
18 enum tcp_ecn_mode {
19 	TCP_ECN_IN_NOECN_OUT_NOECN = 0,
20 	TCP_ECN_IN_ECN_OUT_ECN = 1,
21 	TCP_ECN_IN_ECN_OUT_NOECN = 2,
22 	TCP_ECN_IN_ACCECN_OUT_ACCECN = 3,
23 	TCP_ECN_IN_ACCECN_OUT_ECN = 4,
24 	TCP_ECN_IN_ACCECN_OUT_NOECN = 5,
25 };
26 
27 /* AccECN option sending when AccECN has been successfully negotiated */
28 enum tcp_accecn_option {
29 	TCP_ACCECN_OPTION_DISABLED = 0,
30 	TCP_ACCECN_OPTION_MINIMUM = 1,
31 	TCP_ACCECN_OPTION_FULL = 2,
32 };
33 
34 static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
35 {
36 	/* Do not set CWR if in AccECN mode! */
37 	if (tcp_ecn_mode_rfc3168(tp))
38 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
39 }
40 
41 static inline void tcp_ecn_accept_cwr(struct sock *sk,
42 				      const struct sk_buff *skb)
43 {
44 	struct tcp_sock *tp = tcp_sk(sk);
45 
46 	if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) {
47 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
48 
49 		/* If the sender is telling us it has entered CWR, then its
50 		 * cwnd may be very low (even just 1 packet), so we should ACK
51 		 * immediately.
52 		 */
53 		if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
54 			inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
55 	}
56 }
57 
58 static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
59 {
60 	tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
61 }
62 
63 /* tp->accecn_fail_mode */
64 #define TCP_ACCECN_ACE_FAIL_SEND	BIT(0)
65 #define TCP_ACCECN_ACE_FAIL_RECV	BIT(1)
66 #define TCP_ACCECN_OPT_FAIL_SEND	BIT(2)
67 #define TCP_ACCECN_OPT_FAIL_RECV	BIT(3)
68 
69 static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp)
70 {
71 	return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND;
72 }
73 
74 static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp)
75 {
76 	return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV;
77 }
78 
79 static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp)
80 {
81 	return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND;
82 }
83 
84 static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp)
85 {
86 	return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV;
87 }
88 
89 static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode)
90 {
91 	tp->accecn_fail_mode |= mode;
92 }
93 
94 static inline u8 tcp_accecn_ace(const struct tcphdr *th)
95 {
96 	return (th->ae << 2) | (th->cwr << 1) | th->ece;
97 }
98 
99 /* Infer the ECT value our SYN arrived with from the echoed ACE field */
100 static inline int tcp_accecn_extract_syn_ect(u8 ace)
101 {
102 	/* Below is an excerpt from the 1st block of Table 2 of AccECN spec */
103 	static const int ace_to_ecn[8] = {
104 		INET_ECN_ECT_0,		/* 0b000 (Undefined) */
105 		INET_ECN_ECT_1,		/* 0b001 (Undefined) */
106 		INET_ECN_NOT_ECT,	/* 0b010 (Not-ECT is received) */
107 		INET_ECN_ECT_1,		/* 0b011 (ECT-1 is received) */
108 		INET_ECN_ECT_0,		/* 0b100 (ECT-0 is received) */
109 		INET_ECN_ECT_1,		/* 0b101 (Reserved) */
110 		INET_ECN_CE,		/* 0b110 (CE is received) */
111 		INET_ECN_ECT_1		/* 0b111 (Undefined) */
112 	};
113 
114 	return ace_to_ecn[ace & 0x7];
115 }
116 
117 /* Check ECN field transition to detect invalid transitions */
118 static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv)
119 {
120 	if (rcv == snt)
121 		return true;
122 
123 	/* Non-ECT altered to something or something became non-ECT */
124 	if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT)
125 		return false;
126 	/* CE -> ECT(0/1)? */
127 	if (snt == INET_ECN_CE)
128 		return false;
129 	return true;
130 }
131 
132 static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace,
133 						    u8 sent_ect)
134 {
135 	u8 ect = tcp_accecn_extract_syn_ect(ace);
136 	struct tcp_sock *tp = tcp_sk(sk);
137 
138 	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
139 		return true;
140 
141 	if (!tcp_ect_transition_valid(sent_ect, ect)) {
142 		tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
143 		return false;
144 	}
145 
146 	return true;
147 }
148 
149 /* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */
150 static inline void tcp_accecn_third_ack(struct sock *sk,
151 					const struct sk_buff *skb, u8 sent_ect)
152 {
153 	u8 ace = tcp_accecn_ace(tcp_hdr(skb));
154 	struct tcp_sock *tp = tcp_sk(sk);
155 
156 	switch (ace) {
157 	case 0x0:
158 		/* Invalid value */
159 		tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
160 		break;
161 	case 0x7:
162 	case 0x5:
163 	case 0x1:
164 		/* Unused but legal values */
165 		break;
166 	default:
167 		/* Validation only applies to first non-data packet */
168 		if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
169 		    !TCP_SKB_CB(skb)->sacked &&
170 		    tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) {
171 			if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) &&
172 			    !tp->delivered_ce)
173 				tp->delivered_ce++;
174 		}
175 		break;
176 	}
177 }
178 
179 /* Demand the minimum # to send AccECN optnio */
180 static inline void tcp_accecn_opt_demand_min(struct sock *sk,
181 					     u8 opt_demand_min)
182 {
183 	struct tcp_sock *tp = tcp_sk(sk);
184 	u8 opt_demand;
185 
186 	opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand);
187 	tp->accecn_opt_demand = opt_demand;
188 }
189 
190 /* Maps IP ECN field ECT/CE code point to AccECN option field number, given
191  * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0).
192  */
193 static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield)
194 {
195 	switch (ecnfield & INET_ECN_MASK) {
196 	case INET_ECN_NOT_ECT:
197 		return 0;	/* AccECN does not send counts of NOT_ECT */
198 	case INET_ECN_ECT_1:
199 		return 1;
200 	case INET_ECN_CE:
201 		return 2;
202 	case INET_ECN_ECT_0:
203 		return 3;
204 	}
205 	return 0;
206 }
207 
208 /* Maps IP ECN field ECT/CE code point to AccECN option field value offset.
209  * Some fields do not start from zero, to detect zeroing by middleboxes.
210  */
211 static inline u32 tcp_accecn_field_init_offset(u8 ecnfield)
212 {
213 	switch (ecnfield & INET_ECN_MASK) {
214 	case INET_ECN_NOT_ECT:
215 		return 0;	/* AccECN does not send counts of NOT_ECT */
216 	case INET_ECN_ECT_1:
217 		return TCP_ACCECN_E1B_INIT_OFFSET;
218 	case INET_ECN_CE:
219 		return TCP_ACCECN_CEB_INIT_OFFSET;
220 	case INET_ECN_ECT_0:
221 		return TCP_ACCECN_E0B_INIT_OFFSET;
222 	}
223 	return 0;
224 }
225 
226 /* Maps AccECN option field #nr to IP ECN field ECT/CE bits */
227 static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option,
228 							   bool order)
229 {
230 	/* Based on Table 5 of the AccECN spec to map (option, order) to
231 	 * the corresponding ECN conuters (ECT-1, ECT-0, or CE).
232 	 */
233 	static const u8 optfield_lookup[2][3] = {
234 		/* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */
235 		{ INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 },
236 		/* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */
237 		{ INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 }
238 	};
239 
240 	return optfield_lookup[order][option % 3];
241 }
242 
243 /* Handles AccECN option ECT and CE 24-bit byte counters update into
244  * the u32 value in tcp_sock. As we're processing TCP options, it is
245  * safe to access from - 1.
246  */
247 static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from,
248 				       u32 init_offset)
249 {
250 	u32 truncated = (get_unaligned_be32(from - 1) - init_offset) &
251 			0xFFFFFFU;
252 	u32 delta = (truncated - *cnt) & 0xFFFFFFU;
253 
254 	/* If delta has the highest bit set (24th bit) indicating
255 	 * negative, sign extend to correct an estimation using
256 	 * sign_extend32(delta, 24 - 1)
257 	 */
258 	delta = sign_extend32(delta, 23);
259 	*cnt += delta;
260 	return (s32)delta;
261 }
262 
263 /* Updates Accurate ECN received counters from the received IP ECN field */
264 static inline void tcp_ecn_received_counters(struct sock *sk,
265 					     const struct sk_buff *skb, u32 len)
266 {
267 	u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
268 	u8 is_ce = INET_ECN_is_ce(ecnfield);
269 	struct tcp_sock *tp = tcp_sk(sk);
270 	bool ecn_edge;
271 
272 	if (!INET_ECN_is_not_ect(ecnfield)) {
273 		u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs);
274 
275 		/* As for accurate ECN, the TCP_ECN_SEEN flag is set by
276 		 * tcp_ecn_received_counters() when the ECN codepoint of
277 		 * received TCP data or ACK contains ECT(0), ECT(1), or CE.
278 		 */
279 		if (!tcp_ecn_mode_rfc3168(tp))
280 			tp->ecn_flags |= TCP_ECN_SEEN;
281 
282 		/* ACE counter tracks *all* segments including pure ACKs */
283 		tp->received_ce += pcount;
284 		tp->received_ce_pending = min(tp->received_ce_pending + pcount,
285 					      0xfU);
286 
287 		if (len > 0) {
288 			u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield);
289 			u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1];
290 			u32 bytes_mask = GENMASK_U32(31, 22);
291 
292 			tp->received_ecn_bytes[ecnfield - 1] += len;
293 			tp->accecn_minlen = max_t(u8, tp->accecn_minlen,
294 						  minlen);
295 
296 			/* Send AccECN option at least once per 2^22-byte
297 			 * increase in any ECN byte counter.
298 			 */
299 			if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) &
300 			    bytes_mask) {
301 				tcp_accecn_opt_demand_min(sk, 1);
302 			}
303 		}
304 	}
305 
306 	ecn_edge = tp->prev_ecnfield != ecnfield;
307 	if (ecn_edge || is_ce) {
308 		tp->prev_ecnfield = ecnfield;
309 		/* Demand Accurate ECN change-triggered ACKs. Two ACK are
310 		 * demanded to indicate unambiguously the ecnfield value
311 		 * in the latter ACK.
312 		 */
313 		if (tcp_ecn_mode_accecn(tp)) {
314 			if (ecn_edge)
315 				inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
316 			tp->accecn_opt_demand = 2;
317 		}
318 	}
319 }
320 
321 /* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
322  * initialized at the start of	the half-connection. [...] These byte counters
323  * reflect only the TCP payload length, excluding TCP header and TCP options.
324  */
325 static inline void tcp_ecn_received_counters_payload(struct sock *sk,
326 						     const struct sk_buff *skb)
327 {
328 	const struct tcphdr *th = (const struct tcphdr *)skb->data;
329 
330 	tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
331 }
332 
333 /* AccECN specification, 5.1: [...] a server can determine that it
334  * negotiated AccECN as [...] if the ACK contains an ACE field with
335  * the value 0b010 to 0b111 (decimal 2 to 7).
336  */
337 static inline bool cookie_accecn_ok(const struct tcphdr *th)
338 {
339 	return tcp_accecn_ace(th) > 0x1;
340 }
341 
342 /* Used to form the ACE flags for SYN/ACK */
343 static inline u16 tcp_accecn_reflector_flags(u8 ect)
344 {
345 	/* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN.
346 	 * Below is an excerpt from the 1st block of Table 2 of AccECN spec,
347 	 * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE
348 	 */
349 	static const u8 ecn_to_ace_flags[4] = {
350 		0b010,	/* Not-ECT is received */
351 		0b011,	/* ECT(1) is received */
352 		0b100,	/* ECT(0) is received */
353 		0b110	/* CE is received */
354 	};
355 
356 	return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]);
357 }
358 
359 /* AccECN specification, 3.1.2: If a TCP server that implements AccECN
360  * receives a SYN with the three TCP header flags (AE, CWR and ECE) set
361  * to any combination other than 000, 011 or 111, it MUST negotiate the
362  * use of AccECN as if they had been set to 111.
363  */
364 static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
365 {
366 	u8 ace = tcp_accecn_ace(th);
367 
368 	return ace && ace != 0x3;
369 }
370 
371 static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
372 {
373 	BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
374 	BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
375 	BUILD_BUG_ON(INET_ECN_CE != 0x3);
376 
377 	counter_array[INET_ECN_ECT_1 - 1] = 0;
378 	counter_array[INET_ECN_ECT_0 - 1] = 0;
379 	counter_array[INET_ECN_CE - 1] = 0;
380 }
381 
382 static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
383 {
384 	tp->received_ce = 0;
385 	tp->received_ce_pending = 0;
386 	__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
387 	__tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
388 	tp->accecn_minlen = 0;
389 	tp->accecn_opt_demand = 0;
390 	tp->est_ecnfield = 0;
391 }
392 
393 /* Used for make_synack to form the ACE flags */
394 static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect)
395 {
396 	/* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received
397 	 * from SYN. Below is an excerpt from Table 2 of the AccECN spec:
398 	 * +====================+====================================+
399 	 * |  IP-ECN codepoint  |  Respective ACE falgs on SYN/ACK   |
400 	 * |   received on SYN  |       AE       CWR       ECE       |
401 	 * +====================+====================================+
402 	 * |      Not-ECT       |       0         1         0        |
403 	 * |      ECT(1)        |       0         1         1        |
404 	 * |      ECT(0)        |       1         0         0        |
405 	 * |        CE          |       1         1         0        |
406 	 * +====================+====================================+
407 	 */
408 	th->ae = !!(ect & INET_ECN_ECT_0);
409 	th->cwr = ect != INET_ECN_ECT_0;
410 	th->ece = ect == INET_ECN_ECT_1;
411 }
412 
413 static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb,
414 				      struct tcphdr *th)
415 {
416 	u32 wire_ace;
417 
418 	/* The final packet of the 3WHS or anything like it must reflect
419 	 * the SYN/ACK ECT instead of putting CEP into ACE field, such
420 	 * case show up in tcp_flags.
421 	 */
422 	if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) {
423 		wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET;
424 		th->ece = !!(wire_ace & 0x1);
425 		th->cwr = !!(wire_ace & 0x2);
426 		th->ae = !!(wire_ace & 0x4);
427 		tp->received_ce_pending = 0;
428 	}
429 }
430 
431 /* See Table 2 of the AccECN draft */
432 static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct tcphdr *th,
433 				      u8 ip_dsfield)
434 {
435 	struct tcp_sock *tp = tcp_sk(sk);
436 	u8 ace = tcp_accecn_ace(th);
437 
438 	switch (ace) {
439 	case 0x0:
440 	case 0x7:
441 		/* +========+========+============+=============+
442 		 * | A      | B      |  SYN/ACK   |  Feedback   |
443 		 * |        |        |    B->A    |  Mode of A  |
444 		 * |        |        | AE CWR ECE |             |
445 		 * +========+========+============+=============+
446 		 * | AccECN | No ECN | 0   0   0  |   Not ECN   |
447 		 * | AccECN | Broken | 1   1   1  |   Not ECN   |
448 		 * +========+========+============+=============+
449 		 */
450 		tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
451 		break;
452 	case 0x1:
453 	case 0x5:
454 		/* +========+========+============+=============+
455 		 * | A      | B      |  SYN/ACK   |  Feedback   |
456 		 * |        |        |    B->A    |  Mode of A  |
457 		 * |        |        | AE CWR ECE |             |
458 		 * +========+========+============+=============+
459 		 * | AccECN | Nonce  | 1   0   1  | (Reserved)  |
460 		 * | AccECN | ECN    | 0   0   1  | Classic ECN |
461 		 * | Nonce  | AccECN | 0   0   1  | Classic ECN |
462 		 * | ECN    | AccECN | 0   0   1  | Classic ECN |
463 		 * +========+========+============+=============+
464 		 */
465 		if (tcp_ecn_mode_pending(tp))
466 			/* Downgrade from AccECN, or requested initially */
467 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
468 		break;
469 	default:
470 		tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
471 		tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
472 		tp->accecn_opt_demand = 2;
473 		if (INET_ECN_is_ce(ip_dsfield) &&
474 		    tcp_accecn_validate_syn_feedback(sk, ace,
475 						     tp->syn_ect_snt)) {
476 			tp->received_ce++;
477 			tp->received_ce_pending++;
478 		}
479 		break;
480 	}
481 }
482 
483 static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
484 				   const struct sk_buff *skb)
485 {
486 	if (tcp_ecn_mode_pending(tp)) {
487 		if (!tcp_accecn_syn_requested(th)) {
488 			/* Downgrade to classic ECN feedback */
489 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
490 		} else {
491 			tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
492 					  INET_ECN_MASK;
493 			tp->prev_ecnfield = tp->syn_ect_rcv;
494 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
495 		}
496 	}
497 	if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
498 		tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
499 }
500 
501 static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp,
502 					const struct tcphdr *th)
503 {
504 	if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
505 		return true;
506 	return false;
507 }
508 
509 /* Packet ECN state for a SYN-ACK */
510 static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
511 {
512 	struct tcp_sock *tp = tcp_sk(sk);
513 
514 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
515 	if (tcp_ecn_disabled(tp))
516 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
517 	else if (tcp_ca_needs_ecn(sk) ||
518 		 tcp_bpf_ca_needs_ecn(sk))
519 		INET_ECN_xmit(sk);
520 
521 	if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
522 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
523 		TCP_SKB_CB(skb)->tcp_flags |=
524 			tcp_accecn_reflector_flags(tp->syn_ect_rcv);
525 		tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
526 	}
527 }
528 
529 /* Packet ECN state for a SYN.  */
530 static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
531 {
532 	struct tcp_sock *tp = tcp_sk(sk);
533 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
534 	bool use_ecn, use_accecn;
535 	u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
536 
537 	use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
538 	use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
539 		  tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
540 		  tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
541 
542 	if (!use_ecn) {
543 		const struct dst_entry *dst = __sk_dst_get(sk);
544 
545 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
546 			use_ecn = true;
547 	}
548 
549 	tp->ecn_flags = 0;
550 
551 	if (use_ecn) {
552 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
553 			INET_ECN_xmit(sk);
554 
555 		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
556 		if (use_accecn) {
557 			TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE;
558 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING);
559 			tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
560 		} else {
561 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
562 		}
563 	}
564 }
565 
566 static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
567 {
568 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) {
569 		/* tp->ecn_flags are cleared at a later point in time when
570 		 * SYN ACK is ultimatively being received.
571 		 */
572 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
573 	}
574 }
575 
576 static inline void
577 tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
578 {
579 	if (tcp_rsk(req)->accecn_ok)
580 		tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
581 	else if (inet_rsk(req)->ecn_ok)
582 		th->ece = 1;
583 }
584 
585 static inline bool tcp_accecn_option_beacon_check(const struct sock *sk)
586 {
587 	u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon);
588 	const struct tcp_sock *tp = tcp_sk(sk);
589 
590 	if (!ecn_beacon)
591 		return false;
592 
593 	return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >=
594 	       (tp->srtt_us >> 3);
595 }
596 
597 #endif /* _LINUX_TCP_ECN_H */
598