xref: /linux/include/net/tcp_ecn.h (revision f60034689f2c6076c88d247c534ff1ce85a67f47)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 #ifndef _TCP_ECN_H
3 #define _TCP_ECN_H
4 
5 #include <linux/tcp.h>
6 #include <linux/skbuff.h>
7 #include <linux/bitfield.h>
8 
9 #include <net/inet_connection_sock.h>
10 #include <net/sock.h>
11 #include <net/tcp.h>
12 #include <net/inet_ecn.h>
13 
14 /* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is
15  * attemped to be negotiated and requested for incoming connection
16  * and outgoing connection, respectively.
17  */
18 enum tcp_ecn_mode {
19 	TCP_ECN_IN_NOECN_OUT_NOECN = 0,
20 	TCP_ECN_IN_ECN_OUT_ECN = 1,
21 	TCP_ECN_IN_ECN_OUT_NOECN = 2,
22 	TCP_ECN_IN_ACCECN_OUT_ACCECN = 3,
23 	TCP_ECN_IN_ACCECN_OUT_ECN = 4,
24 	TCP_ECN_IN_ACCECN_OUT_NOECN = 5,
25 };
26 
27 /* AccECN option sending when AccECN has been successfully negotiated */
28 enum tcp_accecn_option {
29 	TCP_ACCECN_OPTION_DISABLED = 0,
30 	TCP_ACCECN_OPTION_MINIMUM = 1,
31 	TCP_ACCECN_OPTION_FULL = 2,
32 };
33 
34 static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
35 {
36 	/* Do not set CWR if in AccECN mode! */
37 	if (tcp_ecn_mode_rfc3168(tp))
38 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
39 }
40 
41 static inline void tcp_ecn_accept_cwr(struct sock *sk,
42 				      const struct sk_buff *skb)
43 {
44 	struct tcp_sock *tp = tcp_sk(sk);
45 
46 	if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) {
47 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
48 
49 		/* If the sender is telling us it has entered CWR, then its
50 		 * cwnd may be very low (even just 1 packet), so we should ACK
51 		 * immediately.
52 		 */
53 		if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
54 			inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
55 	}
56 }
57 
58 static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
59 {
60 	tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
61 }
62 
63 /* tp->accecn_fail_mode */
64 #define TCP_ACCECN_ACE_FAIL_SEND	BIT(0)
65 #define TCP_ACCECN_ACE_FAIL_RECV	BIT(1)
66 #define TCP_ACCECN_OPT_FAIL_SEND	BIT(2)
67 #define TCP_ACCECN_OPT_FAIL_RECV	BIT(3)
68 
69 static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp)
70 {
71 	return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND;
72 }
73 
74 static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp)
75 {
76 	return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV;
77 }
78 
79 static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp)
80 {
81 	return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND;
82 }
83 
84 static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp)
85 {
86 	return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV;
87 }
88 
89 static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode)
90 {
91 	tp->accecn_fail_mode |= mode;
92 }
93 
94 #define TCP_ACCECN_OPT_NOT_SEEN		0x0
95 #define TCP_ACCECN_OPT_EMPTY_SEEN	0x1
96 #define TCP_ACCECN_OPT_COUNTER_SEEN	0x2
97 #define TCP_ACCECN_OPT_FAIL_SEEN	0x3
98 
99 static inline u8 tcp_accecn_ace(const struct tcphdr *th)
100 {
101 	return (th->ae << 2) | (th->cwr << 1) | th->ece;
102 }
103 
104 /* Infer the ECT value our SYN arrived with from the echoed ACE field */
105 static inline int tcp_accecn_extract_syn_ect(u8 ace)
106 {
107 	/* Below is an excerpt from the 1st block of Table 2 of AccECN spec */
108 	static const int ace_to_ecn[8] = {
109 		INET_ECN_ECT_0,		/* 0b000 (Undefined) */
110 		INET_ECN_ECT_1,		/* 0b001 (Undefined) */
111 		INET_ECN_NOT_ECT,	/* 0b010 (Not-ECT is received) */
112 		INET_ECN_ECT_1,		/* 0b011 (ECT-1 is received) */
113 		INET_ECN_ECT_0,		/* 0b100 (ECT-0 is received) */
114 		INET_ECN_ECT_1,		/* 0b101 (Reserved) */
115 		INET_ECN_CE,		/* 0b110 (CE is received) */
116 		INET_ECN_ECT_1		/* 0b111 (Undefined) */
117 	};
118 
119 	return ace_to_ecn[ace & 0x7];
120 }
121 
122 /* Check ECN field transition to detect invalid transitions */
123 static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv)
124 {
125 	if (rcv == snt)
126 		return true;
127 
128 	/* Non-ECT altered to something or something became non-ECT */
129 	if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT)
130 		return false;
131 	/* CE -> ECT(0/1)? */
132 	if (snt == INET_ECN_CE)
133 		return false;
134 	return true;
135 }
136 
137 static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace,
138 						    u8 sent_ect)
139 {
140 	u8 ect = tcp_accecn_extract_syn_ect(ace);
141 	struct tcp_sock *tp = tcp_sk(sk);
142 
143 	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
144 		return true;
145 
146 	if (!tcp_ect_transition_valid(sent_ect, ect)) {
147 		tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
148 		return false;
149 	}
150 
151 	return true;
152 }
153 
154 static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp,
155 						u8 saw_opt)
156 {
157 	tp->saw_accecn_opt = saw_opt;
158 	if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN)
159 		tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV);
160 }
161 
162 /* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */
163 static inline void tcp_accecn_third_ack(struct sock *sk,
164 					const struct sk_buff *skb, u8 sent_ect)
165 {
166 	u8 ace = tcp_accecn_ace(tcp_hdr(skb));
167 	struct tcp_sock *tp = tcp_sk(sk);
168 
169 	switch (ace) {
170 	case 0x0:
171 		/* Invalid value */
172 		tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
173 		break;
174 	case 0x7:
175 	case 0x5:
176 	case 0x1:
177 		/* Unused but legal values */
178 		break;
179 	default:
180 		/* Validation only applies to first non-data packet */
181 		if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
182 		    !TCP_SKB_CB(skb)->sacked &&
183 		    tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) {
184 			if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) &&
185 			    !tp->delivered_ce)
186 				tp->delivered_ce++;
187 		}
188 		break;
189 	}
190 }
191 
192 /* Demand the minimum # to send AccECN optnio */
193 static inline void tcp_accecn_opt_demand_min(struct sock *sk,
194 					     u8 opt_demand_min)
195 {
196 	struct tcp_sock *tp = tcp_sk(sk);
197 	u8 opt_demand;
198 
199 	opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand);
200 	tp->accecn_opt_demand = opt_demand;
201 }
202 
203 /* Maps IP ECN field ECT/CE code point to AccECN option field number, given
204  * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0).
205  */
206 static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield)
207 {
208 	switch (ecnfield & INET_ECN_MASK) {
209 	case INET_ECN_NOT_ECT:
210 		return 0;	/* AccECN does not send counts of NOT_ECT */
211 	case INET_ECN_ECT_1:
212 		return 1;
213 	case INET_ECN_CE:
214 		return 2;
215 	case INET_ECN_ECT_0:
216 		return 3;
217 	}
218 	return 0;
219 }
220 
221 /* Maps IP ECN field ECT/CE code point to AccECN option field value offset.
222  * Some fields do not start from zero, to detect zeroing by middleboxes.
223  */
224 static inline u32 tcp_accecn_field_init_offset(u8 ecnfield)
225 {
226 	switch (ecnfield & INET_ECN_MASK) {
227 	case INET_ECN_NOT_ECT:
228 		return 0;	/* AccECN does not send counts of NOT_ECT */
229 	case INET_ECN_ECT_1:
230 		return TCP_ACCECN_E1B_INIT_OFFSET;
231 	case INET_ECN_CE:
232 		return TCP_ACCECN_CEB_INIT_OFFSET;
233 	case INET_ECN_ECT_0:
234 		return TCP_ACCECN_E0B_INIT_OFFSET;
235 	}
236 	return 0;
237 }
238 
239 /* Maps AccECN option field #nr to IP ECN field ECT/CE bits */
240 static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option,
241 							   bool order)
242 {
243 	/* Based on Table 5 of the AccECN spec to map (option, order) to
244 	 * the corresponding ECN conuters (ECT-1, ECT-0, or CE).
245 	 */
246 	static const u8 optfield_lookup[2][3] = {
247 		/* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */
248 		{ INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 },
249 		/* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */
250 		{ INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 }
251 	};
252 
253 	return optfield_lookup[order][option % 3];
254 }
255 
256 /* Handles AccECN option ECT and CE 24-bit byte counters update into
257  * the u32 value in tcp_sock. As we're processing TCP options, it is
258  * safe to access from - 1.
259  */
260 static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from,
261 				       u32 init_offset)
262 {
263 	u32 truncated = (get_unaligned_be32(from - 1) - init_offset) &
264 			0xFFFFFFU;
265 	u32 delta = (truncated - *cnt) & 0xFFFFFFU;
266 
267 	/* If delta has the highest bit set (24th bit) indicating
268 	 * negative, sign extend to correct an estimation using
269 	 * sign_extend32(delta, 24 - 1)
270 	 */
271 	delta = sign_extend32(delta, 23);
272 	*cnt += delta;
273 	return (s32)delta;
274 }
275 
276 /* Updates Accurate ECN received counters from the received IP ECN field */
277 static inline void tcp_ecn_received_counters(struct sock *sk,
278 					     const struct sk_buff *skb, u32 len)
279 {
280 	u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
281 	u8 is_ce = INET_ECN_is_ce(ecnfield);
282 	struct tcp_sock *tp = tcp_sk(sk);
283 	bool ecn_edge;
284 
285 	if (!INET_ECN_is_not_ect(ecnfield)) {
286 		u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs);
287 
288 		/* As for accurate ECN, the TCP_ECN_SEEN flag is set by
289 		 * tcp_ecn_received_counters() when the ECN codepoint of
290 		 * received TCP data or ACK contains ECT(0), ECT(1), or CE.
291 		 */
292 		if (!tcp_ecn_mode_rfc3168(tp))
293 			tp->ecn_flags |= TCP_ECN_SEEN;
294 
295 		/* ACE counter tracks *all* segments including pure ACKs */
296 		tp->received_ce += pcount;
297 		tp->received_ce_pending = min(tp->received_ce_pending + pcount,
298 					      0xfU);
299 
300 		if (len > 0) {
301 			u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield);
302 			u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1];
303 			u32 bytes_mask = GENMASK_U32(31, 22);
304 
305 			tp->received_ecn_bytes[ecnfield - 1] += len;
306 			tp->accecn_minlen = max_t(u8, tp->accecn_minlen,
307 						  minlen);
308 
309 			/* Send AccECN option at least once per 2^22-byte
310 			 * increase in any ECN byte counter.
311 			 */
312 			if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) &
313 			    bytes_mask) {
314 				tcp_accecn_opt_demand_min(sk, 1);
315 			}
316 		}
317 	}
318 
319 	ecn_edge = tp->prev_ecnfield != ecnfield;
320 	if (ecn_edge || is_ce) {
321 		tp->prev_ecnfield = ecnfield;
322 		/* Demand Accurate ECN change-triggered ACKs. Two ACK are
323 		 * demanded to indicate unambiguously the ecnfield value
324 		 * in the latter ACK.
325 		 */
326 		if (tcp_ecn_mode_accecn(tp)) {
327 			if (ecn_edge)
328 				inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
329 			tp->accecn_opt_demand = 2;
330 		}
331 	}
332 }
333 
334 /* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
335  * initialized at the start of	the half-connection. [...] These byte counters
336  * reflect only the TCP payload length, excluding TCP header and TCP options.
337  */
338 static inline void tcp_ecn_received_counters_payload(struct sock *sk,
339 						     const struct sk_buff *skb)
340 {
341 	const struct tcphdr *th = (const struct tcphdr *)skb->data;
342 
343 	tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
344 }
345 
346 /* AccECN specification, 5.1: [...] a server can determine that it
347  * negotiated AccECN as [...] if the ACK contains an ACE field with
348  * the value 0b010 to 0b111 (decimal 2 to 7).
349  */
350 static inline bool cookie_accecn_ok(const struct tcphdr *th)
351 {
352 	return tcp_accecn_ace(th) > 0x1;
353 }
354 
355 /* Used to form the ACE flags for SYN/ACK */
356 static inline u16 tcp_accecn_reflector_flags(u8 ect)
357 {
358 	/* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN.
359 	 * Below is an excerpt from the 1st block of Table 2 of AccECN spec,
360 	 * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE
361 	 */
362 	static const u8 ecn_to_ace_flags[4] = {
363 		0b010,	/* Not-ECT is received */
364 		0b011,	/* ECT(1) is received */
365 		0b100,	/* ECT(0) is received */
366 		0b110	/* CE is received */
367 	};
368 
369 	return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]);
370 }
371 
372 /* AccECN specification, 3.1.2: If a TCP server that implements AccECN
373  * receives a SYN with the three TCP header flags (AE, CWR and ECE) set
374  * to any combination other than 000, 011 or 111, it MUST negotiate the
375  * use of AccECN as if they had been set to 111.
376  */
377 static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
378 {
379 	u8 ace = tcp_accecn_ace(th);
380 
381 	return ace && ace != 0x3;
382 }
383 
384 static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
385 {
386 	BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
387 	BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
388 	BUILD_BUG_ON(INET_ECN_CE != 0x3);
389 
390 	counter_array[INET_ECN_ECT_1 - 1] = 0;
391 	counter_array[INET_ECN_ECT_0 - 1] = 0;
392 	counter_array[INET_ECN_CE - 1] = 0;
393 }
394 
395 static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
396 {
397 	tp->received_ce = 0;
398 	tp->received_ce_pending = 0;
399 	__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
400 	__tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
401 	tp->accecn_minlen = 0;
402 	tp->accecn_opt_demand = 0;
403 	tp->est_ecnfield = 0;
404 }
405 
406 /* Used for make_synack to form the ACE flags */
407 static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect)
408 {
409 	/* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received
410 	 * from SYN. Below is an excerpt from Table 2 of the AccECN spec:
411 	 * +====================+====================================+
412 	 * |  IP-ECN codepoint  |  Respective ACE falgs on SYN/ACK   |
413 	 * |   received on SYN  |       AE       CWR       ECE       |
414 	 * +====================+====================================+
415 	 * |      Not-ECT       |       0         1         0        |
416 	 * |      ECT(1)        |       0         1         1        |
417 	 * |      ECT(0)        |       1         0         0        |
418 	 * |        CE          |       1         1         0        |
419 	 * +====================+====================================+
420 	 */
421 	th->ae = !!(ect & INET_ECN_ECT_0);
422 	th->cwr = ect != INET_ECN_ECT_0;
423 	th->ece = ect == INET_ECN_ECT_1;
424 }
425 
426 static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb,
427 				      struct tcphdr *th)
428 {
429 	u32 wire_ace;
430 
431 	/* The final packet of the 3WHS or anything like it must reflect
432 	 * the SYN/ACK ECT instead of putting CEP into ACE field, such
433 	 * case show up in tcp_flags.
434 	 */
435 	if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) {
436 		wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET;
437 		th->ece = !!(wire_ace & 0x1);
438 		th->cwr = !!(wire_ace & 0x2);
439 		th->ae = !!(wire_ace & 0x4);
440 		tp->received_ce_pending = 0;
441 	}
442 }
443 
444 static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
445 					u8 opt_offset)
446 {
447 	u8 *ptr = skb_transport_header(skb) + opt_offset;
448 	unsigned int optlen = ptr[1] - 2;
449 
450 	if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1))
451 		return TCP_ACCECN_OPT_FAIL_SEEN;
452 	ptr += 2;
453 
454 	/* Detect option zeroing: an AccECN connection "MAY check that the
455 	 * initial value of the EE0B field or the EE1B field is non-zero"
456 	 */
457 	if (optlen < TCPOLEN_ACCECN_PERFIELD)
458 		return TCP_ACCECN_OPT_EMPTY_SEEN;
459 	if (get_unaligned_be24(ptr) == 0)
460 		return TCP_ACCECN_OPT_FAIL_SEEN;
461 	if (optlen < TCPOLEN_ACCECN_PERFIELD * 3)
462 		return TCP_ACCECN_OPT_COUNTER_SEEN;
463 	ptr += TCPOLEN_ACCECN_PERFIELD * 2;
464 	if (get_unaligned_be24(ptr) == 0)
465 		return TCP_ACCECN_OPT_FAIL_SEEN;
466 
467 	return TCP_ACCECN_OPT_COUNTER_SEEN;
468 }
469 
470 /* See Table 2 of the AccECN draft */
471 static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
472 				      const struct tcphdr *th, u8 ip_dsfield)
473 {
474 	struct tcp_sock *tp = tcp_sk(sk);
475 	u8 ace = tcp_accecn_ace(th);
476 
477 	switch (ace) {
478 	case 0x0:
479 	case 0x7:
480 		/* +========+========+============+=============+
481 		 * | A      | B      |  SYN/ACK   |  Feedback   |
482 		 * |        |        |    B->A    |  Mode of A  |
483 		 * |        |        | AE CWR ECE |             |
484 		 * +========+========+============+=============+
485 		 * | AccECN | No ECN | 0   0   0  |   Not ECN   |
486 		 * | AccECN | Broken | 1   1   1  |   Not ECN   |
487 		 * +========+========+============+=============+
488 		 */
489 		tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
490 		break;
491 	case 0x1:
492 	case 0x5:
493 		/* +========+========+============+=============+
494 		 * | A      | B      |  SYN/ACK   |  Feedback   |
495 		 * |        |        |    B->A    |  Mode of A  |
496 		 * |        |        | AE CWR ECE |             |
497 		 * +========+========+============+=============+
498 		 * | AccECN | Nonce  | 1   0   1  | (Reserved)  |
499 		 * | AccECN | ECN    | 0   0   1  | Classic ECN |
500 		 * | Nonce  | AccECN | 0   0   1  | Classic ECN |
501 		 * | ECN    | AccECN | 0   0   1  | Classic ECN |
502 		 * +========+========+============+=============+
503 		 */
504 		if (tcp_ecn_mode_pending(tp))
505 			/* Downgrade from AccECN, or requested initially */
506 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
507 		break;
508 	default:
509 		tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
510 		tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
511 		if (tp->rx_opt.accecn &&
512 		    tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
513 			u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
514 
515 			tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
516 			tp->accecn_opt_demand = 2;
517 		}
518 		if (INET_ECN_is_ce(ip_dsfield) &&
519 		    tcp_accecn_validate_syn_feedback(sk, ace,
520 						     tp->syn_ect_snt)) {
521 			tp->received_ce++;
522 			tp->received_ce_pending++;
523 		}
524 		break;
525 	}
526 }
527 
528 static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
529 				   const struct sk_buff *skb)
530 {
531 	if (tcp_ecn_mode_pending(tp)) {
532 		if (!tcp_accecn_syn_requested(th)) {
533 			/* Downgrade to classic ECN feedback */
534 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
535 		} else {
536 			tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
537 					  INET_ECN_MASK;
538 			tp->prev_ecnfield = tp->syn_ect_rcv;
539 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
540 		}
541 	}
542 	if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
543 		tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
544 }
545 
546 static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp,
547 					const struct tcphdr *th)
548 {
549 	if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
550 		return true;
551 	return false;
552 }
553 
554 /* Packet ECN state for a SYN-ACK */
555 static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
556 {
557 	struct tcp_sock *tp = tcp_sk(sk);
558 
559 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
560 	if (tcp_ecn_disabled(tp))
561 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
562 	else if (tcp_ca_needs_ecn(sk) ||
563 		 tcp_bpf_ca_needs_ecn(sk))
564 		INET_ECN_xmit(sk);
565 
566 	if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
567 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
568 		TCP_SKB_CB(skb)->tcp_flags |=
569 			tcp_accecn_reflector_flags(tp->syn_ect_rcv);
570 		tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
571 	}
572 }
573 
574 /* Packet ECN state for a SYN.  */
575 static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
576 {
577 	struct tcp_sock *tp = tcp_sk(sk);
578 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
579 	bool use_ecn, use_accecn;
580 	u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
581 
582 	use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
583 	use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
584 		  tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
585 		  tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
586 
587 	if (!use_ecn) {
588 		const struct dst_entry *dst = __sk_dst_get(sk);
589 
590 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
591 			use_ecn = true;
592 	}
593 
594 	tp->ecn_flags = 0;
595 
596 	if (use_ecn) {
597 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
598 			INET_ECN_xmit(sk);
599 
600 		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
601 		if (use_accecn) {
602 			TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE;
603 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING);
604 			tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
605 		} else {
606 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
607 		}
608 	}
609 }
610 
611 static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
612 {
613 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) {
614 		/* tp->ecn_flags are cleared at a later point in time when
615 		 * SYN ACK is ultimatively being received.
616 		 */
617 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
618 	}
619 }
620 
621 static inline void
622 tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
623 {
624 	if (tcp_rsk(req)->accecn_ok)
625 		tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
626 	else if (inet_rsk(req)->ecn_ok)
627 		th->ece = 1;
628 }
629 
630 static inline bool tcp_accecn_option_beacon_check(const struct sock *sk)
631 {
632 	u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon);
633 	const struct tcp_sock *tp = tcp_sk(sk);
634 
635 	if (!ecn_beacon)
636 		return false;
637 
638 	return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >=
639 	       (tp->srtt_us >> 3);
640 }
641 
642 #endif /* _LINUX_TCP_ECN_H */
643