xref: /linux/include/net/tcp_ecn.h (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 #ifndef _TCP_ECN_H
3 #define _TCP_ECN_H
4 
5 #include <linux/tcp.h>
6 #include <linux/skbuff.h>
7 #include <linux/bitfield.h>
8 
9 #include <net/inet_connection_sock.h>
10 #include <net/sock.h>
11 #include <net/tcp.h>
12 #include <net/inet_ecn.h>
13 
14 /* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is
15  * attemped to be negotiated and requested for incoming connection
16  * and outgoing connection, respectively.
17  */
18 enum tcp_ecn_mode {
19 	TCP_ECN_IN_NOECN_OUT_NOECN = 0,
20 	TCP_ECN_IN_ECN_OUT_ECN = 1,
21 	TCP_ECN_IN_ECN_OUT_NOECN = 2,
22 	TCP_ECN_IN_ACCECN_OUT_ACCECN = 3,
23 	TCP_ECN_IN_ACCECN_OUT_ECN = 4,
24 	TCP_ECN_IN_ACCECN_OUT_NOECN = 5,
25 };
26 
27 /* AccECN option sending when AccECN has been successfully negotiated */
28 enum tcp_accecn_option {
29 	TCP_ACCECN_OPTION_DISABLED = 0,
30 	TCP_ACCECN_OPTION_MINIMUM = 1,
31 	TCP_ACCECN_OPTION_FULL = 2,
32 	TCP_ACCECN_OPTION_PERSIST = 3,
33 };
34 
35 /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
36 static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk)
37 {
38 	__INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk));
39 }
40 
41 static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
42 {
43 	/* Do not set CWR if in AccECN mode! */
44 	if (tcp_ecn_mode_rfc3168(tp))
45 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
46 }
47 
48 static inline void tcp_ecn_accept_cwr(struct sock *sk,
49 				      const struct sk_buff *skb)
50 {
51 	struct tcp_sock *tp = tcp_sk(sk);
52 
53 	if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) {
54 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
55 
56 		/* If the sender is telling us it has entered CWR, then its
57 		 * cwnd may be very low (even just 1 packet), so we should ACK
58 		 * immediately.
59 		 */
60 		if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
61 			inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
62 	}
63 }
64 
65 static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
66 {
67 	tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
68 }
69 
70 static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp)
71 {
72 	return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND;
73 }
74 
75 static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp)
76 {
77 	return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV;
78 }
79 
80 static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp)
81 {
82 	return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND;
83 }
84 
85 static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp)
86 {
87 	return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV;
88 }
89 
90 static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode)
91 {
92 	tp->accecn_fail_mode |= mode;
93 }
94 
95 static inline u8 tcp_accecn_ace(const struct tcphdr *th)
96 {
97 	return (th->ae << 2) | (th->cwr << 1) | th->ece;
98 }
99 
100 /* Infer the ECT value our SYN arrived with from the echoed ACE field */
101 static inline int tcp_accecn_extract_syn_ect(u8 ace)
102 {
103 	/* Below is an excerpt from the 1st block of Table 2 of AccECN spec */
104 	static const int ace_to_ecn[8] = {
105 		INET_ECN_ECT_0,		/* 0b000 (Undefined) */
106 		INET_ECN_ECT_1,		/* 0b001 (Undefined) */
107 		INET_ECN_NOT_ECT,	/* 0b010 (Not-ECT is received) */
108 		INET_ECN_ECT_1,		/* 0b011 (ECT-1 is received) */
109 		INET_ECN_ECT_0,		/* 0b100 (ECT-0 is received) */
110 		INET_ECN_ECT_1,		/* 0b101 (Reserved) */
111 		INET_ECN_CE,		/* 0b110 (CE is received) */
112 		INET_ECN_ECT_1		/* 0b111 (Undefined) */
113 	};
114 
115 	return ace_to_ecn[ace & 0x7];
116 }
117 
118 /* Check ECN field transition to detect invalid transitions */
119 static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv)
120 {
121 	if (rcv == snt)
122 		return true;
123 
124 	/* Non-ECT altered to something or something became non-ECT */
125 	if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT)
126 		return false;
127 	/* CE -> ECT(0/1)? */
128 	if (snt == INET_ECN_CE)
129 		return false;
130 	return true;
131 }
132 
133 static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace,
134 						    u8 sent_ect)
135 {
136 	u8 ect = tcp_accecn_extract_syn_ect(ace);
137 	struct tcp_sock *tp = tcp_sk(sk);
138 
139 	if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
140 		return true;
141 
142 	if (!tcp_ect_transition_valid(sent_ect, ect)) {
143 		tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
144 		return false;
145 	}
146 
147 	return true;
148 }
149 
150 static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp,
151 						u8 saw_opt)
152 {
153 	tp->saw_accecn_opt = saw_opt;
154 	if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN)
155 		tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV);
156 }
157 
158 /* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */
159 static inline void tcp_accecn_third_ack(struct sock *sk,
160 					const struct sk_buff *skb, u8 sent_ect)
161 {
162 	u8 ace = tcp_accecn_ace(tcp_hdr(skb));
163 	struct tcp_sock *tp = tcp_sk(sk);
164 
165 	switch (ace) {
166 	case 0x0:
167 		/* Invalid value */
168 		if (!TCP_SKB_CB(skb)->sacked)
169 			tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV |
170 						     TCP_ACCECN_OPT_FAIL_RECV);
171 		break;
172 	case 0x7:
173 	case 0x5:
174 	case 0x1:
175 		/* Unused but legal values */
176 		break;
177 	default:
178 		/* Validation only applies to first non-data packet */
179 		if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
180 		    !TCP_SKB_CB(skb)->sacked &&
181 		    tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) {
182 			if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) &&
183 			    !tp->delivered_ce)
184 				tp->delivered_ce++;
185 		}
186 		break;
187 	}
188 }
189 
190 /* Demand the minimum # to send AccECN optnio */
191 static inline void tcp_accecn_opt_demand_min(struct sock *sk,
192 					     u8 opt_demand_min)
193 {
194 	struct tcp_sock *tp = tcp_sk(sk);
195 	u8 opt_demand;
196 
197 	opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand);
198 	tp->accecn_opt_demand = opt_demand;
199 }
200 
201 /* Maps IP ECN field ECT/CE code point to AccECN option field number, given
202  * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0).
203  */
204 static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield)
205 {
206 	switch (ecnfield & INET_ECN_MASK) {
207 	case INET_ECN_NOT_ECT:
208 		return 0;	/* AccECN does not send counts of NOT_ECT */
209 	case INET_ECN_ECT_1:
210 		return 1;
211 	case INET_ECN_CE:
212 		return 2;
213 	case INET_ECN_ECT_0:
214 		return 3;
215 	}
216 	return 0;
217 }
218 
219 /* Maps IP ECN field ECT/CE code point to AccECN option field value offset.
220  * Some fields do not start from zero, to detect zeroing by middleboxes.
221  */
222 static inline u32 tcp_accecn_field_init_offset(u8 ecnfield)
223 {
224 	switch (ecnfield & INET_ECN_MASK) {
225 	case INET_ECN_NOT_ECT:
226 		return 0;	/* AccECN does not send counts of NOT_ECT */
227 	case INET_ECN_ECT_1:
228 		return TCP_ACCECN_E1B_INIT_OFFSET;
229 	case INET_ECN_CE:
230 		return TCP_ACCECN_CEB_INIT_OFFSET;
231 	case INET_ECN_ECT_0:
232 		return TCP_ACCECN_E0B_INIT_OFFSET;
233 	}
234 	return 0;
235 }
236 
237 /* Maps AccECN option field #nr to IP ECN field ECT/CE bits */
238 static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option,
239 							   bool order)
240 {
241 	/* Based on Table 5 of the AccECN spec to map (option, order) to
242 	 * the corresponding ECN conuters (ECT-1, ECT-0, or CE).
243 	 */
244 	static const u8 optfield_lookup[2][3] = {
245 		/* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */
246 		{ INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 },
247 		/* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */
248 		{ INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 }
249 	};
250 
251 	return optfield_lookup[order][option % 3];
252 }
253 
254 /* Handles AccECN option ECT and CE 24-bit byte counters update into
255  * the u32 value in tcp_sock. As we're processing TCP options, it is
256  * safe to access from - 1.
257  */
258 static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from,
259 				       u32 init_offset)
260 {
261 	u32 truncated = (get_unaligned_be32(from - 1) - init_offset) &
262 			0xFFFFFFU;
263 	u32 delta = (truncated - *cnt) & 0xFFFFFFU;
264 
265 	/* If delta has the highest bit set (24th bit) indicating
266 	 * negative, sign extend to correct an estimation using
267 	 * sign_extend32(delta, 24 - 1)
268 	 */
269 	delta = sign_extend32(delta, 23);
270 	*cnt += delta;
271 	return (s32)delta;
272 }
273 
274 /* Updates Accurate ECN received counters from the received IP ECN field */
275 static inline void tcp_ecn_received_counters(struct sock *sk,
276 					     const struct sk_buff *skb, u32 len)
277 {
278 	u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
279 	u8 is_ce = INET_ECN_is_ce(ecnfield);
280 	struct tcp_sock *tp = tcp_sk(sk);
281 	bool ecn_edge;
282 
283 	if (!INET_ECN_is_not_ect(ecnfield)) {
284 		u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs);
285 
286 		/* As for accurate ECN, the TCP_ECN_SEEN flag is set by
287 		 * tcp_ecn_received_counters() when the ECN codepoint of
288 		 * received TCP data or ACK contains ECT(0), ECT(1), or CE.
289 		 */
290 		if (!tcp_ecn_mode_rfc3168(tp))
291 			tp->ecn_flags |= TCP_ECN_SEEN;
292 
293 		/* ACE counter tracks *all* segments including pure ACKs */
294 		tp->received_ce += pcount;
295 		tp->received_ce_pending = min(tp->received_ce_pending + pcount,
296 					      0xfU);
297 
298 		if (len > 0) {
299 			u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield);
300 			u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1];
301 			u32 bytes_mask = GENMASK_U32(31, 22);
302 
303 			tp->received_ecn_bytes[ecnfield - 1] += len;
304 			tp->accecn_minlen = max_t(u8, tp->accecn_minlen,
305 						  minlen);
306 
307 			/* Send AccECN option at least once per 2^22-byte
308 			 * increase in any ECN byte counter.
309 			 */
310 			if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) &
311 			    bytes_mask) {
312 				tcp_accecn_opt_demand_min(sk, 1);
313 			}
314 		}
315 	}
316 
317 	ecn_edge = tp->prev_ecnfield != ecnfield;
318 	if (ecn_edge || is_ce) {
319 		tp->prev_ecnfield = ecnfield;
320 		/* Demand Accurate ECN change-triggered ACKs. Two ACK are
321 		 * demanded to indicate unambiguously the ecnfield value
322 		 * in the latter ACK.
323 		 */
324 		if (tcp_ecn_mode_accecn(tp)) {
325 			if (ecn_edge)
326 				inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
327 			tp->accecn_opt_demand = 2;
328 		}
329 	}
330 }
331 
332 /* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
333  * initialized at the start of	the half-connection. [...] These byte counters
334  * reflect only the TCP payload length, excluding TCP header and TCP options.
335  */
336 static inline void tcp_ecn_received_counters_payload(struct sock *sk,
337 						     const struct sk_buff *skb)
338 {
339 	const struct tcphdr *th = (const struct tcphdr *)skb->data;
340 
341 	tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
342 }
343 
344 /* AccECN specification, 5.1: [...] a server can determine that it
345  * negotiated AccECN as [...] if the ACK contains an ACE field with
346  * the value 0b010 to 0b111 (decimal 2 to 7).
347  */
348 static inline bool cookie_accecn_ok(const struct tcphdr *th)
349 {
350 	return tcp_accecn_ace(th) > 0x1;
351 }
352 
353 /* Used to form the ACE flags for SYN/ACK */
354 static inline u16 tcp_accecn_reflector_flags(u8 ect)
355 {
356 	/* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN.
357 	 * Below is an excerpt from the 1st block of Table 2 of AccECN spec,
358 	 * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE
359 	 */
360 	static const u8 ecn_to_ace_flags[4] = {
361 		0b010,	/* Not-ECT is received */
362 		0b011,	/* ECT(1) is received */
363 		0b100,	/* ECT(0) is received */
364 		0b110	/* CE is received */
365 	};
366 
367 	return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]);
368 }
369 
370 /* AccECN specification, 3.1.2: If a TCP server that implements AccECN
371  * receives a SYN with the three TCP header flags (AE, CWR and ECE) set
372  * to any combination other than 000, 011 or 111, it MUST negotiate the
373  * use of AccECN as if they had been set to 111.
374  */
375 static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
376 {
377 	u8 ace = tcp_accecn_ace(th);
378 
379 	return ace && ace != 0x3;
380 }
381 
382 static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
383 {
384 	BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
385 	BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
386 	BUILD_BUG_ON(INET_ECN_CE != 0x3);
387 
388 	counter_array[INET_ECN_ECT_1 - 1] = 0;
389 	counter_array[INET_ECN_ECT_0 - 1] = 0;
390 	counter_array[INET_ECN_CE - 1] = 0;
391 }
392 
393 static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
394 {
395 	tp->received_ce = 0;
396 	tp->received_ce_pending = 0;
397 	__tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
398 	__tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
399 	tp->accecn_opt_sent_w_dsack = 0;
400 	tp->accecn_minlen = 0;
401 	tp->accecn_opt_demand = 0;
402 	tp->est_ecnfield = 0;
403 }
404 
405 /* Used for make_synack to form the ACE flags */
406 static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect)
407 {
408 	/* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received
409 	 * from SYN. Below is an excerpt from Table 2 of the AccECN spec:
410 	 * +====================+====================================+
411 	 * |  IP-ECN codepoint  |  Respective ACE falgs on SYN/ACK   |
412 	 * |   received on SYN  |       AE       CWR       ECE       |
413 	 * +====================+====================================+
414 	 * |      Not-ECT       |       0         1         0        |
415 	 * |      ECT(1)        |       0         1         1        |
416 	 * |      ECT(0)        |       1         0         0        |
417 	 * |        CE          |       1         1         0        |
418 	 * +====================+====================================+
419 	 */
420 	th->ae = !!(ect & INET_ECN_ECT_0);
421 	th->cwr = ect != INET_ECN_ECT_0;
422 	th->ece = ect == INET_ECN_ECT_1;
423 }
424 
425 static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb,
426 				      struct tcphdr *th)
427 {
428 	u32 wire_ace;
429 
430 	/* The final packet of the 3WHS or anything like it must reflect
431 	 * the SYN/ACK ECT instead of putting CEP into ACE field, such
432 	 * case show up in tcp_flags.
433 	 */
434 	if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) {
435 		wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET;
436 		th->ece = !!(wire_ace & 0x1);
437 		th->cwr = !!(wire_ace & 0x2);
438 		th->ae = !!(wire_ace & 0x4);
439 		tp->received_ce_pending = 0;
440 	}
441 }
442 
443 static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
444 					u8 opt_offset)
445 {
446 	u8 *ptr = skb_transport_header(skb) + opt_offset;
447 	unsigned int optlen = ptr[1] - 2;
448 
449 	if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1))
450 		return TCP_ACCECN_OPT_FAIL_SEEN;
451 	ptr += 2;
452 
453 	/* Detect option zeroing: an AccECN connection "MAY check that the
454 	 * initial value of the EE0B field or the EE1B field is non-zero"
455 	 */
456 	if (optlen < TCPOLEN_ACCECN_PERFIELD)
457 		return TCP_ACCECN_OPT_EMPTY_SEEN;
458 	if (get_unaligned_be24(ptr) == 0)
459 		return TCP_ACCECN_OPT_FAIL_SEEN;
460 	if (optlen < TCPOLEN_ACCECN_PERFIELD * 3)
461 		return TCP_ACCECN_OPT_COUNTER_SEEN;
462 	ptr += TCPOLEN_ACCECN_PERFIELD * 2;
463 	if (get_unaligned_be24(ptr) == 0)
464 		return TCP_ACCECN_OPT_FAIL_SEEN;
465 
466 	return TCP_ACCECN_OPT_COUNTER_SEEN;
467 }
468 
469 static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk,
470 					     const struct sk_buff *skb, u8 dsf)
471 {
472 	struct tcp_sock *tp = tcp_sk(sk);
473 
474 	tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
475 	tp->syn_ect_rcv = dsf & INET_ECN_MASK;
476 	/* Demand Accurate ECN option in response to the SYN on the SYN/ACK
477 	 * and the TCP server will try to send one more packet with an AccECN
478 	 * Option at a later point during the connection.
479 	 */
480 	if (tp->rx_opt.accecn &&
481 	    tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
482 		u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
483 
484 		tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
485 		tp->accecn_opt_demand = 2;
486 	}
487 }
488 
489 /* See Table 2 of the AccECN draft */
490 static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
491 				      const struct tcphdr *th, u8 ip_dsfield)
492 {
493 	struct tcp_sock *tp = tcp_sk(sk);
494 	u8 ace = tcp_accecn_ace(th);
495 
496 	switch (ace) {
497 	case 0x0:
498 	case 0x7:
499 		/* +========+========+============+=============+
500 		 * | A      | B      |  SYN/ACK   |  Feedback   |
501 		 * |        |        |    B->A    |  Mode of A  |
502 		 * |        |        | AE CWR ECE |             |
503 		 * +========+========+============+=============+
504 		 * | AccECN | No ECN | 0   0   0  |   Not ECN   |
505 		 * | AccECN | Broken | 1   1   1  |   Not ECN   |
506 		 * +========+========+============+=============+
507 		 */
508 		tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
509 		break;
510 	case 0x1:
511 		/* +========+========+============+=============+
512 		 * | A      | B      |  SYN/ACK   |  Feedback   |
513 		 * |        |        |    B->A    |  Mode of A  |
514 		 * |        |        | AE CWR ECE |             |
515 		 * +========+========+============+=============+
516 		 * | AccECN | ECN    | 0   0   1  | Classic ECN |
517 		 * | Nonce  | AccECN | 0   0   1  | Classic ECN |
518 		 * | ECN    | AccECN | 0   0   1  | Classic ECN |
519 		 * +========+========+============+=============+
520 		 */
521 		if (tcp_ca_no_fallback_rfc3168(sk))
522 			tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
523 		else
524 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
525 		break;
526 	case 0x5:
527 		if (tcp_ecn_mode_pending(tp)) {
528 			tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
529 			if (INET_ECN_is_ce(ip_dsfield)) {
530 				tp->received_ce++;
531 				tp->received_ce_pending++;
532 			}
533 		}
534 		break;
535 	default:
536 		tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield);
537 		if (INET_ECN_is_ce(ip_dsfield) &&
538 		    tcp_accecn_validate_syn_feedback(sk, ace,
539 						     tp->syn_ect_snt)) {
540 			tp->received_ce++;
541 			tp->received_ce_pending++;
542 		}
543 		break;
544 	}
545 }
546 
547 static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th,
548 				   const struct sk_buff *skb)
549 {
550 	struct tcp_sock *tp = tcp_sk(sk);
551 
552 	if (tcp_ecn_mode_pending(tp)) {
553 		if (!tcp_accecn_syn_requested(th)) {
554 			/* Downgrade to classic ECN feedback */
555 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
556 		} else {
557 			tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
558 					  INET_ECN_MASK;
559 			tp->prev_ecnfield = tp->syn_ect_rcv;
560 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
561 		}
562 	}
563 	if (tcp_ecn_mode_rfc3168(tp) &&
564 	    (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk)))
565 		tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
566 }
567 
568 static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp,
569 					const struct tcphdr *th)
570 {
571 	if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
572 		return true;
573 	return false;
574 }
575 
576 /* Packet ECN state for a SYN-ACK */
577 static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
578 {
579 	struct tcp_sock *tp = tcp_sk(sk);
580 
581 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
582 	if (tcp_ecn_disabled(tp))
583 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
584 	else if (tcp_ca_needs_ecn(sk) ||
585 		 tcp_bpf_ca_needs_ecn(sk))
586 		INET_ECN_xmit_ect_1_negotiation(sk);
587 
588 	if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
589 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
590 		TCP_SKB_CB(skb)->tcp_flags |=
591 			tcp_accecn_reflector_flags(tp->syn_ect_rcv);
592 		tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
593 	}
594 }
595 
596 /* Packet ECN state for a SYN.  */
597 static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
598 {
599 	struct tcp_sock *tp = tcp_sk(sk);
600 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
601 	bool use_ecn, use_accecn;
602 	u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
603 
604 	use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN ||
605 		     tcp_ca_needs_accecn(sk);
606 	use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
607 		  tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
608 		  tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
609 
610 	if (!use_ecn) {
611 		const struct dst_entry *dst = __sk_dst_get(sk);
612 
613 		if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
614 			use_ecn = true;
615 	}
616 
617 	tp->ecn_flags = 0;
618 
619 	if (use_ecn) {
620 		if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
621 			INET_ECN_xmit_ect_1_negotiation(sk);
622 
623 		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
624 		if (use_accecn) {
625 			TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE;
626 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING);
627 			tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
628 		} else {
629 			tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
630 		}
631 	}
632 }
633 
634 static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
635 {
636 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) {
637 		/* tp->ecn_flags are cleared at a later point in time when
638 		 * SYN ACK is ultimatively being received.
639 		 */
640 		TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
641 	}
642 }
643 
644 static inline void
645 tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
646 		    enum tcp_synack_type synack_type)
647 {
648 	/* Accurate ECN shall retransmit SYN/ACK with ACE=0 if the
649 	 * previously retransmitted SYN/ACK also times out.
650 	 */
651 	if (!req->num_timeout || synack_type != TCP_SYNACK_RETRANS) {
652 		if (tcp_rsk(req)->accecn_ok)
653 			tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
654 		else if (inet_rsk(req)->ecn_ok)
655 			th->ece = 1;
656 	} else if (tcp_rsk(req)->accecn_ok) {
657 		th->ae  = 0;
658 		th->cwr = 0;
659 		th->ece = 0;
660 	}
661 }
662 
663 static inline bool tcp_accecn_option_beacon_check(const struct sock *sk)
664 {
665 	u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon);
666 	const struct tcp_sock *tp = tcp_sk(sk);
667 
668 	if (!ecn_beacon)
669 		return false;
670 
671 	return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >=
672 	       (tp->srtt_us >> 3);
673 }
674 
675 #endif /* _LINUX_TCP_ECN_H */
676