1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 #ifndef _TCP_ECN_H 3 #define _TCP_ECN_H 4 5 #include <linux/tcp.h> 6 #include <linux/skbuff.h> 7 #include <linux/bitfield.h> 8 9 #include <net/inet_connection_sock.h> 10 #include <net/sock.h> 11 #include <net/tcp.h> 12 #include <net/inet_ecn.h> 13 14 /* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is 15 * attemped to be negotiated and requested for incoming connection 16 * and outgoing connection, respectively. 17 */ 18 enum tcp_ecn_mode { 19 TCP_ECN_IN_NOECN_OUT_NOECN = 0, 20 TCP_ECN_IN_ECN_OUT_ECN = 1, 21 TCP_ECN_IN_ECN_OUT_NOECN = 2, 22 TCP_ECN_IN_ACCECN_OUT_ACCECN = 3, 23 TCP_ECN_IN_ACCECN_OUT_ECN = 4, 24 TCP_ECN_IN_ACCECN_OUT_NOECN = 5, 25 }; 26 27 /* AccECN option sending when AccECN has been successfully negotiated */ 28 enum tcp_accecn_option { 29 TCP_ACCECN_OPTION_DISABLED = 0, 30 TCP_ACCECN_OPTION_MINIMUM = 1, 31 TCP_ACCECN_OPTION_FULL = 2, 32 }; 33 34 static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) 35 { 36 /* Do not set CWR if in AccECN mode! */ 37 if (tcp_ecn_mode_rfc3168(tp)) 38 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 39 } 40 41 static inline void tcp_ecn_accept_cwr(struct sock *sk, 42 const struct sk_buff *skb) 43 { 44 struct tcp_sock *tp = tcp_sk(sk); 45 46 if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) { 47 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 48 49 /* If the sender is telling us it has entered CWR, then its 50 * cwnd may be very low (even just 1 packet), so we should ACK 51 * immediately. 52 */ 53 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) 54 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; 55 } 56 } 57 58 static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) 59 { 60 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 61 } 62 63 /* tp->accecn_fail_mode */ 64 #define TCP_ACCECN_ACE_FAIL_SEND BIT(0) 65 #define TCP_ACCECN_ACE_FAIL_RECV BIT(1) 66 #define TCP_ACCECN_OPT_FAIL_SEND BIT(2) 67 #define TCP_ACCECN_OPT_FAIL_RECV BIT(3) 68 69 static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp) 70 { 71 return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND; 72 } 73 74 static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp) 75 { 76 return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV; 77 } 78 79 static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp) 80 { 81 return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND; 82 } 83 84 static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp) 85 { 86 return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV; 87 } 88 89 static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode) 90 { 91 tp->accecn_fail_mode |= mode; 92 } 93 94 #define TCP_ACCECN_OPT_NOT_SEEN 0x0 95 #define TCP_ACCECN_OPT_EMPTY_SEEN 0x1 96 #define TCP_ACCECN_OPT_COUNTER_SEEN 0x2 97 #define TCP_ACCECN_OPT_FAIL_SEEN 0x3 98 99 static inline u8 tcp_accecn_ace(const struct tcphdr *th) 100 { 101 return (th->ae << 2) | (th->cwr << 1) | th->ece; 102 } 103 104 /* Infer the ECT value our SYN arrived with from the echoed ACE field */ 105 static inline int tcp_accecn_extract_syn_ect(u8 ace) 106 { 107 /* Below is an excerpt from the 1st block of Table 2 of AccECN spec */ 108 static const int ace_to_ecn[8] = { 109 INET_ECN_ECT_0, /* 0b000 (Undefined) */ 110 INET_ECN_ECT_1, /* 0b001 (Undefined) */ 111 INET_ECN_NOT_ECT, /* 0b010 (Not-ECT is received) */ 112 INET_ECN_ECT_1, /* 0b011 (ECT-1 is received) */ 113 INET_ECN_ECT_0, /* 0b100 (ECT-0 is received) */ 114 INET_ECN_ECT_1, /* 0b101 (Reserved) */ 115 INET_ECN_CE, /* 0b110 (CE is received) */ 116 INET_ECN_ECT_1 /* 0b111 (Undefined) */ 117 }; 118 119 return ace_to_ecn[ace & 0x7]; 120 } 121 122 /* Check ECN field transition to detect invalid transitions */ 123 static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv) 124 { 125 if (rcv == snt) 126 return true; 127 128 /* Non-ECT altered to something or something became non-ECT */ 129 if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT) 130 return false; 131 /* CE -> ECT(0/1)? */ 132 if (snt == INET_ECN_CE) 133 return false; 134 return true; 135 } 136 137 static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, 138 u8 sent_ect) 139 { 140 u8 ect = tcp_accecn_extract_syn_ect(ace); 141 struct tcp_sock *tp = tcp_sk(sk); 142 143 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) 144 return true; 145 146 if (!tcp_ect_transition_valid(sent_ect, ect)) { 147 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); 148 return false; 149 } 150 151 return true; 152 } 153 154 static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp, 155 u8 saw_opt) 156 { 157 tp->saw_accecn_opt = saw_opt; 158 if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN) 159 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV); 160 } 161 162 /* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */ 163 static inline void tcp_accecn_third_ack(struct sock *sk, 164 const struct sk_buff *skb, u8 sent_ect) 165 { 166 u8 ace = tcp_accecn_ace(tcp_hdr(skb)); 167 struct tcp_sock *tp = tcp_sk(sk); 168 169 switch (ace) { 170 case 0x0: 171 /* Invalid value */ 172 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); 173 break; 174 case 0x7: 175 case 0x5: 176 case 0x1: 177 /* Unused but legal values */ 178 break; 179 default: 180 /* Validation only applies to first non-data packet */ 181 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq && 182 !TCP_SKB_CB(skb)->sacked && 183 tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) { 184 if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) && 185 !tp->delivered_ce) 186 tp->delivered_ce++; 187 } 188 break; 189 } 190 } 191 192 /* Demand the minimum # to send AccECN optnio */ 193 static inline void tcp_accecn_opt_demand_min(struct sock *sk, 194 u8 opt_demand_min) 195 { 196 struct tcp_sock *tp = tcp_sk(sk); 197 u8 opt_demand; 198 199 opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand); 200 tp->accecn_opt_demand = opt_demand; 201 } 202 203 /* Maps IP ECN field ECT/CE code point to AccECN option field number, given 204 * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0). 205 */ 206 static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield) 207 { 208 switch (ecnfield & INET_ECN_MASK) { 209 case INET_ECN_NOT_ECT: 210 return 0; /* AccECN does not send counts of NOT_ECT */ 211 case INET_ECN_ECT_1: 212 return 1; 213 case INET_ECN_CE: 214 return 2; 215 case INET_ECN_ECT_0: 216 return 3; 217 } 218 return 0; 219 } 220 221 /* Maps IP ECN field ECT/CE code point to AccECN option field value offset. 222 * Some fields do not start from zero, to detect zeroing by middleboxes. 223 */ 224 static inline u32 tcp_accecn_field_init_offset(u8 ecnfield) 225 { 226 switch (ecnfield & INET_ECN_MASK) { 227 case INET_ECN_NOT_ECT: 228 return 0; /* AccECN does not send counts of NOT_ECT */ 229 case INET_ECN_ECT_1: 230 return TCP_ACCECN_E1B_INIT_OFFSET; 231 case INET_ECN_CE: 232 return TCP_ACCECN_CEB_INIT_OFFSET; 233 case INET_ECN_ECT_0: 234 return TCP_ACCECN_E0B_INIT_OFFSET; 235 } 236 return 0; 237 } 238 239 /* Maps AccECN option field #nr to IP ECN field ECT/CE bits */ 240 static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option, 241 bool order) 242 { 243 /* Based on Table 5 of the AccECN spec to map (option, order) to 244 * the corresponding ECN conuters (ECT-1, ECT-0, or CE). 245 */ 246 static const u8 optfield_lookup[2][3] = { 247 /* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */ 248 { INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 }, 249 /* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */ 250 { INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 } 251 }; 252 253 return optfield_lookup[order][option % 3]; 254 } 255 256 /* Handles AccECN option ECT and CE 24-bit byte counters update into 257 * the u32 value in tcp_sock. As we're processing TCP options, it is 258 * safe to access from - 1. 259 */ 260 static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from, 261 u32 init_offset) 262 { 263 u32 truncated = (get_unaligned_be32(from - 1) - init_offset) & 264 0xFFFFFFU; 265 u32 delta = (truncated - *cnt) & 0xFFFFFFU; 266 267 /* If delta has the highest bit set (24th bit) indicating 268 * negative, sign extend to correct an estimation using 269 * sign_extend32(delta, 24 - 1) 270 */ 271 delta = sign_extend32(delta, 23); 272 *cnt += delta; 273 return (s32)delta; 274 } 275 276 /* Updates Accurate ECN received counters from the received IP ECN field */ 277 static inline void tcp_ecn_received_counters(struct sock *sk, 278 const struct sk_buff *skb, u32 len) 279 { 280 u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; 281 u8 is_ce = INET_ECN_is_ce(ecnfield); 282 struct tcp_sock *tp = tcp_sk(sk); 283 bool ecn_edge; 284 285 if (!INET_ECN_is_not_ect(ecnfield)) { 286 u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs); 287 288 /* As for accurate ECN, the TCP_ECN_SEEN flag is set by 289 * tcp_ecn_received_counters() when the ECN codepoint of 290 * received TCP data or ACK contains ECT(0), ECT(1), or CE. 291 */ 292 if (!tcp_ecn_mode_rfc3168(tp)) 293 tp->ecn_flags |= TCP_ECN_SEEN; 294 295 /* ACE counter tracks *all* segments including pure ACKs */ 296 tp->received_ce += pcount; 297 tp->received_ce_pending = min(tp->received_ce_pending + pcount, 298 0xfU); 299 300 if (len > 0) { 301 u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield); 302 u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1]; 303 u32 bytes_mask = GENMASK_U32(31, 22); 304 305 tp->received_ecn_bytes[ecnfield - 1] += len; 306 tp->accecn_minlen = max_t(u8, tp->accecn_minlen, 307 minlen); 308 309 /* Send AccECN option at least once per 2^22-byte 310 * increase in any ECN byte counter. 311 */ 312 if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) & 313 bytes_mask) { 314 tcp_accecn_opt_demand_min(sk, 1); 315 } 316 } 317 } 318 319 ecn_edge = tp->prev_ecnfield != ecnfield; 320 if (ecn_edge || is_ce) { 321 tp->prev_ecnfield = ecnfield; 322 /* Demand Accurate ECN change-triggered ACKs. Two ACK are 323 * demanded to indicate unambiguously the ecnfield value 324 * in the latter ACK. 325 */ 326 if (tcp_ecn_mode_accecn(tp)) { 327 if (ecn_edge) 328 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; 329 tp->accecn_opt_demand = 2; 330 } 331 } 332 } 333 334 /* AccECN specification, 2.2: [...] A Data Receiver maintains four counters 335 * initialized at the start of the half-connection. [...] These byte counters 336 * reflect only the TCP payload length, excluding TCP header and TCP options. 337 */ 338 static inline void tcp_ecn_received_counters_payload(struct sock *sk, 339 const struct sk_buff *skb) 340 { 341 const struct tcphdr *th = (const struct tcphdr *)skb->data; 342 343 tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4); 344 } 345 346 /* AccECN specification, 5.1: [...] a server can determine that it 347 * negotiated AccECN as [...] if the ACK contains an ACE field with 348 * the value 0b010 to 0b111 (decimal 2 to 7). 349 */ 350 static inline bool cookie_accecn_ok(const struct tcphdr *th) 351 { 352 return tcp_accecn_ace(th) > 0x1; 353 } 354 355 /* Used to form the ACE flags for SYN/ACK */ 356 static inline u16 tcp_accecn_reflector_flags(u8 ect) 357 { 358 /* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN. 359 * Below is an excerpt from the 1st block of Table 2 of AccECN spec, 360 * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE 361 */ 362 static const u8 ecn_to_ace_flags[4] = { 363 0b010, /* Not-ECT is received */ 364 0b011, /* ECT(1) is received */ 365 0b100, /* ECT(0) is received */ 366 0b110 /* CE is received */ 367 }; 368 369 return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]); 370 } 371 372 /* AccECN specification, 3.1.2: If a TCP server that implements AccECN 373 * receives a SYN with the three TCP header flags (AE, CWR and ECE) set 374 * to any combination other than 000, 011 or 111, it MUST negotiate the 375 * use of AccECN as if they had been set to 111. 376 */ 377 static inline bool tcp_accecn_syn_requested(const struct tcphdr *th) 378 { 379 u8 ace = tcp_accecn_ace(th); 380 381 return ace && ace != 0x3; 382 } 383 384 static inline void __tcp_accecn_init_bytes_counters(int *counter_array) 385 { 386 BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1); 387 BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2); 388 BUILD_BUG_ON(INET_ECN_CE != 0x3); 389 390 counter_array[INET_ECN_ECT_1 - 1] = 0; 391 counter_array[INET_ECN_ECT_0 - 1] = 0; 392 counter_array[INET_ECN_CE - 1] = 0; 393 } 394 395 static inline void tcp_accecn_init_counters(struct tcp_sock *tp) 396 { 397 tp->received_ce = 0; 398 tp->received_ce_pending = 0; 399 __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); 400 __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); 401 tp->accecn_minlen = 0; 402 tp->accecn_opt_demand = 0; 403 tp->est_ecnfield = 0; 404 } 405 406 /* Used for make_synack to form the ACE flags */ 407 static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect) 408 { 409 /* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received 410 * from SYN. Below is an excerpt from Table 2 of the AccECN spec: 411 * +====================+====================================+ 412 * | IP-ECN codepoint | Respective ACE falgs on SYN/ACK | 413 * | received on SYN | AE CWR ECE | 414 * +====================+====================================+ 415 * | Not-ECT | 0 1 0 | 416 * | ECT(1) | 0 1 1 | 417 * | ECT(0) | 1 0 0 | 418 * | CE | 1 1 0 | 419 * +====================+====================================+ 420 */ 421 th->ae = !!(ect & INET_ECN_ECT_0); 422 th->cwr = ect != INET_ECN_ECT_0; 423 th->ece = ect == INET_ECN_ECT_1; 424 } 425 426 static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb, 427 struct tcphdr *th) 428 { 429 u32 wire_ace; 430 431 /* The final packet of the 3WHS or anything like it must reflect 432 * the SYN/ACK ECT instead of putting CEP into ACE field, such 433 * case show up in tcp_flags. 434 */ 435 if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) { 436 wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET; 437 th->ece = !!(wire_ace & 0x1); 438 th->cwr = !!(wire_ace & 0x2); 439 th->ae = !!(wire_ace & 0x4); 440 tp->received_ce_pending = 0; 441 } 442 } 443 444 static inline u8 tcp_accecn_option_init(const struct sk_buff *skb, 445 u8 opt_offset) 446 { 447 u8 *ptr = skb_transport_header(skb) + opt_offset; 448 unsigned int optlen = ptr[1] - 2; 449 450 if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1)) 451 return TCP_ACCECN_OPT_FAIL_SEEN; 452 ptr += 2; 453 454 /* Detect option zeroing: an AccECN connection "MAY check that the 455 * initial value of the EE0B field or the EE1B field is non-zero" 456 */ 457 if (optlen < TCPOLEN_ACCECN_PERFIELD) 458 return TCP_ACCECN_OPT_EMPTY_SEEN; 459 if (get_unaligned_be24(ptr) == 0) 460 return TCP_ACCECN_OPT_FAIL_SEEN; 461 if (optlen < TCPOLEN_ACCECN_PERFIELD * 3) 462 return TCP_ACCECN_OPT_COUNTER_SEEN; 463 ptr += TCPOLEN_ACCECN_PERFIELD * 2; 464 if (get_unaligned_be24(ptr) == 0) 465 return TCP_ACCECN_OPT_FAIL_SEEN; 466 467 return TCP_ACCECN_OPT_COUNTER_SEEN; 468 } 469 470 /* See Table 2 of the AccECN draft */ 471 static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb, 472 const struct tcphdr *th, u8 ip_dsfield) 473 { 474 struct tcp_sock *tp = tcp_sk(sk); 475 u8 ace = tcp_accecn_ace(th); 476 477 switch (ace) { 478 case 0x0: 479 case 0x7: 480 /* +========+========+============+=============+ 481 * | A | B | SYN/ACK | Feedback | 482 * | | | B->A | Mode of A | 483 * | | | AE CWR ECE | | 484 * +========+========+============+=============+ 485 * | AccECN | No ECN | 0 0 0 | Not ECN | 486 * | AccECN | Broken | 1 1 1 | Not ECN | 487 * +========+========+============+=============+ 488 */ 489 tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 490 break; 491 case 0x1: 492 case 0x5: 493 /* +========+========+============+=============+ 494 * | A | B | SYN/ACK | Feedback | 495 * | | | B->A | Mode of A | 496 * | | | AE CWR ECE | | 497 * +========+========+============+=============+ 498 * | AccECN | Nonce | 1 0 1 | (Reserved) | 499 * | AccECN | ECN | 0 0 1 | Classic ECN | 500 * | Nonce | AccECN | 0 0 1 | Classic ECN | 501 * | ECN | AccECN | 0 0 1 | Classic ECN | 502 * +========+========+============+=============+ 503 */ 504 if (tcp_ecn_mode_pending(tp)) 505 /* Downgrade from AccECN, or requested initially */ 506 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 507 break; 508 default: 509 tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); 510 tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK; 511 if (tp->rx_opt.accecn && 512 tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { 513 u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); 514 515 tcp_accecn_saw_opt_fail_recv(tp, saw_opt); 516 tp->accecn_opt_demand = 2; 517 } 518 if (INET_ECN_is_ce(ip_dsfield) && 519 tcp_accecn_validate_syn_feedback(sk, ace, 520 tp->syn_ect_snt)) { 521 tp->received_ce++; 522 tp->received_ce_pending++; 523 } 524 break; 525 } 526 } 527 528 static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th, 529 const struct sk_buff *skb) 530 { 531 if (tcp_ecn_mode_pending(tp)) { 532 if (!tcp_accecn_syn_requested(th)) { 533 /* Downgrade to classic ECN feedback */ 534 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 535 } else { 536 tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & 537 INET_ECN_MASK; 538 tp->prev_ecnfield = tp->syn_ect_rcv; 539 tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); 540 } 541 } 542 if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr)) 543 tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 544 } 545 546 static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, 547 const struct tcphdr *th) 548 { 549 if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) 550 return true; 551 return false; 552 } 553 554 /* Packet ECN state for a SYN-ACK */ 555 static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) 556 { 557 struct tcp_sock *tp = tcp_sk(sk); 558 559 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; 560 if (tcp_ecn_disabled(tp)) 561 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; 562 else if (tcp_ca_needs_ecn(sk) || 563 tcp_bpf_ca_needs_ecn(sk)) 564 INET_ECN_xmit(sk); 565 566 if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) { 567 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; 568 TCP_SKB_CB(skb)->tcp_flags |= 569 tcp_accecn_reflector_flags(tp->syn_ect_rcv); 570 tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK; 571 } 572 } 573 574 /* Packet ECN state for a SYN. */ 575 static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) 576 { 577 struct tcp_sock *tp = tcp_sk(sk); 578 bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); 579 bool use_ecn, use_accecn; 580 u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); 581 582 use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN; 583 use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN || 584 tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN || 585 tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn; 586 587 if (!use_ecn) { 588 const struct dst_entry *dst = __sk_dst_get(sk); 589 590 if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) 591 use_ecn = true; 592 } 593 594 tp->ecn_flags = 0; 595 596 if (use_ecn) { 597 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) 598 INET_ECN_xmit(sk); 599 600 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; 601 if (use_accecn) { 602 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE; 603 tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING); 604 tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK; 605 } else { 606 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 607 } 608 } 609 } 610 611 static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) 612 { 613 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) { 614 /* tp->ecn_flags are cleared at a later point in time when 615 * SYN ACK is ultimatively being received. 616 */ 617 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; 618 } 619 } 620 621 static inline void 622 tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) 623 { 624 if (tcp_rsk(req)->accecn_ok) 625 tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); 626 else if (inet_rsk(req)->ecn_ok) 627 th->ece = 1; 628 } 629 630 static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) 631 { 632 u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon); 633 const struct tcp_sock *tp = tcp_sk(sk); 634 635 if (!ecn_beacon) 636 return false; 637 638 return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >= 639 (tp->srtt_us >> 3); 640 } 641 642 #endif /* _LINUX_TCP_ECN_H */ 643