1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 #ifndef _TCP_ECN_H 3 #define _TCP_ECN_H 4 5 #include <linux/tcp.h> 6 #include <linux/skbuff.h> 7 #include <linux/bitfield.h> 8 9 #include <net/inet_connection_sock.h> 10 #include <net/sock.h> 11 #include <net/tcp.h> 12 #include <net/inet_ecn.h> 13 14 /* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is 15 * attemped to be negotiated and requested for incoming connection 16 * and outgoing connection, respectively. 17 */ 18 enum tcp_ecn_mode { 19 TCP_ECN_IN_NOECN_OUT_NOECN = 0, 20 TCP_ECN_IN_ECN_OUT_ECN = 1, 21 TCP_ECN_IN_ECN_OUT_NOECN = 2, 22 TCP_ECN_IN_ACCECN_OUT_ACCECN = 3, 23 TCP_ECN_IN_ACCECN_OUT_ECN = 4, 24 TCP_ECN_IN_ACCECN_OUT_NOECN = 5, 25 }; 26 27 /* AccECN option sending when AccECN has been successfully negotiated */ 28 enum tcp_accecn_option { 29 TCP_ACCECN_OPTION_DISABLED = 0, 30 TCP_ACCECN_OPTION_MINIMUM = 1, 31 TCP_ACCECN_OPTION_FULL = 2, 32 TCP_ACCECN_OPTION_PERSIST = 3, 33 }; 34 35 /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */ 36 static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk) 37 { 38 __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk)); 39 } 40 41 static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp) 42 { 43 /* Do not set CWR if in AccECN mode! */ 44 if (tcp_ecn_mode_rfc3168(tp)) 45 tp->ecn_flags |= TCP_ECN_QUEUE_CWR; 46 } 47 48 static inline void tcp_ecn_accept_cwr(struct sock *sk, 49 const struct sk_buff *skb) 50 { 51 struct tcp_sock *tp = tcp_sk(sk); 52 53 if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) { 54 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 55 56 /* If the sender is telling us it has entered CWR, then its 57 * cwnd may be very low (even just 1 packet), so we should ACK 58 * immediately. 59 */ 60 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) 61 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; 62 } 63 } 64 65 static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) 66 { 67 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 68 } 69 70 static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp) 71 { 72 return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND; 73 } 74 75 static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp) 76 { 77 return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV; 78 } 79 80 static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp) 81 { 82 return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND; 83 } 84 85 static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp) 86 { 87 return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV; 88 } 89 90 static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode) 91 { 92 tp->accecn_fail_mode |= mode; 93 } 94 95 static inline u8 tcp_accecn_ace(const struct tcphdr *th) 96 { 97 return (th->ae << 2) | (th->cwr << 1) | th->ece; 98 } 99 100 /* Infer the ECT value our SYN arrived with from the echoed ACE field */ 101 static inline int tcp_accecn_extract_syn_ect(u8 ace) 102 { 103 /* Below is an excerpt from the 1st block of Table 2 of AccECN spec */ 104 static const int ace_to_ecn[8] = { 105 INET_ECN_ECT_0, /* 0b000 (Undefined) */ 106 INET_ECN_ECT_1, /* 0b001 (Undefined) */ 107 INET_ECN_NOT_ECT, /* 0b010 (Not-ECT is received) */ 108 INET_ECN_ECT_1, /* 0b011 (ECT-1 is received) */ 109 INET_ECN_ECT_0, /* 0b100 (ECT-0 is received) */ 110 INET_ECN_ECT_1, /* 0b101 (Reserved) */ 111 INET_ECN_CE, /* 0b110 (CE is received) */ 112 INET_ECN_ECT_1 /* 0b111 (Undefined) */ 113 }; 114 115 return ace_to_ecn[ace & 0x7]; 116 } 117 118 /* Check ECN field transition to detect invalid transitions */ 119 static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv) 120 { 121 if (rcv == snt) 122 return true; 123 124 /* Non-ECT altered to something or something became non-ECT */ 125 if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT) 126 return false; 127 /* CE -> ECT(0/1)? */ 128 if (snt == INET_ECN_CE) 129 return false; 130 return true; 131 } 132 133 static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, 134 u8 sent_ect) 135 { 136 u8 ect = tcp_accecn_extract_syn_ect(ace); 137 struct tcp_sock *tp = tcp_sk(sk); 138 139 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) 140 return true; 141 142 if (!tcp_ect_transition_valid(sent_ect, ect)) { 143 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV); 144 return false; 145 } 146 147 return true; 148 } 149 150 static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp, 151 u8 saw_opt) 152 { 153 tp->saw_accecn_opt = saw_opt; 154 if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN) 155 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV); 156 } 157 158 /* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */ 159 static inline void tcp_accecn_third_ack(struct sock *sk, 160 const struct sk_buff *skb, u8 sent_ect) 161 { 162 u8 ace = tcp_accecn_ace(tcp_hdr(skb)); 163 struct tcp_sock *tp = tcp_sk(sk); 164 165 switch (ace) { 166 case 0x0: 167 /* Invalid value */ 168 if (!TCP_SKB_CB(skb)->sacked) 169 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV | 170 TCP_ACCECN_OPT_FAIL_RECV); 171 break; 172 case 0x7: 173 case 0x5: 174 case 0x1: 175 /* Unused but legal values */ 176 break; 177 default: 178 /* Validation only applies to first non-data packet */ 179 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq && 180 !TCP_SKB_CB(skb)->sacked && 181 tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) { 182 if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) && 183 !tp->delivered_ce) 184 tp->delivered_ce++; 185 } 186 break; 187 } 188 } 189 190 /* Demand the minimum # to send AccECN optnio */ 191 static inline void tcp_accecn_opt_demand_min(struct sock *sk, 192 u8 opt_demand_min) 193 { 194 struct tcp_sock *tp = tcp_sk(sk); 195 u8 opt_demand; 196 197 opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand); 198 tp->accecn_opt_demand = opt_demand; 199 } 200 201 /* Maps IP ECN field ECT/CE code point to AccECN option field number, given 202 * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0). 203 */ 204 static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield) 205 { 206 switch (ecnfield & INET_ECN_MASK) { 207 case INET_ECN_NOT_ECT: 208 return 0; /* AccECN does not send counts of NOT_ECT */ 209 case INET_ECN_ECT_1: 210 return 1; 211 case INET_ECN_CE: 212 return 2; 213 case INET_ECN_ECT_0: 214 return 3; 215 } 216 return 0; 217 } 218 219 /* Maps IP ECN field ECT/CE code point to AccECN option field value offset. 220 * Some fields do not start from zero, to detect zeroing by middleboxes. 221 */ 222 static inline u32 tcp_accecn_field_init_offset(u8 ecnfield) 223 { 224 switch (ecnfield & INET_ECN_MASK) { 225 case INET_ECN_NOT_ECT: 226 return 0; /* AccECN does not send counts of NOT_ECT */ 227 case INET_ECN_ECT_1: 228 return TCP_ACCECN_E1B_INIT_OFFSET; 229 case INET_ECN_CE: 230 return TCP_ACCECN_CEB_INIT_OFFSET; 231 case INET_ECN_ECT_0: 232 return TCP_ACCECN_E0B_INIT_OFFSET; 233 } 234 return 0; 235 } 236 237 /* Maps AccECN option field #nr to IP ECN field ECT/CE bits */ 238 static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option, 239 bool order) 240 { 241 /* Based on Table 5 of the AccECN spec to map (option, order) to 242 * the corresponding ECN conuters (ECT-1, ECT-0, or CE). 243 */ 244 static const u8 optfield_lookup[2][3] = { 245 /* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */ 246 { INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 }, 247 /* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */ 248 { INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 } 249 }; 250 251 return optfield_lookup[order][option % 3]; 252 } 253 254 /* Handles AccECN option ECT and CE 24-bit byte counters update into 255 * the u32 value in tcp_sock. As we're processing TCP options, it is 256 * safe to access from - 1. 257 */ 258 static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from, 259 u32 init_offset) 260 { 261 u32 truncated = (get_unaligned_be32(from - 1) - init_offset) & 262 0xFFFFFFU; 263 u32 delta = (truncated - *cnt) & 0xFFFFFFU; 264 265 /* If delta has the highest bit set (24th bit) indicating 266 * negative, sign extend to correct an estimation using 267 * sign_extend32(delta, 24 - 1) 268 */ 269 delta = sign_extend32(delta, 23); 270 *cnt += delta; 271 return (s32)delta; 272 } 273 274 /* Updates Accurate ECN received counters from the received IP ECN field */ 275 static inline void tcp_ecn_received_counters(struct sock *sk, 276 const struct sk_buff *skb, u32 len) 277 { 278 u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK; 279 u8 is_ce = INET_ECN_is_ce(ecnfield); 280 struct tcp_sock *tp = tcp_sk(sk); 281 bool ecn_edge; 282 283 if (!INET_ECN_is_not_ect(ecnfield)) { 284 u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs); 285 286 /* As for accurate ECN, the TCP_ECN_SEEN flag is set by 287 * tcp_ecn_received_counters() when the ECN codepoint of 288 * received TCP data or ACK contains ECT(0), ECT(1), or CE. 289 */ 290 if (!tcp_ecn_mode_rfc3168(tp)) 291 tp->ecn_flags |= TCP_ECN_SEEN; 292 293 /* ACE counter tracks *all* segments including pure ACKs */ 294 tp->received_ce += pcount; 295 tp->received_ce_pending = min(tp->received_ce_pending + pcount, 296 0xfU); 297 298 if (len > 0) { 299 u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield); 300 u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1]; 301 u32 bytes_mask = GENMASK_U32(31, 22); 302 303 tp->received_ecn_bytes[ecnfield - 1] += len; 304 tp->accecn_minlen = max_t(u8, tp->accecn_minlen, 305 minlen); 306 307 /* Send AccECN option at least once per 2^22-byte 308 * increase in any ECN byte counter. 309 */ 310 if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) & 311 bytes_mask) { 312 tcp_accecn_opt_demand_min(sk, 1); 313 } 314 } 315 } 316 317 ecn_edge = tp->prev_ecnfield != ecnfield; 318 if (ecn_edge || is_ce) { 319 tp->prev_ecnfield = ecnfield; 320 /* Demand Accurate ECN change-triggered ACKs. Two ACK are 321 * demanded to indicate unambiguously the ecnfield value 322 * in the latter ACK. 323 */ 324 if (tcp_ecn_mode_accecn(tp)) { 325 if (ecn_edge) 326 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; 327 tp->accecn_opt_demand = 2; 328 } 329 } 330 } 331 332 /* AccECN specification, 2.2: [...] A Data Receiver maintains four counters 333 * initialized at the start of the half-connection. [...] These byte counters 334 * reflect only the TCP payload length, excluding TCP header and TCP options. 335 */ 336 static inline void tcp_ecn_received_counters_payload(struct sock *sk, 337 const struct sk_buff *skb) 338 { 339 const struct tcphdr *th = (const struct tcphdr *)skb->data; 340 341 tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4); 342 } 343 344 /* AccECN specification, 5.1: [...] a server can determine that it 345 * negotiated AccECN as [...] if the ACK contains an ACE field with 346 * the value 0b010 to 0b111 (decimal 2 to 7). 347 */ 348 static inline bool cookie_accecn_ok(const struct tcphdr *th) 349 { 350 return tcp_accecn_ace(th) > 0x1; 351 } 352 353 /* Used to form the ACE flags for SYN/ACK */ 354 static inline u16 tcp_accecn_reflector_flags(u8 ect) 355 { 356 /* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN. 357 * Below is an excerpt from the 1st block of Table 2 of AccECN spec, 358 * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE 359 */ 360 static const u8 ecn_to_ace_flags[4] = { 361 0b010, /* Not-ECT is received */ 362 0b011, /* ECT(1) is received */ 363 0b100, /* ECT(0) is received */ 364 0b110 /* CE is received */ 365 }; 366 367 return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]); 368 } 369 370 /* AccECN specification, 3.1.2: If a TCP server that implements AccECN 371 * receives a SYN with the three TCP header flags (AE, CWR and ECE) set 372 * to any combination other than 000, 011 or 111, it MUST negotiate the 373 * use of AccECN as if they had been set to 111. 374 */ 375 static inline bool tcp_accecn_syn_requested(const struct tcphdr *th) 376 { 377 u8 ace = tcp_accecn_ace(th); 378 379 return ace && ace != 0x3; 380 } 381 382 static inline void __tcp_accecn_init_bytes_counters(int *counter_array) 383 { 384 BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1); 385 BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2); 386 BUILD_BUG_ON(INET_ECN_CE != 0x3); 387 388 counter_array[INET_ECN_ECT_1 - 1] = 0; 389 counter_array[INET_ECN_ECT_0 - 1] = 0; 390 counter_array[INET_ECN_CE - 1] = 0; 391 } 392 393 static inline void tcp_accecn_init_counters(struct tcp_sock *tp) 394 { 395 tp->received_ce = 0; 396 tp->received_ce_pending = 0; 397 __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes); 398 __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes); 399 tp->accecn_opt_sent_w_dsack = 0; 400 tp->accecn_minlen = 0; 401 tp->accecn_opt_demand = 0; 402 tp->est_ecnfield = 0; 403 } 404 405 /* Used for make_synack to form the ACE flags */ 406 static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect) 407 { 408 /* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received 409 * from SYN. Below is an excerpt from Table 2 of the AccECN spec: 410 * +====================+====================================+ 411 * | IP-ECN codepoint | Respective ACE falgs on SYN/ACK | 412 * | received on SYN | AE CWR ECE | 413 * +====================+====================================+ 414 * | Not-ECT | 0 1 0 | 415 * | ECT(1) | 0 1 1 | 416 * | ECT(0) | 1 0 0 | 417 * | CE | 1 1 0 | 418 * +====================+====================================+ 419 */ 420 th->ae = !!(ect & INET_ECN_ECT_0); 421 th->cwr = ect != INET_ECN_ECT_0; 422 th->ece = ect == INET_ECN_ECT_1; 423 } 424 425 static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb, 426 struct tcphdr *th) 427 { 428 u32 wire_ace; 429 430 /* The final packet of the 3WHS or anything like it must reflect 431 * the SYN/ACK ECT instead of putting CEP into ACE field, such 432 * case show up in tcp_flags. 433 */ 434 if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) { 435 wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET; 436 th->ece = !!(wire_ace & 0x1); 437 th->cwr = !!(wire_ace & 0x2); 438 th->ae = !!(wire_ace & 0x4); 439 tp->received_ce_pending = 0; 440 } 441 } 442 443 static inline u8 tcp_accecn_option_init(const struct sk_buff *skb, 444 u8 opt_offset) 445 { 446 u8 *ptr = skb_transport_header(skb) + opt_offset; 447 unsigned int optlen = ptr[1] - 2; 448 449 if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1)) 450 return TCP_ACCECN_OPT_FAIL_SEEN; 451 ptr += 2; 452 453 /* Detect option zeroing: an AccECN connection "MAY check that the 454 * initial value of the EE0B field or the EE1B field is non-zero" 455 */ 456 if (optlen < TCPOLEN_ACCECN_PERFIELD) 457 return TCP_ACCECN_OPT_EMPTY_SEEN; 458 if (get_unaligned_be24(ptr) == 0) 459 return TCP_ACCECN_OPT_FAIL_SEEN; 460 if (optlen < TCPOLEN_ACCECN_PERFIELD * 3) 461 return TCP_ACCECN_OPT_COUNTER_SEEN; 462 ptr += TCPOLEN_ACCECN_PERFIELD * 2; 463 if (get_unaligned_be24(ptr) == 0) 464 return TCP_ACCECN_OPT_FAIL_SEEN; 465 466 return TCP_ACCECN_OPT_COUNTER_SEEN; 467 } 468 469 static inline void tcp_ecn_rcv_synack_accecn(struct sock *sk, 470 const struct sk_buff *skb, u8 dsf) 471 { 472 struct tcp_sock *tp = tcp_sk(sk); 473 474 tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); 475 tp->syn_ect_rcv = dsf & INET_ECN_MASK; 476 /* Demand Accurate ECN option in response to the SYN on the SYN/ACK 477 * and the TCP server will try to send one more packet with an AccECN 478 * Option at a later point during the connection. 479 */ 480 if (tp->rx_opt.accecn && 481 tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) { 482 u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn); 483 484 tcp_accecn_saw_opt_fail_recv(tp, saw_opt); 485 tp->accecn_opt_demand = 2; 486 } 487 } 488 489 /* See Table 2 of the AccECN draft */ 490 static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb, 491 const struct tcphdr *th, u8 ip_dsfield) 492 { 493 struct tcp_sock *tp = tcp_sk(sk); 494 u8 ace = tcp_accecn_ace(th); 495 496 switch (ace) { 497 case 0x0: 498 case 0x7: 499 /* +========+========+============+=============+ 500 * | A | B | SYN/ACK | Feedback | 501 * | | | B->A | Mode of A | 502 * | | | AE CWR ECE | | 503 * +========+========+============+=============+ 504 * | AccECN | No ECN | 0 0 0 | Not ECN | 505 * | AccECN | Broken | 1 1 1 | Not ECN | 506 * +========+========+============+=============+ 507 */ 508 tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 509 break; 510 case 0x1: 511 /* +========+========+============+=============+ 512 * | A | B | SYN/ACK | Feedback | 513 * | | | B->A | Mode of A | 514 * | | | AE CWR ECE | | 515 * +========+========+============+=============+ 516 * | AccECN | ECN | 0 0 1 | Classic ECN | 517 * | Nonce | AccECN | 0 0 1 | Classic ECN | 518 * | ECN | AccECN | 0 0 1 | Classic ECN | 519 * +========+========+============+=============+ 520 */ 521 if (tcp_ca_no_fallback_rfc3168(sk)) 522 tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 523 else 524 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 525 break; 526 case 0x5: 527 if (tcp_ecn_mode_pending(tp)) { 528 tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); 529 if (INET_ECN_is_ce(ip_dsfield)) { 530 tp->received_ce++; 531 tp->received_ce_pending++; 532 } 533 } 534 break; 535 default: 536 tcp_ecn_rcv_synack_accecn(sk, skb, ip_dsfield); 537 if (INET_ECN_is_ce(ip_dsfield) && 538 tcp_accecn_validate_syn_feedback(sk, ace, 539 tp->syn_ect_snt)) { 540 tp->received_ce++; 541 tp->received_ce_pending++; 542 } 543 break; 544 } 545 } 546 547 static inline void tcp_ecn_rcv_syn(struct sock *sk, const struct tcphdr *th, 548 const struct sk_buff *skb) 549 { 550 struct tcp_sock *tp = tcp_sk(sk); 551 552 if (tcp_ecn_mode_pending(tp)) { 553 if (!tcp_accecn_syn_requested(th)) { 554 /* Downgrade to classic ECN feedback */ 555 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 556 } else { 557 tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & 558 INET_ECN_MASK; 559 tp->prev_ecnfield = tp->syn_ect_rcv; 560 tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN); 561 } 562 } 563 if (tcp_ecn_mode_rfc3168(tp) && 564 (!th->ece || !th->cwr || tcp_ca_no_fallback_rfc3168(sk))) 565 tcp_ecn_mode_set(tp, TCP_ECN_DISABLED); 566 } 567 568 static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, 569 const struct tcphdr *th) 570 { 571 if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp)) 572 return true; 573 return false; 574 } 575 576 /* Packet ECN state for a SYN-ACK */ 577 static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) 578 { 579 struct tcp_sock *tp = tcp_sk(sk); 580 581 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; 582 if (tcp_ecn_disabled(tp)) 583 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; 584 else if (tcp_ca_needs_ecn(sk) || 585 tcp_bpf_ca_needs_ecn(sk)) 586 INET_ECN_xmit_ect_1_negotiation(sk); 587 588 if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) { 589 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; 590 TCP_SKB_CB(skb)->tcp_flags |= 591 tcp_accecn_reflector_flags(tp->syn_ect_rcv); 592 tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK; 593 } 594 } 595 596 /* Packet ECN state for a SYN. */ 597 static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) 598 { 599 struct tcp_sock *tp = tcp_sk(sk); 600 bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); 601 bool use_ecn, use_accecn; 602 u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); 603 604 use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN || 605 tcp_ca_needs_accecn(sk); 606 use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN || 607 tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN || 608 tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn; 609 610 if (!use_ecn) { 611 const struct dst_entry *dst = __sk_dst_get(sk); 612 613 if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) 614 use_ecn = true; 615 } 616 617 tp->ecn_flags = 0; 618 619 if (use_ecn) { 620 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) 621 INET_ECN_xmit_ect_1_negotiation(sk); 622 623 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; 624 if (use_accecn) { 625 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE; 626 tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING); 627 tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK; 628 } else { 629 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168); 630 } 631 } 632 } 633 634 static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) 635 { 636 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) { 637 /* tp->ecn_flags are cleared at a later point in time when 638 * SYN ACK is ultimatively being received. 639 */ 640 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE; 641 } 642 } 643 644 static inline void 645 tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th, 646 enum tcp_synack_type synack_type) 647 { 648 /* Accurate ECN shall retransmit SYN/ACK with ACE=0 if the 649 * previously retransmitted SYN/ACK also times out. 650 */ 651 if (!req->num_timeout || synack_type != TCP_SYNACK_RETRANS) { 652 if (tcp_rsk(req)->accecn_ok) 653 tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv); 654 else if (inet_rsk(req)->ecn_ok) 655 th->ece = 1; 656 } else if (tcp_rsk(req)->accecn_ok) { 657 th->ae = 0; 658 th->cwr = 0; 659 th->ece = 0; 660 } 661 } 662 663 static inline bool tcp_accecn_option_beacon_check(const struct sock *sk) 664 { 665 u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon); 666 const struct tcp_sock *tp = tcp_sk(sk); 667 668 if (!ecn_beacon) 669 return false; 670 671 return tcp_stamp_us_delta(tp->tcp_mstamp, tp->accecn_opt_tstamp) * ecn_beacon >= 672 (tp->srtt_us >> 3); 673 } 674 675 #endif /* _LINUX_TCP_ECN_H */ 676