1 /* 2 * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include "internal/quic_ackm.h" 11 #include "internal/uint_set.h" 12 #include "internal/common.h" 13 #include <assert.h> 14 15 DEFINE_LIST_OF(tx_history, OSSL_ACKM_TX_PKT); 16 17 /* 18 * TX Packet History 19 * ***************** 20 * 21 * The TX Packet History object tracks information about packets which have been 22 * sent for which we later expect to receive an ACK. It is essentially a simple 23 * database keeping a list of packet information structures in packet number 24 * order which can also be looked up directly by packet number. 25 * 26 * We currently only allow packets to be appended to the list (i.e. the packet 27 * numbers of the packets appended to the list must monotonically increase), as 28 * we should not currently need more general functionality such as a sorted list 29 * insert. 30 */ 31 struct tx_pkt_history_st { 32 /* A linked list of all our packets. */ 33 OSSL_LIST(tx_history) packets; 34 35 /* 36 * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *) 37 * 38 * Invariant: A packet is in this map if and only if it is in the linked 39 * list. 40 */ 41 LHASH_OF(OSSL_ACKM_TX_PKT) *map; 42 43 /* 44 * The lowest packet number which may currently be added to the history list 45 * (inclusive). We do not allow packet numbers to be added to the history 46 * list non-monotonically, so packet numbers must be greater than or equal 47 * to this value. 48 */ 49 uint64_t watermark; 50 51 /* 52 * Packet number of the highest packet info structure we have yet appended 53 * to the list. This is usually one less than watermark, except when we have 54 * not added any packet yet. 55 */ 56 uint64_t highest_sent; 57 }; 58 59 DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT); 60 61 static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt) 62 { 63 /* Using low bits of the packet number as the hash should be enough */ 64 return (unsigned long)pkt->pkt_num; 65 } 66 67 static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a, 68 const OSSL_ACKM_TX_PKT *b) 69 { 70 if (a->pkt_num < b->pkt_num) 71 return -1; 72 if (a->pkt_num > b->pkt_num) 73 return 1; 74 return 0; 75 } 76 77 static int 78 tx_pkt_history_init(struct tx_pkt_history_st *h) 79 { 80 ossl_list_tx_history_init(&h->packets); 81 h->watermark = 0; 82 h->highest_sent = 0; 83 84 h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare); 85 if (h->map == NULL) 86 return 0; 87 88 return 1; 89 } 90 91 static void 92 tx_pkt_history_destroy(struct tx_pkt_history_st *h) 93 { 94 lh_OSSL_ACKM_TX_PKT_free(h->map); 95 h->map = NULL; 96 ossl_list_tx_history_init(&h->packets); 97 } 98 99 static int 100 tx_pkt_history_add_actual(struct tx_pkt_history_st *h, 101 OSSL_ACKM_TX_PKT *pkt) 102 { 103 OSSL_ACKM_TX_PKT *existing; 104 105 /* 106 * There should not be any existing packet with this number 107 * in our mapping. 108 */ 109 existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt); 110 if (!ossl_assert(existing == NULL)) 111 return 0; 112 113 /* Should not already be in a list. */ 114 if (!ossl_assert(ossl_list_tx_history_next(pkt) == NULL 115 && ossl_list_tx_history_prev(pkt) == NULL)) 116 return 0; 117 118 lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt); 119 120 ossl_list_tx_history_insert_tail(&h->packets, pkt); 121 return 1; 122 } 123 124 /* Adds a packet information structure to the history list. */ 125 static int 126 tx_pkt_history_add(struct tx_pkt_history_st *h, 127 OSSL_ACKM_TX_PKT *pkt) 128 { 129 if (!ossl_assert(pkt->pkt_num >= h->watermark)) 130 return 0; 131 132 if (tx_pkt_history_add_actual(h, pkt) < 1) 133 return 0; 134 135 h->watermark = pkt->pkt_num + 1; 136 h->highest_sent = pkt->pkt_num; 137 return 1; 138 } 139 140 /* Retrieve a packet information structure by packet number. */ 141 static OSSL_ACKM_TX_PKT * 142 tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num) 143 { 144 OSSL_ACKM_TX_PKT key; 145 146 key.pkt_num = pkt_num; 147 148 return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key); 149 } 150 151 /* Remove a packet information structure from the history log. */ 152 static int 153 tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num) 154 { 155 OSSL_ACKM_TX_PKT key, *pkt; 156 key.pkt_num = pkt_num; 157 158 pkt = tx_pkt_history_by_pkt_num(h, pkt_num); 159 if (pkt == NULL) 160 return 0; 161 162 ossl_list_tx_history_remove(&h->packets, pkt); 163 lh_OSSL_ACKM_TX_PKT_delete(h->map, &key); 164 return 1; 165 } 166 167 /* 168 * RX Packet Number Tracking 169 * ************************* 170 * 171 * **Background.** The RX side of the ACK manager must track packets we have 172 * received for which we have to generate ACK frames. Broadly, this means we 173 * store a set of packet numbers which we have received but which we do not know 174 * for a fact that the transmitter knows we have received. 175 * 176 * This must handle various situations: 177 * 178 * 1. We receive a packet but have not sent an ACK yet, so the transmitter 179 * does not know whether we have received it or not yet. 180 * 181 * 2. We receive a packet and send an ACK which is lost. We do not 182 * immediately know that the ACK was lost and the transmitter does not know 183 * that we have received the packet. 184 * 185 * 3. We receive a packet and send an ACK which is received by the 186 * transmitter. The transmitter does not immediately respond with an ACK, 187 * or responds with an ACK which is lost. The transmitter knows that we 188 * have received the packet, but we do not know for sure that it knows, 189 * because the ACK we sent could have been lost. 190 * 191 * 4. We receive a packet and send an ACK which is received by the 192 * transmitter. The transmitter subsequently sends us an ACK which confirms 193 * its receipt of the ACK we sent, and we successfully receive that ACK, so 194 * we know that the transmitter knows, that we received the original 195 * packet. 196 * 197 * Only when we reach case (4) are we relieved of any need to track a given 198 * packet number we have received, because only in this case do we know for sure 199 * that the peer knows we have received the packet. Having reached case (4) we 200 * will never again need to generate an ACK containing the PN in question, but 201 * until we reach that point, we must keep track of the PN as not having been 202 * provably ACKed, as we may have to keep generating ACKs for the given PN not 203 * just until the transmitter receives one, but until we know that it has 204 * received one. This will be referred to herein as "provably ACKed". 205 * 206 * **Duplicate handling.** The above discusses the case where we have received a 207 * packet with a given PN but are at best unsure whether the sender knows we 208 * have received it or not. However, we must also handle the case where we have 209 * yet to receive a packet with a given PN in the first place. The reason for 210 * this is because of the requirement expressed by RFC 9000 s. 12.3: 211 * 212 * "A receiver MUST discard a newly unprotected packet unless it is certain 213 * that it has not processed another packet with the same packet number from 214 * the same packet number space." 215 * 216 * We must ensure we never process a duplicate PN. As such, each possible PN we 217 * can receive must exist in one of the following logical states: 218 * 219 * - We have never processed this PN before 220 * (so if we receive such a PN, it can be processed) 221 * 222 * - We have processed this PN but it has not yet been provably ACKed 223 * (and should therefore be in any future ACK frame generated; 224 * if we receive such a PN again, it must be ignored) 225 * 226 * - We have processed this PN and it has been provably ACKed 227 * (if we receive such a PN again, it must be ignored) 228 * 229 * However, if we were to track this state for every PN ever used in the history 230 * of a connection, the amount of state required would increase unboundedly as 231 * the connection goes on (for example, we would have to store a set of every PN 232 * ever received.) 233 * 234 * RFC 9000 s. 12.3 continues: 235 * 236 * "Endpoints that track all individual packets for the purposes of detecting 237 * duplicates are at risk of accumulating excessive state. The data required 238 * for detecting duplicates can be limited by maintaining a minimum packet 239 * number below which all packets are immediately dropped." 240 * 241 * Moreover, RFC 9000 s. 13.2.3 states that: 242 * 243 * "A receiver MUST retain an ACK Range unless it can ensure that it will not 244 * subsequently accept packets with numbers in that range. Maintaining a 245 * minimum packet number that increases as ranges are discarded is one way to 246 * achieve this with minimal state." 247 * 248 * This touches on a subtlety of the original requirement quoted above: the 249 * receiver MUST discard a packet unless it is certain that it has not processed 250 * another packet with the same PN. However, this does not forbid the receiver 251 * from also discarding some PNs even though it has not yet processed them. In 252 * other words, implementations must be conservative and err in the direction of 253 * assuming a packet is a duplicate, but it is acceptable for this to come at 254 * the cost of falsely identifying some packets as duplicates. 255 * 256 * This allows us to bound the amount of state we must keep, and we adopt the 257 * suggested strategy quoted above to do so. We define a watermark PN below 258 * which all PNs are in the same state. This watermark is only ever increased. 259 * Thus the PNs the state for which needs to be explicitly tracked is limited to 260 * only a small number of recent PNs, and all older PNs have an assumed state. 261 * 262 * Any given PN thus falls into one of the following states: 263 * 264 * - (A) The PN is above the watermark but we have not yet received it. 265 * 266 * If we receive such a PN, we should process it and record the PN as 267 * received. 268 * 269 * - (B) The PN is above the watermark and we have received it. 270 * 271 * The PN should be included in any future ACK frame we generate. 272 * If we receive such a PN again, we should ignore it. 273 * 274 * - (C) The PN is below the watermark. 275 * 276 * We do not know whether a packet with the given PN was received or 277 * not. To be safe, if we receive such a packet, it is not processed. 278 * 279 * Note that state (C) corresponds to both "we have processed this PN and it has 280 * been provably ACKed" logical state and a subset of the PNs in the "we have 281 * never processed this PN before" logical state (namely all PNs which were lost 282 * and never received, but which are not recent enough to be above the 283 * watermark). The reason we can merge these states and avoid tracking states 284 * for the PNs in this state is because the provably ACKed and never-received 285 * states are functionally identical in terms of how we need to handle them: we 286 * don't need to do anything for PNs in either of these states, so we don't have 287 * to care about PNs in this state nor do we have to care about distinguishing 288 * the two states for a given PN. 289 * 290 * Note that under this scheme provably ACKed PNs are by definition always below 291 * the watermark; therefore, it follows that when a PN becomes provably ACKed, 292 * the watermark must be immediately increased to exceed it (otherwise we would 293 * keep reporting it in future ACK frames). 294 * 295 * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when 296 * to advance the watermark: 297 * 298 * "When a packet containing an ACK frame is sent, the Largest Acknowledged 299 * field in that frame can be saved. When a packet containing an ACK frame is 300 * acknowledged, the receiver can stop acknowledging packets less than or 301 * equal to the Largest Acknowledged field in the sent ACK frame." 302 * 303 * This is where our scheme's false positives arise. When a packet containing an 304 * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably 305 * acked, and the watermark is bumped accordingly. However, the Largest 306 * Acknowledged field does not imply that all lower PNs have been received, 307 * because there may be gaps expressed in the ranges of PNs expressed by that 308 * and previous ACK frames. Thus, some unreceived PNs may be moved below the 309 * watermark, and we may subsequently reject those PNs as possibly being 310 * duplicates even though we have not actually received those PNs. Since we bump 311 * the watermark when a PN becomes provably ACKed, it follows that an unreceived 312 * PN falls below the watermark (and thus becomes a false positive for the 313 * purposes of duplicate detection) when a higher-numbered PN becomes provably 314 * ACKed. 315 * 316 * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0, 317 * n) will no longer be processed. Although datagrams may be reordered in the 318 * network, a PN we receive can only become provably ACKed after our own 319 * subsequently generated ACK frame is sent in a future TX packet, and then we 320 * receive another RX PN acknowledging that TX packet. This means that a given RX 321 * PN can only become provably ACKed at least 1 RTT after it is received; it is 322 * unlikely that any reordered datagrams will still be "in the network" (and not 323 * lost) by this time. If this does occur for whatever reason and a late PN is 324 * received, the packet will be discarded unprocessed and the PN is simply 325 * handled as though lost (a "written off" PN). 326 * 327 * **Data structure.** Our state for the RX handling side of the ACK manager, as 328 * discussed above, mainly comprises: 329 * 330 * a) a logical set of PNs, and 331 * b) a monotonically increasing PN counter (the watermark). 332 * 333 * For (a), we define a data structure which stores a logical set of PNs, which 334 * we use to keep track of which PNs we have received but which have not yet 335 * been provably ACKed, and thus will later need to generate an ACK frame for. 336 * 337 * The correspondence with the logical states discussed above is as follows. A 338 * PN is in state (C) if it is below the watermark; otherwise it is in state (B) 339 * if it is in the logical set of PNs, and in state (A) otherwise. 340 * 341 * Note that PNs are only removed from the PN set (when they become provably 342 * ACKed or written off) by virtue of advancement of the watermark. Removing PNs 343 * from the PN set any other way would be ambiguous as it would be 344 * indistinguishable from a PN we have not yet received and risk us processing a 345 * duplicate packet. In other words, for a given PN: 346 * 347 * - State (A) can transition to state (B) or (C) 348 * - State (B) can transition to state (C) only 349 * - State (C) is the terminal state 350 * 351 * We can query the logical set data structure for PNs which have been received 352 * but which have not been provably ACKed when we want to generate ACK frames. 353 * Since ACK frames can be lost and/or we might not know that the peer has 354 * successfully received them, we might generate multiple ACK frames covering a 355 * given PN until that PN becomes provably ACKed and we finally remove it from 356 * our set (by bumping the watermark) as no longer being our concern. 357 * 358 * The data structure used is the UINT_SET structure defined in uint_set.h, 359 * which is used as a PN set. We use the following operations of the structure: 360 * 361 * Insert Range: Used when we receive a new PN. 362 * 363 * Remove Range: Used when bumping the watermark. 364 * 365 * Query: Used to determine if a PN is in the set. 366 * 367 * **Possible duplicates.** A PN is considered a possible duplicate when either: 368 * 369 * a) its PN is already in the PN set (i.e. has already been received), or 370 * b) its PN is below the watermark (i.e. was provably ACKed or written off). 371 * 372 * A packet with a given PN is considered 'processable' when that PN is not 373 * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable). 374 * 375 * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes 376 * provably ACKed. This occurs when an ACK frame is received by the TX side of 377 * the ACK manager; thus, there is necessary interaction between the TX and RX 378 * sides of the ACK manager. 379 * 380 * This is implemented as follows. When a packet is queued as sent in the TX 381 * side of the ACK manager, it may optionally have a Largest Acked value set on 382 * it. The user of the ACK manager should do this if the packet being 383 * transmitted contains an ACK frame, by setting the field to the Largest Acked 384 * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID. 385 * When a TX packet is eventually acknowledged which has this field set, it is 386 * used to update the state of the RX side of the ACK manager by bumping the 387 * watermark accordingly. 388 */ 389 struct rx_pkt_history_st { 390 UINT_SET set; 391 392 /* 393 * Invariant: PNs below this are not in the set. 394 * Invariant: This is monotonic and only ever increases. 395 */ 396 QUIC_PN watermark; 397 }; 398 399 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 400 QUIC_PN watermark); 401 402 static void rx_pkt_history_init(struct rx_pkt_history_st *h) 403 { 404 ossl_uint_set_init(&h->set); 405 h->watermark = 0; 406 } 407 408 static void rx_pkt_history_destroy(struct rx_pkt_history_st *h) 409 { 410 ossl_uint_set_destroy(&h->set); 411 } 412 413 /* 414 * Limit the number of ACK ranges we store to prevent resource consumption DoS 415 * attacks. 416 */ 417 #define MAX_RX_ACK_RANGES 32 418 419 static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h) 420 { 421 QUIC_PN highest = QUIC_PN_INVALID; 422 423 while (ossl_list_uint_set_num(&h->set) > MAX_RX_ACK_RANGES) { 424 UINT_RANGE r = ossl_list_uint_set_head(&h->set)->range; 425 426 highest = (highest == QUIC_PN_INVALID) 427 ? r.end : ossl_quic_pn_max(highest, r.end); 428 429 ossl_uint_set_remove(&h->set, &r); 430 } 431 432 /* 433 * Bump watermark to cover all PNs we removed to avoid accidental 434 * reprocessing of packets. 435 */ 436 if (highest != QUIC_PN_INVALID) 437 rx_pkt_history_bump_watermark(h, highest + 1); 438 } 439 440 static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h, 441 QUIC_PN pn) 442 { 443 UINT_RANGE r; 444 445 r.start = pn; 446 r.end = pn; 447 448 if (pn < h->watermark) 449 return 1; /* consider this a success case */ 450 451 if (ossl_uint_set_insert(&h->set, &r) != 1) 452 return 0; 453 454 rx_pkt_history_trim_range_count(h); 455 return 1; 456 } 457 458 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 459 QUIC_PN watermark) 460 { 461 UINT_RANGE r; 462 463 if (watermark <= h->watermark) 464 return 1; 465 466 /* Remove existing PNs below the watermark. */ 467 r.start = 0; 468 r.end = watermark - 1; 469 if (ossl_uint_set_remove(&h->set, &r) != 1) 470 return 0; 471 472 h->watermark = watermark; 473 return 1; 474 } 475 476 /* 477 * ACK Manager Implementation 478 * ************************** 479 * Implementation of the ACK manager proper. 480 */ 481 482 /* Constants used by the ACK manager; see RFC 9002. */ 483 #define K_GRANULARITY (1 * OSSL_TIME_MS) 484 #define K_PKT_THRESHOLD 3 485 #define K_TIME_THRESHOLD_NUM 9 486 #define K_TIME_THRESHOLD_DEN 8 487 488 /* The maximum number of times we allow PTO to be doubled. */ 489 #define MAX_PTO_COUNT 16 490 491 /* Default maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */ 492 #define DEFAULT_TX_MAX_ACK_DELAY ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY) 493 494 struct ossl_ackm_st { 495 /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */ 496 struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM]; 497 498 /* Our list of received PNs which are not yet provably acked. */ 499 struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM]; 500 501 /* Polymorphic dependencies that we consume. */ 502 OSSL_TIME (*now)(void *arg); 503 void *now_arg; 504 OSSL_STATM *statm; 505 const OSSL_CC_METHOD *cc_method; 506 OSSL_CC_DATA *cc_data; 507 508 /* RFC 9002 variables. */ 509 uint32_t pto_count; 510 QUIC_PN largest_acked_pkt[QUIC_PN_SPACE_NUM]; 511 OSSL_TIME time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM]; 512 OSSL_TIME loss_time[QUIC_PN_SPACE_NUM]; 513 OSSL_TIME loss_detection_deadline; 514 515 /* Lowest PN which is still not known to be ACKed. */ 516 QUIC_PN lowest_unacked_pkt[QUIC_PN_SPACE_NUM]; 517 518 /* Time at which we got our first RTT sample, or 0. */ 519 OSSL_TIME first_rtt_sample; 520 521 /* 522 * A packet's num_bytes are added to this if it is inflight, 523 * and removed again once ack'd/lost/discarded. 524 */ 525 uint64_t bytes_in_flight; 526 527 /* 528 * A packet's num_bytes are added to this if it is both inflight and 529 * ack-eliciting, and removed again once ack'd/lost/discarded. 530 */ 531 uint64_t ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM]; 532 533 /* Count of ECN-CE events. */ 534 uint64_t peer_ecnce[QUIC_PN_SPACE_NUM]; 535 536 /* Set to 1 when the handshake is confirmed. */ 537 char handshake_confirmed; 538 539 /* Set to 1 when attached to server channel */ 540 char is_server; 541 542 /* Set to 1 when the peer has completed address validation. */ 543 char peer_completed_addr_validation; 544 545 /* Set to 1 when a PN space has been discarded. */ 546 char discarded[QUIC_PN_SPACE_NUM]; 547 548 /* Set to 1 when we think an ACK frame should be generated. */ 549 char rx_ack_desired[QUIC_PN_SPACE_NUM]; 550 551 /* Set to 1 if an ACK frame has ever been generated. */ 552 char rx_ack_generated[QUIC_PN_SPACE_NUM]; 553 554 /* Probe request counts for reporting to the user. */ 555 OSSL_ACKM_PROBE_INFO pending_probe; 556 557 /* Generated ACK frames for each PN space. */ 558 OSSL_QUIC_FRAME_ACK ack[QUIC_PN_SPACE_NUM]; 559 OSSL_QUIC_ACK_RANGE ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES]; 560 561 /* Other RX state. */ 562 /* Largest PN we have RX'd. */ 563 QUIC_PN rx_largest_pn[QUIC_PN_SPACE_NUM]; 564 565 /* Time at which the PN in rx_largest_pn was RX'd. */ 566 OSSL_TIME rx_largest_time[QUIC_PN_SPACE_NUM]; 567 568 /* 569 * ECN event counters. Each time we receive a packet with a given ECN label, 570 * the corresponding ECN counter here is incremented. 571 */ 572 uint64_t rx_ect0[QUIC_PN_SPACE_NUM]; 573 uint64_t rx_ect1[QUIC_PN_SPACE_NUM]; 574 uint64_t rx_ecnce[QUIC_PN_SPACE_NUM]; 575 576 /* 577 * Number of ACK-eliciting packets since last ACK. We use this to defer 578 * emitting ACK frames until a threshold number of ACK-eliciting packets 579 * have been received. 580 */ 581 uint32_t rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM]; 582 583 /* 584 * The ACK frame coalescing deadline at which we should flush any unsent ACK 585 * frames. 586 */ 587 OSSL_TIME rx_ack_flush_deadline[QUIC_PN_SPACE_NUM]; 588 589 /* 590 * The RX maximum ACK delay (the maximum amount of time our peer might 591 * wait to send us an ACK after receiving an ACK-eliciting packet). 592 */ 593 OSSL_TIME rx_max_ack_delay; 594 595 /* 596 * The TX maximum ACK delay (the maximum amount of time we allow ourselves 597 * to wait before generating an ACK after receiving an ACK-eliciting 598 * packet). 599 */ 600 OSSL_TIME tx_max_ack_delay; 601 602 /* Callbacks for deadline updates. */ 603 void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg); 604 void *loss_detection_deadline_cb_arg; 605 606 void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg); 607 void *ack_deadline_cb_arg; 608 }; 609 610 static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y) 611 { 612 return x < y ? x : y; 613 } 614 615 /* 616 * Get TX history for a given packet number space. Must not have been 617 * discarded. 618 */ 619 static struct tx_pkt_history_st *get_tx_history(OSSL_ACKM *ackm, int pkt_space) 620 { 621 assert(!ackm->discarded[pkt_space]); 622 623 return &ackm->tx_history[pkt_space]; 624 } 625 626 /* 627 * Get RX history for a given packet number space. Must not have been 628 * discarded. 629 */ 630 static struct rx_pkt_history_st *get_rx_history(OSSL_ACKM *ackm, int pkt_space) 631 { 632 assert(!ackm->discarded[pkt_space]); 633 634 return &ackm->rx_history[pkt_space]; 635 } 636 637 /* Does the newly-acknowledged list contain any ack-eliciting packet? */ 638 static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT *pkt) 639 { 640 for (; pkt != NULL; pkt = pkt->anext) 641 if (pkt->is_ack_eliciting) 642 return 1; 643 644 return 0; 645 } 646 647 /* Return number of ACK-eliciting bytes in flight across all PN spaces. */ 648 static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM *ackm) 649 { 650 int i; 651 uint64_t total = 0; 652 653 for (i = 0; i < QUIC_PN_SPACE_NUM; ++i) 654 total += ackm->ack_eliciting_bytes_in_flight[i]; 655 656 return total; 657 } 658 659 /* Return 1 if the range contains the given PN. */ 660 static int range_contains(const OSSL_QUIC_ACK_RANGE *range, QUIC_PN pn) 661 { 662 return pn >= range->start && pn <= range->end; 663 } 664 665 /* 666 * Given a logical representation of an ACK frame 'ack', create a singly-linked 667 * list of the newly ACK'd frames; that is, of frames which are matched by the 668 * list of PN ranges contained in the ACK frame. The packet structures in the 669 * list returned are removed from the TX history list. Returns a pointer to the 670 * list head (or NULL) if empty. 671 */ 672 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM *ackm, 673 const OSSL_QUIC_FRAME_ACK *ack, 674 int pkt_space) 675 { 676 OSSL_ACKM_TX_PKT *acked_pkts = NULL, **fixup = &acked_pkts, *pkt, *pprev; 677 struct tx_pkt_history_st *h; 678 size_t ridx = 0; 679 680 assert(ack->num_ack_ranges > 0); 681 682 /* 683 * Our history list is a list of packets sorted in ascending order 684 * by packet number. 685 * 686 * ack->ack_ranges is a list of packet number ranges in descending order. 687 * 688 * Walk through our history list from the end in order to efficiently detect 689 * membership in the specified ack ranges. As an optimization, we use our 690 * hashtable to try and skip to the first matching packet. This may fail if 691 * the ACK ranges given include nonexistent packets. 692 */ 693 h = get_tx_history(ackm, pkt_space); 694 695 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 696 if (pkt == NULL) 697 pkt = ossl_list_tx_history_tail(&h->packets); 698 699 for (; pkt != NULL; pkt = pprev) { 700 /* 701 * Save prev value as it will be zeroed if we remove the packet from the 702 * history list below. 703 */ 704 pprev = ossl_list_tx_history_prev(pkt); 705 706 for (;; ++ridx) { 707 if (ridx >= ack->num_ack_ranges) { 708 /* 709 * We have exhausted all ranges so stop here, even if there are 710 * more packets to look at. 711 */ 712 goto stop; 713 } 714 715 if (range_contains(&ack->ack_ranges[ridx], pkt->pkt_num)) { 716 /* We have matched this range. */ 717 tx_pkt_history_remove(h, pkt->pkt_num); 718 719 *fixup = pkt; 720 fixup = &pkt->anext; 721 *fixup = NULL; 722 break; 723 } else if (pkt->pkt_num > ack->ack_ranges[ridx].end) { 724 /* 725 * We have not reached this range yet in our list, so do not 726 * advance ridx. 727 */ 728 break; 729 } else { 730 /* 731 * We have moved beyond this range, so advance to the next range 732 * and try matching again. 733 */ 734 assert(pkt->pkt_num < ack->ack_ranges[ridx].start); 735 continue; 736 } 737 } 738 } 739 stop: 740 741 return acked_pkts; 742 } 743 744 /* 745 * Create a singly-linked list of newly detected-lost packets in the given 746 * packet number space. Returns the head of the list or NULL if no packets were 747 * detected lost. The packets in the list are removed from the TX history list. 748 */ 749 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_lost_pkts(OSSL_ACKM *ackm, 750 int pkt_space) 751 { 752 OSSL_ACKM_TX_PKT *lost_pkts = NULL, **fixup = &lost_pkts, *pkt, *pnext; 753 OSSL_TIME loss_delay, lost_send_time, now; 754 OSSL_RTT_INFO rtt; 755 struct tx_pkt_history_st *h; 756 757 assert(ackm->largest_acked_pkt[pkt_space] != QUIC_PN_INVALID); 758 759 ossl_statm_get_rtt_info(ackm->statm, &rtt); 760 761 ackm->loss_time[pkt_space] = ossl_time_zero(); 762 763 loss_delay = ossl_time_multiply(ossl_time_max(rtt.latest_rtt, 764 rtt.smoothed_rtt), 765 K_TIME_THRESHOLD_NUM); 766 loss_delay = ossl_time_divide(loss_delay, K_TIME_THRESHOLD_DEN); 767 768 /* Minimum time of K_GRANULARITY before packets are deemed lost. */ 769 loss_delay = ossl_time_max(loss_delay, ossl_ticks2time(K_GRANULARITY)); 770 771 /* Packets sent before this time are deemed lost. */ 772 now = ackm->now(ackm->now_arg); 773 lost_send_time = ossl_time_subtract(now, loss_delay); 774 775 h = get_tx_history(ackm, pkt_space); 776 pkt = ossl_list_tx_history_head(&h->packets); 777 778 for (; pkt != NULL; pkt = pnext) { 779 assert(pkt_space == pkt->pkt_space); 780 781 /* 782 * Save prev value as it will be zeroed if we remove the packet from the 783 * history list below. 784 */ 785 pnext = ossl_list_tx_history_next(pkt); 786 787 if (pkt->pkt_num > ackm->largest_acked_pkt[pkt_space]) 788 continue; 789 790 /* 791 * Mark packet as lost, or set time when it should be marked. 792 */ 793 if (ossl_time_compare(pkt->time, lost_send_time) <= 0 794 || ackm->largest_acked_pkt[pkt_space] 795 >= pkt->pkt_num + K_PKT_THRESHOLD) { 796 tx_pkt_history_remove(h, pkt->pkt_num); 797 798 *fixup = pkt; 799 fixup = &pkt->lnext; 800 *fixup = NULL; 801 } else { 802 if (ossl_time_is_zero(ackm->loss_time[pkt_space])) 803 ackm->loss_time[pkt_space] = 804 ossl_time_add(pkt->time, loss_delay); 805 else 806 ackm->loss_time[pkt_space] = 807 ossl_time_min(ackm->loss_time[pkt_space], 808 ossl_time_add(pkt->time, loss_delay)); 809 } 810 } 811 812 return lost_pkts; 813 } 814 815 static OSSL_TIME ackm_get_loss_time_and_space(OSSL_ACKM *ackm, int *pspace) 816 { 817 OSSL_TIME time = ackm->loss_time[QUIC_PN_SPACE_INITIAL]; 818 int i, space = QUIC_PN_SPACE_INITIAL; 819 820 for (i = space + 1; i < QUIC_PN_SPACE_NUM; ++i) 821 if (ossl_time_is_zero(time) 822 || ossl_time_compare(ackm->loss_time[i], time) == -1) { 823 time = ackm->loss_time[i]; 824 space = i; 825 } 826 827 *pspace = space; 828 return time; 829 } 830 831 static OSSL_TIME ackm_get_pto_time_and_space(OSSL_ACKM *ackm, int *space) 832 { 833 OSSL_RTT_INFO rtt; 834 OSSL_TIME duration; 835 OSSL_TIME pto_timeout = ossl_time_infinite(), t; 836 int pto_space = QUIC_PN_SPACE_INITIAL, i; 837 838 ossl_statm_get_rtt_info(ackm->statm, &rtt); 839 840 duration 841 = ossl_time_add(rtt.smoothed_rtt, 842 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 843 ossl_ticks2time(K_GRANULARITY))); 844 845 duration 846 = ossl_time_multiply(duration, 847 (uint64_t)1 << min_u32(ackm->pto_count, 848 MAX_PTO_COUNT)); 849 850 /* Anti-deadlock PTO starts from the current time. */ 851 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 852 assert(!ackm->peer_completed_addr_validation); 853 854 *space = ackm->discarded[QUIC_PN_SPACE_INITIAL] 855 ? QUIC_PN_SPACE_HANDSHAKE 856 : QUIC_PN_SPACE_INITIAL; 857 return ossl_time_add(ackm->now(ackm->now_arg), duration); 858 } 859 860 for (i = QUIC_PN_SPACE_INITIAL; i < QUIC_PN_SPACE_NUM; ++i) { 861 /* 862 * RFC 9002 section 6.2.2.1 keep probe timeout armed until 863 * handshake is confirmed (client sees HANDSHAKE_DONE message 864 * from server). 865 */ 866 if (ackm->ack_eliciting_bytes_in_flight[i] == 0 && 867 (ackm->handshake_confirmed == 1 || ackm->is_server == 1)) 868 continue; 869 870 if (i == QUIC_PN_SPACE_APP) { 871 /* Skip application data until handshake confirmed. */ 872 if (!ackm->handshake_confirmed) 873 break; 874 875 /* Include max_ack_delay and backoff for app data. */ 876 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) { 877 uint64_t factor 878 = (uint64_t)1 << min_u32(ackm->pto_count, MAX_PTO_COUNT); 879 880 duration 881 = ossl_time_add(duration, 882 ossl_time_multiply(ackm->rx_max_ack_delay, 883 factor)); 884 } 885 } 886 887 /* 888 * Only re-arm timer if stack has sent at least one ACK eliciting frame. 889 * If stack has sent no ACK eliciting frame at given encryption level then 890 * particular timer is zero and we must not attempt to set it. Timer keeps 891 * time since epoch (Jan 1 1970) and we must not set timer to past. 892 */ 893 if (!ossl_time_is_zero(ackm->time_of_last_ack_eliciting_pkt[i])) { 894 t = ossl_time_add(ackm->time_of_last_ack_eliciting_pkt[i], duration); 895 if (ossl_time_compare(t, pto_timeout) < 0) { 896 pto_timeout = t; 897 pto_space = i; 898 } 899 } 900 } 901 902 *space = pto_space; 903 return pto_timeout; 904 } 905 906 static void ackm_set_loss_detection_timer_actual(OSSL_ACKM *ackm, 907 OSSL_TIME deadline) 908 { 909 ackm->loss_detection_deadline = deadline; 910 911 if (ackm->loss_detection_deadline_cb != NULL) 912 ackm->loss_detection_deadline_cb(deadline, 913 ackm->loss_detection_deadline_cb_arg); 914 } 915 916 static int ackm_set_loss_detection_timer(OSSL_ACKM *ackm) 917 { 918 int space; 919 OSSL_TIME earliest_loss_time, timeout; 920 921 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &space); 922 if (!ossl_time_is_zero(earliest_loss_time)) { 923 /* Time threshold loss detection. */ 924 ackm_set_loss_detection_timer_actual(ackm, earliest_loss_time); 925 return 1; 926 } 927 928 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0 929 && ackm->peer_completed_addr_validation) { 930 /* 931 * Nothing to detect lost, so no timer is set. However, the client 932 * needs to arm the timer if the server might be blocked by the 933 * anti-amplification limit. 934 */ 935 ackm_set_loss_detection_timer_actual(ackm, ossl_time_zero()); 936 return 1; 937 } 938 939 timeout = ackm_get_pto_time_and_space(ackm, &space); 940 ackm_set_loss_detection_timer_actual(ackm, timeout); 941 return 1; 942 } 943 944 static int ackm_in_persistent_congestion(OSSL_ACKM *ackm, 945 const OSSL_ACKM_TX_PKT *lpkt) 946 { 947 /* TODO(QUIC FUTURE): Persistent congestion not currently implemented. */ 948 return 0; 949 } 950 951 static void ackm_on_pkts_lost(OSSL_ACKM *ackm, int pkt_space, 952 const OSSL_ACKM_TX_PKT *lpkt, int pseudo) 953 { 954 const OSSL_ACKM_TX_PKT *p, *pnext; 955 OSSL_RTT_INFO rtt; 956 QUIC_PN largest_pn_lost = 0; 957 OSSL_CC_LOSS_INFO loss_info = {0}; 958 uint32_t flags = 0; 959 960 for (p = lpkt; p != NULL; p = pnext) { 961 pnext = p->lnext; 962 963 if (p->is_inflight) { 964 ackm->bytes_in_flight -= p->num_bytes; 965 if (p->is_ack_eliciting) 966 ackm->ack_eliciting_bytes_in_flight[p->pkt_space] 967 -= p->num_bytes; 968 969 if (p->pkt_num > largest_pn_lost) 970 largest_pn_lost = p->pkt_num; 971 972 if (!pseudo) { 973 /* 974 * If this is pseudo-loss (e.g. during connection retry) we do not 975 * inform the CC as it is not a real loss and not reflective of 976 * network conditions. 977 */ 978 loss_info.tx_time = p->time; 979 loss_info.tx_size = p->num_bytes; 980 981 ackm->cc_method->on_data_lost(ackm->cc_data, &loss_info); 982 } 983 } 984 985 p->on_lost(p->cb_arg); 986 } 987 988 /* 989 * Persistent congestion can only be considered if we have gotten at least 990 * one RTT sample. 991 */ 992 ossl_statm_get_rtt_info(ackm->statm, &rtt); 993 if (!ossl_time_is_zero(ackm->first_rtt_sample) 994 && ackm_in_persistent_congestion(ackm, lpkt)) 995 flags |= OSSL_CC_LOST_FLAG_PERSISTENT_CONGESTION; 996 997 ackm->cc_method->on_data_lost_finished(ackm->cc_data, flags); 998 } 999 1000 static void ackm_on_pkts_acked(OSSL_ACKM *ackm, const OSSL_ACKM_TX_PKT *apkt) 1001 { 1002 const OSSL_ACKM_TX_PKT *anext; 1003 QUIC_PN last_pn_acked = 0; 1004 OSSL_CC_ACK_INFO ainfo = {0}; 1005 1006 for (; apkt != NULL; apkt = anext) { 1007 if (apkt->is_inflight) { 1008 ackm->bytes_in_flight -= apkt->num_bytes; 1009 if (apkt->is_ack_eliciting) 1010 ackm->ack_eliciting_bytes_in_flight[apkt->pkt_space] 1011 -= apkt->num_bytes; 1012 1013 if (apkt->pkt_num > last_pn_acked) 1014 last_pn_acked = apkt->pkt_num; 1015 1016 if (apkt->largest_acked != QUIC_PN_INVALID) 1017 /* 1018 * This can fail, but it is monotonic; worst case we try again 1019 * next time. 1020 */ 1021 rx_pkt_history_bump_watermark(get_rx_history(ackm, 1022 apkt->pkt_space), 1023 apkt->largest_acked + 1); 1024 } 1025 1026 ainfo.tx_time = apkt->time; 1027 ainfo.tx_size = apkt->num_bytes; 1028 1029 anext = apkt->anext; 1030 apkt->on_acked(apkt->cb_arg); /* may free apkt */ 1031 1032 if (apkt->is_inflight) 1033 ackm->cc_method->on_data_acked(ackm->cc_data, &ainfo); 1034 } 1035 } 1036 1037 OSSL_ACKM *ossl_ackm_new(OSSL_TIME (*now)(void *arg), 1038 void *now_arg, 1039 OSSL_STATM *statm, 1040 const OSSL_CC_METHOD *cc_method, 1041 OSSL_CC_DATA *cc_data, 1042 int is_server) 1043 { 1044 OSSL_ACKM *ackm; 1045 int i; 1046 1047 ackm = OPENSSL_zalloc(sizeof(OSSL_ACKM)); 1048 if (ackm == NULL) 1049 return NULL; 1050 1051 for (i = 0; i < (int)OSSL_NELEM(ackm->tx_history); ++i) { 1052 ackm->largest_acked_pkt[i] = QUIC_PN_INVALID; 1053 ackm->rx_ack_flush_deadline[i] = ossl_time_infinite(); 1054 if (tx_pkt_history_init(&ackm->tx_history[i]) < 1) 1055 goto err; 1056 } 1057 1058 for (i = 0; i < (int)OSSL_NELEM(ackm->rx_history); ++i) 1059 rx_pkt_history_init(&ackm->rx_history[i]); 1060 1061 ackm->now = now; 1062 ackm->now_arg = now_arg; 1063 ackm->statm = statm; 1064 ackm->cc_method = cc_method; 1065 ackm->cc_data = cc_data; 1066 ackm->is_server = (char)is_server; 1067 1068 ackm->rx_max_ack_delay = ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY); 1069 ackm->tx_max_ack_delay = DEFAULT_TX_MAX_ACK_DELAY; 1070 1071 return ackm; 1072 1073 err: 1074 while (--i >= 0) 1075 tx_pkt_history_destroy(&ackm->tx_history[i]); 1076 1077 OPENSSL_free(ackm); 1078 return NULL; 1079 } 1080 1081 void ossl_ackm_free(OSSL_ACKM *ackm) 1082 { 1083 size_t i; 1084 1085 if (ackm == NULL) 1086 return; 1087 1088 for (i = 0; i < OSSL_NELEM(ackm->tx_history); ++i) 1089 if (!ackm->discarded[i]) { 1090 tx_pkt_history_destroy(&ackm->tx_history[i]); 1091 rx_pkt_history_destroy(&ackm->rx_history[i]); 1092 } 1093 1094 OPENSSL_free(ackm); 1095 } 1096 1097 int ossl_ackm_on_tx_packet(OSSL_ACKM *ackm, OSSL_ACKM_TX_PKT *pkt) 1098 { 1099 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt->pkt_space); 1100 1101 /* Time must be set and not move backwards. */ 1102 if (ossl_time_is_zero(pkt->time) 1103 || ossl_time_compare(ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space], 1104 pkt->time) > 0) 1105 return 0; 1106 1107 /* Must have non-zero number of bytes. */ 1108 if (pkt->num_bytes == 0) 1109 return 0; 1110 1111 /* Does not make any sense for a non-in-flight packet to be ACK-eliciting. */ 1112 if (!pkt->is_inflight && pkt->is_ack_eliciting) 1113 return 0; 1114 1115 if (tx_pkt_history_add(h, pkt) == 0) 1116 return 0; 1117 1118 if (pkt->is_inflight) { 1119 if (pkt->is_ack_eliciting) { 1120 ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space] = pkt->time; 1121 ackm->ack_eliciting_bytes_in_flight[pkt->pkt_space] 1122 += pkt->num_bytes; 1123 } 1124 1125 ackm->bytes_in_flight += pkt->num_bytes; 1126 ackm_set_loss_detection_timer(ackm); 1127 1128 ackm->cc_method->on_data_sent(ackm->cc_data, pkt->num_bytes); 1129 } 1130 1131 return 1; 1132 } 1133 1134 int ossl_ackm_on_rx_datagram(OSSL_ACKM *ackm, size_t num_bytes) 1135 { 1136 /* No-op on the client. */ 1137 return 1; 1138 } 1139 1140 static void ackm_process_ecn(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1141 int pkt_space) 1142 { 1143 struct tx_pkt_history_st *h; 1144 OSSL_ACKM_TX_PKT *pkt; 1145 OSSL_CC_ECN_INFO ecn_info = {0}; 1146 1147 /* 1148 * If the ECN-CE counter reported by the peer has increased, this could 1149 * be a new congestion event. 1150 */ 1151 if (ack->ecnce > ackm->peer_ecnce[pkt_space]) { 1152 ackm->peer_ecnce[pkt_space] = ack->ecnce; 1153 1154 h = get_tx_history(ackm, pkt_space); 1155 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 1156 if (pkt == NULL) 1157 return; 1158 1159 ecn_info.largest_acked_time = pkt->time; 1160 ackm->cc_method->on_ecn(ackm->cc_data, &ecn_info); 1161 } 1162 } 1163 1164 int ossl_ackm_on_rx_ack_frame(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1165 int pkt_space, OSSL_TIME rx_time) 1166 { 1167 OSSL_ACKM_TX_PKT *na_pkts, *lost_pkts; 1168 int must_set_timer = 0; 1169 1170 if (ackm->largest_acked_pkt[pkt_space] == QUIC_PN_INVALID) 1171 ackm->largest_acked_pkt[pkt_space] = ack->ack_ranges[0].end; 1172 else 1173 ackm->largest_acked_pkt[pkt_space] 1174 = ossl_quic_pn_max(ackm->largest_acked_pkt[pkt_space], 1175 ack->ack_ranges[0].end); 1176 1177 /* 1178 * If we get an ACK in the handshake space, address validation is completed. 1179 * Make sure we update the timer, even if no packets were ACK'd. 1180 */ 1181 if (!ackm->peer_completed_addr_validation 1182 && pkt_space == QUIC_PN_SPACE_HANDSHAKE) { 1183 ackm->peer_completed_addr_validation = 1; 1184 must_set_timer = 1; 1185 } 1186 1187 /* 1188 * Find packets that are newly acknowledged and remove them from the list. 1189 */ 1190 na_pkts = ackm_detect_and_remove_newly_acked_pkts(ackm, ack, pkt_space); 1191 if (na_pkts == NULL) { 1192 if (must_set_timer) 1193 ackm_set_loss_detection_timer(ackm); 1194 1195 return 1; 1196 } 1197 1198 /* 1199 * Update the RTT if the largest acknowledged is newly acked and at least 1200 * one ACK-eliciting packet was newly acked. 1201 * 1202 * First packet in the list is always the one with the largest PN. 1203 */ 1204 if (na_pkts->pkt_num == ack->ack_ranges[0].end && 1205 ack_includes_ack_eliciting(na_pkts)) { 1206 OSSL_TIME now = ackm->now(ackm->now_arg), ack_delay; 1207 if (ossl_time_is_zero(ackm->first_rtt_sample)) 1208 ackm->first_rtt_sample = now; 1209 1210 /* Enforce maximum ACK delay. */ 1211 ack_delay = ack->delay_time; 1212 if (ackm->handshake_confirmed) 1213 ack_delay = ossl_time_min(ack_delay, ackm->rx_max_ack_delay); 1214 1215 ossl_statm_update_rtt(ackm->statm, ack_delay, 1216 ossl_time_subtract(now, na_pkts->time)); 1217 } 1218 1219 /* 1220 * Process ECN information if present. 1221 * 1222 * We deliberately do most ECN processing in the ACKM rather than the 1223 * congestion controller to avoid having to give the congestion controller 1224 * access to ACKM internal state. 1225 */ 1226 if (ack->ecn_present) 1227 ackm_process_ecn(ackm, ack, pkt_space); 1228 1229 /* Handle inferred loss. */ 1230 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1231 if (lost_pkts != NULL) 1232 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1233 1234 ackm_on_pkts_acked(ackm, na_pkts); 1235 1236 /* 1237 * Reset pto_count unless the client is unsure if the server validated the 1238 * client's address. 1239 */ 1240 if (ackm->peer_completed_addr_validation) 1241 ackm->pto_count = 0; 1242 1243 ackm_set_loss_detection_timer(ackm); 1244 return 1; 1245 } 1246 1247 int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM *ackm, int pkt_space) 1248 { 1249 OSSL_ACKM_TX_PKT *pkt, *pnext; 1250 uint64_t num_bytes_invalidated = 0; 1251 1252 if (ackm->discarded[pkt_space]) 1253 return 0; 1254 1255 if (pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1256 ackm->peer_completed_addr_validation = 1; 1257 1258 for (pkt = ossl_list_tx_history_head(&get_tx_history(ackm, pkt_space)->packets); 1259 pkt != NULL; pkt = pnext) { 1260 pnext = ossl_list_tx_history_next(pkt); 1261 if (pkt->is_inflight) { 1262 ackm->bytes_in_flight -= pkt->num_bytes; 1263 num_bytes_invalidated += pkt->num_bytes; 1264 } 1265 1266 pkt->on_discarded(pkt->cb_arg); /* may free pkt */ 1267 } 1268 1269 tx_pkt_history_destroy(&ackm->tx_history[pkt_space]); 1270 rx_pkt_history_destroy(&ackm->rx_history[pkt_space]); 1271 1272 if (num_bytes_invalidated > 0) 1273 ackm->cc_method->on_data_invalidated(ackm->cc_data, 1274 num_bytes_invalidated); 1275 1276 ackm->time_of_last_ack_eliciting_pkt[pkt_space] = ossl_time_zero(); 1277 ackm->loss_time[pkt_space] = ossl_time_zero(); 1278 ackm->pto_count = 0; 1279 ackm->discarded[pkt_space] = 1; 1280 ackm->ack_eliciting_bytes_in_flight[pkt_space] = 0; 1281 ackm_set_loss_detection_timer(ackm); 1282 return 1; 1283 } 1284 1285 int ossl_ackm_on_handshake_confirmed(OSSL_ACKM *ackm) 1286 { 1287 ackm->handshake_confirmed = 1; 1288 ackm->peer_completed_addr_validation = 1; 1289 ackm_set_loss_detection_timer(ackm); 1290 return 1; 1291 } 1292 1293 static void ackm_queue_probe_anti_deadlock_handshake(OSSL_ACKM *ackm) 1294 { 1295 ++ackm->pending_probe.anti_deadlock_handshake; 1296 } 1297 1298 static void ackm_queue_probe_anti_deadlock_initial(OSSL_ACKM *ackm) 1299 { 1300 ++ackm->pending_probe.anti_deadlock_initial; 1301 } 1302 1303 static void ackm_queue_probe(OSSL_ACKM *ackm, int pkt_space) 1304 { 1305 /* 1306 * TODO(QUIC FUTURE): We are allowed to send either one or two probe 1307 * packets here. 1308 * Determine a strategy for when we should send two probe packets. 1309 */ 1310 ++ackm->pending_probe.pto[pkt_space]; 1311 } 1312 1313 int ossl_ackm_on_timeout(OSSL_ACKM *ackm) 1314 { 1315 int pkt_space; 1316 OSSL_TIME earliest_loss_time; 1317 OSSL_ACKM_TX_PKT *lost_pkts; 1318 1319 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &pkt_space); 1320 if (!ossl_time_is_zero(earliest_loss_time)) { 1321 /* Time threshold loss detection. */ 1322 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1323 if (lost_pkts != NULL) 1324 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1325 ackm_set_loss_detection_timer(ackm); 1326 return 1; 1327 } 1328 1329 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 1330 assert(!ackm->peer_completed_addr_validation); 1331 /* 1332 * Client sends an anti-deadlock packet: Initial is padded to earn more 1333 * anti-amplification credit. A handshake packet proves address 1334 * ownership. 1335 */ 1336 if (ackm->discarded[QUIC_PN_SPACE_INITIAL]) 1337 ackm_queue_probe_anti_deadlock_handshake(ackm); 1338 else 1339 ackm_queue_probe_anti_deadlock_initial(ackm); 1340 } else { 1341 /* 1342 * PTO. The user of the ACKM should send new data if available, else 1343 * retransmit old data, or if neither is available, send a single PING 1344 * frame. 1345 */ 1346 ackm_get_pto_time_and_space(ackm, &pkt_space); 1347 ackm_queue_probe(ackm, pkt_space); 1348 } 1349 1350 ++ackm->pto_count; 1351 ackm_set_loss_detection_timer(ackm); 1352 return 1; 1353 } 1354 1355 OSSL_TIME ossl_ackm_get_loss_detection_deadline(OSSL_ACKM *ackm) 1356 { 1357 return ackm->loss_detection_deadline; 1358 } 1359 1360 OSSL_ACKM_PROBE_INFO *ossl_ackm_get0_probe_request(OSSL_ACKM *ackm) 1361 { 1362 return &ackm->pending_probe; 1363 } 1364 1365 int ossl_ackm_get_largest_unacked(OSSL_ACKM *ackm, int pkt_space, QUIC_PN *pn) 1366 { 1367 struct tx_pkt_history_st *h; 1368 OSSL_ACKM_TX_PKT *p; 1369 1370 h = get_tx_history(ackm, pkt_space); 1371 p = ossl_list_tx_history_tail(&h->packets); 1372 if (p != NULL) { 1373 *pn = p->pkt_num; 1374 return 1; 1375 } 1376 1377 return 0; 1378 } 1379 1380 /* Number of ACK-eliciting packets RX'd before we always emit an ACK. */ 1381 #define PKTS_BEFORE_ACK 2 1382 1383 /* 1384 * Return 1 if emission of an ACK frame is currently desired. 1385 * 1386 * This occurs when one or more of the following conditions occurs: 1387 * 1388 * - We have flagged that we want to send an ACK frame 1389 * (for example, due to the packet threshold count being exceeded), or 1390 * 1391 * - We have exceeded the ACK flush deadline, meaning that 1392 * we have received at least one ACK-eliciting packet, but held off on 1393 * sending an ACK frame immediately in the hope that more ACK-eliciting 1394 * packets might come in, but not enough did and we are now requesting 1395 * transmission of an ACK frame anyway. 1396 * 1397 */ 1398 int ossl_ackm_is_ack_desired(OSSL_ACKM *ackm, int pkt_space) 1399 { 1400 return ackm->rx_ack_desired[pkt_space] 1401 || (!ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space]) 1402 && ossl_time_compare(ackm->now(ackm->now_arg), 1403 ackm->rx_ack_flush_deadline[pkt_space]) >= 0); 1404 } 1405 1406 /* 1407 * Returns 1 if an ACK frame matches a given packet number. 1408 */ 1409 static int ack_contains(const OSSL_QUIC_FRAME_ACK *ack, QUIC_PN pkt_num) 1410 { 1411 size_t i; 1412 1413 for (i = 0; i < ack->num_ack_ranges; ++i) 1414 if (range_contains(&ack->ack_ranges[i], pkt_num)) 1415 return 1; 1416 1417 return 0; 1418 } 1419 1420 /* 1421 * Returns 1 iff a PN (which we have just received) was previously reported as 1422 * implied missing (by us, in an ACK frame we previously generated). 1423 */ 1424 static int ackm_is_missing(OSSL_ACKM *ackm, int pkt_space, QUIC_PN pkt_num) 1425 { 1426 /* 1427 * A PN is implied missing if it is not greater than the highest PN in our 1428 * generated ACK frame, but is not matched by the frame. 1429 */ 1430 return ackm->ack[pkt_space].num_ack_ranges > 0 1431 && pkt_num <= ackm->ack[pkt_space].ack_ranges[0].end 1432 && !ack_contains(&ackm->ack[pkt_space], pkt_num); 1433 } 1434 1435 /* 1436 * Returns 1 iff our RX of a PN newly establishes the implication of missing 1437 * packets. 1438 */ 1439 static int ackm_has_newly_missing(OSSL_ACKM *ackm, int pkt_space) 1440 { 1441 struct rx_pkt_history_st *h; 1442 1443 h = get_rx_history(ackm, pkt_space); 1444 1445 if (ossl_list_uint_set_is_empty(&h->set)) 1446 return 0; 1447 1448 /* 1449 * The second condition here establishes that the highest PN range in our RX 1450 * history comprises only a single PN. If there is more than one, then this 1451 * function will have returned 1 during a previous call to 1452 * ossl_ackm_on_rx_packet assuming the third condition below was met. Thus 1453 * we only return 1 when the missing PN condition is newly established. 1454 * 1455 * The third condition here establishes that the highest PN range in our RX 1456 * history is beyond (and does not border) the highest PN we have yet 1457 * reported in any ACK frame. Thus there is a gap of at least one PN between 1458 * the PNs we have ACK'd previously and the PN we have just received. 1459 */ 1460 return ackm->ack[pkt_space].num_ack_ranges > 0 1461 && ossl_list_uint_set_tail(&h->set)->range.start 1462 == ossl_list_uint_set_tail(&h->set)->range.end 1463 && ossl_list_uint_set_tail(&h->set)->range.start 1464 > ackm->ack[pkt_space].ack_ranges[0].end + 1; 1465 } 1466 1467 static void ackm_set_flush_deadline(OSSL_ACKM *ackm, int pkt_space, 1468 OSSL_TIME deadline) 1469 { 1470 ackm->rx_ack_flush_deadline[pkt_space] = deadline; 1471 1472 if (ackm->ack_deadline_cb != NULL) 1473 ackm->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm, pkt_space), 1474 pkt_space, ackm->ack_deadline_cb_arg); 1475 } 1476 1477 /* Explicitly flags that we want to generate an ACK frame. */ 1478 static void ackm_queue_ack(OSSL_ACKM *ackm, int pkt_space) 1479 { 1480 ackm->rx_ack_desired[pkt_space] = 1; 1481 1482 /* Cancel deadline. */ 1483 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1484 } 1485 1486 static void ackm_on_rx_ack_eliciting(OSSL_ACKM *ackm, 1487 OSSL_TIME rx_time, int pkt_space, 1488 int was_missing) 1489 { 1490 OSSL_TIME tx_max_ack_delay; 1491 1492 if (ackm->rx_ack_desired[pkt_space]) 1493 /* ACK generation already requested so nothing to do. */ 1494 return; 1495 1496 ++ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space]; 1497 1498 if (!ackm->rx_ack_generated[pkt_space] 1499 || was_missing 1500 || ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] 1501 >= PKTS_BEFORE_ACK 1502 || ackm_has_newly_missing(ackm, pkt_space)) { 1503 /* 1504 * Either: 1505 * 1506 * - We have never yet generated an ACK frame, meaning that this 1507 * is the first ever packet received, which we should always 1508 * acknowledge immediately, or 1509 * 1510 * - We previously reported the PN that we have just received as 1511 * missing in a previous ACK frame (meaning that we should report 1512 * the fact that we now have it to the peer immediately), or 1513 * 1514 * - We have exceeded the ACK-eliciting packet threshold count 1515 * for the purposes of ACK coalescing, so request transmission 1516 * of an ACK frame, or 1517 * 1518 * - The PN we just received and added to our PN RX history 1519 * newly implies one or more missing PNs, in which case we should 1520 * inform the peer by sending an ACK frame immediately. 1521 * 1522 * We do not test the ACK flush deadline here because it is tested 1523 * separately in ossl_ackm_is_ack_desired. 1524 */ 1525 ackm_queue_ack(ackm, pkt_space); 1526 return; 1527 } 1528 1529 /* 1530 * Not emitting an ACK yet. 1531 * 1532 * Update the ACK flush deadline. 1533 * 1534 * RFC 9000 s. 13.2.1: "An endpoint MUST acknowledge all ack-eliciting 1535 * Initial and Handshake packets immediately"; don't delay ACK generation if 1536 * we are using the Initial or Handshake PN spaces. 1537 */ 1538 tx_max_ack_delay = ackm->tx_max_ack_delay; 1539 if (pkt_space == QUIC_PN_SPACE_INITIAL 1540 || pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1541 tx_max_ack_delay = ossl_time_zero(); 1542 1543 if (ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space])) 1544 ackm_set_flush_deadline(ackm, pkt_space, 1545 ossl_time_add(rx_time, tx_max_ack_delay)); 1546 else 1547 ackm_set_flush_deadline(ackm, pkt_space, 1548 ossl_time_min(ackm->rx_ack_flush_deadline[pkt_space], 1549 ossl_time_add(rx_time, 1550 tx_max_ack_delay))); 1551 } 1552 1553 int ossl_ackm_on_rx_packet(OSSL_ACKM *ackm, const OSSL_ACKM_RX_PKT *pkt) 1554 { 1555 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt->pkt_space); 1556 int was_missing; 1557 1558 if (ossl_ackm_is_rx_pn_processable(ackm, pkt->pkt_num, pkt->pkt_space) != 1) 1559 /* PN has already been processed or written off, no-op. */ 1560 return 1; 1561 1562 /* 1563 * Record the largest PN we have RX'd and the time we received it. 1564 * We use this to calculate the ACK delay field of ACK frames. 1565 */ 1566 if (pkt->pkt_num > ackm->rx_largest_pn[pkt->pkt_space]) { 1567 ackm->rx_largest_pn[pkt->pkt_space] = pkt->pkt_num; 1568 ackm->rx_largest_time[pkt->pkt_space] = pkt->time; 1569 } 1570 1571 /* 1572 * If the PN we just received was previously implied missing by virtue of 1573 * being omitted from a previous ACK frame generated, we skip any packet 1574 * count thresholds or coalescing delays and emit a new ACK frame 1575 * immediately. 1576 */ 1577 was_missing = ackm_is_missing(ackm, pkt->pkt_space, pkt->pkt_num); 1578 1579 /* 1580 * Add the packet number to our history list of PNs we have not yet provably 1581 * acked. 1582 */ 1583 if (rx_pkt_history_add_pn(h, pkt->pkt_num) != 1) 1584 return 0; 1585 1586 /* 1587 * Receiving this packet may or may not cause us to emit an ACK frame. 1588 * We may not emit an ACK frame yet if we have not yet received a threshold 1589 * number of packets. 1590 */ 1591 if (pkt->is_ack_eliciting) 1592 ackm_on_rx_ack_eliciting(ackm, pkt->time, pkt->pkt_space, was_missing); 1593 1594 /* Update the ECN counters according to which ECN signal we got, if any. */ 1595 switch (pkt->ecn) { 1596 case OSSL_ACKM_ECN_ECT0: 1597 ++ackm->rx_ect0[pkt->pkt_space]; 1598 break; 1599 case OSSL_ACKM_ECN_ECT1: 1600 ++ackm->rx_ect1[pkt->pkt_space]; 1601 break; 1602 case OSSL_ACKM_ECN_ECNCE: 1603 ++ackm->rx_ecnce[pkt->pkt_space]; 1604 break; 1605 default: 1606 break; 1607 } 1608 1609 return 1; 1610 } 1611 1612 static void ackm_fill_rx_ack_ranges(OSSL_ACKM *ackm, int pkt_space, 1613 OSSL_QUIC_FRAME_ACK *ack) 1614 { 1615 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1616 UINT_SET_ITEM *x; 1617 size_t i = 0; 1618 1619 /* 1620 * Copy out ranges from the PN set, starting at the end, until we reach our 1621 * maximum number of ranges. 1622 */ 1623 for (x = ossl_list_uint_set_tail(&h->set); 1624 x != NULL && i < OSSL_NELEM(ackm->ack_ranges); 1625 x = ossl_list_uint_set_prev(x), ++i) { 1626 ackm->ack_ranges[pkt_space][i].start = x->range.start; 1627 ackm->ack_ranges[pkt_space][i].end = x->range.end; 1628 } 1629 1630 ack->ack_ranges = ackm->ack_ranges[pkt_space]; 1631 ack->num_ack_ranges = i; 1632 } 1633 1634 const OSSL_QUIC_FRAME_ACK *ossl_ackm_get_ack_frame(OSSL_ACKM *ackm, 1635 int pkt_space) 1636 { 1637 OSSL_QUIC_FRAME_ACK *ack = &ackm->ack[pkt_space]; 1638 OSSL_TIME now = ackm->now(ackm->now_arg); 1639 1640 ackm_fill_rx_ack_ranges(ackm, pkt_space, ack); 1641 1642 if (!ossl_time_is_zero(ackm->rx_largest_time[pkt_space]) 1643 && ossl_time_compare(now, ackm->rx_largest_time[pkt_space]) > 0 1644 && pkt_space == QUIC_PN_SPACE_APP) 1645 ack->delay_time = 1646 ossl_time_subtract(now, ackm->rx_largest_time[pkt_space]); 1647 else 1648 ack->delay_time = ossl_time_zero(); 1649 1650 ack->ect0 = ackm->rx_ect0[pkt_space]; 1651 ack->ect1 = ackm->rx_ect1[pkt_space]; 1652 ack->ecnce = ackm->rx_ecnce[pkt_space]; 1653 ack->ecn_present = 1; 1654 1655 ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] = 0; 1656 1657 ackm->rx_ack_generated[pkt_space] = 1; 1658 ackm->rx_ack_desired[pkt_space] = 0; 1659 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1660 return ack; 1661 } 1662 1663 1664 OSSL_TIME ossl_ackm_get_ack_deadline(OSSL_ACKM *ackm, int pkt_space) 1665 { 1666 if (ackm->rx_ack_desired[pkt_space]) 1667 /* Already desired, deadline is now. */ 1668 return ossl_time_zero(); 1669 1670 return ackm->rx_ack_flush_deadline[pkt_space]; 1671 } 1672 1673 int ossl_ackm_is_rx_pn_processable(OSSL_ACKM *ackm, QUIC_PN pn, int pkt_space) 1674 { 1675 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1676 1677 return pn >= h->watermark && ossl_uint_set_query(&h->set, pn) == 0; 1678 } 1679 1680 void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM *ackm, 1681 void (*fn)(OSSL_TIME deadline, 1682 void *arg), 1683 void *arg) 1684 { 1685 ackm->loss_detection_deadline_cb = fn; 1686 ackm->loss_detection_deadline_cb_arg = arg; 1687 } 1688 1689 void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM *ackm, 1690 void (*fn)(OSSL_TIME deadline, 1691 int pkt_space, 1692 void *arg), 1693 void *arg) 1694 { 1695 ackm->ack_deadline_cb = fn; 1696 ackm->ack_deadline_cb_arg = arg; 1697 } 1698 1699 int ossl_ackm_mark_packet_pseudo_lost(OSSL_ACKM *ackm, 1700 int pkt_space, QUIC_PN pn) 1701 { 1702 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt_space); 1703 OSSL_ACKM_TX_PKT *pkt; 1704 1705 pkt = tx_pkt_history_by_pkt_num(h, pn); 1706 if (pkt == NULL) 1707 return 0; 1708 1709 tx_pkt_history_remove(h, pkt->pkt_num); 1710 pkt->lnext = NULL; 1711 ackm_on_pkts_lost(ackm, pkt_space, pkt, /*pseudo=*/1); 1712 return 1; 1713 } 1714 1715 OSSL_TIME ossl_ackm_get_pto_duration(OSSL_ACKM *ackm) 1716 { 1717 OSSL_TIME duration; 1718 OSSL_RTT_INFO rtt; 1719 1720 ossl_statm_get_rtt_info(ackm->statm, &rtt); 1721 1722 duration = ossl_time_add(rtt.smoothed_rtt, 1723 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 1724 ossl_ticks2time(K_GRANULARITY))); 1725 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) 1726 duration = ossl_time_add(duration, ackm->rx_max_ack_delay); 1727 1728 return duration; 1729 } 1730 1731 QUIC_PN ossl_ackm_get_largest_acked(OSSL_ACKM *ackm, int pkt_space) 1732 { 1733 return ackm->largest_acked_pkt[pkt_space]; 1734 } 1735 1736 void ossl_ackm_set_rx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME rx_max_ack_delay) 1737 { 1738 ackm->rx_max_ack_delay = rx_max_ack_delay; 1739 } 1740 1741 void ossl_ackm_set_tx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME tx_max_ack_delay) 1742 { 1743 ackm->tx_max_ack_delay = tx_max_ack_delay; 1744 } 1745