1 /* 2 * Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include "internal/quic_ackm.h" 11 #include "internal/uint_set.h" 12 #include "internal/common.h" 13 #include <assert.h> 14 15 DEFINE_LIST_OF(tx_history, OSSL_ACKM_TX_PKT); 16 17 /* 18 * TX Packet History 19 * ***************** 20 * 21 * The TX Packet History object tracks information about packets which have been 22 * sent for which we later expect to receive an ACK. It is essentially a simple 23 * database keeping a list of packet information structures in packet number 24 * order which can also be looked up directly by packet number. 25 * 26 * We currently only allow packets to be appended to the list (i.e. the packet 27 * numbers of the packets appended to the list must monotonically increase), as 28 * we should not currently need more general functionality such as a sorted list 29 * insert. 30 */ 31 struct tx_pkt_history_st { 32 /* A linked list of all our packets. */ 33 OSSL_LIST(tx_history) packets; 34 35 /* 36 * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *) 37 * 38 * Invariant: A packet is in this map if and only if it is in the linked 39 * list. 40 */ 41 LHASH_OF(OSSL_ACKM_TX_PKT) *map; 42 43 /* 44 * The lowest packet number which may currently be added to the history list 45 * (inclusive). We do not allow packet numbers to be added to the history 46 * list non-monotonically, so packet numbers must be greater than or equal 47 * to this value. 48 */ 49 uint64_t watermark; 50 51 /* 52 * Packet number of the highest packet info structure we have yet appended 53 * to the list. This is usually one less than watermark, except when we have 54 * not added any packet yet. 55 */ 56 uint64_t highest_sent; 57 }; 58 59 DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT); 60 61 static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt) 62 { 63 /* Using low bits of the packet number as the hash should be enough */ 64 return (unsigned long)pkt->pkt_num; 65 } 66 67 static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a, 68 const OSSL_ACKM_TX_PKT *b) 69 { 70 if (a->pkt_num < b->pkt_num) 71 return -1; 72 if (a->pkt_num > b->pkt_num) 73 return 1; 74 return 0; 75 } 76 77 static int 78 tx_pkt_history_init(struct tx_pkt_history_st *h) 79 { 80 ossl_list_tx_history_init(&h->packets); 81 h->watermark = 0; 82 h->highest_sent = 0; 83 84 h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare); 85 if (h->map == NULL) 86 return 0; 87 88 return 1; 89 } 90 91 static void 92 tx_pkt_history_destroy(struct tx_pkt_history_st *h) 93 { 94 lh_OSSL_ACKM_TX_PKT_free(h->map); 95 h->map = NULL; 96 ossl_list_tx_history_init(&h->packets); 97 } 98 99 static int 100 tx_pkt_history_add_actual(struct tx_pkt_history_st *h, 101 OSSL_ACKM_TX_PKT *pkt) 102 { 103 OSSL_ACKM_TX_PKT *existing; 104 105 /* 106 * There should not be any existing packet with this number 107 * in our mapping. 108 */ 109 existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt); 110 if (!ossl_assert(existing == NULL)) 111 return 0; 112 113 /* Should not already be in a list. */ 114 if (!ossl_assert(ossl_list_tx_history_next(pkt) == NULL 115 && ossl_list_tx_history_prev(pkt) == NULL)) 116 return 0; 117 118 lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt); 119 120 ossl_list_tx_history_insert_tail(&h->packets, pkt); 121 return 1; 122 } 123 124 /* Adds a packet information structure to the history list. */ 125 static int 126 tx_pkt_history_add(struct tx_pkt_history_st *h, 127 OSSL_ACKM_TX_PKT *pkt) 128 { 129 if (!ossl_assert(pkt->pkt_num >= h->watermark)) 130 return 0; 131 132 if (tx_pkt_history_add_actual(h, pkt) < 1) 133 return 0; 134 135 h->watermark = pkt->pkt_num + 1; 136 h->highest_sent = pkt->pkt_num; 137 return 1; 138 } 139 140 /* Retrieve a packet information structure by packet number. */ 141 static OSSL_ACKM_TX_PKT * 142 tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num) 143 { 144 OSSL_ACKM_TX_PKT key; 145 146 key.pkt_num = pkt_num; 147 148 return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key); 149 } 150 151 /* Remove a packet information structure from the history log. */ 152 static int 153 tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num) 154 { 155 OSSL_ACKM_TX_PKT key, *pkt; 156 key.pkt_num = pkt_num; 157 158 pkt = tx_pkt_history_by_pkt_num(h, pkt_num); 159 if (pkt == NULL) 160 return 0; 161 162 ossl_list_tx_history_remove(&h->packets, pkt); 163 lh_OSSL_ACKM_TX_PKT_delete(h->map, &key); 164 return 1; 165 } 166 167 /* 168 * RX Packet Number Tracking 169 * ************************* 170 * 171 * **Background.** The RX side of the ACK manager must track packets we have 172 * received for which we have to generate ACK frames. Broadly, this means we 173 * store a set of packet numbers which we have received but which we do not know 174 * for a fact that the transmitter knows we have received. 175 * 176 * This must handle various situations: 177 * 178 * 1. We receive a packet but have not sent an ACK yet, so the transmitter 179 * does not know whether we have received it or not yet. 180 * 181 * 2. We receive a packet and send an ACK which is lost. We do not 182 * immediately know that the ACK was lost and the transmitter does not know 183 * that we have received the packet. 184 * 185 * 3. We receive a packet and send an ACK which is received by the 186 * transmitter. The transmitter does not immediately respond with an ACK, 187 * or responds with an ACK which is lost. The transmitter knows that we 188 * have received the packet, but we do not know for sure that it knows, 189 * because the ACK we sent could have been lost. 190 * 191 * 4. We receive a packet and send an ACK which is received by the 192 * transmitter. The transmitter subsequently sends us an ACK which confirms 193 * its receipt of the ACK we sent, and we successfully receive that ACK, so 194 * we know that the transmitter knows, that we received the original 195 * packet. 196 * 197 * Only when we reach case (4) are we relieved of any need to track a given 198 * packet number we have received, because only in this case do we know for sure 199 * that the peer knows we have received the packet. Having reached case (4) we 200 * will never again need to generate an ACK containing the PN in question, but 201 * until we reach that point, we must keep track of the PN as not having been 202 * provably ACKed, as we may have to keep generating ACKs for the given PN not 203 * just until the transmitter receives one, but until we know that it has 204 * received one. This will be referred to herein as "provably ACKed". 205 * 206 * **Duplicate handling.** The above discusses the case where we have received a 207 * packet with a given PN but are at best unsure whether the sender knows we 208 * have received it or not. However, we must also handle the case where we have 209 * yet to receive a packet with a given PN in the first place. The reason for 210 * this is because of the requirement expressed by RFC 9000 s. 12.3: 211 * 212 * "A receiver MUST discard a newly unprotected packet unless it is certain 213 * that it has not processed another packet with the same packet number from 214 * the same packet number space." 215 * 216 * We must ensure we never process a duplicate PN. As such, each possible PN we 217 * can receive must exist in one of the following logical states: 218 * 219 * - We have never processed this PN before 220 * (so if we receive such a PN, it can be processed) 221 * 222 * - We have processed this PN but it has not yet been provably ACKed 223 * (and should therefore be in any future ACK frame generated; 224 * if we receive such a PN again, it must be ignored) 225 * 226 * - We have processed this PN and it has been provably ACKed 227 * (if we receive such a PN again, it must be ignored) 228 * 229 * However, if we were to track this state for every PN ever used in the history 230 * of a connection, the amount of state required would increase unboundedly as 231 * the connection goes on (for example, we would have to store a set of every PN 232 * ever received.) 233 * 234 * RFC 9000 s. 12.3 continues: 235 * 236 * "Endpoints that track all individual packets for the purposes of detecting 237 * duplicates are at risk of accumulating excessive state. The data required 238 * for detecting duplicates can be limited by maintaining a minimum packet 239 * number below which all packets are immediately dropped." 240 * 241 * Moreover, RFC 9000 s. 13.2.3 states that: 242 * 243 * "A receiver MUST retain an ACK Range unless it can ensure that it will not 244 * subsequently accept packets with numbers in that range. Maintaining a 245 * minimum packet number that increases as ranges are discarded is one way to 246 * achieve this with minimal state." 247 * 248 * This touches on a subtlety of the original requirement quoted above: the 249 * receiver MUST discard a packet unless it is certain that it has not processed 250 * another packet with the same PN. However, this does not forbid the receiver 251 * from also discarding some PNs even though it has not yet processed them. In 252 * other words, implementations must be conservative and err in the direction of 253 * assuming a packet is a duplicate, but it is acceptable for this to come at 254 * the cost of falsely identifying some packets as duplicates. 255 * 256 * This allows us to bound the amount of state we must keep, and we adopt the 257 * suggested strategy quoted above to do so. We define a watermark PN below 258 * which all PNs are in the same state. This watermark is only ever increased. 259 * Thus the PNs the state for which needs to be explicitly tracked is limited to 260 * only a small number of recent PNs, and all older PNs have an assumed state. 261 * 262 * Any given PN thus falls into one of the following states: 263 * 264 * - (A) The PN is above the watermark but we have not yet received it. 265 * 266 * If we receive such a PN, we should process it and record the PN as 267 * received. 268 * 269 * - (B) The PN is above the watermark and we have received it. 270 * 271 * The PN should be included in any future ACK frame we generate. 272 * If we receive such a PN again, we should ignore it. 273 * 274 * - (C) The PN is below the watermark. 275 * 276 * We do not know whether a packet with the given PN was received or 277 * not. To be safe, if we receive such a packet, it is not processed. 278 * 279 * Note that state (C) corresponds to both "we have processed this PN and it has 280 * been provably ACKed" logical state and a subset of the PNs in the "we have 281 * never processed this PN before" logical state (namely all PNs which were lost 282 * and never received, but which are not recent enough to be above the 283 * watermark). The reason we can merge these states and avoid tracking states 284 * for the PNs in this state is because the provably ACKed and never-received 285 * states are functionally identical in terms of how we need to handle them: we 286 * don't need to do anything for PNs in either of these states, so we don't have 287 * to care about PNs in this state nor do we have to care about distinguishing 288 * the two states for a given PN. 289 * 290 * Note that under this scheme provably ACKed PNs are by definition always below 291 * the watermark; therefore, it follows that when a PN becomes provably ACKed, 292 * the watermark must be immediately increased to exceed it (otherwise we would 293 * keep reporting it in future ACK frames). 294 * 295 * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when 296 * to advance the watermark: 297 * 298 * "When a packet containing an ACK frame is sent, the Largest Acknowledged 299 * field in that frame can be saved. When a packet containing an ACK frame is 300 * acknowledged, the receiver can stop acknowledging packets less than or 301 * equal to the Largest Acknowledged field in the sent ACK frame." 302 * 303 * This is where our scheme's false positives arise. When a packet containing an 304 * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably 305 * acked, and the watermark is bumped accordingly. However, the Largest 306 * Acknowledged field does not imply that all lower PNs have been received, 307 * because there may be gaps expressed in the ranges of PNs expressed by that 308 * and previous ACK frames. Thus, some unreceived PNs may be moved below the 309 * watermark, and we may subsequently reject those PNs as possibly being 310 * duplicates even though we have not actually received those PNs. Since we bump 311 * the watermark when a PN becomes provably ACKed, it follows that an unreceived 312 * PN falls below the watermark (and thus becomes a false positive for the 313 * purposes of duplicate detection) when a higher-numbered PN becomes provably 314 * ACKed. 315 * 316 * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0, 317 * n) will no longer be processed. Although datagrams may be reordered in the 318 * network, a PN we receive can only become provably ACKed after our own 319 * subsequently generated ACK frame is sent in a future TX packet, and then we 320 * receive another RX PN acknowledging that TX packet. This means that a given RX 321 * PN can only become provably ACKed at least 1 RTT after it is received; it is 322 * unlikely that any reordered datagrams will still be "in the network" (and not 323 * lost) by this time. If this does occur for whatever reason and a late PN is 324 * received, the packet will be discarded unprocessed and the PN is simply 325 * handled as though lost (a "written off" PN). 326 * 327 * **Data structure.** Our state for the RX handling side of the ACK manager, as 328 * discussed above, mainly comprises: 329 * 330 * a) a logical set of PNs, and 331 * b) a monotonically increasing PN counter (the watermark). 332 * 333 * For (a), we define a data structure which stores a logical set of PNs, which 334 * we use to keep track of which PNs we have received but which have not yet 335 * been provably ACKed, and thus will later need to generate an ACK frame for. 336 * 337 * The correspondence with the logical states discussed above is as follows. A 338 * PN is in state (C) if it is below the watermark; otherwise it is in state (B) 339 * if it is in the logical set of PNs, and in state (A) otherwise. 340 * 341 * Note that PNs are only removed from the PN set (when they become provably 342 * ACKed or written off) by virtue of advancement of the watermark. Removing PNs 343 * from the PN set any other way would be ambiguous as it would be 344 * indistinguishable from a PN we have not yet received and risk us processing a 345 * duplicate packet. In other words, for a given PN: 346 * 347 * - State (A) can transition to state (B) or (C) 348 * - State (B) can transition to state (C) only 349 * - State (C) is the terminal state 350 * 351 * We can query the logical set data structure for PNs which have been received 352 * but which have not been provably ACKed when we want to generate ACK frames. 353 * Since ACK frames can be lost and/or we might not know that the peer has 354 * successfully received them, we might generate multiple ACK frames covering a 355 * given PN until that PN becomes provably ACKed and we finally remove it from 356 * our set (by bumping the watermark) as no longer being our concern. 357 * 358 * The data structure used is the UINT_SET structure defined in uint_set.h, 359 * which is used as a PN set. We use the following operations of the structure: 360 * 361 * Insert Range: Used when we receive a new PN. 362 * 363 * Remove Range: Used when bumping the watermark. 364 * 365 * Query: Used to determine if a PN is in the set. 366 * 367 * **Possible duplicates.** A PN is considered a possible duplicate when either: 368 * 369 * a) its PN is already in the PN set (i.e. has already been received), or 370 * b) its PN is below the watermark (i.e. was provably ACKed or written off). 371 * 372 * A packet with a given PN is considered 'processable' when that PN is not 373 * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable). 374 * 375 * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes 376 * provably ACKed. This occurs when an ACK frame is received by the TX side of 377 * the ACK manager; thus, there is necessary interaction between the TX and RX 378 * sides of the ACK manager. 379 * 380 * This is implemented as follows. When a packet is queued as sent in the TX 381 * side of the ACK manager, it may optionally have a Largest Acked value set on 382 * it. The user of the ACK manager should do this if the packet being 383 * transmitted contains an ACK frame, by setting the field to the Largest Acked 384 * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID. 385 * When a TX packet is eventually acknowledged which has this field set, it is 386 * used to update the state of the RX side of the ACK manager by bumping the 387 * watermark accordingly. 388 */ 389 struct rx_pkt_history_st { 390 UINT_SET set; 391 392 /* 393 * Invariant: PNs below this are not in the set. 394 * Invariant: This is monotonic and only ever increases. 395 */ 396 QUIC_PN watermark; 397 }; 398 399 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 400 QUIC_PN watermark); 401 402 static void rx_pkt_history_init(struct rx_pkt_history_st *h) 403 { 404 ossl_uint_set_init(&h->set); 405 h->watermark = 0; 406 } 407 408 static void rx_pkt_history_destroy(struct rx_pkt_history_st *h) 409 { 410 ossl_uint_set_destroy(&h->set); 411 } 412 413 /* 414 * Limit the number of ACK ranges we store to prevent resource consumption DoS 415 * attacks. 416 */ 417 #define MAX_RX_ACK_RANGES 32 418 419 static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h) 420 { 421 QUIC_PN highest = QUIC_PN_INVALID; 422 423 while (ossl_list_uint_set_num(&h->set) > MAX_RX_ACK_RANGES) { 424 UINT_RANGE r = ossl_list_uint_set_head(&h->set)->range; 425 426 highest = (highest == QUIC_PN_INVALID) 427 ? r.end : ossl_quic_pn_max(highest, r.end); 428 429 ossl_uint_set_remove(&h->set, &r); 430 } 431 432 /* 433 * Bump watermark to cover all PNs we removed to avoid accidental 434 * reprocessing of packets. 435 */ 436 if (highest != QUIC_PN_INVALID) 437 rx_pkt_history_bump_watermark(h, highest + 1); 438 } 439 440 static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h, 441 QUIC_PN pn) 442 { 443 UINT_RANGE r; 444 445 r.start = pn; 446 r.end = pn; 447 448 if (pn < h->watermark) 449 return 1; /* consider this a success case */ 450 451 if (ossl_uint_set_insert(&h->set, &r) != 1) 452 return 0; 453 454 rx_pkt_history_trim_range_count(h); 455 return 1; 456 } 457 458 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 459 QUIC_PN watermark) 460 { 461 UINT_RANGE r; 462 463 if (watermark <= h->watermark) 464 return 1; 465 466 /* Remove existing PNs below the watermark. */ 467 r.start = 0; 468 r.end = watermark - 1; 469 if (ossl_uint_set_remove(&h->set, &r) != 1) 470 return 0; 471 472 h->watermark = watermark; 473 return 1; 474 } 475 476 /* 477 * ACK Manager Implementation 478 * ************************** 479 * Implementation of the ACK manager proper. 480 */ 481 482 /* Constants used by the ACK manager; see RFC 9002. */ 483 #define K_GRANULARITY (1 * OSSL_TIME_MS) 484 #define K_PKT_THRESHOLD 3 485 #define K_TIME_THRESHOLD_NUM 9 486 #define K_TIME_THRESHOLD_DEN 8 487 488 /* The maximum number of times we allow PTO to be doubled. */ 489 #define MAX_PTO_COUNT 16 490 491 /* Default maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */ 492 #define DEFAULT_TX_MAX_ACK_DELAY ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY) 493 494 struct ossl_ackm_st { 495 /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */ 496 struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM]; 497 498 /* Our list of received PNs which are not yet provably acked. */ 499 struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM]; 500 501 /* Polymorphic dependencies that we consume. */ 502 OSSL_TIME (*now)(void *arg); 503 void *now_arg; 504 OSSL_STATM *statm; 505 const OSSL_CC_METHOD *cc_method; 506 OSSL_CC_DATA *cc_data; 507 508 /* RFC 9002 variables. */ 509 uint32_t pto_count; 510 QUIC_PN largest_acked_pkt[QUIC_PN_SPACE_NUM]; 511 OSSL_TIME time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM]; 512 OSSL_TIME loss_time[QUIC_PN_SPACE_NUM]; 513 OSSL_TIME loss_detection_deadline; 514 515 /* Lowest PN which is still not known to be ACKed. */ 516 QUIC_PN lowest_unacked_pkt[QUIC_PN_SPACE_NUM]; 517 518 /* Time at which we got our first RTT sample, or 0. */ 519 OSSL_TIME first_rtt_sample; 520 521 /* 522 * A packet's num_bytes are added to this if it is inflight, 523 * and removed again once ack'd/lost/discarded. 524 */ 525 uint64_t bytes_in_flight; 526 527 /* 528 * A packet's num_bytes are added to this if it is both inflight and 529 * ack-eliciting, and removed again once ack'd/lost/discarded. 530 */ 531 uint64_t ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM]; 532 533 /* Count of ECN-CE events. */ 534 uint64_t peer_ecnce[QUIC_PN_SPACE_NUM]; 535 536 /* Set to 1 when the handshake is confirmed. */ 537 char handshake_confirmed; 538 539 /* Set to 1 when the peer has completed address validation. */ 540 char peer_completed_addr_validation; 541 542 /* Set to 1 when a PN space has been discarded. */ 543 char discarded[QUIC_PN_SPACE_NUM]; 544 545 /* Set to 1 when we think an ACK frame should be generated. */ 546 char rx_ack_desired[QUIC_PN_SPACE_NUM]; 547 548 /* Set to 1 if an ACK frame has ever been generated. */ 549 char rx_ack_generated[QUIC_PN_SPACE_NUM]; 550 551 /* Probe request counts for reporting to the user. */ 552 OSSL_ACKM_PROBE_INFO pending_probe; 553 554 /* Generated ACK frames for each PN space. */ 555 OSSL_QUIC_FRAME_ACK ack[QUIC_PN_SPACE_NUM]; 556 OSSL_QUIC_ACK_RANGE ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES]; 557 558 /* Other RX state. */ 559 /* Largest PN we have RX'd. */ 560 QUIC_PN rx_largest_pn[QUIC_PN_SPACE_NUM]; 561 562 /* Time at which the PN in rx_largest_pn was RX'd. */ 563 OSSL_TIME rx_largest_time[QUIC_PN_SPACE_NUM]; 564 565 /* 566 * ECN event counters. Each time we receive a packet with a given ECN label, 567 * the corresponding ECN counter here is incremented. 568 */ 569 uint64_t rx_ect0[QUIC_PN_SPACE_NUM]; 570 uint64_t rx_ect1[QUIC_PN_SPACE_NUM]; 571 uint64_t rx_ecnce[QUIC_PN_SPACE_NUM]; 572 573 /* 574 * Number of ACK-eliciting packets since last ACK. We use this to defer 575 * emitting ACK frames until a threshold number of ACK-eliciting packets 576 * have been received. 577 */ 578 uint32_t rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM]; 579 580 /* 581 * The ACK frame coalescing deadline at which we should flush any unsent ACK 582 * frames. 583 */ 584 OSSL_TIME rx_ack_flush_deadline[QUIC_PN_SPACE_NUM]; 585 586 /* 587 * The RX maximum ACK delay (the maximum amount of time our peer might 588 * wait to send us an ACK after receiving an ACK-eliciting packet). 589 */ 590 OSSL_TIME rx_max_ack_delay; 591 592 /* 593 * The TX maximum ACK delay (the maximum amount of time we allow ourselves 594 * to wait before generating an ACK after receiving an ACK-eliciting 595 * packet). 596 */ 597 OSSL_TIME tx_max_ack_delay; 598 599 /* Callbacks for deadline updates. */ 600 void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg); 601 void *loss_detection_deadline_cb_arg; 602 603 void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg); 604 void *ack_deadline_cb_arg; 605 }; 606 607 static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y) 608 { 609 return x < y ? x : y; 610 } 611 612 /* 613 * Get TX history for a given packet number space. Must not have been 614 * discarded. 615 */ 616 static struct tx_pkt_history_st *get_tx_history(OSSL_ACKM *ackm, int pkt_space) 617 { 618 assert(!ackm->discarded[pkt_space]); 619 620 return &ackm->tx_history[pkt_space]; 621 } 622 623 /* 624 * Get RX history for a given packet number space. Must not have been 625 * discarded. 626 */ 627 static struct rx_pkt_history_st *get_rx_history(OSSL_ACKM *ackm, int pkt_space) 628 { 629 assert(!ackm->discarded[pkt_space]); 630 631 return &ackm->rx_history[pkt_space]; 632 } 633 634 /* Does the newly-acknowledged list contain any ack-eliciting packet? */ 635 static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT *pkt) 636 { 637 for (; pkt != NULL; pkt = pkt->anext) 638 if (pkt->is_ack_eliciting) 639 return 1; 640 641 return 0; 642 } 643 644 /* Return number of ACK-eliciting bytes in flight across all PN spaces. */ 645 static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM *ackm) 646 { 647 int i; 648 uint64_t total = 0; 649 650 for (i = 0; i < QUIC_PN_SPACE_NUM; ++i) 651 total += ackm->ack_eliciting_bytes_in_flight[i]; 652 653 return total; 654 } 655 656 /* Return 1 if the range contains the given PN. */ 657 static int range_contains(const OSSL_QUIC_ACK_RANGE *range, QUIC_PN pn) 658 { 659 return pn >= range->start && pn <= range->end; 660 } 661 662 /* 663 * Given a logical representation of an ACK frame 'ack', create a singly-linked 664 * list of the newly ACK'd frames; that is, of frames which are matched by the 665 * list of PN ranges contained in the ACK frame. The packet structures in the 666 * list returned are removed from the TX history list. Returns a pointer to the 667 * list head (or NULL) if empty. 668 */ 669 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM *ackm, 670 const OSSL_QUIC_FRAME_ACK *ack, 671 int pkt_space) 672 { 673 OSSL_ACKM_TX_PKT *acked_pkts = NULL, **fixup = &acked_pkts, *pkt, *pprev; 674 struct tx_pkt_history_st *h; 675 size_t ridx = 0; 676 677 assert(ack->num_ack_ranges > 0); 678 679 /* 680 * Our history list is a list of packets sorted in ascending order 681 * by packet number. 682 * 683 * ack->ack_ranges is a list of packet number ranges in descending order. 684 * 685 * Walk through our history list from the end in order to efficiently detect 686 * membership in the specified ack ranges. As an optimization, we use our 687 * hashtable to try and skip to the first matching packet. This may fail if 688 * the ACK ranges given include nonexistent packets. 689 */ 690 h = get_tx_history(ackm, pkt_space); 691 692 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 693 if (pkt == NULL) 694 pkt = ossl_list_tx_history_tail(&h->packets); 695 696 for (; pkt != NULL; pkt = pprev) { 697 /* 698 * Save prev value as it will be zeroed if we remove the packet from the 699 * history list below. 700 */ 701 pprev = ossl_list_tx_history_prev(pkt); 702 703 for (;; ++ridx) { 704 if (ridx >= ack->num_ack_ranges) { 705 /* 706 * We have exhausted all ranges so stop here, even if there are 707 * more packets to look at. 708 */ 709 goto stop; 710 } 711 712 if (range_contains(&ack->ack_ranges[ridx], pkt->pkt_num)) { 713 /* We have matched this range. */ 714 tx_pkt_history_remove(h, pkt->pkt_num); 715 716 *fixup = pkt; 717 fixup = &pkt->anext; 718 *fixup = NULL; 719 break; 720 } else if (pkt->pkt_num > ack->ack_ranges[ridx].end) { 721 /* 722 * We have not reached this range yet in our list, so do not 723 * advance ridx. 724 */ 725 break; 726 } else { 727 /* 728 * We have moved beyond this range, so advance to the next range 729 * and try matching again. 730 */ 731 assert(pkt->pkt_num < ack->ack_ranges[ridx].start); 732 continue; 733 } 734 } 735 } 736 stop: 737 738 return acked_pkts; 739 } 740 741 /* 742 * Create a singly-linked list of newly detected-lost packets in the given 743 * packet number space. Returns the head of the list or NULL if no packets were 744 * detected lost. The packets in the list are removed from the TX history list. 745 */ 746 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_lost_pkts(OSSL_ACKM *ackm, 747 int pkt_space) 748 { 749 OSSL_ACKM_TX_PKT *lost_pkts = NULL, **fixup = &lost_pkts, *pkt, *pnext; 750 OSSL_TIME loss_delay, lost_send_time, now; 751 OSSL_RTT_INFO rtt; 752 struct tx_pkt_history_st *h; 753 754 assert(ackm->largest_acked_pkt[pkt_space] != QUIC_PN_INVALID); 755 756 ossl_statm_get_rtt_info(ackm->statm, &rtt); 757 758 ackm->loss_time[pkt_space] = ossl_time_zero(); 759 760 loss_delay = ossl_time_multiply(ossl_time_max(rtt.latest_rtt, 761 rtt.smoothed_rtt), 762 K_TIME_THRESHOLD_NUM); 763 loss_delay = ossl_time_divide(loss_delay, K_TIME_THRESHOLD_DEN); 764 765 /* Minimum time of K_GRANULARITY before packets are deemed lost. */ 766 loss_delay = ossl_time_max(loss_delay, ossl_ticks2time(K_GRANULARITY)); 767 768 /* Packets sent before this time are deemed lost. */ 769 now = ackm->now(ackm->now_arg); 770 lost_send_time = ossl_time_subtract(now, loss_delay); 771 772 h = get_tx_history(ackm, pkt_space); 773 pkt = ossl_list_tx_history_head(&h->packets); 774 775 for (; pkt != NULL; pkt = pnext) { 776 assert(pkt_space == pkt->pkt_space); 777 778 /* 779 * Save prev value as it will be zeroed if we remove the packet from the 780 * history list below. 781 */ 782 pnext = ossl_list_tx_history_next(pkt); 783 784 if (pkt->pkt_num > ackm->largest_acked_pkt[pkt_space]) 785 continue; 786 787 /* 788 * Mark packet as lost, or set time when it should be marked. 789 */ 790 if (ossl_time_compare(pkt->time, lost_send_time) <= 0 791 || ackm->largest_acked_pkt[pkt_space] 792 >= pkt->pkt_num + K_PKT_THRESHOLD) { 793 tx_pkt_history_remove(h, pkt->pkt_num); 794 795 *fixup = pkt; 796 fixup = &pkt->lnext; 797 *fixup = NULL; 798 } else { 799 if (ossl_time_is_zero(ackm->loss_time[pkt_space])) 800 ackm->loss_time[pkt_space] = 801 ossl_time_add(pkt->time, loss_delay); 802 else 803 ackm->loss_time[pkt_space] = 804 ossl_time_min(ackm->loss_time[pkt_space], 805 ossl_time_add(pkt->time, loss_delay)); 806 } 807 } 808 809 return lost_pkts; 810 } 811 812 static OSSL_TIME ackm_get_loss_time_and_space(OSSL_ACKM *ackm, int *pspace) 813 { 814 OSSL_TIME time = ackm->loss_time[QUIC_PN_SPACE_INITIAL]; 815 int i, space = QUIC_PN_SPACE_INITIAL; 816 817 for (i = space + 1; i < QUIC_PN_SPACE_NUM; ++i) 818 if (ossl_time_is_zero(time) 819 || ossl_time_compare(ackm->loss_time[i], time) == -1) { 820 time = ackm->loss_time[i]; 821 space = i; 822 } 823 824 *pspace = space; 825 return time; 826 } 827 828 static OSSL_TIME ackm_get_pto_time_and_space(OSSL_ACKM *ackm, int *space) 829 { 830 OSSL_RTT_INFO rtt; 831 OSSL_TIME duration; 832 OSSL_TIME pto_timeout = ossl_time_infinite(), t; 833 int pto_space = QUIC_PN_SPACE_INITIAL, i; 834 835 ossl_statm_get_rtt_info(ackm->statm, &rtt); 836 837 duration 838 = ossl_time_add(rtt.smoothed_rtt, 839 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 840 ossl_ticks2time(K_GRANULARITY))); 841 842 duration 843 = ossl_time_multiply(duration, 844 (uint64_t)1 << min_u32(ackm->pto_count, 845 MAX_PTO_COUNT)); 846 847 /* Anti-deadlock PTO starts from the current time. */ 848 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 849 assert(!ackm->peer_completed_addr_validation); 850 851 *space = ackm->discarded[QUIC_PN_SPACE_INITIAL] 852 ? QUIC_PN_SPACE_HANDSHAKE 853 : QUIC_PN_SPACE_INITIAL; 854 return ossl_time_add(ackm->now(ackm->now_arg), duration); 855 } 856 857 for (i = QUIC_PN_SPACE_INITIAL; i < QUIC_PN_SPACE_NUM; ++i) { 858 if (ackm->ack_eliciting_bytes_in_flight[i] == 0) 859 continue; 860 861 if (i == QUIC_PN_SPACE_APP) { 862 /* Skip application data until handshake confirmed. */ 863 if (!ackm->handshake_confirmed) 864 break; 865 866 /* Include max_ack_delay and backoff for app data. */ 867 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) { 868 uint64_t factor 869 = (uint64_t)1 << min_u32(ackm->pto_count, MAX_PTO_COUNT); 870 871 duration 872 = ossl_time_add(duration, 873 ossl_time_multiply(ackm->rx_max_ack_delay, 874 factor)); 875 } 876 } 877 878 t = ossl_time_add(ackm->time_of_last_ack_eliciting_pkt[i], duration); 879 if (ossl_time_compare(t, pto_timeout) < 0) { 880 pto_timeout = t; 881 pto_space = i; 882 } 883 } 884 885 *space = pto_space; 886 return pto_timeout; 887 } 888 889 static void ackm_set_loss_detection_timer_actual(OSSL_ACKM *ackm, 890 OSSL_TIME deadline) 891 { 892 ackm->loss_detection_deadline = deadline; 893 894 if (ackm->loss_detection_deadline_cb != NULL) 895 ackm->loss_detection_deadline_cb(deadline, 896 ackm->loss_detection_deadline_cb_arg); 897 } 898 899 static int ackm_set_loss_detection_timer(OSSL_ACKM *ackm) 900 { 901 int space; 902 OSSL_TIME earliest_loss_time, timeout; 903 904 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &space); 905 if (!ossl_time_is_zero(earliest_loss_time)) { 906 /* Time threshold loss detection. */ 907 ackm_set_loss_detection_timer_actual(ackm, earliest_loss_time); 908 return 1; 909 } 910 911 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0 912 && ackm->peer_completed_addr_validation) { 913 /* 914 * Nothing to detect lost, so no timer is set. However, the client 915 * needs to arm the timer if the server might be blocked by the 916 * anti-amplification limit. 917 */ 918 ackm_set_loss_detection_timer_actual(ackm, ossl_time_zero()); 919 return 1; 920 } 921 922 timeout = ackm_get_pto_time_and_space(ackm, &space); 923 ackm_set_loss_detection_timer_actual(ackm, timeout); 924 return 1; 925 } 926 927 static int ackm_in_persistent_congestion(OSSL_ACKM *ackm, 928 const OSSL_ACKM_TX_PKT *lpkt) 929 { 930 /* TODO(QUIC FUTURE): Persistent congestion not currently implemented. */ 931 return 0; 932 } 933 934 static void ackm_on_pkts_lost(OSSL_ACKM *ackm, int pkt_space, 935 const OSSL_ACKM_TX_PKT *lpkt, int pseudo) 936 { 937 const OSSL_ACKM_TX_PKT *p, *pnext; 938 OSSL_RTT_INFO rtt; 939 QUIC_PN largest_pn_lost = 0; 940 OSSL_CC_LOSS_INFO loss_info = {0}; 941 uint32_t flags = 0; 942 943 for (p = lpkt; p != NULL; p = pnext) { 944 pnext = p->lnext; 945 946 if (p->is_inflight) { 947 ackm->bytes_in_flight -= p->num_bytes; 948 if (p->is_ack_eliciting) 949 ackm->ack_eliciting_bytes_in_flight[p->pkt_space] 950 -= p->num_bytes; 951 952 if (p->pkt_num > largest_pn_lost) 953 largest_pn_lost = p->pkt_num; 954 955 if (!pseudo) { 956 /* 957 * If this is pseudo-loss (e.g. during connection retry) we do not 958 * inform the CC as it is not a real loss and not reflective of 959 * network conditions. 960 */ 961 loss_info.tx_time = p->time; 962 loss_info.tx_size = p->num_bytes; 963 964 ackm->cc_method->on_data_lost(ackm->cc_data, &loss_info); 965 } 966 } 967 968 p->on_lost(p->cb_arg); 969 } 970 971 /* 972 * Persistent congestion can only be considered if we have gotten at least 973 * one RTT sample. 974 */ 975 ossl_statm_get_rtt_info(ackm->statm, &rtt); 976 if (!ossl_time_is_zero(ackm->first_rtt_sample) 977 && ackm_in_persistent_congestion(ackm, lpkt)) 978 flags |= OSSL_CC_LOST_FLAG_PERSISTENT_CONGESTION; 979 980 ackm->cc_method->on_data_lost_finished(ackm->cc_data, flags); 981 } 982 983 static void ackm_on_pkts_acked(OSSL_ACKM *ackm, const OSSL_ACKM_TX_PKT *apkt) 984 { 985 const OSSL_ACKM_TX_PKT *anext; 986 QUIC_PN last_pn_acked = 0; 987 OSSL_CC_ACK_INFO ainfo = {0}; 988 989 for (; apkt != NULL; apkt = anext) { 990 if (apkt->is_inflight) { 991 ackm->bytes_in_flight -= apkt->num_bytes; 992 if (apkt->is_ack_eliciting) 993 ackm->ack_eliciting_bytes_in_flight[apkt->pkt_space] 994 -= apkt->num_bytes; 995 996 if (apkt->pkt_num > last_pn_acked) 997 last_pn_acked = apkt->pkt_num; 998 999 if (apkt->largest_acked != QUIC_PN_INVALID) 1000 /* 1001 * This can fail, but it is monotonic; worst case we try again 1002 * next time. 1003 */ 1004 rx_pkt_history_bump_watermark(get_rx_history(ackm, 1005 apkt->pkt_space), 1006 apkt->largest_acked + 1); 1007 } 1008 1009 ainfo.tx_time = apkt->time; 1010 ainfo.tx_size = apkt->num_bytes; 1011 1012 anext = apkt->anext; 1013 apkt->on_acked(apkt->cb_arg); /* may free apkt */ 1014 1015 if (apkt->is_inflight) 1016 ackm->cc_method->on_data_acked(ackm->cc_data, &ainfo); 1017 } 1018 } 1019 1020 OSSL_ACKM *ossl_ackm_new(OSSL_TIME (*now)(void *arg), 1021 void *now_arg, 1022 OSSL_STATM *statm, 1023 const OSSL_CC_METHOD *cc_method, 1024 OSSL_CC_DATA *cc_data) 1025 { 1026 OSSL_ACKM *ackm; 1027 int i; 1028 1029 ackm = OPENSSL_zalloc(sizeof(OSSL_ACKM)); 1030 if (ackm == NULL) 1031 return NULL; 1032 1033 for (i = 0; i < (int)OSSL_NELEM(ackm->tx_history); ++i) { 1034 ackm->largest_acked_pkt[i] = QUIC_PN_INVALID; 1035 ackm->rx_ack_flush_deadline[i] = ossl_time_infinite(); 1036 if (tx_pkt_history_init(&ackm->tx_history[i]) < 1) 1037 goto err; 1038 } 1039 1040 for (i = 0; i < (int)OSSL_NELEM(ackm->rx_history); ++i) 1041 rx_pkt_history_init(&ackm->rx_history[i]); 1042 1043 ackm->now = now; 1044 ackm->now_arg = now_arg; 1045 ackm->statm = statm; 1046 ackm->cc_method = cc_method; 1047 ackm->cc_data = cc_data; 1048 1049 ackm->rx_max_ack_delay = ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY); 1050 ackm->tx_max_ack_delay = DEFAULT_TX_MAX_ACK_DELAY; 1051 1052 return ackm; 1053 1054 err: 1055 while (--i >= 0) 1056 tx_pkt_history_destroy(&ackm->tx_history[i]); 1057 1058 OPENSSL_free(ackm); 1059 return NULL; 1060 } 1061 1062 void ossl_ackm_free(OSSL_ACKM *ackm) 1063 { 1064 size_t i; 1065 1066 if (ackm == NULL) 1067 return; 1068 1069 for (i = 0; i < OSSL_NELEM(ackm->tx_history); ++i) 1070 if (!ackm->discarded[i]) { 1071 tx_pkt_history_destroy(&ackm->tx_history[i]); 1072 rx_pkt_history_destroy(&ackm->rx_history[i]); 1073 } 1074 1075 OPENSSL_free(ackm); 1076 } 1077 1078 int ossl_ackm_on_tx_packet(OSSL_ACKM *ackm, OSSL_ACKM_TX_PKT *pkt) 1079 { 1080 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt->pkt_space); 1081 1082 /* Time must be set and not move backwards. */ 1083 if (ossl_time_is_zero(pkt->time) 1084 || ossl_time_compare(ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space], 1085 pkt->time) > 0) 1086 return 0; 1087 1088 /* Must have non-zero number of bytes. */ 1089 if (pkt->num_bytes == 0) 1090 return 0; 1091 1092 /* Does not make any sense for a non-in-flight packet to be ACK-eliciting. */ 1093 if (!pkt->is_inflight && pkt->is_ack_eliciting) 1094 return 0; 1095 1096 if (tx_pkt_history_add(h, pkt) == 0) 1097 return 0; 1098 1099 if (pkt->is_inflight) { 1100 if (pkt->is_ack_eliciting) { 1101 ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space] = pkt->time; 1102 ackm->ack_eliciting_bytes_in_flight[pkt->pkt_space] 1103 += pkt->num_bytes; 1104 } 1105 1106 ackm->bytes_in_flight += pkt->num_bytes; 1107 ackm_set_loss_detection_timer(ackm); 1108 1109 ackm->cc_method->on_data_sent(ackm->cc_data, pkt->num_bytes); 1110 } 1111 1112 return 1; 1113 } 1114 1115 int ossl_ackm_on_rx_datagram(OSSL_ACKM *ackm, size_t num_bytes) 1116 { 1117 /* No-op on the client. */ 1118 return 1; 1119 } 1120 1121 static void ackm_process_ecn(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1122 int pkt_space) 1123 { 1124 struct tx_pkt_history_st *h; 1125 OSSL_ACKM_TX_PKT *pkt; 1126 OSSL_CC_ECN_INFO ecn_info = {0}; 1127 1128 /* 1129 * If the ECN-CE counter reported by the peer has increased, this could 1130 * be a new congestion event. 1131 */ 1132 if (ack->ecnce > ackm->peer_ecnce[pkt_space]) { 1133 ackm->peer_ecnce[pkt_space] = ack->ecnce; 1134 1135 h = get_tx_history(ackm, pkt_space); 1136 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 1137 if (pkt == NULL) 1138 return; 1139 1140 ecn_info.largest_acked_time = pkt->time; 1141 ackm->cc_method->on_ecn(ackm->cc_data, &ecn_info); 1142 } 1143 } 1144 1145 int ossl_ackm_on_rx_ack_frame(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1146 int pkt_space, OSSL_TIME rx_time) 1147 { 1148 OSSL_ACKM_TX_PKT *na_pkts, *lost_pkts; 1149 int must_set_timer = 0; 1150 1151 if (ackm->largest_acked_pkt[pkt_space] == QUIC_PN_INVALID) 1152 ackm->largest_acked_pkt[pkt_space] = ack->ack_ranges[0].end; 1153 else 1154 ackm->largest_acked_pkt[pkt_space] 1155 = ossl_quic_pn_max(ackm->largest_acked_pkt[pkt_space], 1156 ack->ack_ranges[0].end); 1157 1158 /* 1159 * If we get an ACK in the handshake space, address validation is completed. 1160 * Make sure we update the timer, even if no packets were ACK'd. 1161 */ 1162 if (!ackm->peer_completed_addr_validation 1163 && pkt_space == QUIC_PN_SPACE_HANDSHAKE) { 1164 ackm->peer_completed_addr_validation = 1; 1165 must_set_timer = 1; 1166 } 1167 1168 /* 1169 * Find packets that are newly acknowledged and remove them from the list. 1170 */ 1171 na_pkts = ackm_detect_and_remove_newly_acked_pkts(ackm, ack, pkt_space); 1172 if (na_pkts == NULL) { 1173 if (must_set_timer) 1174 ackm_set_loss_detection_timer(ackm); 1175 1176 return 1; 1177 } 1178 1179 /* 1180 * Update the RTT if the largest acknowledged is newly acked and at least 1181 * one ACK-eliciting packet was newly acked. 1182 * 1183 * First packet in the list is always the one with the largest PN. 1184 */ 1185 if (na_pkts->pkt_num == ack->ack_ranges[0].end && 1186 ack_includes_ack_eliciting(na_pkts)) { 1187 OSSL_TIME now = ackm->now(ackm->now_arg), ack_delay; 1188 if (ossl_time_is_zero(ackm->first_rtt_sample)) 1189 ackm->first_rtt_sample = now; 1190 1191 /* Enforce maximum ACK delay. */ 1192 ack_delay = ack->delay_time; 1193 if (ackm->handshake_confirmed) 1194 ack_delay = ossl_time_min(ack_delay, ackm->rx_max_ack_delay); 1195 1196 ossl_statm_update_rtt(ackm->statm, ack_delay, 1197 ossl_time_subtract(now, na_pkts->time)); 1198 } 1199 1200 /* 1201 * Process ECN information if present. 1202 * 1203 * We deliberately do most ECN processing in the ACKM rather than the 1204 * congestion controller to avoid having to give the congestion controller 1205 * access to ACKM internal state. 1206 */ 1207 if (ack->ecn_present) 1208 ackm_process_ecn(ackm, ack, pkt_space); 1209 1210 /* Handle inferred loss. */ 1211 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1212 if (lost_pkts != NULL) 1213 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1214 1215 ackm_on_pkts_acked(ackm, na_pkts); 1216 1217 /* 1218 * Reset pto_count unless the client is unsure if the server validated the 1219 * client's address. 1220 */ 1221 if (ackm->peer_completed_addr_validation) 1222 ackm->pto_count = 0; 1223 1224 ackm_set_loss_detection_timer(ackm); 1225 return 1; 1226 } 1227 1228 int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM *ackm, int pkt_space) 1229 { 1230 OSSL_ACKM_TX_PKT *pkt, *pnext; 1231 uint64_t num_bytes_invalidated = 0; 1232 1233 if (ackm->discarded[pkt_space]) 1234 return 0; 1235 1236 if (pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1237 ackm->peer_completed_addr_validation = 1; 1238 1239 for (pkt = ossl_list_tx_history_head(&get_tx_history(ackm, pkt_space)->packets); 1240 pkt != NULL; pkt = pnext) { 1241 pnext = ossl_list_tx_history_next(pkt); 1242 if (pkt->is_inflight) { 1243 ackm->bytes_in_flight -= pkt->num_bytes; 1244 num_bytes_invalidated += pkt->num_bytes; 1245 } 1246 1247 pkt->on_discarded(pkt->cb_arg); /* may free pkt */ 1248 } 1249 1250 tx_pkt_history_destroy(&ackm->tx_history[pkt_space]); 1251 rx_pkt_history_destroy(&ackm->rx_history[pkt_space]); 1252 1253 if (num_bytes_invalidated > 0) 1254 ackm->cc_method->on_data_invalidated(ackm->cc_data, 1255 num_bytes_invalidated); 1256 1257 ackm->time_of_last_ack_eliciting_pkt[pkt_space] = ossl_time_zero(); 1258 ackm->loss_time[pkt_space] = ossl_time_zero(); 1259 ackm->pto_count = 0; 1260 ackm->discarded[pkt_space] = 1; 1261 ackm->ack_eliciting_bytes_in_flight[pkt_space] = 0; 1262 ackm_set_loss_detection_timer(ackm); 1263 return 1; 1264 } 1265 1266 int ossl_ackm_on_handshake_confirmed(OSSL_ACKM *ackm) 1267 { 1268 ackm->handshake_confirmed = 1; 1269 ackm->peer_completed_addr_validation = 1; 1270 ackm_set_loss_detection_timer(ackm); 1271 return 1; 1272 } 1273 1274 static void ackm_queue_probe_anti_deadlock_handshake(OSSL_ACKM *ackm) 1275 { 1276 ++ackm->pending_probe.anti_deadlock_handshake; 1277 } 1278 1279 static void ackm_queue_probe_anti_deadlock_initial(OSSL_ACKM *ackm) 1280 { 1281 ++ackm->pending_probe.anti_deadlock_initial; 1282 } 1283 1284 static void ackm_queue_probe(OSSL_ACKM *ackm, int pkt_space) 1285 { 1286 /* 1287 * TODO(QUIC FUTURE): We are allowed to send either one or two probe 1288 * packets here. 1289 * Determine a strategy for when we should send two probe packets. 1290 */ 1291 ++ackm->pending_probe.pto[pkt_space]; 1292 } 1293 1294 int ossl_ackm_on_timeout(OSSL_ACKM *ackm) 1295 { 1296 int pkt_space; 1297 OSSL_TIME earliest_loss_time; 1298 OSSL_ACKM_TX_PKT *lost_pkts; 1299 1300 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &pkt_space); 1301 if (!ossl_time_is_zero(earliest_loss_time)) { 1302 /* Time threshold loss detection. */ 1303 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1304 if (lost_pkts != NULL) 1305 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1306 ackm_set_loss_detection_timer(ackm); 1307 return 1; 1308 } 1309 1310 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 1311 assert(!ackm->peer_completed_addr_validation); 1312 /* 1313 * Client sends an anti-deadlock packet: Initial is padded to earn more 1314 * anti-amplification credit. A handshake packet proves address 1315 * ownership. 1316 */ 1317 if (ackm->discarded[QUIC_PN_SPACE_INITIAL]) 1318 ackm_queue_probe_anti_deadlock_handshake(ackm); 1319 else 1320 ackm_queue_probe_anti_deadlock_initial(ackm); 1321 } else { 1322 /* 1323 * PTO. The user of the ACKM should send new data if available, else 1324 * retransmit old data, or if neither is available, send a single PING 1325 * frame. 1326 */ 1327 ackm_get_pto_time_and_space(ackm, &pkt_space); 1328 ackm_queue_probe(ackm, pkt_space); 1329 } 1330 1331 ++ackm->pto_count; 1332 ackm_set_loss_detection_timer(ackm); 1333 return 1; 1334 } 1335 1336 OSSL_TIME ossl_ackm_get_loss_detection_deadline(OSSL_ACKM *ackm) 1337 { 1338 return ackm->loss_detection_deadline; 1339 } 1340 1341 OSSL_ACKM_PROBE_INFO *ossl_ackm_get0_probe_request(OSSL_ACKM *ackm) 1342 { 1343 return &ackm->pending_probe; 1344 } 1345 1346 int ossl_ackm_get_largest_unacked(OSSL_ACKM *ackm, int pkt_space, QUIC_PN *pn) 1347 { 1348 struct tx_pkt_history_st *h; 1349 OSSL_ACKM_TX_PKT *p; 1350 1351 h = get_tx_history(ackm, pkt_space); 1352 p = ossl_list_tx_history_tail(&h->packets); 1353 if (p != NULL) { 1354 *pn = p->pkt_num; 1355 return 1; 1356 } 1357 1358 return 0; 1359 } 1360 1361 /* Number of ACK-eliciting packets RX'd before we always emit an ACK. */ 1362 #define PKTS_BEFORE_ACK 2 1363 1364 /* 1365 * Return 1 if emission of an ACK frame is currently desired. 1366 * 1367 * This occurs when one or more of the following conditions occurs: 1368 * 1369 * - We have flagged that we want to send an ACK frame 1370 * (for example, due to the packet threshold count being exceeded), or 1371 * 1372 * - We have exceeded the ACK flush deadline, meaning that 1373 * we have received at least one ACK-eliciting packet, but held off on 1374 * sending an ACK frame immediately in the hope that more ACK-eliciting 1375 * packets might come in, but not enough did and we are now requesting 1376 * transmission of an ACK frame anyway. 1377 * 1378 */ 1379 int ossl_ackm_is_ack_desired(OSSL_ACKM *ackm, int pkt_space) 1380 { 1381 return ackm->rx_ack_desired[pkt_space] 1382 || (!ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space]) 1383 && ossl_time_compare(ackm->now(ackm->now_arg), 1384 ackm->rx_ack_flush_deadline[pkt_space]) >= 0); 1385 } 1386 1387 /* 1388 * Returns 1 if an ACK frame matches a given packet number. 1389 */ 1390 static int ack_contains(const OSSL_QUIC_FRAME_ACK *ack, QUIC_PN pkt_num) 1391 { 1392 size_t i; 1393 1394 for (i = 0; i < ack->num_ack_ranges; ++i) 1395 if (range_contains(&ack->ack_ranges[i], pkt_num)) 1396 return 1; 1397 1398 return 0; 1399 } 1400 1401 /* 1402 * Returns 1 iff a PN (which we have just received) was previously reported as 1403 * implied missing (by us, in an ACK frame we previously generated). 1404 */ 1405 static int ackm_is_missing(OSSL_ACKM *ackm, int pkt_space, QUIC_PN pkt_num) 1406 { 1407 /* 1408 * A PN is implied missing if it is not greater than the highest PN in our 1409 * generated ACK frame, but is not matched by the frame. 1410 */ 1411 return ackm->ack[pkt_space].num_ack_ranges > 0 1412 && pkt_num <= ackm->ack[pkt_space].ack_ranges[0].end 1413 && !ack_contains(&ackm->ack[pkt_space], pkt_num); 1414 } 1415 1416 /* 1417 * Returns 1 iff our RX of a PN newly establishes the implication of missing 1418 * packets. 1419 */ 1420 static int ackm_has_newly_missing(OSSL_ACKM *ackm, int pkt_space) 1421 { 1422 struct rx_pkt_history_st *h; 1423 1424 h = get_rx_history(ackm, pkt_space); 1425 1426 if (ossl_list_uint_set_is_empty(&h->set)) 1427 return 0; 1428 1429 /* 1430 * The second condition here establishes that the highest PN range in our RX 1431 * history comprises only a single PN. If there is more than one, then this 1432 * function will have returned 1 during a previous call to 1433 * ossl_ackm_on_rx_packet assuming the third condition below was met. Thus 1434 * we only return 1 when the missing PN condition is newly established. 1435 * 1436 * The third condition here establishes that the highest PN range in our RX 1437 * history is beyond (and does not border) the highest PN we have yet 1438 * reported in any ACK frame. Thus there is a gap of at least one PN between 1439 * the PNs we have ACK'd previously and the PN we have just received. 1440 */ 1441 return ackm->ack[pkt_space].num_ack_ranges > 0 1442 && ossl_list_uint_set_tail(&h->set)->range.start 1443 == ossl_list_uint_set_tail(&h->set)->range.end 1444 && ossl_list_uint_set_tail(&h->set)->range.start 1445 > ackm->ack[pkt_space].ack_ranges[0].end + 1; 1446 } 1447 1448 static void ackm_set_flush_deadline(OSSL_ACKM *ackm, int pkt_space, 1449 OSSL_TIME deadline) 1450 { 1451 ackm->rx_ack_flush_deadline[pkt_space] = deadline; 1452 1453 if (ackm->ack_deadline_cb != NULL) 1454 ackm->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm, pkt_space), 1455 pkt_space, ackm->ack_deadline_cb_arg); 1456 } 1457 1458 /* Explicitly flags that we want to generate an ACK frame. */ 1459 static void ackm_queue_ack(OSSL_ACKM *ackm, int pkt_space) 1460 { 1461 ackm->rx_ack_desired[pkt_space] = 1; 1462 1463 /* Cancel deadline. */ 1464 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1465 } 1466 1467 static void ackm_on_rx_ack_eliciting(OSSL_ACKM *ackm, 1468 OSSL_TIME rx_time, int pkt_space, 1469 int was_missing) 1470 { 1471 OSSL_TIME tx_max_ack_delay; 1472 1473 if (ackm->rx_ack_desired[pkt_space]) 1474 /* ACK generation already requested so nothing to do. */ 1475 return; 1476 1477 ++ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space]; 1478 1479 if (!ackm->rx_ack_generated[pkt_space] 1480 || was_missing 1481 || ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] 1482 >= PKTS_BEFORE_ACK 1483 || ackm_has_newly_missing(ackm, pkt_space)) { 1484 /* 1485 * Either: 1486 * 1487 * - We have never yet generated an ACK frame, meaning that this 1488 * is the first ever packet received, which we should always 1489 * acknowledge immediately, or 1490 * 1491 * - We previously reported the PN that we have just received as 1492 * missing in a previous ACK frame (meaning that we should report 1493 * the fact that we now have it to the peer immediately), or 1494 * 1495 * - We have exceeded the ACK-eliciting packet threshold count 1496 * for the purposes of ACK coalescing, so request transmission 1497 * of an ACK frame, or 1498 * 1499 * - The PN we just received and added to our PN RX history 1500 * newly implies one or more missing PNs, in which case we should 1501 * inform the peer by sending an ACK frame immediately. 1502 * 1503 * We do not test the ACK flush deadline here because it is tested 1504 * separately in ossl_ackm_is_ack_desired. 1505 */ 1506 ackm_queue_ack(ackm, pkt_space); 1507 return; 1508 } 1509 1510 /* 1511 * Not emitting an ACK yet. 1512 * 1513 * Update the ACK flush deadline. 1514 * 1515 * RFC 9000 s. 13.2.1: "An endpoint MUST acknowledge all ack-eliciting 1516 * Initial and Handshake packets immediately"; don't delay ACK generation if 1517 * we are using the Initial or Handshake PN spaces. 1518 */ 1519 tx_max_ack_delay = ackm->tx_max_ack_delay; 1520 if (pkt_space == QUIC_PN_SPACE_INITIAL 1521 || pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1522 tx_max_ack_delay = ossl_time_zero(); 1523 1524 if (ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space])) 1525 ackm_set_flush_deadline(ackm, pkt_space, 1526 ossl_time_add(rx_time, tx_max_ack_delay)); 1527 else 1528 ackm_set_flush_deadline(ackm, pkt_space, 1529 ossl_time_min(ackm->rx_ack_flush_deadline[pkt_space], 1530 ossl_time_add(rx_time, 1531 tx_max_ack_delay))); 1532 } 1533 1534 int ossl_ackm_on_rx_packet(OSSL_ACKM *ackm, const OSSL_ACKM_RX_PKT *pkt) 1535 { 1536 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt->pkt_space); 1537 int was_missing; 1538 1539 if (ossl_ackm_is_rx_pn_processable(ackm, pkt->pkt_num, pkt->pkt_space) != 1) 1540 /* PN has already been processed or written off, no-op. */ 1541 return 1; 1542 1543 /* 1544 * Record the largest PN we have RX'd and the time we received it. 1545 * We use this to calculate the ACK delay field of ACK frames. 1546 */ 1547 if (pkt->pkt_num > ackm->rx_largest_pn[pkt->pkt_space]) { 1548 ackm->rx_largest_pn[pkt->pkt_space] = pkt->pkt_num; 1549 ackm->rx_largest_time[pkt->pkt_space] = pkt->time; 1550 } 1551 1552 /* 1553 * If the PN we just received was previously implied missing by virtue of 1554 * being omitted from a previous ACK frame generated, we skip any packet 1555 * count thresholds or coalescing delays and emit a new ACK frame 1556 * immediately. 1557 */ 1558 was_missing = ackm_is_missing(ackm, pkt->pkt_space, pkt->pkt_num); 1559 1560 /* 1561 * Add the packet number to our history list of PNs we have not yet provably 1562 * acked. 1563 */ 1564 if (rx_pkt_history_add_pn(h, pkt->pkt_num) != 1) 1565 return 0; 1566 1567 /* 1568 * Receiving this packet may or may not cause us to emit an ACK frame. 1569 * We may not emit an ACK frame yet if we have not yet received a threshold 1570 * number of packets. 1571 */ 1572 if (pkt->is_ack_eliciting) 1573 ackm_on_rx_ack_eliciting(ackm, pkt->time, pkt->pkt_space, was_missing); 1574 1575 /* Update the ECN counters according to which ECN signal we got, if any. */ 1576 switch (pkt->ecn) { 1577 case OSSL_ACKM_ECN_ECT0: 1578 ++ackm->rx_ect0[pkt->pkt_space]; 1579 break; 1580 case OSSL_ACKM_ECN_ECT1: 1581 ++ackm->rx_ect1[pkt->pkt_space]; 1582 break; 1583 case OSSL_ACKM_ECN_ECNCE: 1584 ++ackm->rx_ecnce[pkt->pkt_space]; 1585 break; 1586 default: 1587 break; 1588 } 1589 1590 return 1; 1591 } 1592 1593 static void ackm_fill_rx_ack_ranges(OSSL_ACKM *ackm, int pkt_space, 1594 OSSL_QUIC_FRAME_ACK *ack) 1595 { 1596 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1597 UINT_SET_ITEM *x; 1598 size_t i = 0; 1599 1600 /* 1601 * Copy out ranges from the PN set, starting at the end, until we reach our 1602 * maximum number of ranges. 1603 */ 1604 for (x = ossl_list_uint_set_tail(&h->set); 1605 x != NULL && i < OSSL_NELEM(ackm->ack_ranges); 1606 x = ossl_list_uint_set_prev(x), ++i) { 1607 ackm->ack_ranges[pkt_space][i].start = x->range.start; 1608 ackm->ack_ranges[pkt_space][i].end = x->range.end; 1609 } 1610 1611 ack->ack_ranges = ackm->ack_ranges[pkt_space]; 1612 ack->num_ack_ranges = i; 1613 } 1614 1615 const OSSL_QUIC_FRAME_ACK *ossl_ackm_get_ack_frame(OSSL_ACKM *ackm, 1616 int pkt_space) 1617 { 1618 OSSL_QUIC_FRAME_ACK *ack = &ackm->ack[pkt_space]; 1619 OSSL_TIME now = ackm->now(ackm->now_arg); 1620 1621 ackm_fill_rx_ack_ranges(ackm, pkt_space, ack); 1622 1623 if (!ossl_time_is_zero(ackm->rx_largest_time[pkt_space]) 1624 && ossl_time_compare(now, ackm->rx_largest_time[pkt_space]) > 0 1625 && pkt_space == QUIC_PN_SPACE_APP) 1626 ack->delay_time = 1627 ossl_time_subtract(now, ackm->rx_largest_time[pkt_space]); 1628 else 1629 ack->delay_time = ossl_time_zero(); 1630 1631 ack->ect0 = ackm->rx_ect0[pkt_space]; 1632 ack->ect1 = ackm->rx_ect1[pkt_space]; 1633 ack->ecnce = ackm->rx_ecnce[pkt_space]; 1634 ack->ecn_present = 1; 1635 1636 ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] = 0; 1637 1638 ackm->rx_ack_generated[pkt_space] = 1; 1639 ackm->rx_ack_desired[pkt_space] = 0; 1640 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1641 return ack; 1642 } 1643 1644 1645 OSSL_TIME ossl_ackm_get_ack_deadline(OSSL_ACKM *ackm, int pkt_space) 1646 { 1647 if (ackm->rx_ack_desired[pkt_space]) 1648 /* Already desired, deadline is now. */ 1649 return ossl_time_zero(); 1650 1651 return ackm->rx_ack_flush_deadline[pkt_space]; 1652 } 1653 1654 int ossl_ackm_is_rx_pn_processable(OSSL_ACKM *ackm, QUIC_PN pn, int pkt_space) 1655 { 1656 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1657 1658 return pn >= h->watermark && ossl_uint_set_query(&h->set, pn) == 0; 1659 } 1660 1661 void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM *ackm, 1662 void (*fn)(OSSL_TIME deadline, 1663 void *arg), 1664 void *arg) 1665 { 1666 ackm->loss_detection_deadline_cb = fn; 1667 ackm->loss_detection_deadline_cb_arg = arg; 1668 } 1669 1670 void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM *ackm, 1671 void (*fn)(OSSL_TIME deadline, 1672 int pkt_space, 1673 void *arg), 1674 void *arg) 1675 { 1676 ackm->ack_deadline_cb = fn; 1677 ackm->ack_deadline_cb_arg = arg; 1678 } 1679 1680 int ossl_ackm_mark_packet_pseudo_lost(OSSL_ACKM *ackm, 1681 int pkt_space, QUIC_PN pn) 1682 { 1683 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt_space); 1684 OSSL_ACKM_TX_PKT *pkt; 1685 1686 pkt = tx_pkt_history_by_pkt_num(h, pn); 1687 if (pkt == NULL) 1688 return 0; 1689 1690 tx_pkt_history_remove(h, pkt->pkt_num); 1691 pkt->lnext = NULL; 1692 ackm_on_pkts_lost(ackm, pkt_space, pkt, /*pseudo=*/1); 1693 return 1; 1694 } 1695 1696 OSSL_TIME ossl_ackm_get_pto_duration(OSSL_ACKM *ackm) 1697 { 1698 OSSL_TIME duration; 1699 OSSL_RTT_INFO rtt; 1700 1701 ossl_statm_get_rtt_info(ackm->statm, &rtt); 1702 1703 duration = ossl_time_add(rtt.smoothed_rtt, 1704 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 1705 ossl_ticks2time(K_GRANULARITY))); 1706 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) 1707 duration = ossl_time_add(duration, ackm->rx_max_ack_delay); 1708 1709 return duration; 1710 } 1711 1712 QUIC_PN ossl_ackm_get_largest_acked(OSSL_ACKM *ackm, int pkt_space) 1713 { 1714 return ackm->largest_acked_pkt[pkt_space]; 1715 } 1716 1717 void ossl_ackm_set_rx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME rx_max_ack_delay) 1718 { 1719 ackm->rx_max_ack_delay = rx_max_ack_delay; 1720 } 1721 1722 void ossl_ackm_set_tx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME tx_max_ack_delay) 1723 { 1724 ackm->tx_max_ack_delay = tx_max_ack_delay; 1725 } 1726