1 /* 2 * Copyright 2022-2026 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include "internal/quic_ackm.h" 11 #include "internal/uint_set.h" 12 #include "internal/common.h" 13 #include <assert.h> 14 15 DEFINE_LIST_OF(tx_history, OSSL_ACKM_TX_PKT); 16 17 /* 18 * TX Packet History 19 * ***************** 20 * 21 * The TX Packet History object tracks information about packets which have been 22 * sent for which we later expect to receive an ACK. It is essentially a simple 23 * database keeping a list of packet information structures in packet number 24 * order which can also be looked up directly by packet number. 25 * 26 * We currently only allow packets to be appended to the list (i.e. the packet 27 * numbers of the packets appended to the list must monotonically increase), as 28 * we should not currently need more general functionality such as a sorted list 29 * insert. 30 */ 31 struct tx_pkt_history_st { 32 /* A linked list of all our packets. */ 33 OSSL_LIST(tx_history) 34 packets; 35 36 /* 37 * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *) 38 * 39 * Invariant: A packet is in this map if and only if it is in the linked 40 * list. 41 */ 42 LHASH_OF(OSSL_ACKM_TX_PKT) *map; 43 44 /* 45 * The lowest packet number which may currently be added to the history list 46 * (inclusive). We do not allow packet numbers to be added to the history 47 * list non-monotonically, so packet numbers must be greater than or equal 48 * to this value. 49 */ 50 uint64_t watermark; 51 52 /* 53 * Packet number of the highest packet info structure we have yet appended 54 * to the list. This is usually one less than watermark, except when we have 55 * not added any packet yet. 56 */ 57 uint64_t highest_sent; 58 }; 59 60 DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT); 61 62 static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt) 63 { 64 /* Using low bits of the packet number as the hash should be enough */ 65 return (unsigned long)pkt->pkt_num; 66 } 67 68 static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a, 69 const OSSL_ACKM_TX_PKT *b) 70 { 71 if (a->pkt_num < b->pkt_num) 72 return -1; 73 if (a->pkt_num > b->pkt_num) 74 return 1; 75 return 0; 76 } 77 78 static int 79 tx_pkt_history_init(struct tx_pkt_history_st *h) 80 { 81 ossl_list_tx_history_init(&h->packets); 82 h->watermark = 0; 83 h->highest_sent = 0; 84 85 h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare); 86 if (h->map == NULL) 87 return 0; 88 89 return 1; 90 } 91 92 static void 93 tx_pkt_history_destroy(struct tx_pkt_history_st *h) 94 { 95 lh_OSSL_ACKM_TX_PKT_free(h->map); 96 h->map = NULL; 97 ossl_list_tx_history_init(&h->packets); 98 } 99 100 static int 101 tx_pkt_history_add_actual(struct tx_pkt_history_st *h, 102 OSSL_ACKM_TX_PKT *pkt) 103 { 104 OSSL_ACKM_TX_PKT *existing; 105 106 /* 107 * There should not be any existing packet with this number 108 * in our mapping. 109 */ 110 existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt); 111 if (!ossl_assert(existing == NULL)) 112 return 0; 113 114 /* Should not already be in a list. */ 115 if (!ossl_assert(ossl_list_tx_history_next(pkt) == NULL 116 && ossl_list_tx_history_prev(pkt) == NULL)) 117 return 0; 118 119 lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt); 120 if (lh_OSSL_ACKM_TX_PKT_error(h->map)) 121 return 0; 122 123 ossl_list_tx_history_insert_tail(&h->packets, pkt); 124 return 1; 125 } 126 127 /* Adds a packet information structure to the history list. */ 128 static int 129 tx_pkt_history_add(struct tx_pkt_history_st *h, 130 OSSL_ACKM_TX_PKT *pkt) 131 { 132 if (!ossl_assert(pkt->pkt_num >= h->watermark)) 133 return 0; 134 135 if (tx_pkt_history_add_actual(h, pkt) < 1) 136 return 0; 137 138 h->watermark = pkt->pkt_num + 1; 139 h->highest_sent = pkt->pkt_num; 140 return 1; 141 } 142 143 /* Retrieve a packet information structure by packet number. */ 144 static OSSL_ACKM_TX_PKT * 145 tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num) 146 { 147 OSSL_ACKM_TX_PKT key; 148 149 key.pkt_num = pkt_num; 150 151 return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key); 152 } 153 154 /* Remove a packet information structure from the history log. */ 155 static int 156 tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num) 157 { 158 OSSL_ACKM_TX_PKT key, *pkt; 159 key.pkt_num = pkt_num; 160 161 pkt = tx_pkt_history_by_pkt_num(h, pkt_num); 162 if (pkt == NULL) 163 return 0; 164 165 ossl_list_tx_history_remove(&h->packets, pkt); 166 lh_OSSL_ACKM_TX_PKT_delete(h->map, &key); 167 return 1; 168 } 169 170 /* 171 * RX Packet Number Tracking 172 * ************************* 173 * 174 * **Background.** The RX side of the ACK manager must track packets we have 175 * received for which we have to generate ACK frames. Broadly, this means we 176 * store a set of packet numbers which we have received but which we do not know 177 * for a fact that the transmitter knows we have received. 178 * 179 * This must handle various situations: 180 * 181 * 1. We receive a packet but have not sent an ACK yet, so the transmitter 182 * does not know whether we have received it or not yet. 183 * 184 * 2. We receive a packet and send an ACK which is lost. We do not 185 * immediately know that the ACK was lost and the transmitter does not know 186 * that we have received the packet. 187 * 188 * 3. We receive a packet and send an ACK which is received by the 189 * transmitter. The transmitter does not immediately respond with an ACK, 190 * or responds with an ACK which is lost. The transmitter knows that we 191 * have received the packet, but we do not know for sure that it knows, 192 * because the ACK we sent could have been lost. 193 * 194 * 4. We receive a packet and send an ACK which is received by the 195 * transmitter. The transmitter subsequently sends us an ACK which confirms 196 * its receipt of the ACK we sent, and we successfully receive that ACK, so 197 * we know that the transmitter knows, that we received the original 198 * packet. 199 * 200 * Only when we reach case (4) are we relieved of any need to track a given 201 * packet number we have received, because only in this case do we know for sure 202 * that the peer knows we have received the packet. Having reached case (4) we 203 * will never again need to generate an ACK containing the PN in question, but 204 * until we reach that point, we must keep track of the PN as not having been 205 * provably ACKed, as we may have to keep generating ACKs for the given PN not 206 * just until the transmitter receives one, but until we know that it has 207 * received one. This will be referred to herein as "provably ACKed". 208 * 209 * **Duplicate handling.** The above discusses the case where we have received a 210 * packet with a given PN but are at best unsure whether the sender knows we 211 * have received it or not. However, we must also handle the case where we have 212 * yet to receive a packet with a given PN in the first place. The reason for 213 * this is because of the requirement expressed by RFC 9000 s. 12.3: 214 * 215 * "A receiver MUST discard a newly unprotected packet unless it is certain 216 * that it has not processed another packet with the same packet number from 217 * the same packet number space." 218 * 219 * We must ensure we never process a duplicate PN. As such, each possible PN we 220 * can receive must exist in one of the following logical states: 221 * 222 * - We have never processed this PN before 223 * (so if we receive such a PN, it can be processed) 224 * 225 * - We have processed this PN but it has not yet been provably ACKed 226 * (and should therefore be in any future ACK frame generated; 227 * if we receive such a PN again, it must be ignored) 228 * 229 * - We have processed this PN and it has been provably ACKed 230 * (if we receive such a PN again, it must be ignored) 231 * 232 * However, if we were to track this state for every PN ever used in the history 233 * of a connection, the amount of state required would increase unboundedly as 234 * the connection goes on (for example, we would have to store a set of every PN 235 * ever received.) 236 * 237 * RFC 9000 s. 12.3 continues: 238 * 239 * "Endpoints that track all individual packets for the purposes of detecting 240 * duplicates are at risk of accumulating excessive state. The data required 241 * for detecting duplicates can be limited by maintaining a minimum packet 242 * number below which all packets are immediately dropped." 243 * 244 * Moreover, RFC 9000 s. 13.2.3 states that: 245 * 246 * "A receiver MUST retain an ACK Range unless it can ensure that it will not 247 * subsequently accept packets with numbers in that range. Maintaining a 248 * minimum packet number that increases as ranges are discarded is one way to 249 * achieve this with minimal state." 250 * 251 * This touches on a subtlety of the original requirement quoted above: the 252 * receiver MUST discard a packet unless it is certain that it has not processed 253 * another packet with the same PN. However, this does not forbid the receiver 254 * from also discarding some PNs even though it has not yet processed them. In 255 * other words, implementations must be conservative and err in the direction of 256 * assuming a packet is a duplicate, but it is acceptable for this to come at 257 * the cost of falsely identifying some packets as duplicates. 258 * 259 * This allows us to bound the amount of state we must keep, and we adopt the 260 * suggested strategy quoted above to do so. We define a watermark PN below 261 * which all PNs are in the same state. This watermark is only ever increased. 262 * Thus the PNs the state for which needs to be explicitly tracked is limited to 263 * only a small number of recent PNs, and all older PNs have an assumed state. 264 * 265 * Any given PN thus falls into one of the following states: 266 * 267 * - (A) The PN is above the watermark but we have not yet received it. 268 * 269 * If we receive such a PN, we should process it and record the PN as 270 * received. 271 * 272 * - (B) The PN is above the watermark and we have received it. 273 * 274 * The PN should be included in any future ACK frame we generate. 275 * If we receive such a PN again, we should ignore it. 276 * 277 * - (C) The PN is below the watermark. 278 * 279 * We do not know whether a packet with the given PN was received or 280 * not. To be safe, if we receive such a packet, it is not processed. 281 * 282 * Note that state (C) corresponds to both "we have processed this PN and it has 283 * been provably ACKed" logical state and a subset of the PNs in the "we have 284 * never processed this PN before" logical state (namely all PNs which were lost 285 * and never received, but which are not recent enough to be above the 286 * watermark). The reason we can merge these states and avoid tracking states 287 * for the PNs in this state is because the provably ACKed and never-received 288 * states are functionally identical in terms of how we need to handle them: we 289 * don't need to do anything for PNs in either of these states, so we don't have 290 * to care about PNs in this state nor do we have to care about distinguishing 291 * the two states for a given PN. 292 * 293 * Note that under this scheme provably ACKed PNs are by definition always below 294 * the watermark; therefore, it follows that when a PN becomes provably ACKed, 295 * the watermark must be immediately increased to exceed it (otherwise we would 296 * keep reporting it in future ACK frames). 297 * 298 * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when 299 * to advance the watermark: 300 * 301 * "When a packet containing an ACK frame is sent, the Largest Acknowledged 302 * field in that frame can be saved. When a packet containing an ACK frame is 303 * acknowledged, the receiver can stop acknowledging packets less than or 304 * equal to the Largest Acknowledged field in the sent ACK frame." 305 * 306 * This is where our scheme's false positives arise. When a packet containing an 307 * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably 308 * acked, and the watermark is bumped accordingly. However, the Largest 309 * Acknowledged field does not imply that all lower PNs have been received, 310 * because there may be gaps expressed in the ranges of PNs expressed by that 311 * and previous ACK frames. Thus, some unreceived PNs may be moved below the 312 * watermark, and we may subsequently reject those PNs as possibly being 313 * duplicates even though we have not actually received those PNs. Since we bump 314 * the watermark when a PN becomes provably ACKed, it follows that an unreceived 315 * PN falls below the watermark (and thus becomes a false positive for the 316 * purposes of duplicate detection) when a higher-numbered PN becomes provably 317 * ACKed. 318 * 319 * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0, 320 * n) will no longer be processed. Although datagrams may be reordered in the 321 * network, a PN we receive can only become provably ACKed after our own 322 * subsequently generated ACK frame is sent in a future TX packet, and then we 323 * receive another RX PN acknowledging that TX packet. This means that a given RX 324 * PN can only become provably ACKed at least 1 RTT after it is received; it is 325 * unlikely that any reordered datagrams will still be "in the network" (and not 326 * lost) by this time. If this does occur for whatever reason and a late PN is 327 * received, the packet will be discarded unprocessed and the PN is simply 328 * handled as though lost (a "written off" PN). 329 * 330 * **Data structure.** Our state for the RX handling side of the ACK manager, as 331 * discussed above, mainly comprises: 332 * 333 * a) a logical set of PNs, and 334 * b) a monotonically increasing PN counter (the watermark). 335 * 336 * For (a), we define a data structure which stores a logical set of PNs, which 337 * we use to keep track of which PNs we have received but which have not yet 338 * been provably ACKed, and thus will later need to generate an ACK frame for. 339 * 340 * The correspondence with the logical states discussed above is as follows. A 341 * PN is in state (C) if it is below the watermark; otherwise it is in state (B) 342 * if it is in the logical set of PNs, and in state (A) otherwise. 343 * 344 * Note that PNs are only removed from the PN set (when they become provably 345 * ACKed or written off) by virtue of advancement of the watermark. Removing PNs 346 * from the PN set any other way would be ambiguous as it would be 347 * indistinguishable from a PN we have not yet received and risk us processing a 348 * duplicate packet. In other words, for a given PN: 349 * 350 * - State (A) can transition to state (B) or (C) 351 * - State (B) can transition to state (C) only 352 * - State (C) is the terminal state 353 * 354 * We can query the logical set data structure for PNs which have been received 355 * but which have not been provably ACKed when we want to generate ACK frames. 356 * Since ACK frames can be lost and/or we might not know that the peer has 357 * successfully received them, we might generate multiple ACK frames covering a 358 * given PN until that PN becomes provably ACKed and we finally remove it from 359 * our set (by bumping the watermark) as no longer being our concern. 360 * 361 * The data structure used is the UINT_SET structure defined in uint_set.h, 362 * which is used as a PN set. We use the following operations of the structure: 363 * 364 * Insert Range: Used when we receive a new PN. 365 * 366 * Remove Range: Used when bumping the watermark. 367 * 368 * Query: Used to determine if a PN is in the set. 369 * 370 * **Possible duplicates.** A PN is considered a possible duplicate when either: 371 * 372 * a) its PN is already in the PN set (i.e. has already been received), or 373 * b) its PN is below the watermark (i.e. was provably ACKed or written off). 374 * 375 * A packet with a given PN is considered 'processable' when that PN is not 376 * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable). 377 * 378 * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes 379 * provably ACKed. This occurs when an ACK frame is received by the TX side of 380 * the ACK manager; thus, there is necessary interaction between the TX and RX 381 * sides of the ACK manager. 382 * 383 * This is implemented as follows. When a packet is queued as sent in the TX 384 * side of the ACK manager, it may optionally have a Largest Acked value set on 385 * it. The user of the ACK manager should do this if the packet being 386 * transmitted contains an ACK frame, by setting the field to the Largest Acked 387 * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID. 388 * When a TX packet is eventually acknowledged which has this field set, it is 389 * used to update the state of the RX side of the ACK manager by bumping the 390 * watermark accordingly. 391 */ 392 struct rx_pkt_history_st { 393 UINT_SET set; 394 395 /* 396 * Invariant: PNs below this are not in the set. 397 * Invariant: This is monotonic and only ever increases. 398 */ 399 QUIC_PN watermark; 400 }; 401 402 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 403 QUIC_PN watermark); 404 405 static void rx_pkt_history_init(struct rx_pkt_history_st *h) 406 { 407 ossl_uint_set_init(&h->set); 408 h->watermark = 0; 409 } 410 411 static void rx_pkt_history_destroy(struct rx_pkt_history_st *h) 412 { 413 ossl_uint_set_destroy(&h->set); 414 } 415 416 /* 417 * Limit the number of ACK ranges we store to prevent resource consumption DoS 418 * attacks. 419 */ 420 #define MAX_RX_ACK_RANGES 32 421 422 static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h) 423 { 424 QUIC_PN highest = QUIC_PN_INVALID; 425 426 while (ossl_list_uint_set_num(&h->set) > MAX_RX_ACK_RANGES) { 427 UINT_RANGE r = ossl_list_uint_set_head(&h->set)->range; 428 429 highest = (highest == QUIC_PN_INVALID) 430 ? r.end 431 : ossl_quic_pn_max(highest, r.end); 432 433 ossl_uint_set_remove(&h->set, &r); 434 } 435 436 /* 437 * Bump watermark to cover all PNs we removed to avoid accidental 438 * reprocessing of packets. 439 */ 440 if (highest != QUIC_PN_INVALID) 441 rx_pkt_history_bump_watermark(h, highest + 1); 442 } 443 444 static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h, 445 QUIC_PN pn) 446 { 447 UINT_RANGE r; 448 449 r.start = pn; 450 r.end = pn; 451 452 if (pn < h->watermark) 453 return 1; /* consider this a success case */ 454 455 if (ossl_uint_set_insert(&h->set, &r) != 1) 456 return 0; 457 458 rx_pkt_history_trim_range_count(h); 459 return 1; 460 } 461 462 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h, 463 QUIC_PN watermark) 464 { 465 UINT_RANGE r; 466 467 if (watermark <= h->watermark) 468 return 1; 469 470 /* Remove existing PNs below the watermark. */ 471 r.start = 0; 472 r.end = watermark - 1; 473 if (ossl_uint_set_remove(&h->set, &r) != 1) 474 return 0; 475 476 h->watermark = watermark; 477 return 1; 478 } 479 480 /* 481 * ACK Manager Implementation 482 * ************************** 483 * Implementation of the ACK manager proper. 484 */ 485 486 /* Constants used by the ACK manager; see RFC 9002. */ 487 #define K_GRANULARITY (1 * OSSL_TIME_MS) 488 #define K_PKT_THRESHOLD 3 489 #define K_TIME_THRESHOLD_NUM 9 490 #define K_TIME_THRESHOLD_DEN 8 491 492 /* The maximum number of times we allow PTO to be doubled. */ 493 #define MAX_PTO_COUNT 16 494 495 /* Default maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */ 496 #define DEFAULT_TX_MAX_ACK_DELAY ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY) 497 498 struct ossl_ackm_st { 499 /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */ 500 struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM]; 501 502 /* Our list of received PNs which are not yet provably acked. */ 503 struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM]; 504 505 /* Polymorphic dependencies that we consume. */ 506 OSSL_TIME (*now)(void *arg); 507 void *now_arg; 508 OSSL_STATM *statm; 509 const OSSL_CC_METHOD *cc_method; 510 OSSL_CC_DATA *cc_data; 511 512 /* RFC 9002 variables. */ 513 uint32_t pto_count; 514 QUIC_PN largest_acked_pkt[QUIC_PN_SPACE_NUM]; 515 OSSL_TIME time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM]; 516 OSSL_TIME loss_time[QUIC_PN_SPACE_NUM]; 517 OSSL_TIME loss_detection_deadline; 518 519 /* Lowest PN which is still not known to be ACKed. */ 520 QUIC_PN lowest_unacked_pkt[QUIC_PN_SPACE_NUM]; 521 522 /* Time at which we got our first RTT sample, or 0. */ 523 OSSL_TIME first_rtt_sample; 524 525 /* 526 * A packet's num_bytes are added to this if it is inflight, 527 * and removed again once ack'd/lost/discarded. 528 */ 529 uint64_t bytes_in_flight; 530 531 /* 532 * A packet's num_bytes are added to this if it is both inflight and 533 * ack-eliciting, and removed again once ack'd/lost/discarded. 534 */ 535 uint64_t ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM]; 536 537 /* Count of ECN-CE events. */ 538 uint64_t peer_ecnce[QUIC_PN_SPACE_NUM]; 539 540 /* Set to 1 when the handshake is confirmed. */ 541 char handshake_confirmed; 542 543 /* Set to 1 when attached to server channel */ 544 char is_server; 545 546 /* Set to 1 when the peer has completed address validation. */ 547 char peer_completed_addr_validation; 548 549 /* Set to 1 when a PN space has been discarded. */ 550 char discarded[QUIC_PN_SPACE_NUM]; 551 552 /* Set to 1 when we think an ACK frame should be generated. */ 553 char rx_ack_desired[QUIC_PN_SPACE_NUM]; 554 555 /* Set to 1 if an ACK frame has ever been generated. */ 556 char rx_ack_generated[QUIC_PN_SPACE_NUM]; 557 558 /* Probe request counts for reporting to the user. */ 559 OSSL_ACKM_PROBE_INFO pending_probe; 560 561 /* Generated ACK frames for each PN space. */ 562 OSSL_QUIC_FRAME_ACK ack[QUIC_PN_SPACE_NUM]; 563 OSSL_QUIC_ACK_RANGE ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES]; 564 565 /* Other RX state. */ 566 /* Largest PN we have RX'd. */ 567 QUIC_PN rx_largest_pn[QUIC_PN_SPACE_NUM]; 568 569 /* Time at which the PN in rx_largest_pn was RX'd. */ 570 OSSL_TIME rx_largest_time[QUIC_PN_SPACE_NUM]; 571 572 /* 573 * ECN event counters. Each time we receive a packet with a given ECN label, 574 * the corresponding ECN counter here is incremented. 575 */ 576 uint64_t rx_ect0[QUIC_PN_SPACE_NUM]; 577 uint64_t rx_ect1[QUIC_PN_SPACE_NUM]; 578 uint64_t rx_ecnce[QUIC_PN_SPACE_NUM]; 579 580 /* 581 * Number of ACK-eliciting packets since last ACK. We use this to defer 582 * emitting ACK frames until a threshold number of ACK-eliciting packets 583 * have been received. 584 */ 585 uint32_t rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM]; 586 587 /* 588 * The ACK frame coalescing deadline at which we should flush any unsent ACK 589 * frames. 590 */ 591 OSSL_TIME rx_ack_flush_deadline[QUIC_PN_SPACE_NUM]; 592 593 /* 594 * The RX maximum ACK delay (the maximum amount of time our peer might 595 * wait to send us an ACK after receiving an ACK-eliciting packet). 596 */ 597 OSSL_TIME rx_max_ack_delay; 598 599 /* 600 * The TX maximum ACK delay (the maximum amount of time we allow ourselves 601 * to wait before generating an ACK after receiving an ACK-eliciting 602 * packet). 603 */ 604 OSSL_TIME tx_max_ack_delay; 605 606 /* Callbacks for deadline updates. */ 607 void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg); 608 void *loss_detection_deadline_cb_arg; 609 610 void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg); 611 void *ack_deadline_cb_arg; 612 }; 613 614 static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y) 615 { 616 return x < y ? x : y; 617 } 618 619 /* 620 * Get TX history for a given packet number space. Must not have been 621 * discarded. 622 */ 623 static struct tx_pkt_history_st *get_tx_history(OSSL_ACKM *ackm, int pkt_space) 624 { 625 assert(!ackm->discarded[pkt_space]); 626 627 return &ackm->tx_history[pkt_space]; 628 } 629 630 /* 631 * Get RX history for a given packet number space. Must not have been 632 * discarded. 633 */ 634 static struct rx_pkt_history_st *get_rx_history(OSSL_ACKM *ackm, int pkt_space) 635 { 636 assert(!ackm->discarded[pkt_space]); 637 638 return &ackm->rx_history[pkt_space]; 639 } 640 641 /* Does the newly-acknowledged list contain any ack-eliciting packet? */ 642 static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT *pkt) 643 { 644 for (; pkt != NULL; pkt = pkt->anext) 645 if (pkt->is_ack_eliciting) 646 return 1; 647 648 return 0; 649 } 650 651 /* Return number of ACK-eliciting bytes in flight across all PN spaces. */ 652 static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM *ackm) 653 { 654 int i; 655 uint64_t total = 0; 656 657 for (i = 0; i < QUIC_PN_SPACE_NUM; ++i) 658 total += ackm->ack_eliciting_bytes_in_flight[i]; 659 660 return total; 661 } 662 663 /* Return 1 if the range contains the given PN. */ 664 static int range_contains(const OSSL_QUIC_ACK_RANGE *range, QUIC_PN pn) 665 { 666 return pn >= range->start && pn <= range->end; 667 } 668 669 /* 670 * Given a logical representation of an ACK frame 'ack', create a singly-linked 671 * list of the newly ACK'd frames; that is, of frames which are matched by the 672 * list of PN ranges contained in the ACK frame. The packet structures in the 673 * list returned are removed from the TX history list. Returns a pointer to the 674 * list head (or NULL) if empty. 675 */ 676 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM *ackm, 677 const OSSL_QUIC_FRAME_ACK *ack, 678 int pkt_space) 679 { 680 OSSL_ACKM_TX_PKT *acked_pkts = NULL, **fixup = &acked_pkts, *pkt, *pprev; 681 struct tx_pkt_history_st *h; 682 size_t ridx = 0; 683 684 assert(ack->num_ack_ranges > 0); 685 686 /* 687 * Our history list is a list of packets sorted in ascending order 688 * by packet number. 689 * 690 * ack->ack_ranges is a list of packet number ranges in descending order. 691 * 692 * Walk through our history list from the end in order to efficiently detect 693 * membership in the specified ack ranges. As an optimization, we use our 694 * hashtable to try and skip to the first matching packet. This may fail if 695 * the ACK ranges given include nonexistent packets. 696 */ 697 h = get_tx_history(ackm, pkt_space); 698 699 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 700 if (pkt == NULL) 701 pkt = ossl_list_tx_history_tail(&h->packets); 702 703 for (; pkt != NULL; pkt = pprev) { 704 /* 705 * Save prev value as it will be zeroed if we remove the packet from the 706 * history list below. 707 */ 708 pprev = ossl_list_tx_history_prev(pkt); 709 710 for (;; ++ridx) { 711 if (ridx >= ack->num_ack_ranges) { 712 /* 713 * We have exhausted all ranges so stop here, even if there are 714 * more packets to look at. 715 */ 716 goto stop; 717 } 718 719 if (range_contains(&ack->ack_ranges[ridx], pkt->pkt_num)) { 720 /* We have matched this range. */ 721 tx_pkt_history_remove(h, pkt->pkt_num); 722 723 *fixup = pkt; 724 fixup = &pkt->anext; 725 *fixup = NULL; 726 break; 727 } else if (pkt->pkt_num > ack->ack_ranges[ridx].end) { 728 /* 729 * We have not reached this range yet in our list, so do not 730 * advance ridx. 731 */ 732 break; 733 } else { 734 /* 735 * We have moved beyond this range, so advance to the next range 736 * and try matching again. 737 */ 738 assert(pkt->pkt_num < ack->ack_ranges[ridx].start); 739 continue; 740 } 741 } 742 } 743 stop: 744 745 return acked_pkts; 746 } 747 748 /* 749 * Create a singly-linked list of newly detected-lost packets in the given 750 * packet number space. Returns the head of the list or NULL if no packets were 751 * detected lost. The packets in the list are removed from the TX history list. 752 */ 753 static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_lost_pkts(OSSL_ACKM *ackm, 754 int pkt_space) 755 { 756 OSSL_ACKM_TX_PKT *lost_pkts = NULL, **fixup = &lost_pkts, *pkt, *pnext; 757 OSSL_TIME loss_delay, lost_send_time, now; 758 OSSL_RTT_INFO rtt; 759 struct tx_pkt_history_st *h; 760 761 assert(ackm->largest_acked_pkt[pkt_space] != QUIC_PN_INVALID); 762 763 ossl_statm_get_rtt_info(ackm->statm, &rtt); 764 765 ackm->loss_time[pkt_space] = ossl_time_zero(); 766 767 loss_delay = ossl_time_multiply(ossl_time_max(rtt.latest_rtt, 768 rtt.smoothed_rtt), 769 K_TIME_THRESHOLD_NUM); 770 loss_delay = ossl_time_divide(loss_delay, K_TIME_THRESHOLD_DEN); 771 772 /* Minimum time of K_GRANULARITY before packets are deemed lost. */ 773 loss_delay = ossl_time_max(loss_delay, ossl_ticks2time(K_GRANULARITY)); 774 775 /* Packets sent before this time are deemed lost. */ 776 now = ackm->now(ackm->now_arg); 777 lost_send_time = ossl_time_subtract(now, loss_delay); 778 779 h = get_tx_history(ackm, pkt_space); 780 pkt = ossl_list_tx_history_head(&h->packets); 781 782 for (; pkt != NULL; pkt = pnext) { 783 assert(pkt_space == pkt->pkt_space); 784 785 /* 786 * Save prev value as it will be zeroed if we remove the packet from the 787 * history list below. 788 */ 789 pnext = ossl_list_tx_history_next(pkt); 790 791 if (pkt->pkt_num > ackm->largest_acked_pkt[pkt_space]) 792 continue; 793 794 /* 795 * Mark packet as lost, or set time when it should be marked. 796 */ 797 if (ossl_time_compare(pkt->time, lost_send_time) <= 0 798 || ackm->largest_acked_pkt[pkt_space] 799 >= pkt->pkt_num + K_PKT_THRESHOLD) { 800 tx_pkt_history_remove(h, pkt->pkt_num); 801 802 *fixup = pkt; 803 fixup = &pkt->lnext; 804 *fixup = NULL; 805 } else { 806 if (ossl_time_is_zero(ackm->loss_time[pkt_space])) 807 ackm->loss_time[pkt_space] = ossl_time_add(pkt->time, loss_delay); 808 else 809 ackm->loss_time[pkt_space] = ossl_time_min(ackm->loss_time[pkt_space], 810 ossl_time_add(pkt->time, loss_delay)); 811 } 812 } 813 814 return lost_pkts; 815 } 816 817 static OSSL_TIME ackm_get_loss_time_and_space(OSSL_ACKM *ackm, int *pspace) 818 { 819 OSSL_TIME time = ackm->loss_time[QUIC_PN_SPACE_INITIAL]; 820 int i, space = QUIC_PN_SPACE_INITIAL; 821 822 for (i = space + 1; i < QUIC_PN_SPACE_NUM; ++i) 823 if (ossl_time_is_zero(time) 824 || ossl_time_compare(ackm->loss_time[i], time) == -1) { 825 time = ackm->loss_time[i]; 826 space = i; 827 } 828 829 *pspace = space; 830 return time; 831 } 832 833 static OSSL_TIME ackm_get_pto_time_and_space(OSSL_ACKM *ackm, int *space) 834 { 835 OSSL_RTT_INFO rtt; 836 OSSL_TIME duration; 837 OSSL_TIME pto_timeout = ossl_time_infinite(), t; 838 int pto_space = QUIC_PN_SPACE_INITIAL, i; 839 840 ossl_statm_get_rtt_info(ackm->statm, &rtt); 841 842 duration 843 = ossl_time_add(rtt.smoothed_rtt, 844 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 845 ossl_ticks2time(K_GRANULARITY))); 846 847 duration 848 = ossl_time_multiply(duration, 849 (uint64_t)1 << min_u32(ackm->pto_count, 850 MAX_PTO_COUNT)); 851 852 /* Anti-deadlock PTO starts from the current time. */ 853 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 854 assert(!ackm->peer_completed_addr_validation); 855 856 *space = ackm->discarded[QUIC_PN_SPACE_INITIAL] 857 ? QUIC_PN_SPACE_HANDSHAKE 858 : QUIC_PN_SPACE_INITIAL; 859 return ossl_time_add(ackm->now(ackm->now_arg), duration); 860 } 861 862 for (i = QUIC_PN_SPACE_INITIAL; i < QUIC_PN_SPACE_NUM; ++i) { 863 /* 864 * RFC 9002 section 6.2.2.1 keep probe timeout armed until 865 * handshake is confirmed (client sees HANDSHAKE_DONE message 866 * from server). 867 */ 868 if (ackm->ack_eliciting_bytes_in_flight[i] == 0 && (ackm->handshake_confirmed == 1 || ackm->is_server == 1)) 869 continue; 870 871 if (i == QUIC_PN_SPACE_APP) { 872 /* Skip application data until handshake confirmed. */ 873 if (!ackm->handshake_confirmed) 874 break; 875 876 /* Include max_ack_delay and backoff for app data. */ 877 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) { 878 uint64_t factor 879 = (uint64_t)1 << min_u32(ackm->pto_count, MAX_PTO_COUNT); 880 881 duration 882 = ossl_time_add(duration, 883 ossl_time_multiply(ackm->rx_max_ack_delay, 884 factor)); 885 } 886 } 887 888 /* 889 * Only re-arm timer if stack has sent at least one ACK eliciting frame. 890 * If stack has sent no ACK eliciting frame at given encryption level then 891 * particular timer is zero and we must not attempt to set it. Timer keeps 892 * time since epoch (Jan 1 1970) and we must not set timer to past. 893 */ 894 if (!ossl_time_is_zero(ackm->time_of_last_ack_eliciting_pkt[i])) { 895 t = ossl_time_add(ackm->time_of_last_ack_eliciting_pkt[i], duration); 896 if (ossl_time_compare(t, pto_timeout) < 0) { 897 pto_timeout = t; 898 pto_space = i; 899 } 900 } 901 } 902 903 *space = pto_space; 904 return pto_timeout; 905 } 906 907 static void ackm_set_loss_detection_timer_actual(OSSL_ACKM *ackm, 908 OSSL_TIME deadline) 909 { 910 ackm->loss_detection_deadline = deadline; 911 912 if (ackm->loss_detection_deadline_cb != NULL) 913 ackm->loss_detection_deadline_cb(deadline, 914 ackm->loss_detection_deadline_cb_arg); 915 } 916 917 static int ackm_set_loss_detection_timer(OSSL_ACKM *ackm) 918 { 919 int space; 920 OSSL_TIME earliest_loss_time, timeout; 921 922 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &space); 923 if (!ossl_time_is_zero(earliest_loss_time)) { 924 /* Time threshold loss detection. */ 925 ackm_set_loss_detection_timer_actual(ackm, earliest_loss_time); 926 return 1; 927 } 928 929 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0 930 && ackm->peer_completed_addr_validation) { 931 /* 932 * Nothing to detect lost, so no timer is set. However, the client 933 * needs to arm the timer if the server might be blocked by the 934 * anti-amplification limit. 935 */ 936 ackm_set_loss_detection_timer_actual(ackm, ossl_time_zero()); 937 return 1; 938 } 939 940 timeout = ackm_get_pto_time_and_space(ackm, &space); 941 ackm_set_loss_detection_timer_actual(ackm, timeout); 942 return 1; 943 } 944 945 static int ackm_in_persistent_congestion(OSSL_ACKM *ackm, 946 const OSSL_ACKM_TX_PKT *lpkt) 947 { 948 /* TODO(QUIC FUTURE): Persistent congestion not currently implemented. */ 949 return 0; 950 } 951 952 static void ackm_on_pkts_lost(OSSL_ACKM *ackm, int pkt_space, 953 const OSSL_ACKM_TX_PKT *lpkt, int pseudo) 954 { 955 const OSSL_ACKM_TX_PKT *p, *pnext; 956 OSSL_RTT_INFO rtt; 957 QUIC_PN largest_pn_lost = 0; 958 OSSL_CC_LOSS_INFO loss_info = { 0 }; 959 uint32_t flags = 0; 960 961 for (p = lpkt; p != NULL; p = pnext) { 962 pnext = p->lnext; 963 964 if (p->is_inflight) { 965 ackm->bytes_in_flight -= p->num_bytes; 966 if (p->is_ack_eliciting) 967 ackm->ack_eliciting_bytes_in_flight[p->pkt_space] 968 -= p->num_bytes; 969 970 if (p->pkt_num > largest_pn_lost) 971 largest_pn_lost = p->pkt_num; 972 973 if (!pseudo) { 974 /* 975 * If this is pseudo-loss (e.g. during connection retry) we do not 976 * inform the CC as it is not a real loss and not reflective of 977 * network conditions. 978 */ 979 loss_info.tx_time = p->time; 980 loss_info.tx_size = p->num_bytes; 981 982 ackm->cc_method->on_data_lost(ackm->cc_data, &loss_info); 983 } 984 } 985 986 p->on_lost(p->cb_arg); 987 } 988 989 /* 990 * Persistent congestion can only be considered if we have gotten at least 991 * one RTT sample. 992 */ 993 ossl_statm_get_rtt_info(ackm->statm, &rtt); 994 if (!ossl_time_is_zero(ackm->first_rtt_sample) 995 && ackm_in_persistent_congestion(ackm, lpkt)) 996 flags |= OSSL_CC_LOST_FLAG_PERSISTENT_CONGESTION; 997 998 ackm->cc_method->on_data_lost_finished(ackm->cc_data, flags); 999 } 1000 1001 static void ackm_on_pkts_acked(OSSL_ACKM *ackm, const OSSL_ACKM_TX_PKT *apkt) 1002 { 1003 const OSSL_ACKM_TX_PKT *anext; 1004 QUIC_PN last_pn_acked = 0; 1005 OSSL_CC_ACK_INFO ainfo = { 0 }; 1006 1007 for (; apkt != NULL; apkt = anext) { 1008 if (apkt->is_inflight) { 1009 ackm->bytes_in_flight -= apkt->num_bytes; 1010 if (apkt->is_ack_eliciting) 1011 ackm->ack_eliciting_bytes_in_flight[apkt->pkt_space] 1012 -= apkt->num_bytes; 1013 1014 if (apkt->pkt_num > last_pn_acked) 1015 last_pn_acked = apkt->pkt_num; 1016 1017 if (apkt->largest_acked != QUIC_PN_INVALID) 1018 /* 1019 * This can fail, but it is monotonic; worst case we try again 1020 * next time. 1021 */ 1022 rx_pkt_history_bump_watermark(get_rx_history(ackm, 1023 apkt->pkt_space), 1024 apkt->largest_acked + 1); 1025 } 1026 1027 ainfo.tx_time = apkt->time; 1028 ainfo.tx_size = apkt->num_bytes; 1029 1030 anext = apkt->anext; 1031 apkt->on_acked(apkt->cb_arg); /* may free apkt */ 1032 1033 if (apkt->is_inflight) 1034 ackm->cc_method->on_data_acked(ackm->cc_data, &ainfo); 1035 } 1036 } 1037 1038 OSSL_ACKM *ossl_ackm_new(OSSL_TIME (*now)(void *arg), 1039 void *now_arg, 1040 OSSL_STATM *statm, 1041 const OSSL_CC_METHOD *cc_method, 1042 OSSL_CC_DATA *cc_data, 1043 int is_server) 1044 { 1045 OSSL_ACKM *ackm; 1046 int i; 1047 1048 ackm = OPENSSL_zalloc(sizeof(OSSL_ACKM)); 1049 if (ackm == NULL) 1050 return NULL; 1051 1052 for (i = 0; i < (int)OSSL_NELEM(ackm->tx_history); ++i) { 1053 ackm->largest_acked_pkt[i] = QUIC_PN_INVALID; 1054 ackm->rx_ack_flush_deadline[i] = ossl_time_infinite(); 1055 if (tx_pkt_history_init(&ackm->tx_history[i]) < 1) 1056 goto err; 1057 } 1058 1059 for (i = 0; i < (int)OSSL_NELEM(ackm->rx_history); ++i) 1060 rx_pkt_history_init(&ackm->rx_history[i]); 1061 1062 ackm->now = now; 1063 ackm->now_arg = now_arg; 1064 ackm->statm = statm; 1065 ackm->cc_method = cc_method; 1066 ackm->cc_data = cc_data; 1067 ackm->is_server = (char)is_server; 1068 1069 ackm->rx_max_ack_delay = ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY); 1070 ackm->tx_max_ack_delay = DEFAULT_TX_MAX_ACK_DELAY; 1071 1072 return ackm; 1073 1074 err: 1075 while (--i >= 0) 1076 tx_pkt_history_destroy(&ackm->tx_history[i]); 1077 1078 OPENSSL_free(ackm); 1079 return NULL; 1080 } 1081 1082 void ossl_ackm_free(OSSL_ACKM *ackm) 1083 { 1084 size_t i; 1085 1086 if (ackm == NULL) 1087 return; 1088 1089 for (i = 0; i < OSSL_NELEM(ackm->tx_history); ++i) 1090 if (!ackm->discarded[i]) { 1091 tx_pkt_history_destroy(&ackm->tx_history[i]); 1092 rx_pkt_history_destroy(&ackm->rx_history[i]); 1093 } 1094 1095 OPENSSL_free(ackm); 1096 } 1097 1098 int ossl_ackm_on_tx_packet(OSSL_ACKM *ackm, OSSL_ACKM_TX_PKT *pkt) 1099 { 1100 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt->pkt_space); 1101 1102 /* Time must be set and not move backwards. */ 1103 if (ossl_time_is_zero(pkt->time) 1104 || ossl_time_compare(ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space], 1105 pkt->time) 1106 > 0) 1107 return 0; 1108 1109 /* Must have non-zero number of bytes. */ 1110 if (pkt->num_bytes == 0) 1111 return 0; 1112 1113 /* Does not make any sense for a non-in-flight packet to be ACK-eliciting. */ 1114 if (!pkt->is_inflight && pkt->is_ack_eliciting) 1115 return 0; 1116 1117 if (tx_pkt_history_add(h, pkt) == 0) 1118 return 0; 1119 1120 if (pkt->is_inflight) { 1121 if (pkt->is_ack_eliciting) { 1122 ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space] = pkt->time; 1123 ackm->ack_eliciting_bytes_in_flight[pkt->pkt_space] 1124 += pkt->num_bytes; 1125 } 1126 1127 ackm->bytes_in_flight += pkt->num_bytes; 1128 ackm_set_loss_detection_timer(ackm); 1129 1130 ackm->cc_method->on_data_sent(ackm->cc_data, pkt->num_bytes); 1131 } 1132 1133 return 1; 1134 } 1135 1136 int ossl_ackm_on_rx_datagram(OSSL_ACKM *ackm, size_t num_bytes) 1137 { 1138 /* No-op on the client. */ 1139 return 1; 1140 } 1141 1142 static void ackm_process_ecn(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1143 int pkt_space) 1144 { 1145 struct tx_pkt_history_st *h; 1146 OSSL_ACKM_TX_PKT *pkt; 1147 OSSL_CC_ECN_INFO ecn_info = { 0 }; 1148 1149 /* 1150 * If the ECN-CE counter reported by the peer has increased, this could 1151 * be a new congestion event. 1152 */ 1153 if (ack->ecnce > ackm->peer_ecnce[pkt_space]) { 1154 ackm->peer_ecnce[pkt_space] = ack->ecnce; 1155 1156 h = get_tx_history(ackm, pkt_space); 1157 pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end); 1158 if (pkt == NULL) 1159 return; 1160 1161 ecn_info.largest_acked_time = pkt->time; 1162 ackm->cc_method->on_ecn(ackm->cc_data, &ecn_info); 1163 } 1164 } 1165 1166 int ossl_ackm_on_rx_ack_frame(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack, 1167 int pkt_space, OSSL_TIME rx_time) 1168 { 1169 OSSL_ACKM_TX_PKT *na_pkts, *lost_pkts; 1170 int must_set_timer = 0; 1171 1172 if (ackm->largest_acked_pkt[pkt_space] == QUIC_PN_INVALID) 1173 ackm->largest_acked_pkt[pkt_space] = ack->ack_ranges[0].end; 1174 else 1175 ackm->largest_acked_pkt[pkt_space] 1176 = ossl_quic_pn_max(ackm->largest_acked_pkt[pkt_space], 1177 ack->ack_ranges[0].end); 1178 1179 /* 1180 * If we get an ACK in the handshake space, address validation is completed. 1181 * Make sure we update the timer, even if no packets were ACK'd. 1182 */ 1183 if (!ackm->peer_completed_addr_validation 1184 && pkt_space == QUIC_PN_SPACE_HANDSHAKE) { 1185 ackm->peer_completed_addr_validation = 1; 1186 must_set_timer = 1; 1187 } 1188 1189 /* 1190 * Find packets that are newly acknowledged and remove them from the list. 1191 */ 1192 na_pkts = ackm_detect_and_remove_newly_acked_pkts(ackm, ack, pkt_space); 1193 if (na_pkts == NULL) { 1194 if (must_set_timer) 1195 ackm_set_loss_detection_timer(ackm); 1196 1197 return 1; 1198 } 1199 1200 /* 1201 * Update the RTT if the largest acknowledged is newly acked and at least 1202 * one ACK-eliciting packet was newly acked. 1203 * 1204 * First packet in the list is always the one with the largest PN. 1205 */ 1206 if (na_pkts->pkt_num == ack->ack_ranges[0].end && ack_includes_ack_eliciting(na_pkts)) { 1207 OSSL_TIME now = ackm->now(ackm->now_arg), ack_delay; 1208 if (ossl_time_is_zero(ackm->first_rtt_sample)) 1209 ackm->first_rtt_sample = now; 1210 1211 /* Enforce maximum ACK delay. */ 1212 ack_delay = ack->delay_time; 1213 if (ackm->handshake_confirmed) 1214 ack_delay = ossl_time_min(ack_delay, ackm->rx_max_ack_delay); 1215 1216 ossl_statm_update_rtt(ackm->statm, ack_delay, 1217 ossl_time_subtract(now, na_pkts->time)); 1218 } 1219 1220 /* 1221 * Process ECN information if present. 1222 * 1223 * We deliberately do most ECN processing in the ACKM rather than the 1224 * congestion controller to avoid having to give the congestion controller 1225 * access to ACKM internal state. 1226 */ 1227 if (ack->ecn_present) 1228 ackm_process_ecn(ackm, ack, pkt_space); 1229 1230 /* Handle inferred loss. */ 1231 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1232 if (lost_pkts != NULL) 1233 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1234 1235 ackm_on_pkts_acked(ackm, na_pkts); 1236 1237 /* 1238 * Reset pto_count unless the client is unsure if the server validated the 1239 * client's address. 1240 */ 1241 if (ackm->peer_completed_addr_validation) 1242 ackm->pto_count = 0; 1243 1244 ackm_set_loss_detection_timer(ackm); 1245 return 1; 1246 } 1247 1248 int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM *ackm, int pkt_space) 1249 { 1250 OSSL_ACKM_TX_PKT *pkt, *pnext; 1251 uint64_t num_bytes_invalidated = 0; 1252 1253 if (ackm->discarded[pkt_space]) 1254 return 0; 1255 1256 if (pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1257 ackm->peer_completed_addr_validation = 1; 1258 1259 for (pkt = ossl_list_tx_history_head(&get_tx_history(ackm, pkt_space)->packets); 1260 pkt != NULL; pkt = pnext) { 1261 pnext = ossl_list_tx_history_next(pkt); 1262 if (pkt->is_inflight) { 1263 ackm->bytes_in_flight -= pkt->num_bytes; 1264 num_bytes_invalidated += pkt->num_bytes; 1265 } 1266 1267 pkt->on_discarded(pkt->cb_arg); /* may free pkt */ 1268 } 1269 1270 tx_pkt_history_destroy(&ackm->tx_history[pkt_space]); 1271 rx_pkt_history_destroy(&ackm->rx_history[pkt_space]); 1272 1273 if (num_bytes_invalidated > 0) 1274 ackm->cc_method->on_data_invalidated(ackm->cc_data, 1275 num_bytes_invalidated); 1276 1277 ackm->time_of_last_ack_eliciting_pkt[pkt_space] = ossl_time_zero(); 1278 ackm->loss_time[pkt_space] = ossl_time_zero(); 1279 ackm->pto_count = 0; 1280 ackm->discarded[pkt_space] = 1; 1281 ackm->ack_eliciting_bytes_in_flight[pkt_space] = 0; 1282 ackm_set_loss_detection_timer(ackm); 1283 return 1; 1284 } 1285 1286 int ossl_ackm_on_handshake_confirmed(OSSL_ACKM *ackm) 1287 { 1288 ackm->handshake_confirmed = 1; 1289 ackm->peer_completed_addr_validation = 1; 1290 ackm_set_loss_detection_timer(ackm); 1291 return 1; 1292 } 1293 1294 static void ackm_queue_probe_anti_deadlock_handshake(OSSL_ACKM *ackm) 1295 { 1296 ++ackm->pending_probe.anti_deadlock_handshake; 1297 } 1298 1299 static void ackm_queue_probe_anti_deadlock_initial(OSSL_ACKM *ackm) 1300 { 1301 ++ackm->pending_probe.anti_deadlock_initial; 1302 } 1303 1304 static void ackm_queue_probe(OSSL_ACKM *ackm, int pkt_space) 1305 { 1306 /* 1307 * TODO(QUIC FUTURE): We are allowed to send either one or two probe 1308 * packets here. 1309 * Determine a strategy for when we should send two probe packets. 1310 */ 1311 ++ackm->pending_probe.pto[pkt_space]; 1312 } 1313 1314 int ossl_ackm_on_timeout(OSSL_ACKM *ackm) 1315 { 1316 int pkt_space; 1317 OSSL_TIME earliest_loss_time; 1318 OSSL_ACKM_TX_PKT *lost_pkts; 1319 1320 earliest_loss_time = ackm_get_loss_time_and_space(ackm, &pkt_space); 1321 if (!ossl_time_is_zero(earliest_loss_time)) { 1322 /* Time threshold loss detection. */ 1323 lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space); 1324 if (lost_pkts != NULL) 1325 ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0); 1326 ackm_set_loss_detection_timer(ackm); 1327 return 1; 1328 } 1329 1330 if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) { 1331 assert(!ackm->peer_completed_addr_validation); 1332 /* 1333 * Client sends an anti-deadlock packet: Initial is padded to earn more 1334 * anti-amplification credit. A handshake packet proves address 1335 * ownership. 1336 */ 1337 if (ackm->discarded[QUIC_PN_SPACE_INITIAL]) 1338 ackm_queue_probe_anti_deadlock_handshake(ackm); 1339 else 1340 ackm_queue_probe_anti_deadlock_initial(ackm); 1341 } else { 1342 /* 1343 * PTO. The user of the ACKM should send new data if available, else 1344 * retransmit old data, or if neither is available, send a single PING 1345 * frame. 1346 */ 1347 ackm_get_pto_time_and_space(ackm, &pkt_space); 1348 ackm_queue_probe(ackm, pkt_space); 1349 } 1350 1351 ++ackm->pto_count; 1352 ackm_set_loss_detection_timer(ackm); 1353 return 1; 1354 } 1355 1356 OSSL_TIME ossl_ackm_get_loss_detection_deadline(OSSL_ACKM *ackm) 1357 { 1358 return ackm->loss_detection_deadline; 1359 } 1360 1361 OSSL_ACKM_PROBE_INFO *ossl_ackm_get0_probe_request(OSSL_ACKM *ackm) 1362 { 1363 return &ackm->pending_probe; 1364 } 1365 1366 int ossl_ackm_get_largest_unacked(OSSL_ACKM *ackm, int pkt_space, QUIC_PN *pn) 1367 { 1368 struct tx_pkt_history_st *h; 1369 OSSL_ACKM_TX_PKT *p; 1370 1371 h = get_tx_history(ackm, pkt_space); 1372 p = ossl_list_tx_history_tail(&h->packets); 1373 if (p != NULL) { 1374 *pn = p->pkt_num; 1375 return 1; 1376 } 1377 1378 return 0; 1379 } 1380 1381 /* Number of ACK-eliciting packets RX'd before we always emit an ACK. */ 1382 #define PKTS_BEFORE_ACK 2 1383 1384 /* 1385 * Return 1 if emission of an ACK frame is currently desired. 1386 * 1387 * This occurs when one or more of the following conditions occurs: 1388 * 1389 * - We have flagged that we want to send an ACK frame 1390 * (for example, due to the packet threshold count being exceeded), or 1391 * 1392 * - We have exceeded the ACK flush deadline, meaning that 1393 * we have received at least one ACK-eliciting packet, but held off on 1394 * sending an ACK frame immediately in the hope that more ACK-eliciting 1395 * packets might come in, but not enough did and we are now requesting 1396 * transmission of an ACK frame anyway. 1397 * 1398 */ 1399 int ossl_ackm_is_ack_desired(OSSL_ACKM *ackm, int pkt_space) 1400 { 1401 return ackm->rx_ack_desired[pkt_space] 1402 || (!ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space]) 1403 && ossl_time_compare(ackm->now(ackm->now_arg), 1404 ackm->rx_ack_flush_deadline[pkt_space]) 1405 >= 0); 1406 } 1407 1408 /* 1409 * Returns 1 if an ACK frame matches a given packet number. 1410 */ 1411 static int ack_contains(const OSSL_QUIC_FRAME_ACK *ack, QUIC_PN pkt_num) 1412 { 1413 size_t i; 1414 1415 for (i = 0; i < ack->num_ack_ranges; ++i) 1416 if (range_contains(&ack->ack_ranges[i], pkt_num)) 1417 return 1; 1418 1419 return 0; 1420 } 1421 1422 /* 1423 * Returns 1 iff a PN (which we have just received) was previously reported as 1424 * implied missing (by us, in an ACK frame we previously generated). 1425 */ 1426 static int ackm_is_missing(OSSL_ACKM *ackm, int pkt_space, QUIC_PN pkt_num) 1427 { 1428 /* 1429 * A PN is implied missing if it is not greater than the highest PN in our 1430 * generated ACK frame, but is not matched by the frame. 1431 */ 1432 return ackm->ack[pkt_space].num_ack_ranges > 0 1433 && pkt_num <= ackm->ack[pkt_space].ack_ranges[0].end 1434 && !ack_contains(&ackm->ack[pkt_space], pkt_num); 1435 } 1436 1437 /* 1438 * Returns 1 iff our RX of a PN newly establishes the implication of missing 1439 * packets. 1440 */ 1441 static int ackm_has_newly_missing(OSSL_ACKM *ackm, int pkt_space) 1442 { 1443 struct rx_pkt_history_st *h; 1444 1445 h = get_rx_history(ackm, pkt_space); 1446 1447 if (ossl_list_uint_set_is_empty(&h->set)) 1448 return 0; 1449 1450 /* 1451 * The second condition here establishes that the highest PN range in our RX 1452 * history comprises only a single PN. If there is more than one, then this 1453 * function will have returned 1 during a previous call to 1454 * ossl_ackm_on_rx_packet assuming the third condition below was met. Thus 1455 * we only return 1 when the missing PN condition is newly established. 1456 * 1457 * The third condition here establishes that the highest PN range in our RX 1458 * history is beyond (and does not border) the highest PN we have yet 1459 * reported in any ACK frame. Thus there is a gap of at least one PN between 1460 * the PNs we have ACK'd previously and the PN we have just received. 1461 */ 1462 return ackm->ack[pkt_space].num_ack_ranges > 0 1463 && ossl_list_uint_set_tail(&h->set)->range.start 1464 == ossl_list_uint_set_tail(&h->set)->range.end 1465 && ossl_list_uint_set_tail(&h->set)->range.start 1466 > ackm->ack[pkt_space].ack_ranges[0].end + 1; 1467 } 1468 1469 static void ackm_set_flush_deadline(OSSL_ACKM *ackm, int pkt_space, 1470 OSSL_TIME deadline) 1471 { 1472 ackm->rx_ack_flush_deadline[pkt_space] = deadline; 1473 1474 if (ackm->ack_deadline_cb != NULL) 1475 ackm->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm, pkt_space), 1476 pkt_space, ackm->ack_deadline_cb_arg); 1477 } 1478 1479 /* Explicitly flags that we want to generate an ACK frame. */ 1480 static void ackm_queue_ack(OSSL_ACKM *ackm, int pkt_space) 1481 { 1482 ackm->rx_ack_desired[pkt_space] = 1; 1483 1484 /* Cancel deadline. */ 1485 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1486 } 1487 1488 static void ackm_on_rx_ack_eliciting(OSSL_ACKM *ackm, 1489 OSSL_TIME rx_time, int pkt_space, 1490 int was_missing) 1491 { 1492 OSSL_TIME tx_max_ack_delay; 1493 1494 if (ackm->rx_ack_desired[pkt_space]) 1495 /* ACK generation already requested so nothing to do. */ 1496 return; 1497 1498 ++ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space]; 1499 1500 if (!ackm->rx_ack_generated[pkt_space] 1501 || was_missing 1502 || ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] 1503 >= PKTS_BEFORE_ACK 1504 || ackm_has_newly_missing(ackm, pkt_space)) { 1505 /* 1506 * Either: 1507 * 1508 * - We have never yet generated an ACK frame, meaning that this 1509 * is the first ever packet received, which we should always 1510 * acknowledge immediately, or 1511 * 1512 * - We previously reported the PN that we have just received as 1513 * missing in a previous ACK frame (meaning that we should report 1514 * the fact that we now have it to the peer immediately), or 1515 * 1516 * - We have exceeded the ACK-eliciting packet threshold count 1517 * for the purposes of ACK coalescing, so request transmission 1518 * of an ACK frame, or 1519 * 1520 * - The PN we just received and added to our PN RX history 1521 * newly implies one or more missing PNs, in which case we should 1522 * inform the peer by sending an ACK frame immediately. 1523 * 1524 * We do not test the ACK flush deadline here because it is tested 1525 * separately in ossl_ackm_is_ack_desired. 1526 */ 1527 ackm_queue_ack(ackm, pkt_space); 1528 return; 1529 } 1530 1531 /* 1532 * Not emitting an ACK yet. 1533 * 1534 * Update the ACK flush deadline. 1535 * 1536 * RFC 9000 s. 13.2.1: "An endpoint MUST acknowledge all ack-eliciting 1537 * Initial and Handshake packets immediately"; don't delay ACK generation if 1538 * we are using the Initial or Handshake PN spaces. 1539 */ 1540 tx_max_ack_delay = ackm->tx_max_ack_delay; 1541 if (pkt_space == QUIC_PN_SPACE_INITIAL 1542 || pkt_space == QUIC_PN_SPACE_HANDSHAKE) 1543 tx_max_ack_delay = ossl_time_zero(); 1544 1545 if (ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space])) 1546 ackm_set_flush_deadline(ackm, pkt_space, 1547 ossl_time_add(rx_time, tx_max_ack_delay)); 1548 else 1549 ackm_set_flush_deadline(ackm, pkt_space, 1550 ossl_time_min(ackm->rx_ack_flush_deadline[pkt_space], 1551 ossl_time_add(rx_time, 1552 tx_max_ack_delay))); 1553 } 1554 1555 int ossl_ackm_on_rx_packet(OSSL_ACKM *ackm, const OSSL_ACKM_RX_PKT *pkt) 1556 { 1557 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt->pkt_space); 1558 int was_missing; 1559 1560 if (ossl_ackm_is_rx_pn_processable(ackm, pkt->pkt_num, pkt->pkt_space) != 1) 1561 /* PN has already been processed or written off, no-op. */ 1562 return 1; 1563 1564 /* 1565 * Record the largest PN we have RX'd and the time we received it. 1566 * We use this to calculate the ACK delay field of ACK frames. 1567 */ 1568 if (pkt->pkt_num > ackm->rx_largest_pn[pkt->pkt_space]) { 1569 ackm->rx_largest_pn[pkt->pkt_space] = pkt->pkt_num; 1570 ackm->rx_largest_time[pkt->pkt_space] = pkt->time; 1571 } 1572 1573 /* 1574 * If the PN we just received was previously implied missing by virtue of 1575 * being omitted from a previous ACK frame generated, we skip any packet 1576 * count thresholds or coalescing delays and emit a new ACK frame 1577 * immediately. 1578 */ 1579 was_missing = ackm_is_missing(ackm, pkt->pkt_space, pkt->pkt_num); 1580 1581 /* 1582 * Add the packet number to our history list of PNs we have not yet provably 1583 * acked. 1584 */ 1585 if (rx_pkt_history_add_pn(h, pkt->pkt_num) != 1) 1586 return 0; 1587 1588 /* 1589 * Receiving this packet may or may not cause us to emit an ACK frame. 1590 * We may not emit an ACK frame yet if we have not yet received a threshold 1591 * number of packets. 1592 */ 1593 if (pkt->is_ack_eliciting) 1594 ackm_on_rx_ack_eliciting(ackm, pkt->time, pkt->pkt_space, was_missing); 1595 1596 /* Update the ECN counters according to which ECN signal we got, if any. */ 1597 switch (pkt->ecn) { 1598 case OSSL_ACKM_ECN_ECT0: 1599 ++ackm->rx_ect0[pkt->pkt_space]; 1600 break; 1601 case OSSL_ACKM_ECN_ECT1: 1602 ++ackm->rx_ect1[pkt->pkt_space]; 1603 break; 1604 case OSSL_ACKM_ECN_ECNCE: 1605 ++ackm->rx_ecnce[pkt->pkt_space]; 1606 break; 1607 default: 1608 break; 1609 } 1610 1611 return 1; 1612 } 1613 1614 static void ackm_fill_rx_ack_ranges(OSSL_ACKM *ackm, int pkt_space, 1615 OSSL_QUIC_FRAME_ACK *ack) 1616 { 1617 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1618 UINT_SET_ITEM *x; 1619 size_t i = 0; 1620 1621 /* 1622 * Copy out ranges from the PN set, starting at the end, until we reach our 1623 * maximum number of ranges. 1624 */ 1625 for (x = ossl_list_uint_set_tail(&h->set); 1626 x != NULL && i < OSSL_NELEM(ackm->ack_ranges); 1627 x = ossl_list_uint_set_prev(x), ++i) { 1628 ackm->ack_ranges[pkt_space][i].start = x->range.start; 1629 ackm->ack_ranges[pkt_space][i].end = x->range.end; 1630 } 1631 1632 ack->ack_ranges = ackm->ack_ranges[pkt_space]; 1633 ack->num_ack_ranges = i; 1634 } 1635 1636 const OSSL_QUIC_FRAME_ACK *ossl_ackm_get_ack_frame(OSSL_ACKM *ackm, 1637 int pkt_space) 1638 { 1639 OSSL_QUIC_FRAME_ACK *ack = &ackm->ack[pkt_space]; 1640 OSSL_TIME now = ackm->now(ackm->now_arg); 1641 1642 ackm_fill_rx_ack_ranges(ackm, pkt_space, ack); 1643 1644 if (!ossl_time_is_zero(ackm->rx_largest_time[pkt_space]) 1645 && ossl_time_compare(now, ackm->rx_largest_time[pkt_space]) > 0 1646 && pkt_space == QUIC_PN_SPACE_APP) 1647 ack->delay_time = ossl_time_subtract(now, ackm->rx_largest_time[pkt_space]); 1648 else 1649 ack->delay_time = ossl_time_zero(); 1650 1651 ack->ect0 = ackm->rx_ect0[pkt_space]; 1652 ack->ect1 = ackm->rx_ect1[pkt_space]; 1653 ack->ecnce = ackm->rx_ecnce[pkt_space]; 1654 ack->ecn_present = 1; 1655 1656 ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] = 0; 1657 1658 ackm->rx_ack_generated[pkt_space] = 1; 1659 ackm->rx_ack_desired[pkt_space] = 0; 1660 ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite()); 1661 return ack; 1662 } 1663 1664 OSSL_TIME ossl_ackm_get_ack_deadline(OSSL_ACKM *ackm, int pkt_space) 1665 { 1666 if (ackm->rx_ack_desired[pkt_space]) 1667 /* Already desired, deadline is now. */ 1668 return ossl_time_zero(); 1669 1670 return ackm->rx_ack_flush_deadline[pkt_space]; 1671 } 1672 1673 int ossl_ackm_is_rx_pn_processable(OSSL_ACKM *ackm, QUIC_PN pn, int pkt_space) 1674 { 1675 struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space); 1676 1677 return pn >= h->watermark && ossl_uint_set_query(&h->set, pn) == 0; 1678 } 1679 1680 void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM *ackm, 1681 void (*fn)(OSSL_TIME deadline, 1682 void *arg), 1683 void *arg) 1684 { 1685 ackm->loss_detection_deadline_cb = fn; 1686 ackm->loss_detection_deadline_cb_arg = arg; 1687 } 1688 1689 void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM *ackm, 1690 void (*fn)(OSSL_TIME deadline, 1691 int pkt_space, 1692 void *arg), 1693 void *arg) 1694 { 1695 ackm->ack_deadline_cb = fn; 1696 ackm->ack_deadline_cb_arg = arg; 1697 } 1698 1699 int ossl_ackm_mark_packet_pseudo_lost(OSSL_ACKM *ackm, 1700 int pkt_space, QUIC_PN pn) 1701 { 1702 struct tx_pkt_history_st *h = get_tx_history(ackm, pkt_space); 1703 OSSL_ACKM_TX_PKT *pkt; 1704 1705 pkt = tx_pkt_history_by_pkt_num(h, pn); 1706 if (pkt == NULL) 1707 return 0; 1708 1709 tx_pkt_history_remove(h, pkt->pkt_num); 1710 pkt->lnext = NULL; 1711 ackm_on_pkts_lost(ackm, pkt_space, pkt, /*pseudo=*/1); 1712 return 1; 1713 } 1714 1715 OSSL_TIME ossl_ackm_get_pto_duration(OSSL_ACKM *ackm) 1716 { 1717 OSSL_TIME duration; 1718 OSSL_RTT_INFO rtt; 1719 1720 ossl_statm_get_rtt_info(ackm->statm, &rtt); 1721 1722 duration = ossl_time_add(rtt.smoothed_rtt, 1723 ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4), 1724 ossl_ticks2time(K_GRANULARITY))); 1725 if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) 1726 duration = ossl_time_add(duration, ackm->rx_max_ack_delay); 1727 1728 return duration; 1729 } 1730 1731 QUIC_PN ossl_ackm_get_largest_acked(OSSL_ACKM *ackm, int pkt_space) 1732 { 1733 return ackm->largest_acked_pkt[pkt_space]; 1734 } 1735 1736 void ossl_ackm_set_rx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME rx_max_ack_delay) 1737 { 1738 ackm->rx_max_ack_delay = rx_max_ack_delay; 1739 } 1740 1741 void ossl_ackm_set_tx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME tx_max_ack_delay) 1742 { 1743 ackm->tx_max_ack_delay = tx_max_ack_delay; 1744 } 1745