1 /* 2 * Copyright 2022-2026 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 #include "internal/quic_reactor.h" 10 #include "internal/common.h" 11 #include "internal/thread_arch.h" 12 #include <assert.h> 13 14 #if defined(OPENSSL_SYS_WINDOWS) 15 #include <winsock2.h> 16 #include <mstcpip.h> 17 #include <mswsock.h> 18 #endif 19 20 /* 21 * Core I/O Reactor Framework 22 * ========================== 23 */ 24 static void rtor_notify_other_threads(QUIC_REACTOR *rtor); 25 26 int ossl_quic_reactor_init(QUIC_REACTOR *rtor, 27 void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg, 28 uint32_t flags), 29 void *tick_cb_arg, 30 CRYPTO_MUTEX *mutex, 31 OSSL_TIME initial_tick_deadline, 32 uint64_t flags) 33 { 34 rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 35 rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 36 rtor->net_read_desired = 0; 37 rtor->net_write_desired = 0; 38 rtor->can_poll_r = 0; 39 rtor->can_poll_w = 0; 40 rtor->tick_deadline = initial_tick_deadline; 41 42 rtor->tick_cb = tick_cb; 43 rtor->tick_cb_arg = tick_cb_arg; 44 rtor->mutex = mutex; 45 46 rtor->cur_blocking_waiters = 0; 47 48 if ((flags & QUIC_REACTOR_FLAG_USE_NOTIFIER) != 0) { 49 if (!ossl_rio_notifier_init(&rtor->notifier)) 50 return 0; 51 52 if ((rtor->notifier_cv = ossl_crypto_condvar_new()) == NULL) { 53 ossl_rio_notifier_cleanup(&rtor->notifier); 54 return 0; 55 } 56 57 rtor->have_notifier = 1; 58 } else { 59 rtor->have_notifier = 0; 60 } 61 62 return 1; 63 } 64 65 void ossl_quic_reactor_cleanup(QUIC_REACTOR *rtor) 66 { 67 if (rtor == NULL) 68 return; 69 70 if (rtor->have_notifier) { 71 ossl_rio_notifier_cleanup(&rtor->notifier); 72 rtor->have_notifier = 0; 73 74 ossl_crypto_condvar_free(&rtor->notifier_cv); 75 } 76 } 77 78 #if defined(OPENSSL_SYS_WINDOWS) 79 80 /* Work around for MinGW builds. */ 81 #if defined(__MINGW32__) && !defined(SIO_UDP_NETRESET) 82 #define SIO_UDP_NETRESET _WSAIOW(IOC_VENDOR, 15) 83 #endif 84 85 /* 86 * On Windows recvfrom() may return WSAECONNRESET when destination port 87 * used in preceding call to sendto() is no longer reachable. The reset 88 * error received from UDP socket takes the whole port down. This behavior 89 * must be suppressed for QUIC protocol so QUIC applications may rely on 90 * QUIC protocol itself to detect network failures. 91 */ 92 static void rtor_configure_winsock(BIO_POLL_DESCRIPTOR *bpd) 93 { 94 BOOL bNewBehavior = FALSE; 95 DWORD dwBytesReturned = 0; 96 97 if (bpd->type == BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD) { 98 WSAIoctl(bpd->value.fd, SIO_UDP_CONNRESET, &bNewBehavior, 99 sizeof(bNewBehavior), NULL, 0, &dwBytesReturned, NULL, NULL); 100 WSAIoctl(bpd->value.fd, SIO_UDP_NETRESET, &bNewBehavior, 101 sizeof(bNewBehavior), NULL, 0, &dwBytesReturned, NULL, NULL); 102 } 103 } 104 #endif 105 106 void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r) 107 { 108 if (r == NULL) 109 rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 110 else 111 rtor->poll_r = *r; 112 113 #if defined(OPENSSL_SYS_WINDOWS) 114 rtor_configure_winsock(&rtor->poll_r); 115 #endif 116 117 rtor->can_poll_r 118 = ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_r); 119 } 120 121 void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w) 122 { 123 if (w == NULL) 124 rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 125 else 126 rtor->poll_w = *w; 127 128 #if defined(OPENSSL_SYS_WINDOWS) 129 rtor_configure_winsock(&rtor->poll_w); 130 #endif 131 132 rtor->can_poll_w 133 = ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_w); 134 } 135 136 const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(const QUIC_REACTOR *rtor) 137 { 138 return &rtor->poll_r; 139 } 140 141 const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(const QUIC_REACTOR *rtor) 142 { 143 return &rtor->poll_w; 144 } 145 146 int ossl_quic_reactor_can_support_poll_descriptor(const QUIC_REACTOR *rtor, 147 const BIO_POLL_DESCRIPTOR *d) 148 { 149 return d->type == BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD; 150 } 151 152 int ossl_quic_reactor_can_poll_r(const QUIC_REACTOR *rtor) 153 { 154 return rtor->can_poll_r; 155 } 156 157 int ossl_quic_reactor_can_poll_w(const QUIC_REACTOR *rtor) 158 { 159 return rtor->can_poll_w; 160 } 161 162 int ossl_quic_reactor_net_read_desired(QUIC_REACTOR *rtor) 163 { 164 return rtor->net_read_desired; 165 } 166 167 int ossl_quic_reactor_net_write_desired(QUIC_REACTOR *rtor) 168 { 169 return rtor->net_write_desired; 170 } 171 172 OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor) 173 { 174 return rtor->tick_deadline; 175 } 176 177 int ossl_quic_reactor_tick(QUIC_REACTOR *rtor, uint32_t flags) 178 { 179 QUIC_TICK_RESULT res = { 0 }; 180 181 /* 182 * Note that the tick callback cannot fail; this is intentional. Arguably it 183 * does not make that much sense for ticking to 'fail' (in the sense of an 184 * explicit error indicated to the user) because ticking is by its nature 185 * best effort. If something fatal happens with a connection we can report 186 * it on the next actual application I/O call. 187 */ 188 rtor->tick_cb(&res, rtor->tick_cb_arg, flags); 189 190 rtor->net_read_desired = res.net_read_desired; 191 rtor->net_write_desired = res.net_write_desired; 192 rtor->tick_deadline = res.tick_deadline; 193 if (res.notify_other_threads) 194 rtor_notify_other_threads(rtor); 195 196 return 1; 197 } 198 199 RIO_NOTIFIER *ossl_quic_reactor_get0_notifier(QUIC_REACTOR *rtor) 200 { 201 return rtor->have_notifier ? &rtor->notifier : NULL; 202 } 203 204 /* 205 * Blocking I/O Adaptation Layer 206 * ============================= 207 */ 208 209 /* 210 * Utility which can be used to poll on up to two FDs. This is designed to 211 * support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where 212 * different FDs are used for read and write). 213 * 214 * Generally use of poll(2) is preferred where available. Windows, however, 215 * hasn't traditionally offered poll(2), only select(2). WSAPoll() was 216 * introduced in Vista but has seemingly been buggy until relatively recent 217 * versions of Windows 10. Moreover we support XP so this is not a suitable 218 * target anyway. However, the traditional issues with select(2) turn out not to 219 * be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of 220 * FDs (and thus is limited in the magnitude of the FDs expressible), Windows 221 * select(2) is very different. In Windows, socket handles are not allocated 222 * contiguously from zero and thus this bitmap approach was infeasible. Thus in 223 * adapting the Berkeley sockets API to Windows a different approach was taken 224 * whereby the fd_set contains a fixed length array of socket handles and an 225 * integer indicating how many entries are valid; thus Windows select() 226 * ironically is actually much more like *NIX poll(2) than *NIX select(2). In 227 * any case, this means that the relevant limit for Windows select() is the 228 * number of FDs being polled, not the magnitude of those FDs. Since we only 229 * poll for two FDs here, this limit does not concern us. 230 * 231 * Usage: rfd and wfd may be the same or different. Either or both may also be 232 * -1. If rfd_want_read is 1, rfd is polled for readability, and if 233 * wfd_want_write is 1, wfd is polled for writability. Note that since any 234 * passed FD is always polled for error conditions, setting rfd_want_read=0 and 235 * wfd_want_write=0 is not the same as passing -1 for both FDs. 236 * 237 * deadline is a timestamp to return at. If it is ossl_time_infinite(), the call 238 * never times out. 239 * 240 * Returns 0 on error and 1 on success. Timeout expiry is considered a success 241 * condition. We don't elaborate our return values here because the way we are 242 * actually using this doesn't currently care. 243 * 244 * If mutex is non-NULL, it is assumed to be held for write and is unlocked for 245 * the duration of the call. 246 * 247 * Precondition: mutex is NULL or is held for write (unchecked) 248 * Postcondition: mutex is NULL or is held for write (unless 249 * CRYPTO_THREAD_write_lock fails) 250 */ 251 static int poll_two_fds(int rfd, int rfd_want_read, 252 int wfd, int wfd_want_write, 253 int notify_rfd, 254 OSSL_TIME deadline, 255 CRYPTO_MUTEX *mutex) 256 { 257 #if defined(OPENSSL_SYS_WINDOWS) || !defined(POLLIN) 258 fd_set rfd_set, wfd_set, efd_set; 259 OSSL_TIME now, timeout; 260 struct timeval tv, *ptv; 261 int maxfd, pres; 262 263 #ifndef OPENSSL_SYS_WINDOWS 264 /* 265 * On Windows there is no relevant limit to the magnitude of a fd value (see 266 * above). On *NIX the fd_set uses a bitmap and we must check the limit. 267 */ 268 if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE) 269 return 0; 270 #endif 271 272 FD_ZERO(&rfd_set); 273 FD_ZERO(&wfd_set); 274 FD_ZERO(&efd_set); 275 276 if (rfd != INVALID_SOCKET && rfd_want_read) 277 openssl_fdset(rfd, &rfd_set); 278 if (wfd != INVALID_SOCKET && wfd_want_write) 279 openssl_fdset(wfd, &wfd_set); 280 281 /* Always check for error conditions. */ 282 if (rfd != INVALID_SOCKET) 283 openssl_fdset(rfd, &efd_set); 284 if (wfd != INVALID_SOCKET) 285 openssl_fdset(wfd, &efd_set); 286 287 /* Check for notifier FD readability. */ 288 if (notify_rfd != INVALID_SOCKET) { 289 openssl_fdset(notify_rfd, &rfd_set); 290 openssl_fdset(notify_rfd, &efd_set); 291 } 292 293 maxfd = rfd; 294 if (wfd > maxfd) 295 maxfd = wfd; 296 if (notify_rfd > maxfd) 297 maxfd = notify_rfd; 298 299 if (!ossl_assert(rfd != INVALID_SOCKET || wfd != INVALID_SOCKET 300 || !ossl_time_is_infinite(deadline))) 301 /* Do not block forever; should not happen. */ 302 return 0; 303 304 /* 305 * The mutex dance (unlock/re-locak after poll/seclect) is 306 * potentially problematic. This may create a situation when 307 * two threads arrive to select/poll with the same file 308 * descriptors. We just need to be aware of this. 309 */ 310 #if defined(OPENSSL_THREADS) 311 if (mutex != NULL) 312 ossl_crypto_mutex_unlock(mutex); 313 #endif 314 315 do { 316 /* 317 * select expects a timeout, not a deadline, so do the conversion. 318 * Update for each call to ensure the correct value is used if we repeat 319 * due to EINTR. 320 */ 321 if (ossl_time_is_infinite(deadline)) { 322 ptv = NULL; 323 } else { 324 now = ossl_time_now(); 325 /* 326 * ossl_time_subtract saturates to zero so we don't need to check if 327 * now > deadline. 328 */ 329 timeout = ossl_time_subtract(deadline, now); 330 tv = ossl_time_to_timeval(timeout); 331 ptv = &tv; 332 } 333 334 pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv); 335 } while (pres == -1 && get_last_socket_error_is_eintr()); 336 337 #if defined(OPENSSL_THREADS) 338 if (mutex != NULL) 339 ossl_crypto_mutex_lock(mutex); 340 #endif 341 342 return pres < 0 ? 0 : 1; 343 #else 344 int pres, timeout_ms; 345 OSSL_TIME now, timeout; 346 struct pollfd pfds[3] = { 0 }; 347 size_t npfd = 0; 348 349 if (rfd == wfd) { 350 pfds[npfd].fd = rfd; 351 pfds[npfd].events = (rfd_want_read ? POLLIN : 0) 352 | (wfd_want_write ? POLLOUT : 0); 353 if (rfd >= 0 && pfds[npfd].events != 0) 354 ++npfd; 355 } else { 356 pfds[npfd].fd = rfd; 357 pfds[npfd].events = (rfd_want_read ? POLLIN : 0); 358 if (rfd >= 0 && pfds[npfd].events != 0) 359 ++npfd; 360 361 pfds[npfd].fd = wfd; 362 pfds[npfd].events = (wfd_want_write ? POLLOUT : 0); 363 if (wfd >= 0 && pfds[npfd].events != 0) 364 ++npfd; 365 } 366 367 if (notify_rfd >= 0) { 368 pfds[npfd].fd = notify_rfd; 369 pfds[npfd].events = POLLIN; 370 ++npfd; 371 } 372 373 if (!ossl_assert(npfd != 0 || !ossl_time_is_infinite(deadline))) 374 /* Do not block forever; should not happen. */ 375 return 0; 376 377 #if defined(OPENSSL_THREADS) 378 if (mutex != NULL) 379 ossl_crypto_mutex_unlock(mutex); 380 #endif 381 382 do { 383 if (ossl_time_is_infinite(deadline)) { 384 timeout_ms = -1; 385 } else { 386 now = ossl_time_now(); 387 timeout = ossl_time_subtract(deadline, now); 388 timeout_ms = ossl_time2ms(timeout); 389 } 390 391 pres = poll(pfds, npfd, timeout_ms); 392 } while (pres == -1 && get_last_socket_error_is_eintr()); 393 394 #if defined(OPENSSL_THREADS) 395 if (mutex != NULL) 396 ossl_crypto_mutex_lock(mutex); 397 #endif 398 399 return pres < 0 ? 0 : 1; 400 #endif 401 } 402 403 static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd) 404 { 405 if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) { 406 *fd = INVALID_SOCKET; 407 return 1; 408 } 409 410 if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD 411 || d->value.fd == INVALID_SOCKET) 412 return 0; 413 414 *fd = d->value.fd; 415 return 1; 416 } 417 418 /* 419 * Poll up to two abstract poll descriptors, as well as an optional notify FD. 420 * Currently we only support poll descriptors which represent FDs. 421 * 422 * If mutex is non-NULL, it is assumed be a lock currently held for write and is 423 * unlocked for the duration of any wait. 424 * 425 * Precondition: mutex is NULL or is held for write (unchecked) 426 * Postcondition: mutex is NULL or is held for write (unless 427 * CRYPTO_THREAD_write_lock fails) 428 */ 429 static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read, 430 const BIO_POLL_DESCRIPTOR *w, int w_want_write, 431 int notify_rfd, 432 OSSL_TIME deadline, 433 CRYPTO_MUTEX *mutex) 434 { 435 int rfd, wfd; 436 437 if (!poll_descriptor_to_fd(r, &rfd) 438 || !poll_descriptor_to_fd(w, &wfd)) 439 return 0; 440 441 return poll_two_fds(rfd, r_want_read, wfd, w_want_write, 442 notify_rfd, deadline, mutex); 443 } 444 445 /* 446 * Notify other threads currently blocking in 447 * ossl_quic_reactor_block_until_pred() calls that a predicate they are using 448 * might now be met due to state changes. 449 * 450 * This function must be called after state changes which might cause a 451 * predicate in another thread to now be met (i.e., ticking). It is a no-op if 452 * inter-thread notification is not being used. 453 * 454 * The reactor mutex must be held while calling this function. 455 */ 456 static void rtor_notify_other_threads(QUIC_REACTOR *rtor) 457 { 458 if (!rtor->have_notifier) 459 return; 460 461 /* 462 * This function is called when we have done anything on this thread which 463 * might allow a predicate for a block_until_pred call on another thread to 464 * now be met. 465 * 466 * When this happens, we need to wake those threads using the notifier. 467 * However, we do not want to wake *this* thread (if/when it subsequently 468 * enters block_until_pred) due to the notifier FD becoming readable. 469 * Therefore, signal the notifier, and use a CV to detect when all other 470 * threads have woken. 471 */ 472 473 if (rtor->cur_blocking_waiters == 0) 474 /* Nothing to do in this case. */ 475 return; 476 477 /* Signal the notifier to wake up all threads. */ 478 if (!rtor->signalled_notifier) { 479 ossl_rio_notifier_signal(&rtor->notifier); 480 rtor->signalled_notifier = 1; 481 } 482 483 /* 484 * Wait on the CV until all threads have finished the first phase of the 485 * wakeup process and the last thread out has taken responsibility for 486 * unsignalling the notifier. 487 */ 488 while (rtor->signalled_notifier) 489 ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex); 490 } 491 492 /* 493 * Block until a predicate function evaluates to true. 494 * 495 * If mutex is non-NULL, it is assumed be a lock currently held for write and is 496 * unlocked for the duration of any wait. 497 * 498 * Precondition: Must hold channel write lock (unchecked) 499 * Precondition: mutex is NULL or is held for write (unchecked) 500 * Postcondition: mutex is NULL or is held for write (unless 501 * CRYPTO_THREAD_write_lock fails) 502 */ 503 int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor, 504 int (*pred)(void *arg), void *pred_arg, 505 uint32_t flags) 506 { 507 int res, net_read_desired, net_write_desired, notifier_fd; 508 OSSL_TIME tick_deadline; 509 510 notifier_fd 511 = (rtor->have_notifier ? ossl_rio_notifier_as_fd(&rtor->notifier) 512 : INVALID_SOCKET); 513 514 for (;;) { 515 if ((flags & SKIP_FIRST_TICK) != 0) 516 flags &= ~SKIP_FIRST_TICK; 517 else 518 /* best effort */ 519 ossl_quic_reactor_tick(rtor, 0); 520 521 if ((res = pred(pred_arg)) != 0) 522 return res; 523 524 net_read_desired = ossl_quic_reactor_net_read_desired(rtor); 525 net_write_desired = ossl_quic_reactor_net_write_desired(rtor); 526 tick_deadline = ossl_quic_reactor_get_tick_deadline(rtor); 527 if (!net_read_desired && !net_write_desired 528 && ossl_time_is_infinite(tick_deadline)) 529 /* Can't wait if there is nothing to wait for. */ 530 return 0; 531 532 ossl_quic_reactor_enter_blocking_section(rtor); 533 534 res = poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor), 535 net_read_desired, 536 ossl_quic_reactor_get_poll_w(rtor), 537 net_write_desired, 538 notifier_fd, 539 tick_deadline, 540 rtor->mutex); 541 542 /* 543 * We have now exited the OS poller call. We may have 544 * (rtor->signalled_notifier), and other threads may still be blocking. 545 * This means that cur_blocking_waiters may still be non-zero. As such, 546 * we cannot unsignal the notifier until all threads have had an 547 * opportunity to wake up. 548 * 549 * At the same time, we cannot unsignal in the case where 550 * cur_blocking_waiters is now zero because this condition may not occur 551 * reliably. Consider the following scenario: 552 * 553 * T1 enters block_until_pred, cur_blocking_waiters -> 1 554 * T2 enters block_until_pred, cur_blocking_waiters -> 2 555 * T3 enters block_until_pred, cur_blocking_waiters -> 3 556 * 557 * T4 enters block_until_pred, does not block, ticks, 558 * sees that cur_blocking_waiters > 0 and signals the notifier 559 * 560 * T3 wakes, cur_blocking_waiters -> 2 561 * T3 predicate is not satisfied, cur_blocking_waiters -> 3, block again 562 * 563 * Notifier is still signalled, so T3 immediately wakes again 564 * and is stuck repeating the above steps. 565 * 566 * T1, T2 are also woken by the notifier but never see 567 * cur_blocking_waiters drop to 0, so never unsignal the notifier. 568 * 569 * As such, a two phase approach is chosen when designalling the 570 * notifier: 571 * 572 * First, all of the poll_two_descriptor calls on all threads are 573 * allowed to exit due to the notifier being signalled. 574 * 575 * Second, the thread which happened to be the one which decremented 576 * cur_blocking_waiters to 0 unsignals the notifier and is then 577 * responsible for broadcasting to a CV to indicate to the other 578 * threads that the synchronised wakeup has been completed. Other 579 * threads wait for this CV to be signalled. 580 * 581 */ 582 ossl_quic_reactor_leave_blocking_section(rtor); 583 584 if (!res) 585 /* 586 * We don't actually care why the call succeeded (timeout, FD 587 * readiness), we just call reactor_tick and start trying to do I/O 588 * things again. If poll_two_fds returns 0, this is some other 589 * non-timeout failure and we should stop here. 590 * 591 * TODO(QUIC FUTURE): In the future we could avoid unnecessary 592 * syscalls by not retrying network I/O that isn't ready based 593 * on the result of the poll call. However this might be difficult 594 * because it requires we do the call to poll(2) or equivalent 595 * syscall ourselves, whereas in the general case the application 596 * does the polling and just calls SSL_handle_events(). 597 * Implementing this optimisation in the future will probably 598 * therefore require API changes. 599 */ 600 return 0; 601 } 602 603 return res; 604 } 605 606 void ossl_quic_reactor_enter_blocking_section(QUIC_REACTOR *rtor) 607 { 608 ++rtor->cur_blocking_waiters; 609 } 610 611 void ossl_quic_reactor_leave_blocking_section(QUIC_REACTOR *rtor) 612 { 613 assert(rtor->cur_blocking_waiters > 0); 614 --rtor->cur_blocking_waiters; 615 616 if (rtor->have_notifier && rtor->signalled_notifier) { 617 if (rtor->cur_blocking_waiters == 0) { 618 ossl_rio_notifier_unsignal(&rtor->notifier); 619 rtor->signalled_notifier = 0; 620 621 /* 622 * Release the other threads which have woken up (and possibly 623 * rtor_notify_other_threads as well). 624 */ 625 ossl_crypto_condvar_broadcast(rtor->notifier_cv); 626 } else { 627 /* We are not the last waiter out - so wait for that one. */ 628 while (rtor->signalled_notifier) 629 ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex); 630 } 631 } 632 } 633