1 /* 2 * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 #include "internal/quic_reactor.h" 10 #include "internal/common.h" 11 #include "internal/thread_arch.h" 12 #include <assert.h> 13 14 /* 15 * Core I/O Reactor Framework 16 * ========================== 17 */ 18 static void rtor_notify_other_threads(QUIC_REACTOR *rtor); 19 20 int ossl_quic_reactor_init(QUIC_REACTOR *rtor, 21 void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg, 22 uint32_t flags), 23 void *tick_cb_arg, 24 CRYPTO_MUTEX *mutex, 25 OSSL_TIME initial_tick_deadline, 26 uint64_t flags) 27 { 28 rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 29 rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 30 rtor->net_read_desired = 0; 31 rtor->net_write_desired = 0; 32 rtor->can_poll_r = 0; 33 rtor->can_poll_w = 0; 34 rtor->tick_deadline = initial_tick_deadline; 35 36 rtor->tick_cb = tick_cb; 37 rtor->tick_cb_arg = tick_cb_arg; 38 rtor->mutex = mutex; 39 40 rtor->cur_blocking_waiters = 0; 41 42 if ((flags & QUIC_REACTOR_FLAG_USE_NOTIFIER) != 0) { 43 if (!ossl_rio_notifier_init(&rtor->notifier)) 44 return 0; 45 46 if ((rtor->notifier_cv = ossl_crypto_condvar_new()) == NULL) { 47 ossl_rio_notifier_cleanup(&rtor->notifier); 48 return 0; 49 } 50 51 rtor->have_notifier = 1; 52 } else { 53 rtor->have_notifier = 0; 54 } 55 56 return 1; 57 } 58 59 void ossl_quic_reactor_cleanup(QUIC_REACTOR *rtor) 60 { 61 if (rtor == NULL) 62 return; 63 64 if (rtor->have_notifier) { 65 ossl_rio_notifier_cleanup(&rtor->notifier); 66 rtor->have_notifier = 0; 67 68 ossl_crypto_condvar_free(&rtor->notifier_cv); 69 } 70 } 71 72 void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r) 73 { 74 if (r == NULL) 75 rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 76 else 77 rtor->poll_r = *r; 78 79 rtor->can_poll_r 80 = ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_r); 81 } 82 83 void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w) 84 { 85 if (w == NULL) 86 rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE; 87 else 88 rtor->poll_w = *w; 89 90 rtor->can_poll_w 91 = ossl_quic_reactor_can_support_poll_descriptor(rtor, &rtor->poll_w); 92 } 93 94 const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(const QUIC_REACTOR *rtor) 95 { 96 return &rtor->poll_r; 97 } 98 99 const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(const QUIC_REACTOR *rtor) 100 { 101 return &rtor->poll_w; 102 } 103 104 int ossl_quic_reactor_can_support_poll_descriptor(const QUIC_REACTOR *rtor, 105 const BIO_POLL_DESCRIPTOR *d) 106 { 107 return d->type == BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD; 108 } 109 110 int ossl_quic_reactor_can_poll_r(const QUIC_REACTOR *rtor) 111 { 112 return rtor->can_poll_r; 113 } 114 115 int ossl_quic_reactor_can_poll_w(const QUIC_REACTOR *rtor) 116 { 117 return rtor->can_poll_w; 118 } 119 120 int ossl_quic_reactor_net_read_desired(QUIC_REACTOR *rtor) 121 { 122 return rtor->net_read_desired; 123 } 124 125 int ossl_quic_reactor_net_write_desired(QUIC_REACTOR *rtor) 126 { 127 return rtor->net_write_desired; 128 } 129 130 OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor) 131 { 132 return rtor->tick_deadline; 133 } 134 135 int ossl_quic_reactor_tick(QUIC_REACTOR *rtor, uint32_t flags) 136 { 137 QUIC_TICK_RESULT res = {0}; 138 139 /* 140 * Note that the tick callback cannot fail; this is intentional. Arguably it 141 * does not make that much sense for ticking to 'fail' (in the sense of an 142 * explicit error indicated to the user) because ticking is by its nature 143 * best effort. If something fatal happens with a connection we can report 144 * it on the next actual application I/O call. 145 */ 146 rtor->tick_cb(&res, rtor->tick_cb_arg, flags); 147 148 rtor->net_read_desired = res.net_read_desired; 149 rtor->net_write_desired = res.net_write_desired; 150 rtor->tick_deadline = res.tick_deadline; 151 if (res.notify_other_threads) 152 rtor_notify_other_threads(rtor); 153 154 return 1; 155 } 156 157 RIO_NOTIFIER *ossl_quic_reactor_get0_notifier(QUIC_REACTOR *rtor) 158 { 159 return rtor->have_notifier ? &rtor->notifier : NULL; 160 } 161 162 /* 163 * Blocking I/O Adaptation Layer 164 * ============================= 165 */ 166 167 /* 168 * Utility which can be used to poll on up to two FDs. This is designed to 169 * support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where 170 * different FDs are used for read and write). 171 * 172 * Generally use of poll(2) is preferred where available. Windows, however, 173 * hasn't traditionally offered poll(2), only select(2). WSAPoll() was 174 * introduced in Vista but has seemingly been buggy until relatively recent 175 * versions of Windows 10. Moreover we support XP so this is not a suitable 176 * target anyway. However, the traditional issues with select(2) turn out not to 177 * be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of 178 * FDs (and thus is limited in the magnitude of the FDs expressible), Windows 179 * select(2) is very different. In Windows, socket handles are not allocated 180 * contiguously from zero and thus this bitmap approach was infeasible. Thus in 181 * adapting the Berkeley sockets API to Windows a different approach was taken 182 * whereby the fd_set contains a fixed length array of socket handles and an 183 * integer indicating how many entries are valid; thus Windows select() 184 * ironically is actually much more like *NIX poll(2) than *NIX select(2). In 185 * any case, this means that the relevant limit for Windows select() is the 186 * number of FDs being polled, not the magnitude of those FDs. Since we only 187 * poll for two FDs here, this limit does not concern us. 188 * 189 * Usage: rfd and wfd may be the same or different. Either or both may also be 190 * -1. If rfd_want_read is 1, rfd is polled for readability, and if 191 * wfd_want_write is 1, wfd is polled for writability. Note that since any 192 * passed FD is always polled for error conditions, setting rfd_want_read=0 and 193 * wfd_want_write=0 is not the same as passing -1 for both FDs. 194 * 195 * deadline is a timestamp to return at. If it is ossl_time_infinite(), the call 196 * never times out. 197 * 198 * Returns 0 on error and 1 on success. Timeout expiry is considered a success 199 * condition. We don't elaborate our return values here because the way we are 200 * actually using this doesn't currently care. 201 * 202 * If mutex is non-NULL, it is assumed to be held for write and is unlocked for 203 * the duration of the call. 204 * 205 * Precondition: mutex is NULL or is held for write (unchecked) 206 * Postcondition: mutex is NULL or is held for write (unless 207 * CRYPTO_THREAD_write_lock fails) 208 */ 209 static int poll_two_fds(int rfd, int rfd_want_read, 210 int wfd, int wfd_want_write, 211 int notify_rfd, 212 OSSL_TIME deadline, 213 CRYPTO_MUTEX *mutex) 214 { 215 #if defined(OPENSSL_SYS_WINDOWS) || !defined(POLLIN) 216 fd_set rfd_set, wfd_set, efd_set; 217 OSSL_TIME now, timeout; 218 struct timeval tv, *ptv; 219 int maxfd, pres; 220 221 # ifndef OPENSSL_SYS_WINDOWS 222 /* 223 * On Windows there is no relevant limit to the magnitude of a fd value (see 224 * above). On *NIX the fd_set uses a bitmap and we must check the limit. 225 */ 226 if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE) 227 return 0; 228 # endif 229 230 FD_ZERO(&rfd_set); 231 FD_ZERO(&wfd_set); 232 FD_ZERO(&efd_set); 233 234 if (rfd != INVALID_SOCKET && rfd_want_read) 235 openssl_fdset(rfd, &rfd_set); 236 if (wfd != INVALID_SOCKET && wfd_want_write) 237 openssl_fdset(wfd, &wfd_set); 238 239 /* Always check for error conditions. */ 240 if (rfd != INVALID_SOCKET) 241 openssl_fdset(rfd, &efd_set); 242 if (wfd != INVALID_SOCKET) 243 openssl_fdset(wfd, &efd_set); 244 245 /* Check for notifier FD readability. */ 246 if (notify_rfd != INVALID_SOCKET) { 247 openssl_fdset(notify_rfd, &rfd_set); 248 openssl_fdset(notify_rfd, &efd_set); 249 } 250 251 maxfd = rfd; 252 if (wfd > maxfd) 253 maxfd = wfd; 254 if (notify_rfd > maxfd) 255 maxfd = notify_rfd; 256 257 if (!ossl_assert(rfd != INVALID_SOCKET || wfd != INVALID_SOCKET 258 || !ossl_time_is_infinite(deadline))) 259 /* Do not block forever; should not happen. */ 260 return 0; 261 262 /* 263 * The mutex dance (unlock/re-locak after poll/seclect) is 264 * potentially problematic. This may create a situation when 265 * two threads arrive to select/poll with the same file 266 * descriptors. We just need to be aware of this. 267 */ 268 # if defined(OPENSSL_THREADS) 269 if (mutex != NULL) 270 ossl_crypto_mutex_unlock(mutex); 271 # endif 272 273 do { 274 /* 275 * select expects a timeout, not a deadline, so do the conversion. 276 * Update for each call to ensure the correct value is used if we repeat 277 * due to EINTR. 278 */ 279 if (ossl_time_is_infinite(deadline)) { 280 ptv = NULL; 281 } else { 282 now = ossl_time_now(); 283 /* 284 * ossl_time_subtract saturates to zero so we don't need to check if 285 * now > deadline. 286 */ 287 timeout = ossl_time_subtract(deadline, now); 288 tv = ossl_time_to_timeval(timeout); 289 ptv = &tv; 290 } 291 292 pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv); 293 } while (pres == -1 && get_last_socket_error_is_eintr()); 294 295 # if defined(OPENSSL_THREADS) 296 if (mutex != NULL) 297 ossl_crypto_mutex_lock(mutex); 298 # endif 299 300 return pres < 0 ? 0 : 1; 301 #else 302 int pres, timeout_ms; 303 OSSL_TIME now, timeout; 304 struct pollfd pfds[3] = {0}; 305 size_t npfd = 0; 306 307 if (rfd == wfd) { 308 pfds[npfd].fd = rfd; 309 pfds[npfd].events = (rfd_want_read ? POLLIN : 0) 310 | (wfd_want_write ? POLLOUT : 0); 311 if (rfd >= 0 && pfds[npfd].events != 0) 312 ++npfd; 313 } else { 314 pfds[npfd].fd = rfd; 315 pfds[npfd].events = (rfd_want_read ? POLLIN : 0); 316 if (rfd >= 0 && pfds[npfd].events != 0) 317 ++npfd; 318 319 pfds[npfd].fd = wfd; 320 pfds[npfd].events = (wfd_want_write ? POLLOUT : 0); 321 if (wfd >= 0 && pfds[npfd].events != 0) 322 ++npfd; 323 } 324 325 if (notify_rfd >= 0) { 326 pfds[npfd].fd = notify_rfd; 327 pfds[npfd].events = POLLIN; 328 ++npfd; 329 } 330 331 if (!ossl_assert(npfd != 0 || !ossl_time_is_infinite(deadline))) 332 /* Do not block forever; should not happen. */ 333 return 0; 334 335 # if defined(OPENSSL_THREADS) 336 if (mutex != NULL) 337 ossl_crypto_mutex_unlock(mutex); 338 # endif 339 340 do { 341 if (ossl_time_is_infinite(deadline)) { 342 timeout_ms = -1; 343 } else { 344 now = ossl_time_now(); 345 timeout = ossl_time_subtract(deadline, now); 346 timeout_ms = ossl_time2ms(timeout); 347 } 348 349 pres = poll(pfds, npfd, timeout_ms); 350 } while (pres == -1 && get_last_socket_error_is_eintr()); 351 352 # if defined(OPENSSL_THREADS) 353 if (mutex != NULL) 354 ossl_crypto_mutex_lock(mutex); 355 # endif 356 357 return pres < 0 ? 0 : 1; 358 #endif 359 } 360 361 static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd) 362 { 363 if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) { 364 *fd = INVALID_SOCKET; 365 return 1; 366 } 367 368 if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD 369 || d->value.fd == INVALID_SOCKET) 370 return 0; 371 372 *fd = d->value.fd; 373 return 1; 374 } 375 376 /* 377 * Poll up to two abstract poll descriptors, as well as an optional notify FD. 378 * Currently we only support poll descriptors which represent FDs. 379 * 380 * If mutex is non-NULL, it is assumed be a lock currently held for write and is 381 * unlocked for the duration of any wait. 382 * 383 * Precondition: mutex is NULL or is held for write (unchecked) 384 * Postcondition: mutex is NULL or is held for write (unless 385 * CRYPTO_THREAD_write_lock fails) 386 */ 387 static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read, 388 const BIO_POLL_DESCRIPTOR *w, int w_want_write, 389 int notify_rfd, 390 OSSL_TIME deadline, 391 CRYPTO_MUTEX *mutex) 392 { 393 int rfd, wfd; 394 395 if (!poll_descriptor_to_fd(r, &rfd) 396 || !poll_descriptor_to_fd(w, &wfd)) 397 return 0; 398 399 return poll_two_fds(rfd, r_want_read, wfd, w_want_write, 400 notify_rfd, deadline, mutex); 401 } 402 403 /* 404 * Notify other threads currently blocking in 405 * ossl_quic_reactor_block_until_pred() calls that a predicate they are using 406 * might now be met due to state changes. 407 * 408 * This function must be called after state changes which might cause a 409 * predicate in another thread to now be met (i.e., ticking). It is a no-op if 410 * inter-thread notification is not being used. 411 * 412 * The reactor mutex must be held while calling this function. 413 */ 414 static void rtor_notify_other_threads(QUIC_REACTOR *rtor) 415 { 416 if (!rtor->have_notifier) 417 return; 418 419 /* 420 * This function is called when we have done anything on this thread which 421 * might allow a predicate for a block_until_pred call on another thread to 422 * now be met. 423 * 424 * When this happens, we need to wake those threads using the notifier. 425 * However, we do not want to wake *this* thread (if/when it subsequently 426 * enters block_until_pred) due to the notifier FD becoming readable. 427 * Therefore, signal the notifier, and use a CV to detect when all other 428 * threads have woken. 429 */ 430 431 if (rtor->cur_blocking_waiters == 0) 432 /* Nothing to do in this case. */ 433 return; 434 435 /* Signal the notifier to wake up all threads. */ 436 if (!rtor->signalled_notifier) { 437 ossl_rio_notifier_signal(&rtor->notifier); 438 rtor->signalled_notifier = 1; 439 } 440 441 /* 442 * Wait on the CV until all threads have finished the first phase of the 443 * wakeup process and the last thread out has taken responsibility for 444 * unsignalling the notifier. 445 */ 446 while (rtor->signalled_notifier) 447 ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex); 448 } 449 450 /* 451 * Block until a predicate function evaluates to true. 452 * 453 * If mutex is non-NULL, it is assumed be a lock currently held for write and is 454 * unlocked for the duration of any wait. 455 * 456 * Precondition: Must hold channel write lock (unchecked) 457 * Precondition: mutex is NULL or is held for write (unchecked) 458 * Postcondition: mutex is NULL or is held for write (unless 459 * CRYPTO_THREAD_write_lock fails) 460 */ 461 int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor, 462 int (*pred)(void *arg), void *pred_arg, 463 uint32_t flags) 464 { 465 int res, net_read_desired, net_write_desired, notifier_fd; 466 OSSL_TIME tick_deadline; 467 468 notifier_fd 469 = (rtor->have_notifier ? ossl_rio_notifier_as_fd(&rtor->notifier) 470 : INVALID_SOCKET); 471 472 for (;;) { 473 if ((flags & SKIP_FIRST_TICK) != 0) 474 flags &= ~SKIP_FIRST_TICK; 475 else 476 /* best effort */ 477 ossl_quic_reactor_tick(rtor, 0); 478 479 if ((res = pred(pred_arg)) != 0) 480 return res; 481 482 net_read_desired = ossl_quic_reactor_net_read_desired(rtor); 483 net_write_desired = ossl_quic_reactor_net_write_desired(rtor); 484 tick_deadline = ossl_quic_reactor_get_tick_deadline(rtor); 485 if (!net_read_desired && !net_write_desired 486 && ossl_time_is_infinite(tick_deadline)) 487 /* Can't wait if there is nothing to wait for. */ 488 return 0; 489 490 ossl_quic_reactor_enter_blocking_section(rtor); 491 492 res = poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor), 493 net_read_desired, 494 ossl_quic_reactor_get_poll_w(rtor), 495 net_write_desired, 496 notifier_fd, 497 tick_deadline, 498 rtor->mutex); 499 500 /* 501 * We have now exited the OS poller call. We may have 502 * (rtor->signalled_notifier), and other threads may still be blocking. 503 * This means that cur_blocking_waiters may still be non-zero. As such, 504 * we cannot unsignal the notifier until all threads have had an 505 * opportunity to wake up. 506 * 507 * At the same time, we cannot unsignal in the case where 508 * cur_blocking_waiters is now zero because this condition may not occur 509 * reliably. Consider the following scenario: 510 * 511 * T1 enters block_until_pred, cur_blocking_waiters -> 1 512 * T2 enters block_until_pred, cur_blocking_waiters -> 2 513 * T3 enters block_until_pred, cur_blocking_waiters -> 3 514 * 515 * T4 enters block_until_pred, does not block, ticks, 516 * sees that cur_blocking_waiters > 0 and signals the notifier 517 * 518 * T3 wakes, cur_blocking_waiters -> 2 519 * T3 predicate is not satisfied, cur_blocking_waiters -> 3, block again 520 * 521 * Notifier is still signalled, so T3 immediately wakes again 522 * and is stuck repeating the above steps. 523 * 524 * T1, T2 are also woken by the notifier but never see 525 * cur_blocking_waiters drop to 0, so never unsignal the notifier. 526 * 527 * As such, a two phase approach is chosen when designalling the 528 * notifier: 529 * 530 * First, all of the poll_two_descriptor calls on all threads are 531 * allowed to exit due to the notifier being signalled. 532 * 533 * Second, the thread which happened to be the one which decremented 534 * cur_blocking_waiters to 0 unsignals the notifier and is then 535 * responsible for broadcasting to a CV to indicate to the other 536 * threads that the synchronised wakeup has been completed. Other 537 * threads wait for this CV to be signalled. 538 * 539 */ 540 ossl_quic_reactor_leave_blocking_section(rtor); 541 542 if (!res) 543 /* 544 * We don't actually care why the call succeeded (timeout, FD 545 * readiness), we just call reactor_tick and start trying to do I/O 546 * things again. If poll_two_fds returns 0, this is some other 547 * non-timeout failure and we should stop here. 548 * 549 * TODO(QUIC FUTURE): In the future we could avoid unnecessary 550 * syscalls by not retrying network I/O that isn't ready based 551 * on the result of the poll call. However this might be difficult 552 * because it requires we do the call to poll(2) or equivalent 553 * syscall ourselves, whereas in the general case the application 554 * does the polling and just calls SSL_handle_events(). 555 * Implementing this optimisation in the future will probably 556 * therefore require API changes. 557 */ 558 return 0; 559 } 560 561 return res; 562 } 563 564 void ossl_quic_reactor_enter_blocking_section(QUIC_REACTOR *rtor) 565 { 566 ++rtor->cur_blocking_waiters; 567 } 568 569 void ossl_quic_reactor_leave_blocking_section(QUIC_REACTOR *rtor) 570 { 571 assert(rtor->cur_blocking_waiters > 0); 572 --rtor->cur_blocking_waiters; 573 574 if (rtor->have_notifier && rtor->signalled_notifier) { 575 if (rtor->cur_blocking_waiters == 0) { 576 ossl_rio_notifier_unsignal(&rtor->notifier); 577 rtor->signalled_notifier = 0; 578 579 /* 580 * Release the other threads which have woken up (and possibly 581 * rtor_notify_other_threads as well). 582 */ 583 ossl_crypto_condvar_broadcast(rtor->notifier_cv); 584 } else { 585 /* We are not the last waiter out - so wait for that one. */ 586 while (rtor->signalled_notifier) 587 ossl_crypto_condvar_wait(rtor->notifier_cv, rtor->mutex); 588 } 589 } 590 } 591