QUIC: Add QUIC reactor

Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Matt Caswell <matt@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/19703)
This commit is contained in:
Hugo Landau 2022-11-17 14:59:18 +00:00
parent 68801bcb76
commit 69523214ee
3 changed files with 464 additions and 0 deletions

View File

@ -0,0 +1,162 @@
/*
* Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#ifndef OSSL_QUIC_REACTOR_H
# define OSSL_QUIC_REACTOR_H
# include "internal/time.h"
# include "internal/sockets.h"
# include <openssl/bio.h>
/*
* Core I/O Reactor Framework
* ==========================
*
* Manages use of async network I/O which the QUIC stack is built on. The core
* mechanic looks like this:
*
* - There is a pollable FD for both the read and write side respectively.
* Readability and writeability of these FDs respectively determines when
* network I/O is available.
*
* - The reactor can export these FDs to the user, as well as flags indicating
* whether the user should listen for readability, writeability, or neither.
*
* - The reactor can export a timeout indication to the user, indicating when
* the reactor should be called (via libssl APIs) regardless of whether
* the network socket has become ready.
*
* The reactor is based around a tick callback which is essentially the mutator
* function. The mutator attempts to do whatever it can, attempting to perform
* network I/O to the extent currently feasible. When done, the mutator returns
* information to the reactor indicating when it should be woken up again:
*
* - Should it be woken up when network RX is possible?
* - Should it be woken up when network TX is possible?
* - Should it be woken up no later than some deadline X?
*
* The intention is that ALL I/O-related SSL_* functions with side effects (e.g.
* SSL_read/SSL_write) consist of three phases:
*
* - Optionally mutate the QUIC machine's state.
* - Optionally tick the QUIC reactor.
* - Optionally mutate the QUIC machine's state.
*
* For example, SSL_write is a mutation (appending to a stream buffer) followed
* by an optional tick (generally expected as we may want to send the data
* immediately, though not strictly needed if transmission is being deferred due
* to Nagle's algorithm, etc.).
*
* SSL_read is also a mutation and in principle does not need to tick the
* reactor, but it generally will anyway to ensure that the reactor is regularly
* ticked by an application which is only reading and not writing.
*
* If the SSL object is being used in blocking mode, SSL_read may need to block
* if no data is available yet, and SSL_write may need to block if buffers
* are full.
*
* The internals of the QUIC I/O engine always use asynchronous I/O. If the
* application desires blocking semantics, we handle this by adding a blocking
* adaptation layer on top of our internal asynchronous I/O API as exposed by
* the reactor interface.
*/
# ifndef OPENSSL_NO_QUIC
typedef struct quic_tick_result_st {
char want_net_read;
char want_net_write;
OSSL_TIME tick_deadline;
} QUIC_TICK_RESULT;
typedef struct quic_reactor_st {
/*
* BIO poll descriptors which can be polled. poll_r is a poll descriptor
* which becomes readable when the QUIC state machine can potentially do
* work, and poll_w is a poll descriptor which becomes writable when the
* QUIC state machine can potentially do work. Generally, either of these
* conditions means that SSL_tick() should be called, or another SSL
* function which implicitly calls SSL_tick() (e.g. SSL_read/SSL_write()).
*/
BIO_POLL_DESCRIPTOR poll_r, poll_w;
OSSL_TIME tick_deadline; /* ossl_time_infinite() if none currently applicable */
void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg);
void *tick_cb_arg;
/*
* These are true if we would like to know when we can read or write from
* the network respectively.
*/
unsigned int want_net_read : 1;
unsigned int want_net_write : 1;
} QUIC_REACTOR;
void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
void *tick_cb_arg,
OSSL_TIME initial_tick_deadline);
void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor,
const BIO_POLL_DESCRIPTOR *r);
void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor,
const BIO_POLL_DESCRIPTOR *w);
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor);
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor);
int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor);
int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor);
OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor);
/*
* Do whatever work can be done, and as much work as can be done. This involves
* e.g. seeing if we can read anything from the network (if we want to), seeing
* if we can write anything to the network (if we want to), etc.
*/
int ossl_quic_reactor_tick(QUIC_REACTOR *rtor);
/*
* Blocking I/O Adaptation Layer
* =============================
*
* The blocking I/O adaptation layer implements blocking I/O on top of our
* asynchronous core.
*
* The core mechanism is block_until_pred(), which does not return until pred()
* returns a value other than 0. The blocker uses OS I/O synchronisation
* primitives (e.g. poll(2)) and ticks the reactor until the predicate is
* satisfied. The blocker is not required to call pred() more than once between
* tick calls.
*
* When pred returns a non-zero value, that value is returned by this function.
* This can be used to allow pred() to indicate error conditions and short
* circuit the blocking process.
*
* A return value of -1 is reserved for network polling errors. Therefore this
* return value should not be used by pred() if ambiguity is not desired. Note
* that the predicate function can always arrange its own output mechanism, for
* example by passing a structure of its own as the argument.
*
* If the SKIP_FIRST_TICK flag is set, the first call to reactor_tick() before
* the first call to pred() is skipped. This is useful if it is known that
* ticking the reactor again will not be useful (e.g. because it has already
* been done).
*/
#define SKIP_FIRST_TICK (1U << 0)
int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
int (*pred)(void *arg), void *pred_arg,
uint32_t flags);
# endif
#endif

View File

@ -9,3 +9,4 @@ SOURCE[$LIBSSL]=quic_cfq.c quic_txpim.c quic_fifd.c quic_txp.c
SOURCE[$LIBSSL]=quic_stream_map.c
SOURCE[$LIBSSL]=quic_sf_list.c quic_rstream.c quic_sstream.c
SOURCE[$LIBSSL]=quic_dummy_handshake.c
SOURCE[$LIBSSL]=quic_reactor.c

301
ssl/quic/quic_reactor.c Normal file
View File

@ -0,0 +1,301 @@
/*
* Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#include "internal/quic_reactor.h"
/*
* Core I/O Reactor Framework
* ==========================
*/
void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
void *tick_cb_arg,
OSSL_TIME initial_tick_deadline)
{
rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
rtor->want_net_read = 0;
rtor->want_net_write = 0;
rtor->tick_deadline = initial_tick_deadline;
rtor->tick_cb = tick_cb;
rtor->tick_cb_arg = tick_cb_arg;
}
void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r)
{
rtor->poll_r = *r;
}
void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w)
{
rtor->poll_w = *w;
}
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor)
{
return &rtor->poll_r;
}
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor)
{
return &rtor->poll_w;
}
int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor)
{
return rtor->want_net_read;
}
int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor)
{
return rtor->want_net_write;
}
OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor)
{
return rtor->tick_deadline;
}
int ossl_quic_reactor_tick(QUIC_REACTOR *rtor)
{
QUIC_TICK_RESULT res = {0};
/*
* Note that the tick callback cannot fail; this is intentional. Arguably it
* does not make that much sense for ticking to 'fail' (in the sense of an
* explicit error indicated to the user) because ticking is by its nature
* best effort. If something fatal happens with a connection we can report
* it on the next actual application I/O call.
*/
rtor->tick_cb(&res, rtor->tick_cb_arg);
rtor->want_net_read = res.want_net_read;
rtor->want_net_write = res.want_net_write;
rtor->tick_deadline = res.tick_deadline;
return 1;
}
/*
* Blocking I/O Adaptation Layer
* =============================
*/
/*
* Utility which can be used to poll on up to two FDs. This is designed to
* support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where
* different FDs are used for read and write).
*
* Generally use of poll(2) is preferred where available. Windows, however,
* hasn't traditionally offered poll(2), only select(2). WSAPoll() was
* introduced in Vista but has seemingly been buggy until relatively recent
* versions of Windows 10. Moreover we support XP so this is not a suitable
* target anyway. However, the traditional issues with select(2) turn out not to
* be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of
* FDs (and thus is limited in the magnitude of the FDs expressible), Windows
* select(2) is very different. In Windows, socket handles are not allocated
* contiguously from zero and thus this bitmap approach was infeasible. Thus in
* adapting the Berkeley sockets API to Windows a different approach was taken
* whereby the fd_set contains a fixed length array of socket handles and an
* integer indicating how many entries are valid; thus Windows select()
* ironically is actually much more like *NIX poll(2) than *NIX select(2). In
* any case, this means that the relevant limit for Windows select() is the
* number of FDs being polled, not the magnitude of those FDs. Since we only
* poll for two FDs here, this limit does not concern us.
*
* Usage: rfd and wfd may be the same or different. Either or both may also be
* -1. If rfd_want_read is 1, rfd is polled for readability, and if
* wfd_want_write is 1, wfd is polled for writability. Note that since any
* passed FD is always polled for error conditions, setting rfd_want_read=0 and
* wfd_want_write=0 is not the same as passing -1 for both FDs.
*
* deadline is a timestamp to return at. If it is ossl_time_infinite(), the call
* never times out.
*
* Returns 0 on error and 1 on success. Timeout expiry is considered a success
* condition. We don't elaborate our return values here because the way we are
* actually using this doesn't currently care.
*/
static int poll_two_fds(int rfd, int rfd_want_read,
int wfd, int wfd_want_write,
OSSL_TIME deadline)
{
#if defined(OSSL_SYS_WINDOWS) || !defined(POLLIN)
fd_set rfd_set, wfd_set, efd_set;
OSSL_TIME now, timeout;
struct timeval tv, *ptv;
int maxfd, pres;
#ifndef OSSL_SYS_WINDOWS
/*
* On Windows there is no relevant limit to the magnitude of a fd value (see
* above). On *NIX the fd_set uses a bitmap and we must check the limit.
*/
if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE)
return 0;
#endif
FD_ZERO(&rfd_set);
FD_ZERO(&wfd_set);
FD_ZERO(&efd_set);
if (rfd != -1 && rfd_want_read)
openssl_fdset(rfd, &rfd_set);
if (wfd != -1 && wfd_want_write)
openssl_fdset(wfd, &wfd_set);
/* Always check for error conditions. */
if (rfd != -1)
openssl_fdset(rfd, &efd_set);
if (wfd != -1)
openssl_fdset(wfd, &efd_set);
maxfd = rfd;
if (wfd > maxfd)
maxfd = wfd;
if (rfd == -1 && wfd == -1 && ossl_time_is_infinite(deadline))
/* Do not block forever; should not happen. */
return 0;
do {
/*
* select expects a timeout, not a deadline, so do the conversion.
* Update for each call to ensure the correct value is used if we repeat
* due to EINTR.
*/
if (ossl_time_is_infinite(deadline)) {
ptv = NULL;
} else {
now = ossl_time_now();
/*
* ossl_time_subtract saturates to zero so we don't need to check if
* now > deadline.
*/
timeout = ossl_time_subtract(deadline, now);
tv = ossl_time_to_timeval(timeout);
ptv = &tv;
}
pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv);
} while (pres == -1 && get_last_socket_error_is_eintr());
return pres < 0 ? 0 : 1;
#else
int pres, timeout_ms;
OSSL_TIME now, timeout;
struct pollfd pfds[2] = {0};
size_t npfd = 0;
if (rfd == wfd) {
pfds[npfd].fd = rfd;
pfds[npfd].events = (rfd_want_read ? POLLIN : 0)
| (wfd_want_write ? POLLOUT : 0);
if (rfd >= 0 && pfds[npfd].events != 0)
++npfd;
} else {
pfds[npfd].fd = rfd;
pfds[npfd].events = (rfd_want_read ? POLLIN : 0);
if (rfd >= 0 && pfds[npfd].events != 0)
++npfd;
pfds[npfd].fd = wfd;
pfds[npfd].events = (wfd_want_write ? POLLOUT : 0);
if (wfd >= 0 && pfds[npfd].events != 0)
++npfd;
}
if (npfd == 0 && ossl_time_is_infinite(deadline))
/* Do not block forever; should not happen. */
return 0;
do {
if (ossl_time_is_infinite(deadline)) {
timeout_ms = -1;
} else {
now = ossl_time_now();
timeout = ossl_time_subtract(deadline, now);
timeout_ms = ossl_time2ms(timeout);
}
pres = poll(pfds, npfd, timeout_ms);
} while (pres == -1 && get_last_socket_error_is_eintr());
return pres < 0 ? 0 : 1;
#endif
}
static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd)
{
if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) {
*fd = -1;
return 1;
}
if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD || d->value.fd < 0)
return 0;
*fd = d->value.fd;
return 1;
}
/*
* Poll up to two abstract poll descriptors. Currently we only support
* poll descriptors which represent FDs.
*/
static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read,
const BIO_POLL_DESCRIPTOR *w, int w_want_write,
OSSL_TIME deadline)
{
int rfd, wfd;
if (!poll_descriptor_to_fd(r, &rfd)
|| !poll_descriptor_to_fd(w, &wfd))
return 0;
return poll_two_fds(rfd, r_want_read, wfd, w_want_write, deadline);
}
int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
int (*pred)(void *arg), void *pred_arg,
uint32_t flags)
{
int res;
for (;;) {
if ((flags & SKIP_FIRST_TICK) != 0)
flags &= ~SKIP_FIRST_TICK;
else
/* best effort */
ossl_quic_reactor_tick(rtor);
if ((res = pred(pred_arg)) != 0)
return res;
if (!poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor),
ossl_quic_reactor_want_net_read(rtor),
ossl_quic_reactor_get_poll_w(rtor),
ossl_quic_reactor_want_net_write(rtor),
ossl_quic_reactor_get_tick_deadline(rtor)))
/*
* We don't actually care why the call succeeded (timeout, FD
* readiness), we just call reactor_tick and start trying to do I/O
* things again. If poll_two_fds returns 0, this is some other
* non-timeout failure and we should stop here.
*
* TODO(QUIC): In the future we could avoid unnecessary syscalls by
* not retrying network I/O that isn't ready based on the result of
* the poll call. However this might be difficult because it
* requires we do the call to poll(2) or equivalent syscall
* ourselves, whereas in the general case the application does the
* polling and just calls SSL_tick(). Implementing this optimisation
* in the future will probably therefore require API changes.
*/
return 0;
}
}