mirror of
https://github.com/openssl/openssl.git
synced 2025-02-17 14:32:04 +08:00
QUIC: Add QUIC reactor
Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Matt Caswell <matt@openssl.org> (Merged from https://github.com/openssl/openssl/pull/19703)
This commit is contained in:
parent
68801bcb76
commit
69523214ee
162
include/internal/quic_reactor.h
Normal file
162
include/internal/quic_reactor.h
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
#ifndef OSSL_QUIC_REACTOR_H
|
||||
# define OSSL_QUIC_REACTOR_H
|
||||
|
||||
# include "internal/time.h"
|
||||
# include "internal/sockets.h"
|
||||
# include <openssl/bio.h>
|
||||
|
||||
/*
|
||||
* Core I/O Reactor Framework
|
||||
* ==========================
|
||||
*
|
||||
* Manages use of async network I/O which the QUIC stack is built on. The core
|
||||
* mechanic looks like this:
|
||||
*
|
||||
* - There is a pollable FD for both the read and write side respectively.
|
||||
* Readability and writeability of these FDs respectively determines when
|
||||
* network I/O is available.
|
||||
*
|
||||
* - The reactor can export these FDs to the user, as well as flags indicating
|
||||
* whether the user should listen for readability, writeability, or neither.
|
||||
*
|
||||
* - The reactor can export a timeout indication to the user, indicating when
|
||||
* the reactor should be called (via libssl APIs) regardless of whether
|
||||
* the network socket has become ready.
|
||||
*
|
||||
* The reactor is based around a tick callback which is essentially the mutator
|
||||
* function. The mutator attempts to do whatever it can, attempting to perform
|
||||
* network I/O to the extent currently feasible. When done, the mutator returns
|
||||
* information to the reactor indicating when it should be woken up again:
|
||||
*
|
||||
* - Should it be woken up when network RX is possible?
|
||||
* - Should it be woken up when network TX is possible?
|
||||
* - Should it be woken up no later than some deadline X?
|
||||
*
|
||||
* The intention is that ALL I/O-related SSL_* functions with side effects (e.g.
|
||||
* SSL_read/SSL_write) consist of three phases:
|
||||
*
|
||||
* - Optionally mutate the QUIC machine's state.
|
||||
* - Optionally tick the QUIC reactor.
|
||||
* - Optionally mutate the QUIC machine's state.
|
||||
*
|
||||
* For example, SSL_write is a mutation (appending to a stream buffer) followed
|
||||
* by an optional tick (generally expected as we may want to send the data
|
||||
* immediately, though not strictly needed if transmission is being deferred due
|
||||
* to Nagle's algorithm, etc.).
|
||||
*
|
||||
* SSL_read is also a mutation and in principle does not need to tick the
|
||||
* reactor, but it generally will anyway to ensure that the reactor is regularly
|
||||
* ticked by an application which is only reading and not writing.
|
||||
*
|
||||
* If the SSL object is being used in blocking mode, SSL_read may need to block
|
||||
* if no data is available yet, and SSL_write may need to block if buffers
|
||||
* are full.
|
||||
*
|
||||
* The internals of the QUIC I/O engine always use asynchronous I/O. If the
|
||||
* application desires blocking semantics, we handle this by adding a blocking
|
||||
* adaptation layer on top of our internal asynchronous I/O API as exposed by
|
||||
* the reactor interface.
|
||||
*/
|
||||
# ifndef OPENSSL_NO_QUIC
|
||||
|
||||
typedef struct quic_tick_result_st {
|
||||
char want_net_read;
|
||||
char want_net_write;
|
||||
OSSL_TIME tick_deadline;
|
||||
} QUIC_TICK_RESULT;
|
||||
|
||||
typedef struct quic_reactor_st {
|
||||
/*
|
||||
* BIO poll descriptors which can be polled. poll_r is a poll descriptor
|
||||
* which becomes readable when the QUIC state machine can potentially do
|
||||
* work, and poll_w is a poll descriptor which becomes writable when the
|
||||
* QUIC state machine can potentially do work. Generally, either of these
|
||||
* conditions means that SSL_tick() should be called, or another SSL
|
||||
* function which implicitly calls SSL_tick() (e.g. SSL_read/SSL_write()).
|
||||
*/
|
||||
BIO_POLL_DESCRIPTOR poll_r, poll_w;
|
||||
OSSL_TIME tick_deadline; /* ossl_time_infinite() if none currently applicable */
|
||||
|
||||
void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg);
|
||||
void *tick_cb_arg;
|
||||
|
||||
/*
|
||||
* These are true if we would like to know when we can read or write from
|
||||
* the network respectively.
|
||||
*/
|
||||
unsigned int want_net_read : 1;
|
||||
unsigned int want_net_write : 1;
|
||||
} QUIC_REACTOR;
|
||||
|
||||
void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
|
||||
void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
|
||||
void *tick_cb_arg,
|
||||
OSSL_TIME initial_tick_deadline);
|
||||
|
||||
void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor,
|
||||
const BIO_POLL_DESCRIPTOR *r);
|
||||
|
||||
void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor,
|
||||
const BIO_POLL_DESCRIPTOR *w);
|
||||
|
||||
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor);
|
||||
|
||||
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor);
|
||||
|
||||
int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor);
|
||||
|
||||
int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor);
|
||||
|
||||
OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor);
|
||||
|
||||
/*
|
||||
* Do whatever work can be done, and as much work as can be done. This involves
|
||||
* e.g. seeing if we can read anything from the network (if we want to), seeing
|
||||
* if we can write anything to the network (if we want to), etc.
|
||||
*/
|
||||
int ossl_quic_reactor_tick(QUIC_REACTOR *rtor);
|
||||
|
||||
/*
|
||||
* Blocking I/O Adaptation Layer
|
||||
* =============================
|
||||
*
|
||||
* The blocking I/O adaptation layer implements blocking I/O on top of our
|
||||
* asynchronous core.
|
||||
*
|
||||
* The core mechanism is block_until_pred(), which does not return until pred()
|
||||
* returns a value other than 0. The blocker uses OS I/O synchronisation
|
||||
* primitives (e.g. poll(2)) and ticks the reactor until the predicate is
|
||||
* satisfied. The blocker is not required to call pred() more than once between
|
||||
* tick calls.
|
||||
*
|
||||
* When pred returns a non-zero value, that value is returned by this function.
|
||||
* This can be used to allow pred() to indicate error conditions and short
|
||||
* circuit the blocking process.
|
||||
*
|
||||
* A return value of -1 is reserved for network polling errors. Therefore this
|
||||
* return value should not be used by pred() if ambiguity is not desired. Note
|
||||
* that the predicate function can always arrange its own output mechanism, for
|
||||
* example by passing a structure of its own as the argument.
|
||||
*
|
||||
* If the SKIP_FIRST_TICK flag is set, the first call to reactor_tick() before
|
||||
* the first call to pred() is skipped. This is useful if it is known that
|
||||
* ticking the reactor again will not be useful (e.g. because it has already
|
||||
* been done).
|
||||
*/
|
||||
#define SKIP_FIRST_TICK (1U << 0)
|
||||
|
||||
int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
|
||||
int (*pred)(void *arg), void *pred_arg,
|
||||
uint32_t flags);
|
||||
|
||||
# endif
|
||||
|
||||
#endif
|
@ -9,3 +9,4 @@ SOURCE[$LIBSSL]=quic_cfq.c quic_txpim.c quic_fifd.c quic_txp.c
|
||||
SOURCE[$LIBSSL]=quic_stream_map.c
|
||||
SOURCE[$LIBSSL]=quic_sf_list.c quic_rstream.c quic_sstream.c
|
||||
SOURCE[$LIBSSL]=quic_dummy_handshake.c
|
||||
SOURCE[$LIBSSL]=quic_reactor.c
|
||||
|
301
ssl/quic/quic_reactor.c
Normal file
301
ssl/quic/quic_reactor.c
Normal file
@ -0,0 +1,301 @@
|
||||
/*
|
||||
* Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
||||
* this file except in compliance with the License. You can obtain a copy
|
||||
* in the file LICENSE in the source distribution or at
|
||||
* https://www.openssl.org/source/license.html
|
||||
*/
|
||||
#include "internal/quic_reactor.h"
|
||||
|
||||
/*
|
||||
* Core I/O Reactor Framework
|
||||
* ==========================
|
||||
*/
|
||||
void ossl_quic_reactor_init(QUIC_REACTOR *rtor,
|
||||
void (*tick_cb)(QUIC_TICK_RESULT *res, void *arg),
|
||||
void *tick_cb_arg,
|
||||
OSSL_TIME initial_tick_deadline)
|
||||
{
|
||||
rtor->poll_r.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
|
||||
rtor->poll_w.type = BIO_POLL_DESCRIPTOR_TYPE_NONE;
|
||||
rtor->want_net_read = 0;
|
||||
rtor->want_net_write = 0;
|
||||
rtor->tick_deadline = initial_tick_deadline;
|
||||
|
||||
rtor->tick_cb = tick_cb;
|
||||
rtor->tick_cb_arg = tick_cb_arg;
|
||||
}
|
||||
|
||||
void ossl_quic_reactor_set_poll_r(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *r)
|
||||
{
|
||||
rtor->poll_r = *r;
|
||||
}
|
||||
|
||||
void ossl_quic_reactor_set_poll_w(QUIC_REACTOR *rtor, const BIO_POLL_DESCRIPTOR *w)
|
||||
{
|
||||
rtor->poll_w = *w;
|
||||
}
|
||||
|
||||
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_r(QUIC_REACTOR *rtor)
|
||||
{
|
||||
return &rtor->poll_r;
|
||||
}
|
||||
|
||||
const BIO_POLL_DESCRIPTOR *ossl_quic_reactor_get_poll_w(QUIC_REACTOR *rtor)
|
||||
{
|
||||
return &rtor->poll_w;
|
||||
}
|
||||
|
||||
int ossl_quic_reactor_want_net_read(QUIC_REACTOR *rtor)
|
||||
{
|
||||
return rtor->want_net_read;
|
||||
}
|
||||
|
||||
int ossl_quic_reactor_want_net_write(QUIC_REACTOR *rtor)
|
||||
{
|
||||
return rtor->want_net_write;
|
||||
}
|
||||
|
||||
OSSL_TIME ossl_quic_reactor_get_tick_deadline(QUIC_REACTOR *rtor)
|
||||
{
|
||||
return rtor->tick_deadline;
|
||||
}
|
||||
|
||||
int ossl_quic_reactor_tick(QUIC_REACTOR *rtor)
|
||||
{
|
||||
QUIC_TICK_RESULT res = {0};
|
||||
|
||||
/*
|
||||
* Note that the tick callback cannot fail; this is intentional. Arguably it
|
||||
* does not make that much sense for ticking to 'fail' (in the sense of an
|
||||
* explicit error indicated to the user) because ticking is by its nature
|
||||
* best effort. If something fatal happens with a connection we can report
|
||||
* it on the next actual application I/O call.
|
||||
*/
|
||||
rtor->tick_cb(&res, rtor->tick_cb_arg);
|
||||
|
||||
rtor->want_net_read = res.want_net_read;
|
||||
rtor->want_net_write = res.want_net_write;
|
||||
rtor->tick_deadline = res.tick_deadline;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Blocking I/O Adaptation Layer
|
||||
* =============================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Utility which can be used to poll on up to two FDs. This is designed to
|
||||
* support use of split FDs (e.g. with SSL_set_rfd and SSL_set_wfd where
|
||||
* different FDs are used for read and write).
|
||||
*
|
||||
* Generally use of poll(2) is preferred where available. Windows, however,
|
||||
* hasn't traditionally offered poll(2), only select(2). WSAPoll() was
|
||||
* introduced in Vista but has seemingly been buggy until relatively recent
|
||||
* versions of Windows 10. Moreover we support XP so this is not a suitable
|
||||
* target anyway. However, the traditional issues with select(2) turn out not to
|
||||
* be an issue on Windows; whereas traditional *NIX select(2) uses a bitmap of
|
||||
* FDs (and thus is limited in the magnitude of the FDs expressible), Windows
|
||||
* select(2) is very different. In Windows, socket handles are not allocated
|
||||
* contiguously from zero and thus this bitmap approach was infeasible. Thus in
|
||||
* adapting the Berkeley sockets API to Windows a different approach was taken
|
||||
* whereby the fd_set contains a fixed length array of socket handles and an
|
||||
* integer indicating how many entries are valid; thus Windows select()
|
||||
* ironically is actually much more like *NIX poll(2) than *NIX select(2). In
|
||||
* any case, this means that the relevant limit for Windows select() is the
|
||||
* number of FDs being polled, not the magnitude of those FDs. Since we only
|
||||
* poll for two FDs here, this limit does not concern us.
|
||||
*
|
||||
* Usage: rfd and wfd may be the same or different. Either or both may also be
|
||||
* -1. If rfd_want_read is 1, rfd is polled for readability, and if
|
||||
* wfd_want_write is 1, wfd is polled for writability. Note that since any
|
||||
* passed FD is always polled for error conditions, setting rfd_want_read=0 and
|
||||
* wfd_want_write=0 is not the same as passing -1 for both FDs.
|
||||
*
|
||||
* deadline is a timestamp to return at. If it is ossl_time_infinite(), the call
|
||||
* never times out.
|
||||
*
|
||||
* Returns 0 on error and 1 on success. Timeout expiry is considered a success
|
||||
* condition. We don't elaborate our return values here because the way we are
|
||||
* actually using this doesn't currently care.
|
||||
*/
|
||||
static int poll_two_fds(int rfd, int rfd_want_read,
|
||||
int wfd, int wfd_want_write,
|
||||
OSSL_TIME deadline)
|
||||
{
|
||||
#if defined(OSSL_SYS_WINDOWS) || !defined(POLLIN)
|
||||
fd_set rfd_set, wfd_set, efd_set;
|
||||
OSSL_TIME now, timeout;
|
||||
struct timeval tv, *ptv;
|
||||
int maxfd, pres;
|
||||
|
||||
#ifndef OSSL_SYS_WINDOWS
|
||||
/*
|
||||
* On Windows there is no relevant limit to the magnitude of a fd value (see
|
||||
* above). On *NIX the fd_set uses a bitmap and we must check the limit.
|
||||
*/
|
||||
if (rfd >= FD_SETSIZE || wfd >= FD_SETSIZE)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
FD_ZERO(&rfd_set);
|
||||
FD_ZERO(&wfd_set);
|
||||
FD_ZERO(&efd_set);
|
||||
|
||||
if (rfd != -1 && rfd_want_read)
|
||||
openssl_fdset(rfd, &rfd_set);
|
||||
if (wfd != -1 && wfd_want_write)
|
||||
openssl_fdset(wfd, &wfd_set);
|
||||
|
||||
/* Always check for error conditions. */
|
||||
if (rfd != -1)
|
||||
openssl_fdset(rfd, &efd_set);
|
||||
if (wfd != -1)
|
||||
openssl_fdset(wfd, &efd_set);
|
||||
|
||||
maxfd = rfd;
|
||||
if (wfd > maxfd)
|
||||
maxfd = wfd;
|
||||
|
||||
if (rfd == -1 && wfd == -1 && ossl_time_is_infinite(deadline))
|
||||
/* Do not block forever; should not happen. */
|
||||
return 0;
|
||||
|
||||
do {
|
||||
/*
|
||||
* select expects a timeout, not a deadline, so do the conversion.
|
||||
* Update for each call to ensure the correct value is used if we repeat
|
||||
* due to EINTR.
|
||||
*/
|
||||
if (ossl_time_is_infinite(deadline)) {
|
||||
ptv = NULL;
|
||||
} else {
|
||||
now = ossl_time_now();
|
||||
/*
|
||||
* ossl_time_subtract saturates to zero so we don't need to check if
|
||||
* now > deadline.
|
||||
*/
|
||||
timeout = ossl_time_subtract(deadline, now);
|
||||
tv = ossl_time_to_timeval(timeout);
|
||||
ptv = &tv;
|
||||
}
|
||||
|
||||
pres = select(maxfd + 1, &rfd_set, &wfd_set, &efd_set, ptv);
|
||||
} while (pres == -1 && get_last_socket_error_is_eintr());
|
||||
|
||||
return pres < 0 ? 0 : 1;
|
||||
#else
|
||||
int pres, timeout_ms;
|
||||
OSSL_TIME now, timeout;
|
||||
struct pollfd pfds[2] = {0};
|
||||
size_t npfd = 0;
|
||||
|
||||
if (rfd == wfd) {
|
||||
pfds[npfd].fd = rfd;
|
||||
pfds[npfd].events = (rfd_want_read ? POLLIN : 0)
|
||||
| (wfd_want_write ? POLLOUT : 0);
|
||||
if (rfd >= 0 && pfds[npfd].events != 0)
|
||||
++npfd;
|
||||
} else {
|
||||
pfds[npfd].fd = rfd;
|
||||
pfds[npfd].events = (rfd_want_read ? POLLIN : 0);
|
||||
if (rfd >= 0 && pfds[npfd].events != 0)
|
||||
++npfd;
|
||||
|
||||
pfds[npfd].fd = wfd;
|
||||
pfds[npfd].events = (wfd_want_write ? POLLOUT : 0);
|
||||
if (wfd >= 0 && pfds[npfd].events != 0)
|
||||
++npfd;
|
||||
}
|
||||
|
||||
if (npfd == 0 && ossl_time_is_infinite(deadline))
|
||||
/* Do not block forever; should not happen. */
|
||||
return 0;
|
||||
|
||||
do {
|
||||
if (ossl_time_is_infinite(deadline)) {
|
||||
timeout_ms = -1;
|
||||
} else {
|
||||
now = ossl_time_now();
|
||||
timeout = ossl_time_subtract(deadline, now);
|
||||
timeout_ms = ossl_time2ms(timeout);
|
||||
}
|
||||
|
||||
pres = poll(pfds, npfd, timeout_ms);
|
||||
} while (pres == -1 && get_last_socket_error_is_eintr());
|
||||
|
||||
return pres < 0 ? 0 : 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int poll_descriptor_to_fd(const BIO_POLL_DESCRIPTOR *d, int *fd)
|
||||
{
|
||||
if (d == NULL || d->type == BIO_POLL_DESCRIPTOR_TYPE_NONE) {
|
||||
*fd = -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (d->type != BIO_POLL_DESCRIPTOR_TYPE_SOCK_FD || d->value.fd < 0)
|
||||
return 0;
|
||||
|
||||
*fd = d->value.fd;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Poll up to two abstract poll descriptors. Currently we only support
|
||||
* poll descriptors which represent FDs.
|
||||
*/
|
||||
static int poll_two_descriptors(const BIO_POLL_DESCRIPTOR *r, int r_want_read,
|
||||
const BIO_POLL_DESCRIPTOR *w, int w_want_write,
|
||||
OSSL_TIME deadline)
|
||||
{
|
||||
int rfd, wfd;
|
||||
|
||||
if (!poll_descriptor_to_fd(r, &rfd)
|
||||
|| !poll_descriptor_to_fd(w, &wfd))
|
||||
return 0;
|
||||
|
||||
return poll_two_fds(rfd, r_want_read, wfd, w_want_write, deadline);
|
||||
}
|
||||
|
||||
int ossl_quic_reactor_block_until_pred(QUIC_REACTOR *rtor,
|
||||
int (*pred)(void *arg), void *pred_arg,
|
||||
uint32_t flags)
|
||||
{
|
||||
int res;
|
||||
|
||||
for (;;) {
|
||||
if ((flags & SKIP_FIRST_TICK) != 0)
|
||||
flags &= ~SKIP_FIRST_TICK;
|
||||
else
|
||||
/* best effort */
|
||||
ossl_quic_reactor_tick(rtor);
|
||||
|
||||
if ((res = pred(pred_arg)) != 0)
|
||||
return res;
|
||||
|
||||
if (!poll_two_descriptors(ossl_quic_reactor_get_poll_r(rtor),
|
||||
ossl_quic_reactor_want_net_read(rtor),
|
||||
ossl_quic_reactor_get_poll_w(rtor),
|
||||
ossl_quic_reactor_want_net_write(rtor),
|
||||
ossl_quic_reactor_get_tick_deadline(rtor)))
|
||||
/*
|
||||
* We don't actually care why the call succeeded (timeout, FD
|
||||
* readiness), we just call reactor_tick and start trying to do I/O
|
||||
* things again. If poll_two_fds returns 0, this is some other
|
||||
* non-timeout failure and we should stop here.
|
||||
*
|
||||
* TODO(QUIC): In the future we could avoid unnecessary syscalls by
|
||||
* not retrying network I/O that isn't ready based on the result of
|
||||
* the poll call. However this might be difficult because it
|
||||
* requires we do the call to poll(2) or equivalent syscall
|
||||
* ourselves, whereas in the general case the application does the
|
||||
* polling and just calls SSL_tick(). Implementing this optimisation
|
||||
* in the future will probably therefore require API changes.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user