openssl/ssl/quic/quic_stream.c
Hugo Landau 8302259013 QUIC Send Stream Management
Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/19159)
2022-10-05 16:15:06 +02:00

538 lines
15 KiB
C

/*
* Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#include "internal/quic_stream.h"
#include "internal/uint_set.h"
#include "internal/common.h"
/*
* ==================================================================
* Byte-wise ring buffer which supports pushing and popping blocks of multiple
* bytes at a time. The logical offset of each byte for the purposes of a QUIC
* stream is tracked. Bytes can be popped from the ring buffer in two stages;
* first they are popped, and then they are culled. Bytes which have been popped
* but not yet culled will not be overwritten, and can be restored.
*/
struct ring_buf {
void *start;
size_t alloc; /* size of buffer allocation in bytes */
/*
* Logical offset of the head (where we append to). This is the current size
* of the QUIC stream. This increases monotonically.
*/
uint64_t head_offset;
/*
* Logical offset of the cull tail. Data is no longer needed and is
* deallocated as the cull tail advances, which occurs as data is
* acknowledged. This increases monotonically.
*/
uint64_t ctail_offset;
};
static int ring_buf_init(struct ring_buf *r)
{
r->start = NULL;
r->alloc = 0;
r->head_offset = r->ctail_offset = 0;
return 1;
}
static void ring_buf_destroy(struct ring_buf *r)
{
OPENSSL_free(r->start);
r->start = NULL;
r->alloc = 0;
}
static size_t ring_buf_used(struct ring_buf *r)
{
return r->head_offset - r->ctail_offset;
}
static size_t ring_buf_avail(struct ring_buf *r)
{
return r->alloc - ring_buf_used(r);
}
static size_t ring_buf_push(struct ring_buf *r,
const unsigned char *buf, size_t buf_len)
{
size_t pushed = 0, avail, idx, l, i;
unsigned char *start = r->start;
for (i = 0;; ++i) {
avail = ring_buf_avail(r);
if (buf_len > avail)
buf_len = avail;
if (buf_len == 0)
break;
assert(i < 2);
idx = r->head_offset % r->alloc;
l = r->alloc - idx;
if (buf_len < l)
l = buf_len;
memcpy(start + idx, buf, l);
r->head_offset += l;
buf += l;
buf_len -= l;
pushed += l;
}
return pushed;
}
/*
* Retrieves data out of the read size of the ring buffer starting at the given
* logical offset. *buf is set to point to a contiguous span of bytes and
* *buf_len is set to the number of contiguous bytes. After this function
* returns, there may or may not be more bytes available at the logical offset
* of (logical_offset + *buf_len) by calling this function again. If the logical
* offset is out of the range retained by the ring buffer, returns 0, else
* returns 1. A logical offset at the end of the range retained by the ring
* buffer is not considered an error and is returned with a *buf_len of 0.
*
* The ring buffer state is not changed.
*/
static int ring_buf_get_buf_at(const struct ring_buf *r,
uint64_t logical_offset,
const unsigned char **buf, size_t *buf_len)
{
const unsigned char *start = r->start;
size_t idx, l;
if (logical_offset > r->head_offset || logical_offset < r->ctail_offset)
return 0;
if (r->alloc == 0) {
*buf = NULL;
*buf_len = 0;
return 1;
}
idx = logical_offset % r->alloc;
l = r->head_offset - logical_offset;
if (l > r->alloc - idx)
l = r->alloc - idx;
*buf = start + idx;
*buf_len = l;
return 1;
}
static void ring_buf_cpop_range(struct ring_buf *r,
uint64_t start, uint64_t end)
{
assert(end >= start);
if (start > r->ctail_offset)
return;
r->ctail_offset = end + 1;
}
static int ring_buf_resize(struct ring_buf *r, size_t num_bytes)
{
struct ring_buf rnew = {0};
const unsigned char *src = NULL;
size_t src_len = 0, copied = 0;
if (num_bytes == r->alloc)
return 1;
if (num_bytes < ring_buf_used(r))
return 0;
rnew.start = OPENSSL_malloc(num_bytes);
if (rnew.start == NULL)
return 0;
rnew.alloc = num_bytes;
rnew.head_offset = r->head_offset - ring_buf_used(r);
rnew.ctail_offset = rnew.head_offset;
for (;;) {
if (!ring_buf_get_buf_at(r, r->ctail_offset + copied, &src, &src_len)) {
OPENSSL_free(rnew.start);
return 0;
}
if (src_len == 0)
break;
if (ring_buf_push(&rnew, src, src_len) != src_len) {
OPENSSL_free(rnew.start);
return 0;
}
copied += src_len;
}
assert(rnew.head_offset == r->head_offset);
rnew.ctail_offset = r->ctail_offset;
OPENSSL_free(r->start);
memcpy(r, &rnew, sizeof(*r));
return 1;
}
/*
* ==================================================================
* QUIC Send Stream
*/
struct quic_sstream_st {
struct ring_buf ring_buf;
/*
* Any logical byte in the stream is in one of these states:
*
* - NEW: The byte has not yet been transmitted, or has been lost and is
* in need of retransmission.
*
* - IN_FLIGHT: The byte has been transmitted but is awaiting
* acknowledgement. We continue to store the data in case we return
* to the NEW state.
*
* - ACKED: The byte has been acknowledged and we can cease storing it.
* We do not necessarily cull it immediately, so there may be a delay
* between reaching the ACKED state and the buffer space actually being
* recycled.
*
* A logical byte in the stream is
*
* - in the NEW state if it is in new_set;
* - is in the ACKED state if it is in acked_set
* (and may or may not have been culled);
* - is in the IN_FLIGHT state otherwise.
*
* Invariant: No logical byte is ever in both new_set and acked_set.
*/
UINT_SET new_set, acked_set;
/*
* The current size of the stream is ring_buf.head_offset. If
* have_final_size is true, this is also the final size of the stream.
*/
unsigned int have_final_size : 1;
unsigned int sent_final_size : 1;
unsigned int acked_final_size : 1;
};
static void qss_cull(QUIC_SSTREAM *qss);
QUIC_SSTREAM *ossl_quic_sstream_new(size_t init_buf_size)
{
QUIC_SSTREAM *qss;
qss = OPENSSL_zalloc(sizeof(QUIC_SSTREAM));
if (qss == NULL)
return NULL;
ring_buf_init(&qss->ring_buf);
if (!ring_buf_resize(&qss->ring_buf, init_buf_size)) {
ring_buf_destroy(&qss->ring_buf);
OPENSSL_free(qss);
return NULL;
}
ossl_uint_set_init(&qss->new_set);
ossl_uint_set_init(&qss->acked_set);
return qss;
}
void ossl_quic_sstream_free(QUIC_SSTREAM *qss)
{
if (qss == NULL)
return;
ossl_uint_set_destroy(&qss->new_set);
ossl_uint_set_destroy(&qss->acked_set);
ring_buf_destroy(&qss->ring_buf);
OPENSSL_free(qss);
}
int ossl_quic_sstream_get_stream_frame(QUIC_SSTREAM *qss,
size_t skip,
OSSL_QUIC_FRAME_STREAM *hdr,
OSSL_QTX_IOVEC *iov,
size_t *num_iov)
{
size_t num_iov_ = 0, src_len = 0, total_len = 0, i;
uint64_t max_len;
const unsigned char *src = NULL;
UINT_SET_ITEM *range = qss->new_set.head;
if (*num_iov < 2)
return 0;
for (i = 0; i < skip && range != NULL; ++i)
range = range->next;
if (range == NULL) {
/* No new bytes to send, but we might have a FIN */
if (!qss->have_final_size || qss->sent_final_size)
return 0;
hdr->offset = qss->ring_buf.head_offset;
hdr->len = 0;
hdr->is_fin = 1;
*num_iov = 0;
return 1;
}
/*
* We can only send a contiguous range of logical bytes in a single
* stream frame, so limit ourselves to the range of the first set entry.
*
* Set entries never have 'adjacent' entries so we don't have to worry
* about them here.
*/
max_len = range->range.end - range->range.start + 1;
for (i = 0;; ++i) {
if (total_len >= max_len)
break;
if (!ring_buf_get_buf_at(&qss->ring_buf,
range->range.start + total_len,
&src, &src_len))
return 0;
if (src_len == 0)
break;
assert(i < 2);
if (total_len + src_len > max_len)
src_len = max_len - total_len;
iov[num_iov_].buf = src;
iov[num_iov_].buf_len = src_len;
total_len += src_len;
++num_iov_;
}
hdr->offset = range->range.start;
hdr->len = total_len;
hdr->is_fin = qss->have_final_size
&& hdr->offset + hdr->len == qss->ring_buf.head_offset;
*num_iov = num_iov_;
return 1;
}
int ossl_quic_sstream_mark_transmitted(QUIC_SSTREAM *qss,
uint64_t start,
uint64_t end)
{
UINT_RANGE r;
r.start = start;
r.end = end;
if (!ossl_uint_set_remove(&qss->new_set, &r))
return 0;
return 1;
}
int ossl_quic_sstream_mark_transmitted_fin(QUIC_SSTREAM *qss,
uint64_t final_size)
{
/*
* We do not really need final_size since we already know the size of the
* stream, but this serves as a sanity check.
*/
if (!qss->have_final_size || final_size != qss->ring_buf.head_offset)
return 0;
qss->sent_final_size = 1;
return 1;
}
int ossl_quic_sstream_mark_lost(QUIC_SSTREAM *qss,
uint64_t start,
uint64_t end)
{
UINT_RANGE r;
r.start = start;
r.end = end;
/*
* We lost a range of stream data bytes, so reinsert them into the new set,
* so that they are returned once more by ossl_quic_sstream_get_stream_frame.
*/
if (!ossl_uint_set_insert(&qss->new_set, &r))
return 0;
return 1;
}
int ossl_quic_sstream_mark_lost_fin(QUIC_SSTREAM *qss)
{
if (qss->acked_final_size)
/* Does not make sense to lose a FIN after it has been ACKed */
return 0;
/* FIN was lost, so we need to transmit it again. */
qss->sent_final_size = 0;
return 1;
}
int ossl_quic_sstream_mark_acked(QUIC_SSTREAM *qss,
uint64_t start,
uint64_t end)
{
UINT_RANGE r;
r.start = start;
r.end = end;
if (!ossl_uint_set_insert(&qss->acked_set, &r))
return 0;
qss_cull(qss);
return 1;
}
int ossl_quic_sstream_mark_acked_fin(QUIC_SSTREAM *qss)
{
if (!qss->have_final_size)
/* Cannot ack final size before we have a final size */
return 0;
qss->acked_final_size = 1;
return 1;
}
void ossl_quic_sstream_fin(QUIC_SSTREAM *qss)
{
if (qss->have_final_size)
return;
qss->have_final_size = 1;
}
int ossl_quic_sstream_append(QUIC_SSTREAM *qss,
const unsigned char *buf,
size_t buf_len,
size_t *consumed)
{
size_t l, consumed_ = 0;
UINT_RANGE r;
struct ring_buf old_ring_buf = qss->ring_buf;
if (qss->have_final_size) {
*consumed = 0;
return 0;
}
/*
* Note: It is assumed that ossl_quic_sstream_append will be called during a
* call to e.g. SSL_write and this function is therefore designed to support
* such semantics. In particular, the buffer pointed to by buf is only
* assumed to be valid for the duration of this call, therefore we must copy
* the data here. We will later copy-and-encrypt the data during packet
* encryption, so this is a two-copy design. Supporting a one-copy design in
* the future will require applications to use a different kind of API.
* Supporting such changes in future will require corresponding enhancements
* to this code.
*/
while (buf_len > 0) {
l = ring_buf_push(&qss->ring_buf, buf, buf_len);
if (l == 0)
break;
buf += l;
buf_len -= l;
consumed_ += l;
}
if (consumed_ > 0) {
r.start = old_ring_buf.head_offset;
r.end = r.start + consumed_ - 1;
assert(r.end + 1 == qss->ring_buf.head_offset);
if (!ossl_uint_set_insert(&qss->new_set, &r)) {
qss->ring_buf = old_ring_buf;
*consumed = 0;
return 0;
}
}
*consumed = consumed_;
return 1;
}
static void qss_cull(QUIC_SSTREAM *qss)
{
/*
* Potentially cull data from our ring buffer. This can happen once data has
* been ACKed and we know we are never going to have to transmit it again.
*
* Since we use a ring buffer design for simplicity, we cannot cull byte n +
* k (for k > 0) from the ring buffer until byte n has also been culled.
* This means if parts of the stream get acknowledged out of order we might
* keep around some data we technically don't need to for a while. The
* impact of this is likely to be small and limited to quite a short
* duration, and doesn't justify the use of a more complex design.
*/
/*
* We only need to check the first range entry in the integer set because we
* can only cull contiguous areas at the start of the ring buffer anyway.
*/
if (qss->acked_set.head != NULL)
ring_buf_cpop_range(&qss->ring_buf,
qss->acked_set.head->range.start,
qss->acked_set.head->range.end);
}
int ossl_quic_sstream_set_buffer_size(QUIC_SSTREAM *qss, size_t num_bytes)
{
return ring_buf_resize(&qss->ring_buf, num_bytes);
}
size_t ossl_quic_sstream_get_buffer_size(QUIC_SSTREAM *qss)
{
return qss->ring_buf.alloc;
}
size_t ossl_quic_sstream_get_buffer_used(QUIC_SSTREAM *qss)
{
return ring_buf_used(&qss->ring_buf);
}
size_t ossl_quic_sstream_get_buffer_avail(QUIC_SSTREAM *qss)
{
return ring_buf_avail(&qss->ring_buf);
}
void ossl_quic_sstream_adjust_iov(size_t len,
OSSL_QTX_IOVEC *iov,
size_t num_iov)
{
size_t running = 0, i, iovlen;
for (i = 0, running = 0; i < num_iov; ++i) {
iovlen = iov[i].buf_len;
if (running >= len)
iov[i].buf_len = 0;
else if (running + iovlen > len)
iov[i].buf_len = len - running;
running += iovlen;
}
}