427 lines
14 KiB
C
427 lines
14 KiB
C
/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */
|
|
/*
|
|
* lsquic_di_nocopy.c -- The "no-copy" data in stream.
|
|
*
|
|
* Data from packets is not copied: the packets are referenced by stream
|
|
* frames. When all data from stream frame is read, the frame is released
|
|
* and packet reference count is decremented, which possibly results in
|
|
* packet being released as well.
|
|
*
|
|
* This approach works well in regular circumstances; there are two scenarios
|
|
* when it does not:
|
|
*
|
|
* A. If we have many out-of-order frames, insertion into the list becomes
|
|
* expensive. In the degenerate case, we'd have to traverse the whole
|
|
* list to find appropriate position.
|
|
*
|
|
* B. Having many frames ties up resources, as each frame keeps a reference
|
|
* to the packet that contains it. This is a possible attack vector:
|
|
* send many one-byte packets; a single hole at the beginning will stop
|
|
* the server from being able to read the stream, thus tying up resources.
|
|
*
|
|
* If we detect that either (A) or (B) is true, we request that the stream
|
|
* switch to a more robust incoming stream frame handler by setting
|
|
* DI_SWITCH_IMPL flag.
|
|
*
|
|
* For a small number of elements, (A) and (B) do not matter and the checks
|
|
* are not performed. This number is defined by EFF_CHECK_THRESH_LOW. On
|
|
* the other side of the spectrum, if the number of frames grows very high,
|
|
* we want to switch to a more memory-efficient implementation even if (A)
|
|
* and (B) are not true. EFF_CHECK_THRESH_HIGH defines this threshold.
|
|
*
|
|
* Between the low and high thresholds, we detect efficiency problems as
|
|
* follows.
|
|
*
|
|
* To detect (A), we count how many elements we have to traverse during
|
|
* insertion. If we have to traverse at least half the list
|
|
* EFF_FAR_TRAVERSE_COUNT in a row, DI_SWITCH_IMPL is issued.
|
|
*
|
|
* If average stream frame size is smaller than EFF_TINY_FRAME_SZ bytes,
|
|
* (B) condition is true. In addition, if there are more than EFF_MAX_HOLES
|
|
* in the stream, this is also indicative of (B).
|
|
*/
|
|
|
|
|
|
#include <assert.h>
|
|
#include <inttypes.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <sys/queue.h>
|
|
|
|
#include "lsquic.h"
|
|
#include "lsquic_types.h"
|
|
#include "lsquic_int_types.h"
|
|
#include "lsquic_conn_flow.h"
|
|
#include "lsquic_packet_in.h"
|
|
#include "lsquic_rtt.h"
|
|
#include "lsquic_sfcw.h"
|
|
#include "lsquic_stream.h"
|
|
#include "lsquic_mm.h"
|
|
#include "lsquic_malo.h"
|
|
#include "lsquic_conn.h"
|
|
#include "lsquic_conn_public.h"
|
|
#include "lsquic_data_in_if.h"
|
|
|
|
|
|
#define LSQUIC_LOGGER_MODULE LSQLM_DI
|
|
#define LSQUIC_LOG_CONN_ID ncdi->ncdi_conn_pub->lconn->cn_cid
|
|
#define LSQUIC_LOG_STREAM_ID ncdi->ncdi_stream_id
|
|
#include "lsquic_logger.h"
|
|
|
|
|
|
/* If number of elements is at or below this number, we do not bother to check
|
|
* efficiency conditions.
|
|
*/
|
|
#define EFF_CHECK_THRESH_LOW 10
|
|
|
|
/* If number of elements is higher than this number, efficiency alert
|
|
* is issued unconditionally.
|
|
*/
|
|
#define EFF_CHECK_THRESH_HIGH 1000
|
|
|
|
/* Maximum number of consecutive far traversals */
|
|
#define EFF_FAR_TRAVERSE_COUNT 4
|
|
|
|
/* Maximum number of holes that is not deemed suspicious */
|
|
#define EFF_MAX_HOLES 5
|
|
|
|
/* What is deemed a tiny frame, in bytes. If it is a power of two, calculation
|
|
* is cheaper.
|
|
*/
|
|
#define EFF_TINY_FRAME_SZ 64
|
|
|
|
|
|
TAILQ_HEAD(stream_frames_tailq, stream_frame);
|
|
|
|
|
|
struct nocopy_data_in
|
|
{
|
|
struct stream_frames_tailq ncdi_frames_in;
|
|
struct data_in ncdi_data_in;
|
|
struct lsquic_conn_public *ncdi_conn_pub;
|
|
uint64_t ncdi_byteage;
|
|
uint32_t ncdi_stream_id;
|
|
unsigned ncdi_n_frames;
|
|
unsigned ncdi_n_holes;
|
|
unsigned ncdi_cons_far;
|
|
};
|
|
|
|
|
|
#define NCDI_PTR(data_in) (struct nocopy_data_in *) \
|
|
((unsigned char *) (data_in) - offsetof(struct nocopy_data_in, ncdi_data_in))
|
|
|
|
#define STREAM_FRAME_PTR(data_frame) (struct stream_frame *) \
|
|
((unsigned char *) (data_frame) - offsetof(struct stream_frame, data_frame))
|
|
|
|
|
|
static const struct data_in_iface di_if_nocopy;
|
|
|
|
|
|
struct data_in *
|
|
data_in_nocopy_new (struct lsquic_conn_public *conn_pub, uint32_t stream_id)
|
|
{
|
|
struct nocopy_data_in *ncdi;
|
|
|
|
ncdi = malloc(sizeof(*ncdi));
|
|
if (!ncdi)
|
|
return NULL;
|
|
|
|
TAILQ_INIT(&ncdi->ncdi_frames_in);
|
|
ncdi->ncdi_data_in.di_if = &di_if_nocopy;
|
|
ncdi->ncdi_data_in.di_flags = 0;
|
|
ncdi->ncdi_conn_pub = conn_pub;
|
|
ncdi->ncdi_stream_id = stream_id;
|
|
ncdi->ncdi_byteage = 0;
|
|
ncdi->ncdi_n_frames = 0;
|
|
ncdi->ncdi_n_holes = 0;
|
|
ncdi->ncdi_cons_far = 0;
|
|
return &ncdi->ncdi_data_in;
|
|
}
|
|
|
|
|
|
static void
|
|
nocopy_di_destroy (struct data_in *data_in)
|
|
{
|
|
struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
|
|
stream_frame_t *frame;
|
|
while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
|
|
{
|
|
TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
|
|
lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
|
|
lsquic_malo_put(frame);
|
|
}
|
|
free(ncdi);
|
|
}
|
|
|
|
|
|
#if 1
|
|
#define CHECK_ORDER(ncdi)
|
|
#else
|
|
static int
|
|
ordered (const struct nocopy_data_in *ncdi)
|
|
{
|
|
const stream_frame_t *frame;
|
|
uint64_t off = 0;
|
|
int ordered = 1;
|
|
TAILQ_FOREACH(frame, &ncdi->ncdi_frames_in, next_frame)
|
|
{
|
|
ordered &= off <= frame->data_frame.df_offset;
|
|
off = frame->data_frame.df_offset;
|
|
}
|
|
return ordered;
|
|
}
|
|
#define CHECK_ORDER(ncdi) assert(ordered(ncdi))
|
|
#endif
|
|
|
|
|
|
/* To reduce the number of conditionals, logical operators have been replaced
|
|
* with arithmetic operators. Return value is an integer in range [0, 3].
|
|
* Bit 0 is set due to FIN in previous frame. If bit 1 is set, it means that
|
|
* it's a dup.
|
|
*/
|
|
static int
|
|
insert_frame (struct nocopy_data_in *ncdi, struct stream_frame *new_frame,
|
|
uint64_t read_offset, unsigned *p_n_frames)
|
|
{
|
|
int ins;
|
|
unsigned count;
|
|
stream_frame_t *prev_frame, *next_frame;
|
|
|
|
/* Find position in the list, going backwards. We go backwards because
|
|
* that is the most likely scenario.
|
|
*/
|
|
next_frame = TAILQ_LAST(&ncdi->ncdi_frames_in, stream_frames_tailq);
|
|
if (next_frame && new_frame->data_frame.df_offset < next_frame->data_frame.df_offset)
|
|
{
|
|
count = 1;
|
|
prev_frame = TAILQ_PREV(next_frame, stream_frames_tailq, next_frame);
|
|
for ( ; prev_frame &&
|
|
new_frame->data_frame.df_offset < next_frame->data_frame.df_offset;
|
|
next_frame = prev_frame,
|
|
prev_frame = TAILQ_PREV(prev_frame, stream_frames_tailq, next_frame))
|
|
{
|
|
if (new_frame->data_frame.df_offset >= prev_frame->data_frame.df_offset)
|
|
break;
|
|
++count;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
count = 0;
|
|
prev_frame = NULL;
|
|
}
|
|
|
|
if (!prev_frame && next_frame && new_frame->data_frame.df_offset >=
|
|
next_frame->data_frame.df_offset)
|
|
{
|
|
prev_frame = next_frame;
|
|
next_frame = TAILQ_NEXT(next_frame, next_frame);
|
|
}
|
|
|
|
/* Perform checks */
|
|
if (prev_frame)
|
|
ins =
|
|
(((prev_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
|
|
(prev_frame->data_frame.df_size == new_frame->data_frame.df_size) &
|
|
(prev_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */
|
|
| prev_frame->data_frame.df_fin /* FIN in the middle or dup */
|
|
| (prev_frame->data_frame.df_offset + prev_frame->data_frame.df_size
|
|
> new_frame->data_frame.df_offset) /* Overlap */
|
|
;
|
|
else
|
|
ins = 0;
|
|
|
|
if (next_frame)
|
|
ins |=
|
|
(((next_frame->data_frame.df_offset == new_frame->data_frame.df_offset) &
|
|
(next_frame->data_frame.df_size == new_frame->data_frame.df_size) &
|
|
(next_frame->data_frame.df_fin == new_frame->data_frame.df_fin)) << 1) /* Duplicate */
|
|
| (new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */
|
|
| new_frame->data_frame.df_fin /* FIN in the middle or dup */
|
|
| (new_frame->data_frame.df_offset + new_frame->data_frame.df_size
|
|
> next_frame->data_frame.df_offset) /* Overlap */
|
|
;
|
|
else
|
|
ins |=
|
|
(new_frame->data_frame.df_offset < read_offset) << 1 /* Duplicate */
|
|
;
|
|
|
|
if (ins)
|
|
return ins;
|
|
|
|
if (prev_frame)
|
|
{
|
|
TAILQ_INSERT_AFTER(&ncdi->ncdi_frames_in, prev_frame, new_frame, next_frame);
|
|
ncdi->ncdi_n_holes += prev_frame->data_frame.df_offset +
|
|
prev_frame->data_frame.df_size != new_frame->data_frame.df_offset;
|
|
if (next_frame)
|
|
{
|
|
ncdi->ncdi_n_holes += new_frame->data_frame.df_offset +
|
|
new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
|
|
--ncdi->ncdi_n_holes;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ncdi->ncdi_n_holes += next_frame && new_frame->data_frame.df_offset +
|
|
new_frame->data_frame.df_size != next_frame->data_frame.df_offset;
|
|
TAILQ_INSERT_HEAD(&ncdi->ncdi_frames_in, new_frame, next_frame);
|
|
}
|
|
CHECK_ORDER(ncdi);
|
|
|
|
++ncdi->ncdi_n_frames;
|
|
ncdi->ncdi_byteage += new_frame->data_frame.df_size;
|
|
*p_n_frames = count;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
check_efficiency (struct nocopy_data_in *ncdi, unsigned count)
|
|
{
|
|
if (ncdi->ncdi_n_frames <= EFF_CHECK_THRESH_LOW)
|
|
{
|
|
ncdi->ncdi_cons_far = 0;
|
|
return 0;
|
|
}
|
|
if (ncdi->ncdi_n_frames > EFF_CHECK_THRESH_HIGH)
|
|
return 1;
|
|
if (count >= ncdi->ncdi_n_frames / 2)
|
|
{
|
|
++ncdi->ncdi_cons_far;
|
|
if (ncdi->ncdi_cons_far > EFF_FAR_TRAVERSE_COUNT)
|
|
return 1;
|
|
}
|
|
else
|
|
ncdi->ncdi_cons_far = 0;
|
|
if (ncdi->ncdi_n_holes > EFF_MAX_HOLES)
|
|
return 1;
|
|
if (ncdi->ncdi_byteage / EFF_TINY_FRAME_SZ < ncdi->ncdi_n_frames)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void
|
|
set_eff_alert (struct nocopy_data_in *ncdi)
|
|
{
|
|
LSQ_DEBUG("low efficiency: n_frames: %u; n_holes: %u; cons_far: %u; "
|
|
"byteage: %"PRIu64, ncdi->ncdi_n_frames, ncdi->ncdi_n_holes,
|
|
ncdi->ncdi_cons_far, ncdi->ncdi_byteage);
|
|
ncdi->ncdi_data_in.di_flags |= DI_SWITCH_IMPL;
|
|
}
|
|
|
|
|
|
static enum ins_frame
|
|
nocopy_di_insert_frame (struct data_in *data_in,
|
|
struct stream_frame *new_frame, uint64_t read_offset)
|
|
{
|
|
struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
|
|
unsigned count;
|
|
int ins;
|
|
|
|
assert(0 == (new_frame->data_frame.df_fin & ~1));
|
|
ins = insert_frame(ncdi, new_frame, read_offset, &count);
|
|
switch (ins)
|
|
{
|
|
case 0:
|
|
if (check_efficiency(ncdi, count))
|
|
set_eff_alert(ncdi);
|
|
return INS_FRAME_OK;
|
|
case 2:
|
|
case 3:
|
|
lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
|
|
lsquic_malo_put(new_frame);
|
|
return INS_FRAME_DUP;
|
|
default:
|
|
assert(1 == ins);
|
|
lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, new_frame->packet_in);
|
|
lsquic_malo_put(new_frame);
|
|
return INS_FRAME_ERR;
|
|
}
|
|
}
|
|
|
|
|
|
static struct data_frame *
|
|
nocopy_di_get_frame (struct data_in *data_in, uint64_t read_offset)
|
|
{
|
|
struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
|
|
struct stream_frame *frame = TAILQ_FIRST(&ncdi->ncdi_frames_in);
|
|
if (frame && frame->data_frame.df_offset +
|
|
frame->data_frame.df_read_off == read_offset)
|
|
return &frame->data_frame;
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static void
|
|
nocopy_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
|
|
{
|
|
struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
|
|
struct stream_frame *const frame = STREAM_FRAME_PTR(data_frame), *first;
|
|
assert(data_frame->df_read_off == data_frame->df_size);
|
|
TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
|
|
first = TAILQ_FIRST(&ncdi->ncdi_frames_in);
|
|
ncdi->ncdi_n_holes -= first && frame->data_frame.df_offset +
|
|
frame->data_frame.df_size != first->data_frame.df_offset;
|
|
--ncdi->ncdi_n_frames;
|
|
ncdi->ncdi_byteage -= frame->data_frame.df_size;
|
|
lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
|
|
lsquic_malo_put(frame);
|
|
}
|
|
|
|
|
|
static int
|
|
nocopy_di_empty (struct data_in *data_in)
|
|
{
|
|
struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
|
|
return TAILQ_EMPTY(&ncdi->ncdi_frames_in);
|
|
}
|
|
|
|
|
|
struct data_in *
|
|
nocopy_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
|
|
{
|
|
struct nocopy_data_in *const ncdi = NCDI_PTR(data_in);
|
|
struct data_in *new_data_in;
|
|
stream_frame_t *frame;
|
|
enum ins_frame ins;
|
|
|
|
new_data_in = data_in_hash_new(ncdi->ncdi_conn_pub, ncdi->ncdi_stream_id,
|
|
ncdi->ncdi_byteage);
|
|
if (!new_data_in)
|
|
goto end;
|
|
|
|
while ((frame = TAILQ_FIRST(&ncdi->ncdi_frames_in)))
|
|
{
|
|
TAILQ_REMOVE(&ncdi->ncdi_frames_in, frame, next_frame);
|
|
ins = data_in_hash_insert_data_frame(new_data_in, &frame->data_frame,
|
|
read_offset);
|
|
lsquic_packet_in_put(ncdi->ncdi_conn_pub->mm, frame->packet_in);
|
|
lsquic_malo_put(frame);
|
|
if (INS_FRAME_ERR == ins)
|
|
{
|
|
new_data_in->di_if->di_destroy(new_data_in);
|
|
new_data_in = NULL;
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
end:
|
|
data_in->di_if->di_destroy(data_in);
|
|
return new_data_in;
|
|
}
|
|
|
|
|
|
static const struct data_in_iface di_if_nocopy = {
|
|
.di_destroy = nocopy_di_destroy,
|
|
.di_empty = nocopy_di_empty,
|
|
.di_frame_done = nocopy_di_frame_done,
|
|
.di_get_frame = nocopy_di_get_frame,
|
|
.di_insert_frame = nocopy_di_insert_frame,
|
|
.di_switch_impl = nocopy_di_switch_impl,
|
|
};
|