litespeed-quic/src/liblsquic/lsquic_di_hash.c

/* Copyright (c) 2017 - 2022 LiteSpeed Technologies Inc.  See LICENSE. */
/*
 * lsquic_di_hash.c -- Copy incoming data into a hash
 *
 * While this implementation copies the data, its memory use is limited,
 * which makes it a good choice when we have a lot of stream frames
 * coming in.
 *
 * Another difference is that incoming STREAM frames are allowed to overlap.
 */


#include <assert.h>
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/queue.h>

#include "lsquic.h"
#include "lsquic_int_types.h"
#include "lsquic_types.h"
#include "lsquic_conn_flow.h"
#include "lsquic_packet_common.h"
#include "lsquic_packet_in.h"
#include "lsquic_rtt.h"
#include "lsquic_sfcw.h"
#include "lsquic_varint.h"
#include "lsquic_hq.h"
#include "lsquic_hash.h"
#include "lsquic_stream.h"
#include "lsquic_mm.h"
#include "lsquic_malo.h"
#include "lsquic_conn.h"
#include "lsquic_conn_public.h"
#include "lsquic_data_in_if.h"


#define LSQUIC_LOGGER_MODULE LSQLM_DI
#define LSQUIC_LOG_CONN_ID lsquic_conn_log_cid(hdi->hdi_conn_pub->lconn)
#define LSQUIC_LOG_STREAM_ID hdi->hdi_stream_id
#include "lsquic_logger.h"


#define N_DB_SETS 57

#define DB_DATA_SIZE (0x1000 - sizeof(TAILQ_ENTRY(data_block)) - \
                                sizeof(uint64_t) - N_DB_SETS * sizeof(uint64_t))

struct data_block
{
    TAILQ_ENTRY(data_block) db_next;
    uint64_t                db_off;
    uint64_t                db_set[N_DB_SETS];  /* bit for each valid byte */
    unsigned char           db_data[DB_DATA_SIZE];
};

typedef char db_set_covers_all_db_data[(N_DB_SETS * 64 >= DB_DATA_SIZE) ?1: - 1];
typedef char db_set_no_waste[(N_DB_SETS * 64 - 64 <= DB_DATA_SIZE)?1: - 1];
typedef char db_block_is_4K[(sizeof(struct data_block) == 0x1000) ?1:- 1];


TAILQ_HEAD(dblock_head, data_block);


static const struct data_in_iface *di_if_hash_ptr;


struct hash_data_in
{
    struct data_in              hdi_data_in;
    struct lsquic_conn_public  *hdi_conn_pub;
    uint64_t                    hdi_fin_off;
    struct dblock_head         *hdi_buckets;
    struct data_block          *hdi_last_block;
    struct data_frame           hdi_data_frame;
    lsquic_stream_id_t          hdi_stream_id;
    unsigned                    hdi_count;
    unsigned                    hdi_nbits;
    enum {
            HDI_FIN = (1 << 0),
    }                           hdi_flags;
};


#define HDI_PTR(data_in) (struct hash_data_in *) \
    ((unsigned char *) (data_in) - offsetof(struct hash_data_in, hdi_data_in))


#define N_BUCKETS(n_bits) (1U << (n_bits))
#define BUCKNO(n_bits, off) ((off / DB_DATA_SIZE) & (N_BUCKETS(n_bits) - 1))


static unsigned
my_log2 /* silly name to suppress compiler warning */ (unsigned sz)
{
#if __GNUC__
    unsigned clz = __builtin_clz(sz);
    return 32 - clz;
#else
    unsigned clz;
    size_t y;
    clz = 32;
    y = sz >> 16;   if (y) { clz -= 16; sz = y; }
    y = sz >>  8;   if (y) { clz -=  8; sz = y; }
    y = sz >>  4;   if (y) { clz -=  4; sz = y; }
    y = sz >>  2;   if (y) { clz -=  2; sz = y; }
    y = sz >>  1;   if (y) return 32 - clz + 1;
    return 32 - clz + sz;
#endif
}


struct data_in *
lsquic_data_in_hash_new (struct lsquic_conn_public *conn_pub,
                        lsquic_stream_id_t stream_id, uint64_t byteage)
{
    struct hash_data_in *hdi;
    unsigned n;

    hdi = malloc(sizeof(*hdi));
    if (!hdi)
        return NULL;

    hdi->hdi_data_in.di_if    = di_if_hash_ptr;
    hdi->hdi_data_in.di_flags = 0;
    hdi->hdi_conn_pub         = conn_pub;
    hdi->hdi_stream_id        = stream_id;
    hdi->hdi_fin_off          = 0;
    hdi->hdi_flags            = 0;
    hdi->hdi_last_block       = NULL;
    if (byteage >= DB_DATA_SIZE /* __builtin_clz is undefined if
                                   argument is 0 */)
        hdi->hdi_nbits        = my_log2(byteage / DB_DATA_SIZE) + 2;
    else
        hdi->hdi_nbits        = 3;
    hdi->hdi_count            = 0;
    hdi->hdi_buckets          = malloc(sizeof(hdi->hdi_buckets[0]) *
                                                    N_BUCKETS(hdi->hdi_nbits));
    if (!hdi->hdi_buckets)
    {
        free(hdi);
        return NULL;
    }

    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
        TAILQ_INIT(&hdi->hdi_buckets[n]);

    return &hdi->hdi_data_in;
}


static void
hash_di_destroy (struct data_in *data_in)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    struct data_block *block;
    unsigned n;

    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
    {
        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
        {
            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
            free(block);
        }
    }
    free(hdi->hdi_buckets);
    free(hdi);
}


static int
hash_grow (struct hash_data_in *hdi)
{
    struct dblock_head *new_buckets, *new[2];
    struct data_block *block;
    unsigned n, old_nbits;
    int idx;

    old_nbits = hdi->hdi_nbits;
    LSQ_DEBUG("doubling number of buckets to %u", N_BUCKETS(old_nbits + 1));
    new_buckets = malloc(sizeof(hdi->hdi_buckets[0])
                                                * N_BUCKETS(old_nbits + 1));
    if (!new_buckets)
    {
        LSQ_WARN("malloc failed: potential trouble ahead");
        return -1;
    }

    for (n = 0; n < N_BUCKETS(old_nbits); ++n)
    {
        new[0] = &new_buckets[n];
        new[1] = &new_buckets[n + N_BUCKETS(old_nbits)];
        TAILQ_INIT(new[0]);
        TAILQ_INIT(new[1]);
        while ((block = TAILQ_FIRST(&hdi->hdi_buckets[n])))
        {
            TAILQ_REMOVE(&hdi->hdi_buckets[n], block, db_next);
            idx = (BUCKNO(old_nbits + 1, block->db_off) >> old_nbits) & 1;
            TAILQ_INSERT_TAIL(new[idx], block, db_next);
        }
    }
    free(hdi->hdi_buckets);
    hdi->hdi_nbits   = old_nbits + 1;
    hdi->hdi_buckets = new_buckets;
    return 0;
}


static int
hash_insert (struct hash_data_in *hdi, struct data_block *block)
{
    unsigned buckno;

    if (hdi->hdi_count >= N_BUCKETS(hdi->hdi_nbits) / 2 && 0 != hash_grow(hdi))
        return -1;

    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
    TAILQ_INSERT_TAIL(&hdi->hdi_buckets[buckno], block, db_next);
    ++hdi->hdi_count;
    return 0;
}


static struct data_block *
hash_find (const struct hash_data_in *hdi, uint64_t off)
{
    struct data_block *block;
    unsigned buckno;

    buckno = BUCKNO(hdi->hdi_nbits, off);
    TAILQ_FOREACH(block, &hdi->hdi_buckets[buckno], db_next)
        if (off == block->db_off)
            return block;
    return NULL;
}


static void
hash_remove (struct hash_data_in *hdi, struct data_block *block)
{
    unsigned buckno;

    buckno = BUCKNO(hdi->hdi_nbits, block->db_off);
    TAILQ_REMOVE(&hdi->hdi_buckets[buckno], block, db_next);
    --hdi->hdi_count;
}


static struct data_block *
new_block (struct hash_data_in *hdi, uint64_t off)
{
    struct data_block *block;

    assert(0 == off % DB_DATA_SIZE);

    block = malloc(sizeof(*block));
    if (!block)
        return NULL;

    block->db_off = off;
    if (0 != hash_insert(hdi, block))
    {
        free(block);
        return NULL;
    }

    memset(block->db_set, 0, sizeof(block->db_set));
    return block;
}


static unsigned
block_write (struct data_block *block, unsigned block_off,
                             const unsigned char *data, unsigned data_sz)
{
    const unsigned char *begin, *end;
    unsigned set, bit, n_full_sets, n;
    uint64_t mask;

    assert(block_off < DB_DATA_SIZE);
    if (data_sz > DB_DATA_SIZE - block_off)
        data_sz = DB_DATA_SIZE - block_off;

    begin = data;
    end = begin + data_sz;
    set = block_off >> 6;
    bit = block_off & 0x3F;

    assert(set < N_DB_SETS);

    if (bit)
    {
        n = 64 - bit;
        if (n > data_sz)
            n = data_sz;
        mask = ~((1ULL <<  bit     ) - 1)
             &  ((1ULL << (bit + n - 1)) | ((1ULL << (bit + n - 1)) - 1));
        block->db_set[ set ] |= mask;
        memcpy(block->db_data + block_off, data, n);
        data      += n;
        block_off += n;
        ++set;
    }

    n_full_sets = (end - data) >> 6;
    if (n_full_sets)
    {
        memcpy(block->db_data + block_off, data, n_full_sets * 64);
        data      += n_full_sets * 64;
        block_off += n_full_sets * 64;
        memset(&block->db_set[ set ], 0xFF, n_full_sets * 8);
        set += n_full_sets;
    }

    if (data < end)
    {
        assert(end - data < 64);
        block->db_set[ set ] |= ((1ULL << (end - data)) - 1);
        memcpy(block->db_data + block_off, data, end - data);
        data = end;
    }

    assert(set <= N_DB_SETS);

    return data - begin;
}


static int
has_bytes_after (const struct data_block *block, unsigned off)
{
    unsigned bit, set;
    int has;

    set = off >> 6;
    bit = off & 0x3F;

    has = 0 != (block->db_set[ set ] >> bit);
    ++set;

    for ( ; set < N_DB_SETS; ++set)
        has += 0 != block->db_set[ set ];

    return has > 0;
}


enum ins_frame
lsquic_data_in_hash_insert_data_frame (struct data_in *data_in,
                const struct data_frame *data_frame, uint64_t read_offset)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    struct data_block *block;
    uint64_t key, off, diff, fin_off;
    const unsigned char *data;
    unsigned size, nw;

    if (data_frame->df_offset + data_frame->df_size < read_offset)
    {
        if (data_frame->df_fin)
            return INS_FRAME_ERR;
        else
            return INS_FRAME_DUP;
    }

    if ((hdi->hdi_flags & HDI_FIN) &&
         (
          (data_frame->df_fin &&
             data_frame->df_offset + data_frame->df_size != hdi->hdi_fin_off)
          ||
          data_frame->df_offset + data_frame->df_size > hdi->hdi_fin_off
         )
       )
    {
        return INS_FRAME_ERR;
    }

    if (data_frame->df_offset < read_offset)
    {
        diff = read_offset - data_frame->df_offset;
        assert(diff <= data_frame->df_size);
        size = data_frame->df_size   - diff;
        off  = data_frame->df_offset + diff;
        data = data_frame->df_data   + diff;
    }
    else
    {
        size = data_frame->df_size;
        off  = data_frame->df_offset;
        data = data_frame->df_data;
    }

    key = off - (off % DB_DATA_SIZE);
    do
    {
        block = hash_find(hdi, key);
        if (!block)
        {
            block = new_block(hdi, key);
            if (!block)
                return INS_FRAME_ERR;
        }
        nw = block_write(block, off % DB_DATA_SIZE, data, size);
        size -= nw;
        off  += nw;
        data += nw;
        key  += DB_DATA_SIZE;
    }
    while (size > 0);

    if (data_frame->df_fin)
    {
        fin_off = data_frame->df_offset + data_frame->df_size;
        if (has_bytes_after(block, fin_off - block->db_off) ||
                                                        hash_find(hdi, key))
        {
            return INS_FRAME_ERR;
        }
        hdi->hdi_flags  |= HDI_FIN;
        hdi->hdi_fin_off = fin_off;
    }

    return INS_FRAME_OK;
}


static enum ins_frame
hash_di_insert_frame (struct data_in *data_in,
                        struct stream_frame *new_frame, uint64_t read_offset)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    const struct data_frame *const data_frame = &new_frame->data_frame;
    enum ins_frame ins;

    ins = lsquic_data_in_hash_insert_data_frame(data_in, data_frame,
                                                                read_offset);
    assert(ins != INS_FRAME_OVERLAP);
    lsquic_packet_in_put(hdi->hdi_conn_pub->mm, new_frame->packet_in);
    if (ins != INS_FRAME_OK)
        lsquic_malo_put(new_frame);
    return ins;
}


#if __GNUC__
#   define ctz __builtin_ctzll
#else
static unsigned
ctz (unsigned long long x)
{
    unsigned n = 0;
    if (0 == (x & ((1ULL << 32) - 1))) { n += 32; x >>= 32; }
    if (0 == (x & ((1ULL << 16) - 1))) { n += 16; x >>= 16; }
    if (0 == (x & ((1ULL <<  8) - 1))) { n +=  8; x >>=  8; }
    if (0 == (x & ((1ULL <<  4) - 1))) { n +=  4; x >>=  4; }
    if (0 == (x & ((1ULL <<  2) - 1))) { n +=  2; x >>=  2; }
    if (0 == (x & ((1ULL <<  1) - 1))) { n +=  1; x >>=  1; }
    return n;
}
#endif


static unsigned
n_avail_bytes (const struct data_block *block, unsigned set, unsigned bit)
{
    unsigned count;
    uint64_t part;

    part = ~(block->db_set[ set ] >> bit);
    if (part)
    {
        count = ctz(part);
        if (count < 64 - bit)
            return count;
    }
    else
        count = 64;
    ++set;

    for ( ; set < N_DB_SETS && ~0ULL == block->db_set[ set ]; ++set)
        count += 64;

    if (set < N_DB_SETS)
    {
        part = ~block->db_set[ set ];
        if (part)
            count += ctz(part);
        else
            count += 64;
    }

    return count;
}


/* Data block is readable if there is at least one readable byte at
 * `read_offset' or there is FIN at that offset.
 */
static int
setup_data_frame (struct hash_data_in *hdi, const uint64_t read_offset,
                                                    struct data_block *block)
{
    unsigned set, bit;
    uint64_t offset;

    offset = read_offset % DB_DATA_SIZE;
    set = offset >> 6;
    bit = offset & 0x3F;

    if (block->db_set[ set ] & (1ULL << bit))
    {
        hdi->hdi_last_block             = block;
        hdi->hdi_data_frame.df_data     = block->db_data;
        hdi->hdi_data_frame.df_offset   = block->db_off;
        hdi->hdi_data_frame.df_read_off = offset;
        hdi->hdi_data_frame.df_size     = offset +
                                                n_avail_bytes(block, set, bit);
        hdi->hdi_data_frame.df_fin      =
            (hdi->hdi_flags & HDI_FIN) &&
                hdi->hdi_data_frame.df_read_off +
                    hdi->hdi_data_frame.df_size == hdi->hdi_fin_off;
        return 1;
    }
    else if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
    {
        hdi->hdi_last_block             = block;
        hdi->hdi_data_frame.df_data     = NULL;
        hdi->hdi_data_frame.df_offset   = block->db_off;
        hdi->hdi_data_frame.df_read_off = offset;
        hdi->hdi_data_frame.df_size     = offset;
        hdi->hdi_data_frame.df_fin      = 1;
        return 1;
    }
    else
        return 0;
}


static struct data_frame *
hash_di_get_frame (struct data_in *data_in, uint64_t read_offset)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    struct data_block *block;
    uint64_t key;
    
    key = read_offset - (read_offset % DB_DATA_SIZE);
    block = hash_find(hdi, key);
    if (!block)
    {
        if ((hdi->hdi_flags & HDI_FIN) && read_offset == hdi->hdi_fin_off)
        {
            hdi->hdi_last_block             = NULL;
            hdi->hdi_data_frame.df_data     = NULL;
            hdi->hdi_data_frame.df_offset   = read_offset -
                                                    read_offset % DB_DATA_SIZE;
            hdi->hdi_data_frame.df_read_off = 0;
            hdi->hdi_data_frame.df_size     = 0;
            hdi->hdi_data_frame.df_fin      = 1;
            return &hdi->hdi_data_frame;
        }
        else
            return NULL;
    }

    if (setup_data_frame(hdi, read_offset, block))
        return &hdi->hdi_data_frame;
    else
        return NULL;
}


static void
hash_di_frame_done (struct data_in *data_in, struct data_frame *data_frame)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    struct data_block *const block = hdi->hdi_last_block;

    if (block)
    {
        if (data_frame->df_read_off == DB_DATA_SIZE ||
                            !has_bytes_after(block, data_frame->df_read_off))
        {
            hash_remove(hdi, block);
            free(block);
            if (0 == hdi->hdi_count && 0 == (hdi->hdi_flags & HDI_FIN))
            {
                LSQ_DEBUG("hash empty, want to switch");
                hdi->hdi_data_in.di_flags |= DI_SWITCH_IMPL;
            }
        }
    }
    else
        assert(data_frame->df_fin && data_frame->df_size == 0);
}


static int
hash_di_empty (struct data_in *data_in)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    return hdi->hdi_count == 0;
}


static struct data_in *
hash_di_switch_impl (struct data_in *data_in, uint64_t read_offset)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    struct data_in *new_data_in;

    assert(hdi->hdi_count == 0);

    new_data_in = lsquic_data_in_nocopy_new(hdi->hdi_conn_pub,
                                                    hdi->hdi_stream_id);
    data_in->di_if->di_destroy(data_in);

    return new_data_in;
}


static size_t
hash_di_mem_used (struct data_in *data_in)
{
    struct hash_data_in *const hdi = HDI_PTR(data_in);
    const struct data_block *block;
    size_t size;
    unsigned n;

    size = sizeof(*data_in);

    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
            size += sizeof(*block);

    size += N_BUCKETS(hdi->hdi_nbits) * sizeof(hdi->hdi_buckets[0]);

    return size;
}


static void
hash_di_dump_state (struct data_in *data_in)
{
    const struct hash_data_in *const hdi = HDI_PTR(data_in);
    const struct data_block *block;
    unsigned n;

    LSQ_DEBUG("hash state: flags: %X; fin off: %"PRIu64"; count: %u",
        hdi->hdi_flags, hdi->hdi_fin_off, hdi->hdi_count);
    for (n = 0; n < N_BUCKETS(hdi->hdi_nbits); ++n)
        TAILQ_FOREACH(block, &hdi->hdi_buckets[n], db_next)
            LSQ_DEBUG("block: off: %"PRIu64, block->db_off);
}


static uint64_t
hash_di_readable_bytes (struct data_in *data_in, uint64_t read_offset)
{
    const struct data_frame *data_frame;
    uint64_t starting_offset;

    starting_offset = read_offset;
    while (data_frame = hash_di_get_frame(data_in, read_offset),
                data_frame && data_frame->df_size - data_frame->df_read_off)
        read_offset += data_frame->df_size - data_frame->df_read_off;

    return read_offset - starting_offset;
}


static const struct data_in_iface di_if_hash = {
    .di_destroy      = hash_di_destroy,
    .di_dump_state   = hash_di_dump_state,
    .di_empty        = hash_di_empty,
    .di_frame_done   = hash_di_frame_done,
    .di_get_frame    = hash_di_get_frame,
    .di_insert_frame = hash_di_insert_frame,
    .di_mem_used     = hash_di_mem_used,
    .di_own_on_ok    = 0,
    .di_readable_bytes
                     = hash_di_readable_bytes,
    .di_switch_impl  = hash_di_switch_impl,
};

static const struct data_in_iface *di_if_hash_ptr = &di_if_hash;