litespeed-quic/src/liblsquic/lsquic_malo.c

290 lines
8.1 KiB
C

/* Copyright (c) 2017 LiteSpeed Technologies Inc. See LICENSE. */
/*
* lsquic_malo.c -- malo allocator implementation.
*
* The malo allocator is a pool of objects of fixed size. It tries to
* allocate and deallocate objects as fast as possible. To do so, it
* does the following:
*
* 1. Allocations occur 4 KB at a time.
* 2. No division or multiplication operations are performed.
*
* (In recent testing, malo was about 2.7 times faster than malloc for
* 64-byte objects.)
*
* Besides speed, two other important characteristics distinguish it
* from other pool allocators:
*
* 1. To free (put) an object, one does not need a pointer to the malo
* object. This makes this allocator easy to use.
* 2. A built-in iterator is provided to iterate over all allocated
* objects (with ability to safely release objects while iterator
* is active). This may be useful in some circumstances.
*
* To gain all these advantages, there are trade-offs:
*
* 1. There are two memory penalties:
* a. Per object overhead. To avoid division and multiplication,
* the object sizes is rounded up to the nearest power or two,
* starting with 64 bytes (minumum) and up to 2 KB (maximum).
* Thus, a 104-byte object will have a 24-byte overhead; a
* 130-byte object will have 126-byte overhead. This is
* something to keep in mind.
* b. Per page overhead. Page links occupy some bytes in the
* page. To keep things fast, at least one slot per page is
* always occupied, independent of object size. Thus, for a
* 1 KB object size, 25% of the page is used for the page
* header.
* 2. 4 KB pages are not freed until the malo allocator is destroyed.
* This is something to keep in mind.
*
* P.S. In Russian, "malo" (мало) means "little" or "few". Thus, the
* malo allocator aims to perform its job in as few CPU cycles as
* possible.
*/
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/queue.h>
#include "fiu-local.h"
#include "lsquic_malo.h"
/* 64 slots in a 4KB page means that the smallest object is 64 bytes.
* The largest object is 2KB.
*/
#define MALO_MIN_NBITS 6
#define MALO_MAX_NBITS 11
/* A "free page" is a page with free slots available.
*/
static unsigned find_free_slot (uint64_t slots);
static unsigned size_in_bits (size_t sz);
struct malo_page {
SLIST_ENTRY(malo_page) next_page;
LIST_ENTRY(malo_page) next_free_page;
struct malo *malo;
uint64_t slots,
full_slot_mask;
unsigned nbits;
unsigned initial_slot;
};
typedef char malo_header_fits_in_one_slot
[0 - (sizeof(struct malo_page) > (1 << MALO_MAX_NBITS))];
struct malo {
struct malo_page page_header;
SLIST_HEAD(, malo_page) all_pages;
LIST_HEAD(, malo_page) free_pages;
struct {
struct malo_page *cur_page;
unsigned next_slot;
} iter;
};
struct malo *
lsquic_malo_create (size_t obj_size)
{
unsigned nbits = size_in_bits(obj_size);
if (nbits < MALO_MIN_NBITS)
nbits = MALO_MIN_NBITS;
else if (nbits > MALO_MAX_NBITS)
{
errno = EOVERFLOW;
return NULL;
}
struct malo *malo;
if (0 != posix_memalign((void **) &malo, 0x1000, 0x1000))
return NULL;
SLIST_INIT(&malo->all_pages);
LIST_INIT(&malo->free_pages);
malo->iter.cur_page = &malo->page_header;
malo->iter.next_slot = 0;
int n_slots = sizeof(*malo) / (1 << nbits)
+ ((sizeof(*malo) % (1 << nbits)) > 0);
struct malo_page *const page = &malo->page_header;
SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
page->malo = malo;
if (nbits == MALO_MIN_NBITS)
page->full_slot_mask = ~0ULL;
else
page->full_slot_mask = (1ULL << (1 << (12 - nbits))) - 1;
page->slots = (1ULL << n_slots) - 1;
page->nbits = nbits;
page->initial_slot = n_slots;
return malo;
}
static struct malo_page *
allocate_page (struct malo *malo)
{
struct malo_page *page;
if (0 != posix_memalign((void **) &page, 0x1000, 0x1000))
return NULL;
SLIST_INSERT_HEAD(&malo->all_pages, page, next_page);
LIST_INSERT_HEAD(&malo->free_pages, page, next_free_page);
page->slots = 1;
page->full_slot_mask = malo->page_header.full_slot_mask;
page->nbits = malo->page_header.nbits;
page->malo = malo;
page->initial_slot = 1;
return page;
}
#define FAIL_NOMEM do { errno = ENOMEM; return NULL; } while (0)
/* Get a new object. */
void *
lsquic_malo_get (struct malo *malo)
{
fiu_do_on("malo/get", FAIL_NOMEM);
struct malo_page *page = LIST_FIRST(&malo->free_pages);
if (!page)
{
page = allocate_page(malo);
if (!page)
return NULL;
}
unsigned slot = find_free_slot(page->slots);
page->slots |= (1ULL << slot);
if (page->full_slot_mask == page->slots)
LIST_REMOVE(page, next_free_page);
return (char *) page + (slot << page->nbits);
}
/* Return obj to the pool */
void
lsquic_malo_put (void *obj)
{
uintptr_t page_addr = (uintptr_t) obj & ~((1 << 12) - 1);
struct malo_page *page = (void *) page_addr;
unsigned slot = ((uintptr_t) obj - page_addr) >> page->nbits;
if (page->full_slot_mask == page->slots)
LIST_INSERT_HEAD(&page->malo->free_pages, page, next_free_page);
page->slots &= ~(1ULL << slot);
}
void
lsquic_malo_destroy (struct malo *malo)
{
struct malo_page *page, *next;
page = SLIST_FIRST(&malo->all_pages);
while (page != &malo->page_header)
{
next = SLIST_NEXT(page, next_page);
free(page);
page = next;
}
free(page);
}
/* The iterator is built-in. Usage:
* void *obj;
* for (obj = lsquic_malo_first(malo); obj; lsquic_malo_next(malo))
* do_stuff(obj);
*/
void *
lsquic_malo_first (struct malo *malo)
{
malo->iter.cur_page = SLIST_FIRST(&malo->all_pages);
malo->iter.next_slot = malo->iter.cur_page->initial_slot;
return lsquic_malo_next(malo);
}
void *
lsquic_malo_next (struct malo *malo)
{
struct malo_page *page;
unsigned max_slot, slot;
page = malo->iter.cur_page;
if (page)
{
max_slot = 1 << (12 - page->nbits); /* Same for all pages */
slot = malo->iter.next_slot;
while (1)
{
for (; slot < max_slot; ++slot)
{
if (page->slots & (1ULL << slot))
{
malo->iter.cur_page = page;
malo->iter.next_slot = slot + 1;
return (char *) page + (slot << page->nbits);
}
}
page = SLIST_NEXT(page, next_page);
if (page)
slot = page->initial_slot;
else
{
malo->iter.cur_page = NULL; /* Stop iterator */
return NULL;
}
}
}
return NULL;
}
static unsigned
size_in_bits (size_t sz)
{
#if __GNUC__
unsigned clz = __builtin_clz(sz - 1);
return 32 - clz;
#else
unsigned clz;
size_t y;
--sz;
clz = 32;
y = sz >> 16; if (y) { clz -= 16; sz = y; }
y = sz >> 8; if (y) { clz -= 8; sz = y; }
y = sz >> 4; if (y) { clz -= 4; sz = y; }
y = sz >> 2; if (y) { clz -= 2; sz = y; }
y = sz >> 1; if (y) return 32 - clz + 1;
return 32 - clz + sz;
#endif
}
static unsigned
find_free_slot (uint64_t slots)
{
#if __GNUC__
return __builtin_ffsll(~slots) - 1;
#else
unsigned n;
slots =~ slots;
n = 0;
if (0 == (slots & ((1ULL << 32) - 1))) { n += 32; slots >>= 32; }
if (0 == (slots & ((1ULL << 16) - 1))) { n += 16; slots >>= 16; }
if (0 == (slots & ((1ULL << 8) - 1))) { n += 8; slots >>= 8; }
if (0 == (slots & ((1ULL << 4) - 1))) { n += 4; slots >>= 4; }
if (0 == (slots & ((1ULL << 2) - 1))) { n += 2; slots >>= 2; }
if (0 == (slots & ((1ULL << 1) - 1))) { n += 1; slots >>= 1; }
return n;
#endif
}