aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorQi Wang <interwq@gwu.edu>2015-10-27 15:12:10 -0700
committerJason Evans <je@fb.com>2015-11-10 14:28:34 -0800
commitf4a0f32d340985de477bbe329ecdaecd69ed1055 (patch)
treea148610f4d2253186c59e671dcb065ce3647d2f5 /src
parent710ca112e31e8621177d08162f60158c27dd2974 (diff)
downloadjemalloc-f4a0f32d340985de477bbe329ecdaecd69ed1055.tar.gz
Fast-path improvement: reduce # of branches and unnecessary operations.
- Combine multiple runtime branches into a single malloc_slow check. - Avoid calling arena_choose / size2index / index2size on fast path. - A few micro optimizations.
Diffstat (limited to 'src')
-rw-r--r--src/arena.c26
-rw-r--r--src/ckh.c10
-rw-r--r--src/huge.c6
-rw-r--r--src/jemalloc.c186
-rw-r--r--src/prof.c37
-rw-r--r--src/quarantine.c20
-rw-r--r--src/tcache.c33
7 files changed, 204 insertions, 114 deletions
diff --git a/src/arena.c b/src/arena.c
index 844d721..143afb9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1990,11 +1990,10 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
/*
* OOM. tbin->avail isn't yet filled down to its first
* element, so the successful allocations (if any) must
- * be moved to the base of tbin->avail before bailing
- * out.
+ * be moved just before tbin->avail before bailing out.
*/
if (i > 0) {
- memmove(tbin->avail, &tbin->avail[nfill - i],
+ memmove(tbin->avail - i, tbin->avail - nfill,
i * sizeof(void *));
}
break;
@@ -2004,7 +2003,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
true);
}
/* Insert such that low regions get used first. */
- tbin->avail[nfill - 1 - i] = ptr;
+ *(tbin->avail - nfill + i) = ptr;
}
if (config_stats) {
bin->stats.nmalloc += i;
@@ -2125,14 +2124,12 @@ arena_quarantine_junk_small(void *ptr, size_t usize)
}
void *
-arena_malloc_small(arena_t *arena, size_t size, bool zero)
+arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero)
{
void *ret;
arena_bin_t *bin;
arena_run_t *run;
- szind_t binind;
- binind = size2index(size);
assert(binind < NBINS);
bin = &arena->bins[binind];
size = index2size(binind);
@@ -2179,7 +2176,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
}
void *
-arena_malloc_large(arena_t *arena, size_t size, bool zero)
+arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero)
{
void *ret;
size_t usize;
@@ -2189,7 +2186,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
UNUSED bool idump;
/* Large allocation. */
- usize = s2u(size);
+ usize = index2size(binind);
malloc_mutex_lock(&arena->lock);
if (config_cache_oblivious) {
uint64_t r;
@@ -2214,7 +2211,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) +
random_offset);
if (config_stats) {
- szind_t index = size2index(usize) - NBINS;
+ szind_t index = binind - NBINS;
arena->stats.nmalloc_large++;
arena->stats.nrequests_large++;
@@ -2336,7 +2333,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
&& (usize & PAGE_MASK) == 0))) {
/* Small; alignment doesn't require special run placement. */
- ret = arena_malloc(tsd, arena, usize, zero, tcache);
+ ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+ tcache, true);
} else if (usize <= large_maxclass && alignment <= PAGE) {
/*
* Large; alignment doesn't require special run placement.
@@ -2344,7 +2342,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
* the base of the run, so do some bit manipulation to retrieve
* the base.
*/
- ret = arena_malloc(tsd, arena, usize, zero, tcache);
+ ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+ tcache, true);
if (config_cache_oblivious)
ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
} else {
@@ -2823,7 +2822,8 @@ arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
{
if (alignment == 0)
- return (arena_malloc(tsd, arena, usize, zero, tcache));
+ return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
+ tcache, true));
usize = sa2u(usize, alignment);
if (usize == 0)
return (NULL);
diff --git a/src/ckh.c b/src/ckh.c
index 53a1c1e..e4328d2 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
if (!ckh_rebuild(ckh, tab)) {
- idalloctm(tsd, tab, tcache_get(tsd, false), true);
+ idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
break;
}
/* Rebuilding failed, so back out partially rebuilt table. */
- idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
+ idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
}
@@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
if (!ckh_rebuild(ckh, tab)) {
- idalloctm(tsd, tab, tcache_get(tsd, false), true);
+ idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
#ifdef CKH_COUNT
ckh->nshrinks++;
#endif
@@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
}
/* Rebuilding failed, so back out partially rebuilt table. */
- idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
+ idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
ckh->tab = tab;
ckh->lg_curbuckets = lg_prevbuckets;
#ifdef CKH_COUNT
@@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
(unsigned long long)ckh->nrelocs);
#endif
- idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
+ idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
if (config_debug)
memset(ckh, 0x5a, sizeof(ckh_t));
}
diff --git a/src/huge.c b/src/huge.c
index 1e9a665..c1fa379 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -75,7 +75,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
arena = arena_choose(tsd, arena);
if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
size, alignment, &is_zeroed)) == NULL) {
- idalloctm(tsd, node, tcache, true);
+ idalloctm(tsd, node, tcache, true, true);
return (NULL);
}
@@ -83,7 +83,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
if (huge_node_set(ret, node)) {
arena_chunk_dalloc_huge(arena, ret, size);
- idalloctm(tsd, node, tcache, true);
+ idalloctm(tsd, node, tcache, true, true);
return (NULL);
}
@@ -372,7 +372,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
extent_node_size_get(node));
arena_chunk_dalloc_huge(extent_node_arena_get(node),
extent_node_addr_get(node), extent_node_size_get(node));
- idalloctm(tsd, node, tcache, true);
+ idalloctm(tsd, node, tcache, true, true);
}
arena_t *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5a2d324..eed6331 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,12 +70,29 @@ typedef enum {
} malloc_init_t;
static malloc_init_t malloc_init_state = malloc_init_uninitialized;
+/* 0 should be the common case. Set to true to trigger initialization. */
+static bool malloc_slow = true;
+
+/* When malloc_slow != 0, set the corresponding bits for sanity check. */
+enum {
+ flag_opt_junk_alloc = (1U),
+ flag_opt_junk_free = (1U << 1),
+ flag_opt_quarantine = (1U << 2),
+ flag_opt_zero = (1U << 3),
+ flag_opt_utrace = (1U << 4),
+ flag_in_valgrind = (1U << 5),
+ flag_opt_xmalloc = (1U << 6)
+};
+static uint8_t malloc_slow_flags;
+
+/* Last entry for overflow detection only. */
JEMALLOC_ALIGNED(CACHELINE)
-const size_t index2size_tab[NSIZES] = {
+const size_t index2size_tab[NSIZES+1] = {
#define SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
SIZE_CLASSES
#undef SC
+ ZU(0)
};
JEMALLOC_ALIGNED(CACHELINE)
@@ -309,14 +326,15 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
if (unlikely(malloc_init_a0()))
return (NULL);
- return (iallocztm(NULL, size, zero, false, is_metadata, a0get()));
+ return (iallocztm(NULL, size, size2index(size), zero, false,
+ is_metadata, a0get(), true));
}
static void
a0idalloc(void *ptr, bool is_metadata)
{
- idalloctm(NULL, ptr, false, is_metadata);
+ idalloctm(NULL, ptr, false, is_metadata, true);
}
void *
@@ -839,6 +857,26 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
}
static void
+malloc_slow_flag_init(void)
+{
+ /*
+ * Combine the runtime options into malloc_slow for fast path. Called
+ * after processing all the options.
+ */
+ malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
+ | (opt_junk_free ? flag_opt_junk_free : 0)
+ | (opt_quarantine ? flag_opt_quarantine : 0)
+ | (opt_zero ? flag_opt_zero : 0)
+ | (opt_utrace ? flag_opt_utrace : 0)
+ | (opt_xmalloc ? flag_opt_xmalloc : 0);
+
+ if (config_valgrind)
+ malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0);
+
+ malloc_slow = (malloc_slow_flags != 0);
+}
+
+static void
malloc_conf_init(void)
{
unsigned i;
@@ -1304,6 +1342,8 @@ malloc_init_hard_finish(void)
arenas[0] = a0;
malloc_init_state = malloc_init_initialized;
+ malloc_slow_flag_init();
+
return (false);
}
@@ -1355,34 +1395,36 @@ malloc_init_hard(void)
*/
static void *
-imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
+imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
+ prof_tctx_t *tctx, bool slow_path)
{
void *p;
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
- p = imalloc(tsd, LARGE_MINCLASS);
+ szind_t ind_large = size2index(LARGE_MINCLASS);
+ p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = imalloc(tsd, usize);
+ p = imalloc(tsd, usize, ind, slow_path);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(tsd_t *tsd, size_t usize)
+imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
{
void *p;
prof_tctx_t *tctx;
tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
- p = imalloc_prof_sample(tsd, usize, tctx);
+ p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path);
else
- p = imalloc(tsd, usize);
+ p = imalloc(tsd, usize, ind, slow_path);
if (unlikely(p == NULL)) {
prof_alloc_rollback(tsd, tctx, true);
return (NULL);
@@ -1393,23 +1435,45 @@ imalloc_prof(tsd_t *tsd, size_t usize)
}
JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_body(size_t size, tsd_t **tsd, size_t *usize)
+imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
{
+ szind_t ind;
- if (unlikely(malloc_init()))
+ if (slow_path && unlikely(malloc_init()))
return (NULL);
*tsd = tsd_fetch();
+ ind = size2index(size);
+
+ if (config_stats ||
+ (config_prof && opt_prof) ||
+ (slow_path && config_valgrind && unlikely(in_valgrind))) {
+ *usize = index2size(ind);
+ }
if (config_prof && opt_prof) {
- *usize = s2u(size);
if (unlikely(*usize == 0))
return (NULL);
- return (imalloc_prof(*tsd, *usize));
+ return (imalloc_prof(*tsd, *usize, ind, slow_path));
}
- if (config_stats || (config_valgrind && unlikely(in_valgrind)))
- *usize = s2u(size);
- return (imalloc(*tsd, size));
+ return (imalloc(*tsd, size, ind, slow_path));
+}
+
+JEMALLOC_ALWAYS_INLINE_C void
+imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
+{
+ if (unlikely(ret == NULL)) {
+ if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
+ malloc_write("<jemalloc>: Error in malloc(): "
+ "out of memory\n");
+ abort();
+ }
+ set_errno(ENOMEM);
+ }
+ if (config_stats && likely(ret != NULL)) {
+ assert(usize == isalloc(ret, config_prof));
+ *tsd_thread_allocatedp_get(tsd) += usize;
+ }
}
JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1424,21 +1488,20 @@ je_malloc(size_t size)
if (size == 0)
size = 1;
- ret = imalloc_body(size, &tsd, &usize);
- if (unlikely(ret == NULL)) {
- if (config_xmalloc && unlikely(opt_xmalloc)) {
- malloc_write("<jemalloc>: Error in malloc(): "
- "out of memory\n");
- abort();
- }
- set_errno(ENOMEM);
- }
- if (config_stats && likely(ret != NULL)) {
- assert(usize == isalloc(ret, config_prof));
- *tsd_thread_allocatedp_get(tsd) += usize;
+ if (likely(!malloc_slow)) {
+ /*
+ * imalloc_body() is inlined so that fast and slow paths are
+ * generated separately with statically known slow_path.
+ */
+ ret = imalloc_body(size, &tsd, &usize, false);
+ imalloc_post_check(ret, tsd, usize, false);
+ } else {
+ ret = imalloc_body(size, &tsd, &usize, true);
+ imalloc_post_check(ret, tsd, usize, true);
+ UTRACE(0, size, ret);
+ JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
}
- UTRACE(0, size, ret);
- JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
+
return (ret);
}
@@ -1576,34 +1639,35 @@ je_aligned_alloc(size_t alignment, size_t size)
}
static void *
-icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
+icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
{
void *p;
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
- p = icalloc(tsd, LARGE_MINCLASS);
+ szind_t ind_large = size2index(LARGE_MINCLASS);
+ p = icalloc(tsd, LARGE_MINCLASS, ind_large);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = icalloc(tsd, usize);
+ p = icalloc(tsd, usize, ind);
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(tsd_t *tsd, size_t usize)
+icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
{
void *p;
prof_tctx_t *tctx;
tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
- p = icalloc_prof_sample(tsd, usize, tctx);
+ p = icalloc_prof_sample(tsd, usize, ind, tctx);
else
- p = icalloc(tsd, usize);
+ p = icalloc(tsd, usize, ind);
if (unlikely(p == NULL)) {
prof_alloc_rollback(tsd, tctx, true);
return (NULL);
@@ -1621,6 +1685,7 @@ je_calloc(size_t num, size_t size)
void *ret;
tsd_t *tsd;
size_t num_size;
+ szind_t ind;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
if (unlikely(malloc_init())) {
@@ -1650,17 +1715,18 @@ je_calloc(size_t num, size_t size)
goto label_return;
}
+ ind = size2index(num_size);
if (config_prof && opt_prof) {
- usize = s2u(num_size);
+ usize = index2size(ind);
if (unlikely(usize == 0)) {
ret = NULL;
goto label_return;
}
- ret = icalloc_prof(tsd, usize);
+ ret = icalloc_prof(tsd, usize, ind);
} else {
if (config_stats || (config_valgrind && unlikely(in_valgrind)))
- usize = s2u(num_size);
- ret = icalloc(tsd, num_size);
+ usize = index2size(ind);
+ ret = icalloc(tsd, num_size, ind);
}
label_return:
@@ -1725,7 +1791,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
}
JEMALLOC_INLINE_C void
-ifree(tsd_t *tsd, void *ptr, tcache_t *tcache)
+ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
{
size_t usize;
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
@@ -1740,10 +1806,15 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache)
usize = isalloc(ptr, config_prof);
if (config_stats)
*tsd_thread_deallocatedp_get(tsd) += usize;
- if (config_valgrind && unlikely(in_valgrind))
- rzsize = p2rz(ptr);
- iqalloc(tsd, ptr, tcache);
- JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+
+ if (likely(!slow_path))
+ iqalloc(tsd, ptr, tcache, false);
+ else {
+ if (config_valgrind && unlikely(in_valgrind))
+ rzsize = p2rz(ptr);
+ iqalloc(tsd, ptr, tcache, true);
+ JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+ }
}
JEMALLOC_INLINE_C void
@@ -1780,7 +1851,7 @@ je_realloc(void *ptr, size_t size)
/* realloc(ptr, 0) is equivalent to free(ptr). */
UTRACE(ptr, 0, 0);
tsd = tsd_fetch();
- ifree(tsd, ptr, tcache_get(tsd, false));
+ ifree(tsd, ptr, tcache_get(tsd, false), true);
return (NULL);
}
size = 1;
@@ -1807,7 +1878,10 @@ je_realloc(void *ptr, size_t size)
}
} else {
/* realloc(NULL, size) is equivalent to malloc(size). */
- ret = imalloc_body(size, &tsd, &usize);
+ if (likely(!malloc_slow))
+ ret = imalloc_body(size, &tsd, &usize, false);
+ else
+ ret = imalloc_body(size, &tsd, &usize, true);
}
if (unlikely(ret == NULL)) {
@@ -1836,7 +1910,10 @@ je_free(void *ptr)
UTRACE(ptr, 0, 0);
if (likely(ptr != NULL)) {
tsd_t *tsd = tsd_fetch();
- ifree(tsd, ptr, tcache_get(tsd, false));
+ if (likely(!malloc_slow))
+ ifree(tsd, ptr, tcache_get(tsd, false), false);
+ else
+ ifree(tsd, ptr, tcache_get(tsd, false), true);
}
}
@@ -1965,12 +2042,14 @@ JEMALLOC_ALWAYS_INLINE_C void *
imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
tcache_t *tcache, arena_t *arena)
{
+ szind_t ind;
+ ind = size2index(usize);
if (unlikely(alignment != 0))
return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
if (unlikely(zero))
- return (icalloct(tsd, usize, tcache, arena));
- return (imalloct(tsd, usize, tcache, arena));
+ return (icalloct(tsd, usize, ind, tcache, arena));
+ return (imalloct(tsd, usize, ind, tcache, arena));
}
static void *
@@ -2034,9 +2113,10 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
arena_t *arena;
if (likely(flags == 0)) {
+ szind_t ind = size2index(size);
if (config_stats || (config_valgrind && unlikely(in_valgrind)))
- *usize = s2u(size);
- return (imalloc(tsd, size));
+ *usize = index2size(ind);
+ return (imalloc(tsd, size, ind, true));
}
if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
@@ -2375,7 +2455,7 @@ je_dallocx(void *ptr, int flags)
tcache = tcache_get(tsd, false);
UTRACE(ptr, 0, 0);
- ifree(tsd_fetch(), ptr, tcache);
+ ifree(tsd_fetch(), ptr, tcache, true);
}
JEMALLOC_ALWAYS_INLINE_C size_t
diff --git a/src/prof.c b/src/prof.c
index 5d2b959..199e63e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -551,9 +551,9 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
/*
* Create a single allocation that has space for vec of length bt->len.
*/
- prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t,
- vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true),
- true, NULL);
+ size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
+ prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
+ size2index(size), false, tcache_get(tsd, true), true, NULL, true);
if (gctx == NULL)
return (NULL);
gctx->lock = prof_gctx_mutex_choose();
@@ -594,7 +594,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
prof_leave(tsd, tdata_self);
/* Destroy gctx. */
malloc_mutex_unlock(gctx->lock);
- idalloctm(tsd, gctx, tcache_get(tsd, false), true);
+ idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
} else {
/*
* Compensate for increment in prof_tctx_destroy() or
@@ -701,7 +701,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
prof_tdata_destroy(tsd, tdata, false);
if (destroy_tctx)
- idalloctm(tsd, tctx, tcache_get(tsd, false), true);
+ idalloctm(tsd, tctx, tcache_get(tsd, false), true, true);
}
static bool
@@ -730,7 +730,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
/* OOM. */
prof_leave(tsd, tdata);
- idalloctm(tsd, gctx.v, tcache_get(tsd, false), true);
+ idalloctm(tsd, gctx.v, tcache_get(tsd, false), true,
+ true);
return (true);
}
new_gctx = true;
@@ -789,8 +790,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
/* Link a prof_tctx_t into gctx for this thread. */
tcache = tcache_get(tsd, true);
- ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true,
- NULL);
+ ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
+ size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL,
+ true);
if (ret.p == NULL) {
if (new_gctx)
prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -810,7 +812,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
if (error) {
if (new_gctx)
prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
- idalloctm(tsd, ret.v, tcache, true);
+ idalloctm(tsd, ret.v, tcache, true, true);
return (NULL);
}
malloc_mutex_lock(gctx->lock);
@@ -1211,7 +1213,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
tctx_tree_remove(&gctx->tctxs,
to_destroy);
idalloctm(tsd, to_destroy,
- tcache_get(tsd, false), true);
+ tcache_get(tsd, false), true, true);
} else
next = NULL;
} while (next != NULL);
@@ -1714,8 +1716,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
/* Initialize an empty cache for this thread. */
tcache = tcache_get(tsd, true);
- tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false,
- tcache, true, NULL);
+ tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
+ size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true);
if (tdata == NULL)
return (NULL);
@@ -1729,7 +1731,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
prof_bt_hash, prof_bt_keycomp)) {
- idalloctm(tsd, tdata, tcache, true);
+ idalloctm(tsd, tdata, tcache, true, true);
return (NULL);
}
@@ -1784,9 +1786,9 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
tcache = tcache_get(tsd, false);
if (tdata->thread_name != NULL)
- idalloctm(tsd, tdata->thread_name, tcache, true);
+ idalloctm(tsd, tdata->thread_name, tcache, true, true);
ckh_delete(tsd, &tdata->bt2tctx);
- idalloctm(tsd, tdata, tcache, true);
+ idalloctm(tsd, tdata, tcache, true, true);
}
static void
@@ -1947,7 +1949,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
if (size == 1)
return ("");
- ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL);
+ ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd,
+ true), true, NULL, true);
if (ret == NULL)
return (NULL);
memcpy(ret, thread_name, size);
@@ -1980,7 +1983,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
if (tdata->thread_name != NULL) {
idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
- true);
+ true, true);
tdata->thread_name = NULL;
}
if (strlen(s) > 0)
diff --git a/src/quarantine.c b/src/quarantine.c
index 6c43dfc..ff8801c 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -23,12 +23,14 @@ static quarantine_t *
quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
{
quarantine_t *quarantine;
+ size_t size;
assert(tsd_nominal(tsd));
- quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs)
- + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false,
- tcache_get(tsd, true), true, NULL);
+ size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
+ sizeof(quarantine_obj_t));
+ quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
+ false, tcache_get(tsd, true), true, NULL, true);
if (quarantine == NULL)
return (NULL);
quarantine->curbytes = 0;
@@ -55,7 +57,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
if (tsd_quarantine_get(tsd) == NULL)
tsd_quarantine_set(tsd, quarantine);
else
- idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+ idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
}
static quarantine_t *
@@ -87,7 +89,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
sizeof(quarantine_obj_t));
}
- idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+ idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
tsd_quarantine_set(tsd, ret);
return (ret);
@@ -98,7 +100,7 @@ quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
{
quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
assert(obj->usize == isalloc(obj->ptr, config_prof));
- idalloctm(tsd, obj->ptr, NULL, false);
+ idalloctm(tsd, obj->ptr, NULL, false, true);
quarantine->curbytes -= obj->usize;
quarantine->curobjs--;
quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
@@ -123,7 +125,7 @@ quarantine(tsd_t *tsd, void *ptr)
assert(opt_quarantine);
if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
- idalloctm(tsd, ptr, NULL, false);
+ idalloctm(tsd, ptr, NULL, false, true);
return;
}
/*
@@ -162,7 +164,7 @@ quarantine(tsd_t *tsd, void *ptr)
}
} else {
assert(quarantine->curbytes == 0);
- idalloctm(tsd, ptr, NULL, false);
+ idalloctm(tsd, ptr, NULL, false, true);
}
}
@@ -177,7 +179,7 @@ quarantine_cleanup(tsd_t *tsd)
quarantine = tsd_quarantine_get(tsd);
if (quarantine != NULL) {
quarantine_drain(tsd, quarantine, 0);
- idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+ idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
tsd_quarantine_set(tsd, NULL);
}
}
diff --git a/src/tcache.c b/src/tcache.c
index fdafd0c..78c6230 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -72,7 +72,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
void *
tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
- tcache_bin_t *tbin, szind_t binind)
+ tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
{
void *ret;
@@ -80,7 +80,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
tcache->prof_accumbytes : 0);
if (config_prof)
tcache->prof_accumbytes = 0;
- ret = tcache_alloc_easy(tbin);
+ ret = tcache_alloc_easy(tbin, tcache_success);
return (ret);
}
@@ -102,7 +102,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena bin associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
- tbin->avail[0]);
+ *(tbin->avail - 1));
arena_t *bin_arena = extent_node_arena_get(&chunk->node);
arena_bin_t *bin = &bin_arena->bins[binind];
@@ -122,7 +122,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
}
ndeferred = 0;
for (i = 0; i < nflush; i++) {
- ptr = tbin->avail[i];
+ ptr = *(tbin->avail - 1 - i);
assert(ptr != NULL);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (extent_node_arena_get(&chunk->node) == bin_arena) {
@@ -139,7 +139,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
* locked. Stash the object, so that it can be
* handled in a future pass.
*/
- tbin->avail[ndeferred] = ptr;
+ *(tbin->avail - 1 - ndeferred) = ptr;
ndeferred++;
}
}
@@ -158,8 +158,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
malloc_mutex_unlock(&bin->lock);
}
- memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
- rem * sizeof(void *));
+ memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+ sizeof(void *));
tbin->ncached = rem;
if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
@@ -182,7 +182,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
/* Lock the arena associated with the first object. */
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
- tbin->avail[0]);
+ *(tbin->avail - 1));
arena_t *locked_arena = extent_node_arena_get(&chunk->node);
UNUSED bool idump;
@@ -206,7 +206,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
}
ndeferred = 0;
for (i = 0; i < nflush; i++) {
- ptr = tbin->avail[i];
+ ptr = *(tbin->avail - 1 - i);
assert(ptr != NULL);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (extent_node_arena_get(&chunk->node) ==
@@ -220,7 +220,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
* Stash the object, so that it can be handled
* in a future pass.
*/
- tbin->avail[ndeferred] = ptr;
+ *(tbin->avail - 1 - ndeferred) = ptr;
ndeferred++;
}
}
@@ -241,8 +241,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
malloc_mutex_unlock(&arena->lock);
}
- memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
- rem * sizeof(void *));
+ memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+ sizeof(void *));
tbin->ncached = rem;
if ((int)tbin->ncached < tbin->low_water)
tbin->low_water = tbin->ncached;
@@ -333,9 +333,14 @@ tcache_create(tsd_t *tsd, arena_t *arena)
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
for (i = 0; i < nhbins; i++) {
tcache->tbins[i].lg_fill_div = 1;
+ stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+ /*
+ * avail points past the available space. Allocations will
+ * access the slots toward higher addresses (for the benefit of
+ * prefetch).
+ */
tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
(uintptr_t)stack_offset);
- stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
}
return (tcache);
@@ -379,7 +384,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
arena_prof_accum(arena, tcache->prof_accumbytes))
prof_idump();
- idalloctm(tsd, tcache, false, true);
+ idalloctm(tsd, tcache, false, true, true);
}
void