aboutsummaryrefslogtreecommitdiff
path: root/include/jemalloc/internal/prof.h
diff options
context:
space:
mode:
authorChristopher Ferris <cferris@google.com>2015-04-22 06:59:28 +0000
committerChristopher Ferris <cferris@google.com>2015-04-22 00:05:52 -0700
commit83e5767ee9a8c68150cca06ae0d27a13ba4fcaf8 (patch)
tree9c72852322473822033addd4f1d902e4b33338d4 /include/jemalloc/internal/prof.h
parentcfef1c84fffb9f781fccb0df4598e0dcdc022182 (diff)
downloadjemalloc-83e5767ee9a8c68150cca06ae0d27a13ba4fcaf8.tar.gz
Revert "Revert "Merge remote-tracking branch 'aosp/upstream-dev' into merge""
This reverts commit 75929a97332565c3b987986f35652b6d5d275d3c. The original failure this was reverted for seems to have been a bug somewhere else. Change-Id: Ib29ba03b1b967f940dc19eceac2aa1d2923be1eb
Diffstat (limited to 'include/jemalloc/internal/prof.h')
-rw-r--r--include/jemalloc/internal/prof.h588
1 files changed, 293 insertions, 295 deletions
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index d82fbc4..2e22711 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -3,8 +3,8 @@
typedef struct prof_bt_s prof_bt_t;
typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_thr_cnt_s prof_thr_cnt_t;
-typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t;
/* Option defaults. */
@@ -23,9 +23,6 @@ typedef struct prof_tdata_s prof_tdata_t;
*/
#define PROF_BT_MAX 128
-/* Maximum number of backtraces to store in each per thread LRU cache. */
-#define PROF_TCMAX 1024
-
/* Initial hash table size. */
#define PROF_CKH_MINITEMS 64
@@ -36,12 +33,18 @@ typedef struct prof_tdata_s prof_tdata_t;
#define PROF_PRINTF_BUFSIZE 128
/*
- * Number of mutexes shared among all ctx's. No space is allocated for these
+ * Number of mutexes shared among all gctx's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NCTX_LOCKS 1024
/*
+ * Number of mutexes shared among all tdata's. No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NTDATA_LOCKS 256
+
+/*
* prof_tdata pointers close to NULL are used to encode state information that
* is used for cleaning up during thread shutdown.
*/
@@ -68,111 +71,141 @@ typedef struct {
#endif
struct prof_cnt_s {
- /*
- * Profiling counters. An allocation/deallocation pair can operate on
- * different prof_thr_cnt_t objects that are linked into the same
- * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
- * negative. In principle it is possible for the *bytes counters to
- * overflow/underflow, but a general solution would require something
- * like 128-bit counters; this implementation doesn't bother to solve
- * that problem.
- */
- int64_t curobjs;
- int64_t curbytes;
+ /* Profiling counters. */
+ uint64_t curobjs;
+ uint64_t curbytes;
uint64_t accumobjs;
uint64_t accumbytes;
};
-struct prof_thr_cnt_s {
- /* Linkage into prof_ctx_t's cnts_ql. */
- ql_elm(prof_thr_cnt_t) cnts_link;
+typedef enum {
+ prof_tctx_state_initializing,
+ prof_tctx_state_nominal,
+ prof_tctx_state_dumping,
+ prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
- /* Linkage into thread's LRU. */
- ql_elm(prof_thr_cnt_t) lru_link;
+struct prof_tctx_s {
+ /* Thread data for thread that performed the allocation. */
+ prof_tdata_t *tdata;
/*
- * Associated context. If a thread frees an object that it did not
- * allocate, it is possible that the context is not cached in the
- * thread's hash table, in which case it must be able to look up the
- * context, insert a new prof_thr_cnt_t into the thread's hash table,
- * and link it into the prof_ctx_t's cnts_ql.
+ * Copy of tdata->thr_uid, necessary because tdata may be defunct during
+ * teardown.
*/
- prof_ctx_t *ctx;
+ uint64_t thr_uid;
+
+ /* Profiling counters, protected by tdata->lock. */
+ prof_cnt_t cnts;
+
+ /* Associated global context. */
+ prof_gctx_t *gctx;
/*
- * Threads use memory barriers to update the counters. Since there is
- * only ever one writer, the only challenge is for the reader to get a
- * consistent read of the counters.
- *
- * The writer uses this series of operations:
- *
- * 1) Increment epoch to an odd number.
- * 2) Update counters.
- * 3) Increment epoch to an even number.
- *
- * The reader must assure 1) that the epoch is even while it reads the
- * counters, and 2) that the epoch doesn't change between the time it
- * starts and finishes reading the counters.
+ * UID that distinguishes multiple tctx's created by the same thread,
+ * but coexisting in gctx->tctxs. There are two ways that such
+ * coexistence can occur:
+ * - A dumper thread can cause a tctx to be retained in the purgatory
+ * state.
+ * - Although a single "producer" thread must create all tctx's which
+ * share the same thr_uid, multiple "consumers" can each concurrently
+ * execute portions of prof_tctx_destroy(). prof_tctx_destroy() only
+ * gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+ * threshold can be hit again before the first consumer finishes
+ * executing prof_tctx_destroy().
*/
- unsigned epoch;
+ uint64_t tctx_uid;
- /* Profiling counters. */
- prof_cnt_t cnts;
-};
+ /* Linkage into gctx's tctxs. */
+ rb_node(prof_tctx_t) tctx_link;
-struct prof_ctx_s {
- /* Associated backtrace. */
- prof_bt_t *bt;
+ /*
+ * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
+ * sample vs destroy race.
+ */
+ bool prepared;
+
+ /* Current dump-related state, protected by gctx->lock. */
+ prof_tctx_state_t state;
- /* Protects nlimbo, cnt_merged, and cnts_ql. */
+ /*
+ * Copy of cnts snapshotted during early dump phase, protected by
+ * dump_mtx.
+ */
+ prof_cnt_t dump_cnts;
+};
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
+
+struct prof_gctx_s {
+ /* Protects nlimbo, cnt_summed, and tctxs. */
malloc_mutex_t *lock;
/*
- * Number of threads that currently cause this ctx to be in a state of
+ * Number of threads that currently cause this gctx to be in a state of
* limbo due to one of:
- * - Initializing per thread counters associated with this ctx.
- * - Preparing to destroy this ctx.
- * - Dumping a heap profile that includes this ctx.
+ * - Initializing this gctx.
+ * - Initializing per thread counters associated with this gctx.
+ * - Preparing to destroy this gctx.
+ * - Dumping a heap profile that includes this gctx.
* nlimbo must be 1 (single destroyer) in order to safely destroy the
- * ctx.
+ * gctx.
*/
unsigned nlimbo;
- /* Temporary storage for summation during dump. */
- prof_cnt_t cnt_summed;
-
- /* When threads exit, they merge their stats into cnt_merged. */
- prof_cnt_t cnt_merged;
-
/*
- * List of profile counters, one for each thread that has allocated in
+ * Tree of profile counters, one for each thread that has allocated in
* this context.
*/
- ql_head(prof_thr_cnt_t) cnts_ql;
+ prof_tctx_tree_t tctxs;
+
+ /* Linkage for tree of contexts to be dumped. */
+ rb_node(prof_gctx_t) dump_link;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
- /* Linkage for list of contexts to be dumped. */
- ql_elm(prof_ctx_t) dump_link;
+ /* Associated backtrace. */
+ prof_bt_t bt;
+
+ /* Backtrace vector, variable size, referred to by bt. */
+ void *vec[1];
};
-typedef ql_head(prof_ctx_t) prof_ctx_list_t;
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
struct prof_tdata_s {
+ malloc_mutex_t *lock;
+
+ /* Monotonically increasing unique thread identifier. */
+ uint64_t thr_uid;
+
/*
- * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a
- * cache of backtraces, with associated thread-specific prof_thr_cnt_t
- * objects. Other threads may read the prof_thr_cnt_t contents, but no
- * others will ever write them.
- *
- * Upon thread exit, the thread must merge all the prof_thr_cnt_t
- * counter data into the associated prof_ctx_t objects, and unlink/free
- * the prof_thr_cnt_t objects.
+ * Monotonically increasing discriminator among tdata structures
+ * associated with the same thr_uid.
*/
- ckh_t bt2cnt;
+ uint64_t thr_discrim;
- /* LRU for contents of bt2cnt. */
- ql_head(prof_thr_cnt_t) lru_ql;
+ /* Included in heap profile dumps if non-NULL. */
+ char *thread_name;
- /* Backtrace vector, used for calls to prof_backtrace(). */
- void **vec;
+ bool attached;
+ bool expired;
+
+ rb_node(prof_tdata_t) tdata_link;
+
+ /*
+ * Counter used to initialize prof_tctx_t's tctx_uid. No locking is
+ * necessary when incrementing this field, because only one thread ever
+ * does so.
+ */
+ uint64_t tctx_uid_next;
+
+ /*
+ * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
+ * backtraces for which it has non-zero allocation/deallocation counters
+ * associated with thread-specific prof_tctx_t objects. Other threads
+ * may write to prof_tctx_t contents when freeing associated objects.
+ */
+ ckh_t bt2tctx;
/* Sampling state. */
uint64_t prng_state;
@@ -182,20 +215,36 @@ struct prof_tdata_s {
bool enq;
bool enq_idump;
bool enq_gdump;
+
+ /*
+ * Set to true during an early dump phase for tdata's which are
+ * currently being dumped. New threads' tdata's have this initialized
+ * to false so that they aren't accidentally included in later dump
+ * phases.
+ */
+ bool dumping;
+
+ /*
+ * True if profiling is active for this tdata's thread
+ * (thread.prof.active mallctl).
+ */
+ bool active;
+
+ /* Temporary storage for summation during dump. */
+ prof_cnt_t cnt_summed;
+
+ /* Backtrace vector, used for calls to prof_backtrace(). */
+ void *vec[PROF_BT_MAX];
};
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
extern bool opt_prof;
-/*
- * Even if opt_prof is true, sampling can be temporarily disabled by setting
- * opt_prof_active to false. No locking is used when updating opt_prof_active,
- * so there are no guarantees regarding how long it will take for all threads
- * to notice state changes.
- */
extern bool opt_prof_active;
+extern bool opt_prof_thread_active_init;
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
extern bool opt_prof_gdump; /* High-water memory dumping. */
@@ -209,6 +258,12 @@ extern char opt_prof_prefix[
#endif
1];
+/* Accessed via prof_active_[gs]et{_unlocked,}(). */
+extern bool prof_active;
+
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool prof_gdump_val;
+
/*
* Profile dump interval, measured in bytes allocated. Each arena triggers a
* profile dump when it reaches this threshold. The effect is that the
@@ -218,306 +273,249 @@ extern char opt_prof_prefix[
*/
extern uint64_t prof_interval;
+/*
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
+ */
+extern size_t lg_prof_sample;
+
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_malloc_sample_object(const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
void bt_init(prof_bt_t *bt, void **vec);
void prof_backtrace(prof_bt_t *bt);
-prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
#ifdef JEMALLOC_JET
+size_t prof_tdata_count(void);
size_t prof_bt_count(void);
+const prof_cnt_t *prof_cnt_all(void);
typedef int (prof_dump_open_t)(bool, const char *);
extern prof_dump_open_t *prof_dump_open;
+typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+extern prof_dump_header_t *prof_dump_header;
#endif
void prof_idump(void);
bool prof_mdump(const char *filename);
void prof_gdump(void);
-prof_tdata_t *prof_tdata_init(void);
-void prof_tdata_cleanup(void *arg);
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
+void prof_tdata_cleanup(tsd_t *tsd);
+const char *prof_thread_name_get(void);
+bool prof_active_get(void);
+bool prof_active_set(bool active);
+int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool prof_thread_active_get(void);
+bool prof_thread_active_set(bool active);
+bool prof_thread_active_init_get(void);
+bool prof_thread_active_init_set(bool active_init);
+bool prof_gdump_get(void);
+bool prof_gdump_set(bool active);
void prof_boot0(void);
void prof_boot1(void);
bool prof_boot2(void);
void prof_prefork(void);
void prof_postfork_parent(void);
void prof_postfork_child(void);
-void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+void prof_sample_threshold_update(prof_tdata_t *tdata);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
-#define PROF_ALLOC_PREP(size, ret) do { \
- prof_tdata_t *prof_tdata; \
- prof_bt_t bt; \
- \
- assert(size == s2u(size)); \
- \
- if (!opt_prof_active || \
- prof_sample_accum_update(size, false, &prof_tdata)) { \
- ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
- } else { \
- bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt); \
- ret = prof_lookup(&bt); \
- } \
-} while (0)
-
#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
-
-prof_tdata_t *prof_tdata_get(bool create);
-bool prof_sample_accum_update(size_t size, bool commit,
- prof_tdata_t **prof_tdata_out);
-prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-void prof_malloc_record_object(const void *ptr, size_t usize,
- prof_thr_cnt_t *cnt);
-void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx);
-void prof_free(const void *ptr, size_t size);
+bool prof_active_get_unlocked(void);
+bool prof_gdump_get_unlocked(void);
+prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create);
+bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
+ prof_tdata_t **tdata_out);
+prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
+prof_tctx_t *prof_tctx_get(const void *ptr);
+void prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void prof_malloc_sample_object(const void *ptr, size_t usize,
+ prof_tctx_t *tctx);
+void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
+ prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
+void prof_free(tsd_t *tsd, const void *ptr, size_t usize);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-malloc_tsd_externs(prof_tdata, prof_tdata_t *)
-malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
- prof_tdata_cleanup)
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void)
+{
+
+ /*
+ * Even if opt_prof is true, sampling can be temporarily disabled by
+ * setting prof_active to false. No locking is used when reading
+ * prof_active in the fast path, so there are no guarantees regarding
+ * how long it will take for all threads to notice state changes.
+ */
+ return (prof_active);
+}
-JEMALLOC_INLINE prof_tdata_t *
-prof_tdata_get(bool create)
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void)
{
- prof_tdata_t *prof_tdata;
+
+ /*
+ * No locking is used when reading prof_gdump_val in the fast path, so
+ * there are no guarantees regarding how long it will take for all
+ * threads to notice state changes.
+ */
+ return (prof_gdump_val);
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create)
+{
+ prof_tdata_t *tdata;
cassert(config_prof);
- prof_tdata = *prof_tdata_tsd_get();
- if (create && prof_tdata == NULL)
- prof_tdata = prof_tdata_init();
+ tdata = tsd_prof_tdata_get(tsd);
+ if (create) {
+ if (unlikely(tdata == NULL)) {
+ if (tsd_nominal(tsd)) {
+ tdata = prof_tdata_init(tsd);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ } else if (unlikely(tdata->expired)) {
+ tdata = prof_tdata_reinit(tsd, tdata);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ assert(tdata == NULL || tdata->attached);
+ }
- return (prof_tdata);
+ return (tdata);
}
-JEMALLOC_INLINE prof_ctx_t *
-prof_ctx_get(const void *ptr)
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_tctx_get(const void *ptr)
{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- ret = arena_prof_ctx_get(ptr);
- } else
- ret = huge_prof_ctx_get(ptr);
-
- return (ret);
+ return (arena_prof_tctx_get(ptr));
}
-JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
{
- arena_chunk_t *chunk;
cassert(config_prof);
assert(ptr != NULL);
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- if (chunk != ptr) {
- /* Region. */
- arena_prof_ctx_set(ptr, ctx);
- } else
- huge_prof_ctx_set(ptr, ctx);
+ arena_prof_tctx_set(ptr, tctx);
}
-JEMALLOC_INLINE bool
-prof_sample_accum_update(size_t size, bool commit,
- prof_tdata_t **prof_tdata_out)
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+ prof_tdata_t **tdata_out)
{
- prof_tdata_t *prof_tdata;
+ prof_tdata_t *tdata;
cassert(config_prof);
- prof_tdata = prof_tdata_get(true);
- if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
- prof_tdata = NULL;
+ tdata = prof_tdata_get(tsd, true);
+ if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ tdata = NULL;
- if (prof_tdata_out != NULL)
- *prof_tdata_out = prof_tdata;
+ if (tdata_out != NULL)
+ *tdata_out = tdata;
- if (prof_tdata == NULL)
+ if (tdata == NULL)
return (true);
- if (prof_tdata->bytes_until_sample >= size) {
- if (commit)
- prof_tdata->bytes_until_sample -= size;
+ if (tdata->bytes_until_sample >= usize) {
+ if (update)
+ tdata->bytes_until_sample -= usize;
return (true);
} else {
/* Compute new sample threshold. */
- if (commit)
- prof_sample_threshold_update(prof_tdata);
- return (false);
+ if (update)
+ prof_sample_threshold_update(tdata);
+ return (!tdata->active);
}
}
-JEMALLOC_INLINE void
-prof_malloc_record_object(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) {
- prof_ctx_set(ptr, cnt->ctx);
-
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
+{
+ prof_tctx_t *ret;
+ prof_tdata_t *tdata;
+ prof_bt_t bt;
+
+ assert(usize == s2u(usize));
+
+ if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
+ usize, update, &tdata)))
+ ret = (prof_tctx_t *)(uintptr_t)1U;
+ else {
+ bt_init(&bt, tdata->vec);
+ prof_backtrace(&bt);
+ ret = prof_lookup(tsd, &bt);
}
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
+
+ return (ret);
}
-JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
{
cassert(config_prof);
assert(ptr != NULL);
assert(usize == isalloc(ptr, true));
- if (prof_sample_accum_update(usize, true, NULL)) {
- /*
- * Don't sample. For malloc()-like allocation, it is
- * always possible to tell in advance how large an
- * object's usable size will be, so there should never
- * be a difference between the usize passed to
- * PROF_ALLOC_PREP() and prof_malloc().
- */
- assert((uintptr_t)cnt == (uintptr_t)1U);
- }
-
- if ((uintptr_t)cnt > (uintptr_t)1U)
- prof_malloc_record_object(ptr, usize, cnt);
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ prof_malloc_sample_object(ptr, usize, tctx);
else
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+ prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
}
-JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
- size_t old_usize, prof_ctx_t *old_ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
+ bool updated, size_t old_usize, prof_tctx_t *old_tctx)
{
- prof_thr_cnt_t *told_cnt;
cassert(config_prof);
- assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+ assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
- if (ptr != NULL) {
+ if (!updated && ptr != NULL) {
assert(usize == isalloc(ptr, true));
- if (prof_sample_accum_update(usize, true, NULL)) {
+ if (prof_sample_accum_update(tsd, usize, true, NULL)) {
/*
- * Don't sample. The usize passed to
- * PROF_ALLOC_PREP() was larger than what
- * actually got allocated, so a backtrace was
- * captured for this allocation, even though
- * its actual usize was insufficient to cross
- * the sample threshold.
+ * Don't sample. The usize passed to PROF_ALLOC_PREP()
+ * was larger than what actually got allocated, so a
+ * backtrace was captured for this allocation, even
+ * though its actual usize was insufficient to cross the
+ * sample threshold.
*/
- cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+ tctx = (prof_tctx_t *)(uintptr_t)1U;
}
}
- if ((uintptr_t)old_ctx > (uintptr_t)1U) {
- told_cnt = prof_lookup(old_ctx->bt);
- if (told_cnt == NULL) {
- /*
- * It's too late to propagate OOM for this realloc(),
- * so operate directly on old_cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(old_ctx->lock);
- old_ctx->cnt_merged.curobjs--;
- old_ctx->cnt_merged.curbytes -= old_usize;
- malloc_mutex_unlock(old_ctx->lock);
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
- }
- } else
- told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, cnt->ctx);
- cnt->epoch++;
- } else if (ptr != NULL)
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U) {
- told_cnt->cnts.curobjs--;
- told_cnt->cnts.curbytes -= old_usize;
- }
- if ((uintptr_t)cnt > (uintptr_t)1U) {
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += usize;
- if (opt_prof_accum) {
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += usize;
- }
- }
- /*********/
- mb_write();
- /*********/
- if ((uintptr_t)told_cnt > (uintptr_t)1U)
- told_cnt->epoch++;
- if ((uintptr_t)cnt > (uintptr_t)1U)
- cnt->epoch++;
- /*********/
- mb_write(); /* Not strictly necessary. */
+ if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
+ prof_free_sampled_object(tsd, old_usize, old_tctx);
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ prof_malloc_sample_object(ptr, usize, tctx);
+ else
+ prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
}
-JEMALLOC_INLINE void
-prof_free(const void *ptr, size_t size)
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize)
{
- prof_ctx_t *ctx = prof_ctx_get(ptr);
+ prof_tctx_t *tctx = prof_tctx_get(ptr);
cassert(config_prof);
+ assert(usize == isalloc(ptr, true));
- if ((uintptr_t)ctx > (uintptr_t)1) {
- prof_thr_cnt_t *tcnt;
- assert(size == isalloc(ptr, true));
- tcnt = prof_lookup(ctx->bt);
-
- if (tcnt != NULL) {
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- tcnt->cnts.curobjs--;
- tcnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- tcnt->epoch++;
- /*********/
- mb_write();
- /*********/
- } else {
- /*
- * OOM during free() cannot be propagated, so operate
- * directly on cnt->ctx->cnt_merged.
- */
- malloc_mutex_lock(ctx->lock);
- ctx->cnt_merged.curobjs--;
- ctx->cnt_merged.curbytes -= size;
- malloc_mutex_unlock(ctx->lock);
- }
- }
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+ prof_free_sampled_object(tsd, usize, tctx);
}
#endif