Implement per thread heap profiling.

Rename data structures (prof_thr_cnt_t-->prof_tctx_t, prof_ctx_t-->prof_gctx_t), and convert to storing a prof_tctx_t for sampled objects. Convert PROF_ALLOC_PREP() to prof_alloc_prep(), since precise backtrace depth within jemalloc functions is no longer an issue (pprof prunes irrelevant frames). Implement mallctl's: - prof.reset implements full sample data reset, and optional change of sample interval. - prof.lg_sample reads the current sample interval (opt.lg_prof_sample was the permanent source of truth prior to prof.reset). - thread.prof.name provides naming capability for threads within heap profile dumps. - thread.prof.active makes it possible to activate/deactivate heap profiling for individual threads. Modify the heap dump files to contain per thread heap profile data. This change is incompatible with the existing pprof, which will require enhancements to read and process the enriched data.
author: Jason Evans <jasone@canonware.com> 2014-08-18 16:22:13 -0700
committer: Jason Evans <jasone@canonware.com> 2014-08-19 21:31:16 -0700
commit: 602c8e0971160e4b85b08b16cf8a2375aa24bc04 (patch)
tree: 86ba47333cd7a09e2038501044d8f88762cd5159 /include/jemalloc/internal/prof.h
parent: 1628e8615ed6c82ded14d6013ac775274eb426e6 (diff)
download: jemalloc-602c8e0971160e4b85b08b16cf8a2375aa24bc04.tar.gz
1 files changed, 195 insertions, 245 deletions
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 9398ad9..104bfad 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -1,11 +1,10 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-typedef uint64_t prof_thr_uid_t;
 typedef struct prof_bt_s prof_bt_t;
 typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_thr_cnt_s prof_thr_cnt_t;
-typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
 typedef struct prof_tdata_s prof_tdata_t;
 
 /* Option defaults. */
@@ -34,12 +33,18 @@ typedef struct prof_tdata_s prof_tdata_t;
 #define	PROF_PRINTF_BUFSIZE		128
 
 /*
- * Number of mutexes shared among all ctx's.  No space is allocated for these
+ * Number of mutexes shared among all gctx's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
 #define	PROF_NCTX_LOCKS			1024
 
 /*
+ * Number of mutexes shared among all tdata's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define	PROF_NTDATA_LOCKS		256
+
+/*
  * prof_tdata pointers close to NULL are used to encode state information that
  * is used for cleaning up during thread shutdown.
  */
@@ -66,87 +71,70 @@ typedef struct {
 #endif
 
 struct prof_cnt_s {
-	/*
-	 * Profiling counters.  An allocation/deallocation pair can operate on
-	 * different prof_thr_cnt_t objects that are linked into the same
-	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
-	 * negative.  In principle it is possible for the *bytes counters to
-	 * overflow/underflow, but a general solution would require something
-	 * like 128-bit counters; this implementation doesn't bother to solve
-	 * that problem.
-	 */
-	int64_t		curobjs;
-	int64_t		curbytes;
+	/* Profiling counters. */
+	uint64_t	curobjs;
+	uint64_t	curbytes;
 	uint64_t	accumobjs;
 	uint64_t	accumbytes;
 };
 
-struct prof_thr_cnt_s {
-	prof_thr_uid_t		thr_uid;
+typedef enum {
+	prof_tctx_state_nominal,
+	prof_tctx_state_dumping,
+	prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
 
-	/* Linkage into prof_ctx_t's thr_cnts. */
-	rb_node(prof_thr_cnt_t)	thr_cnt_link;
+struct prof_tctx_s {
+	/* Thread data for thread that performed the allocation. */
+	prof_tdata_t		*tdata;
 
-	/*
-	 * Associated context.  If a thread frees an object that it did not
-	 * allocate, it is possible that the context is not present in the
-	 * thread's hash table, in which case it must be able to look up the
-	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
-	 * and link it into the prof_ctx_t's thr_cnts.
-	 */
-	prof_ctx_t		*ctx;
+	/* Profiling counters, protected by tdata->lock. */
+	prof_cnt_t		cnts;
+
+	/* Associated global context. */
+	prof_gctx_t		*gctx;
+
+	/* Linkage into gctx's tctxs. */
+	rb_node(prof_tctx_t)	tctx_link;
+
+	/* Current dump-related state, protected by gctx->lock. */
+	prof_tctx_state_t	state;
 
 	/*
-	 * Threads use memory barriers to update the counters.  Since there is
-	 * only ever one writer, the only challenge is for the reader to get a
-	 * consistent read of the counters.
-	 *
-	 * The writer uses this series of operations:
-	 *
-	 * 1) Increment epoch to an odd number.
-	 * 2) Update counters.
-	 * 3) Increment epoch to an even number.
-	 *
-	 * The reader must assure 1) that the epoch is even while it reads the
-	 * counters, and 2) that the epoch doesn't change between the time it
-	 * starts and finishes reading the counters.
+	 * Copy of cnts snapshotted during early dump phase, protected by
+	 * dump_mtx.
 	 */
-	unsigned		epoch;
-
-	/* Profiling counters. */
-	prof_cnt_t		cnts;
+	prof_cnt_t		dump_cnts;
 };
-typedef rb_tree(prof_thr_cnt_t) prof_thr_cnt_tree_t;
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
-struct prof_ctx_s {
-	/* Protects nlimbo, cnt_merged, and thr_cnts. */
+struct prof_gctx_s {
+	/* Protects nlimbo, cnt_summed, and tctxs. */
 	malloc_mutex_t		*lock;
 
 	/*
-	 * Number of threads that currently cause this ctx to be in a state of
+	 * Number of threads that currently cause this gctx to be in a state of
 	 * limbo due to one of:
-	 *   - Initializing per thread counters associated with this ctx.
-	 *   - Preparing to destroy this ctx.
-	 *   - Dumping a heap profile that includes this ctx.
+	 *   - Initializing this gctx.
+	 *   - Initializing per thread counters associated with this gctx.
+	 *   - Preparing to destroy this gctx.
+	 *   - Dumping a heap profile that includes this gctx.
 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
-	 * ctx.
+	 * gctx.
 	 */
 	unsigned		nlimbo;
 
-	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
-
-	/* When threads exit, they merge their stats into cnt_merged. */
-	prof_cnt_t		cnt_merged;
-
 	/*
 	 * Tree of profile counters, one for each thread that has allocated in
 	 * this context.
 	 */
-	prof_thr_cnt_tree_t	thr_cnts;
+	prof_tctx_tree_t	tctxs;
 
 	/* Linkage for tree of contexts to be dumped. */
-	rb_node(prof_ctx_t)	dump_link;
+	rb_node(prof_gctx_t)	dump_link;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
 
 	/* Associated backtrace. */
 	prof_bt_t		bt;
@@ -154,21 +142,34 @@ struct prof_ctx_s {
 	/* Backtrace vector, variable size, referred to by bt. */
 	void			*vec[1];
 };
-typedef rb_tree(prof_ctx_t) prof_ctx_tree_t;
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
+
+typedef enum {
+	prof_tdata_state_attached, /* Active thread attached, data valid. */
+	prof_tdata_state_detached, /* Defunct thread, data remain valid. */
+	prof_tdata_state_expired   /* Predates reset, omit data from dump. */
+} prof_tdata_state_t;
 
 struct prof_tdata_s {
+	malloc_mutex_t		*lock;
+
+	/* Monotonically increasing unique thread identifier. */
+	uint64_t		thr_uid;
+
+	/* Included in heap profile dumps if non-NULL. */
+	char			*thread_name;
+
+	prof_tdata_state_t	state;
+
+	rb_node(prof_tdata_t)	tdata_link;
+
 	/*
-	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread tracks
+	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
 	 * backtraces for which it has non-zero allocation/deallocation counters
-	 * associated with thread-specific prof_thr_cnt_t objects.  Other
-	 * threads may read the prof_thr_cnt_t contents, but no others will ever
-	 * write them.
-	 *
-	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
-	 * counter data into the associated prof_ctx_t objects, and unlink/free
-	 * the prof_thr_cnt_t objects.
+	 * associated with thread-specific prof_tctx_t objects.  Other threads
+	 * may write to prof_tctx_t contents when freeing associated objects.
 	 */
-	ckh_t			bt2cnt;
+	ckh_t			bt2tctx;
 
 	/* Sampling state. */
 	uint64_t		prng_state;
@@ -179,9 +180,27 @@ struct prof_tdata_s {
 	bool			enq_idump;
 	bool			enq_gdump;
 
+	/*
+	 * Set to true during an early dump phase for tdata's which are
+	 * currently being dumped.  New threads' tdata's have this initialized
+	 * to false so that they aren't accidentally included in later dump
+	 * phases.
+	 */
+	bool			dumping;
+
+	/*
+	 * True if profiling is active for this tdata's thread
+	 * (thread.prof.active mallctl).
+	 */
+	bool			active;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
 	/* Backtrace vector, used for calls to prof_backtrace(). */
 	void			*vec[PROF_BT_MAX];
 };
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
@@ -217,9 +236,18 @@ extern char	opt_prof_prefix[
  */
 extern uint64_t	prof_interval;
 
+/*
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
+ */
+extern size_t	lg_prof_sample;
+
+void	prof_malloc_sample_object(const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_free_sampled_object(size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
 void	prof_backtrace(prof_bt_t *bt);
-prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
+prof_tctx_t	*prof_lookup(prof_bt_t *bt);
 #ifdef JEMALLOC_JET
 size_t	prof_bt_count(void);
 typedef int (prof_dump_open_t)(bool, const char *);
@@ -229,53 +257,44 @@ void	prof_idump(void);
 bool	prof_mdump(const char *filename);
 void	prof_gdump(void);
 prof_tdata_t	*prof_tdata_init(void);
+prof_tdata_t	*prof_tdata_reinit(prof_tdata_t *tdata);
+void	prof_reset(size_t lg_sample);
 void	prof_tdata_cleanup(void *arg);
+const char	*prof_thread_name_get(void);
+bool	prof_thread_name_set(const char *thread_name);
+bool	prof_thread_active_get(void);
+bool	prof_thread_active_set(bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
 bool	prof_boot2(void);
 void	prof_prefork(void);
 void	prof_postfork_parent(void);
 void	prof_postfork_child(void);
-void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
-#define	PROF_ALLOC_PREP(size, ret) do {					\
-	prof_tdata_t *prof_tdata;					\
-	prof_bt_t bt;							\
-									\
-	assert(size == s2u(size));					\
-									\
-	if (!opt_prof_active ||						\
-	    prof_sample_accum_update(size, false, &prof_tdata)) {	\
-		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
-	} else {							\
-		bt_init(&bt, prof_tdata->vec);				\
-		prof_backtrace(&bt);					\
-		ret = prof_lookup(&bt);					\
-	}								\
-} while (0)
-
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
 
 prof_tdata_t	*prof_tdata_get(bool create);
-bool	prof_sample_accum_update(size_t size, bool commit,
-    prof_tdata_t **prof_tdata_out);
-prof_ctx_t	*prof_ctx_get(const void *ptr);
-void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-void	prof_malloc_record_object(const void *ptr, size_t usize,
-    prof_thr_cnt_t *cnt);
-void	prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
-void	prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
-    size_t old_usize, prof_ctx_t *old_ctx);
-void	prof_free(const void *ptr, size_t size);
+bool	prof_sample_accum_update(size_t usize, bool commit,
+    prof_tdata_t **tdata_out);
+prof_tctx_t	*prof_alloc_prep(size_t usize);
+prof_tctx_t	*prof_tctx_get(const void *ptr);
+void	prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void	prof_malloc_sample_object(const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void	prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx,
+    size_t old_usize, prof_tctx_t *old_tctx);
+void	prof_free(const void *ptr, size_t usize);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
+/* Thread-specific backtrace cache, used to reduce bt2gctx contention. */
 malloc_tsd_externs(prof_tdata, prof_tdata_t *)
 malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
     prof_tdata_cleanup)
@@ -283,21 +302,27 @@ malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
 JEMALLOC_INLINE prof_tdata_t *
 prof_tdata_get(bool create)
 {
-	prof_tdata_t *prof_tdata;
+	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	prof_tdata = *prof_tdata_tsd_get();
-	if (create && prof_tdata == NULL)
-		prof_tdata = prof_tdata_init();
+	tdata = *prof_tdata_tsd_get();
+	if (create) {
+		if (tdata == NULL)
+			tdata = prof_tdata_init();
+		else if (tdata->state == prof_tdata_state_expired)
+			tdata = prof_tdata_reinit(tdata);
+		assert(tdata == NULL || tdata->state ==
+		    prof_tdata_state_attached);
+	}
 
-	return (prof_tdata);
+	return (tdata);
 }
 
-JEMALLOC_INLINE prof_ctx_t *
-prof_ctx_get(const void *ptr)
+JEMALLOC_INLINE prof_tctx_t *
+prof_tctx_get(const void *ptr)
 {
-	prof_ctx_t *ret;
+	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
 
 	cassert(config_prof);
@@ -306,15 +331,15 @@ prof_ctx_get(const void *ptr)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-		ret = arena_prof_ctx_get(ptr);
+		ret = arena_prof_tctx_get(ptr);
 	} else
-		ret = huge_prof_ctx_get(ptr);
+		ret = huge_prof_tctx_get(ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
@@ -324,66 +349,62 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-		arena_prof_ctx_set(ptr, ctx);
+		arena_prof_tctx_set(ptr, tctx);
 	} else
-		huge_prof_ctx_set(ptr, ctx);
+		huge_prof_tctx_set(ptr, tctx);
 }
 
 JEMALLOC_INLINE bool
-prof_sample_accum_update(size_t size, bool commit,
-    prof_tdata_t **prof_tdata_out)
+prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out)
 {
-	prof_tdata_t *prof_tdata;
+	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	prof_tdata = prof_tdata_get(true);
-	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
-		prof_tdata = NULL;
+	tdata = prof_tdata_get(true);
+	if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		tdata = NULL;
 
-	if (prof_tdata_out != NULL)
-		*prof_tdata_out = prof_tdata;
+	if (tdata_out != NULL)
+		*tdata_out = tdata;
 
-	if (prof_tdata == NULL)
+	if (tdata == NULL)
 		return (true);
 
-	if (prof_tdata->bytes_until_sample >= size) {
+	if (tdata->bytes_until_sample >= usize) {
 		if (commit)
-			prof_tdata->bytes_until_sample -= size;
+			tdata->bytes_until_sample -= usize;
 		return (true);
 	} else {
 		/* Compute new sample threshold. */
 		if (commit)
-			prof_sample_threshold_update(prof_tdata);
-		return (false);
+			prof_sample_threshold_update(tdata);
+		return (tdata->active == false);
 	}
 }
 
-JEMALLOC_INLINE void
-prof_malloc_record_object(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) {
-	prof_ctx_set(ptr, cnt->ctx);
-
-	cnt->epoch++;
-	/*********/
-	mb_write();
-	/*********/
-	cnt->cnts.curobjs++;
-	cnt->cnts.curbytes += usize;
-	if (opt_prof_accum) {
-		cnt->cnts.accumobjs++;
-		cnt->cnts.accumbytes += usize;
+JEMALLOC_INLINE prof_tctx_t *
+prof_alloc_prep(size_t usize)
+{
+	prof_tctx_t *ret;
+	prof_tdata_t *tdata;
+	prof_bt_t bt;
+
+	assert(usize == s2u(usize));
+
+	if (!opt_prof_active || prof_sample_accum_update(usize, false, &tdata))
+		ret = (prof_tctx_t *)(uintptr_t)1U;
+	else {
+		bt_init(&bt, tdata->vec);
+		prof_backtrace(&bt);
+		ret = prof_lookup(&bt);
 	}
-	/*********/
-	mb_write();
-	/*********/
-	cnt->epoch++;
-	/*********/
-	mb_write();
-	/*********/
+
+	return (ret);
 }
 
 JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
+prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
@@ -392,131 +413,60 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
 
 	if (prof_sample_accum_update(usize, true, NULL)) {
 		/*
-		 * Don't sample.  For malloc()-like allocation, it is
-		 * always possible to tell in advance how large an
-		 * object's usable size will be, so there should never
-		 * be a difference between the usize passed to
-		 * PROF_ALLOC_PREP() and prof_malloc().
+		 * Don't sample.  For malloc()-like allocation, it is always
+		 * possible to tell in advance how large an object's usable size
+		 * will be, so there should never be a difference between the
+		 * usize passed to PROF_ALLOC_PREP() and prof_malloc().
 		 */
-		assert((uintptr_t)cnt == (uintptr_t)1U);
+		assert((uintptr_t)tctx == (uintptr_t)1U);
 	}
 
-	if ((uintptr_t)cnt > (uintptr_t)1U)
-		prof_malloc_record_object(ptr, usize, cnt);
+	if ((uintptr_t)tctx > (uintptr_t)1U)
+		prof_malloc_sample_object(ptr, usize, tctx);
 	else
-		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
-    size_t old_usize, prof_ctx_t *old_ctx)
+prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, size_t old_usize,
+    prof_tctx_t *old_tctx)
 {
-	prof_thr_cnt_t *told_cnt;
 
 	cassert(config_prof);
-	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (ptr != NULL) {
 		assert(usize == isalloc(ptr, true));
 		if (prof_sample_accum_update(usize, true, NULL)) {
 			/*
-			 * Don't sample.  The usize passed to
-			 * PROF_ALLOC_PREP() was larger than what
-			 * actually got allocated, so a backtrace was
-			 * captured for this allocation, even though
-			 * its actual usize was insufficient to cross
-			 * the sample threshold.
+			 * Don't sample.  The usize passed to PROF_ALLOC_PREP()
+			 * was larger than what actually got allocated, so a
+			 * backtrace was captured for this allocation, even
+			 * though its actual usize was insufficient to cross the
+			 * sample threshold.
 			 */
-			cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}
 
-	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
-		told_cnt = prof_lookup(&old_ctx->bt);
-		if (told_cnt == NULL) {
-			/*
-			 * It's too late to propagate OOM for this realloc(),
-			 * so operate directly on old_cnt->ctx->cnt_merged.
-			 */
-			malloc_mutex_lock(old_ctx->lock);
-			old_ctx->cnt_merged.curobjs--;
-			old_ctx->cnt_merged.curbytes -= old_usize;
-			malloc_mutex_unlock(old_ctx->lock);
-			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-		}
-	} else
-		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
-	if ((uintptr_t)told_cnt > (uintptr_t)1U)
-		told_cnt->epoch++;
-	if ((uintptr_t)cnt > (uintptr_t)1U) {
-		prof_ctx_set(ptr, cnt->ctx);
-		cnt->epoch++;
-	} else if (ptr != NULL)
-		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
-	/*********/
-	mb_write();
-	/*********/
-	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
-		told_cnt->cnts.curobjs--;
-		told_cnt->cnts.curbytes -= old_usize;
-	}
-	if ((uintptr_t)cnt > (uintptr_t)1U) {
-		cnt->cnts.curobjs++;
-		cnt->cnts.curbytes += usize;
-		if (opt_prof_accum) {
-			cnt->cnts.accumobjs++;
-			cnt->cnts.accumbytes += usize;
-		}
-	}
-	/*********/
-	mb_write();
-	/*********/
-	if ((uintptr_t)told_cnt > (uintptr_t)1U)
-		told_cnt->epoch++;
-	if ((uintptr_t)cnt > (uintptr_t)1U)
-		cnt->epoch++;
-	/*********/
-	mb_write(); /* Not strictly necessary. */
+	if ((uintptr_t)old_tctx > (uintptr_t)1U)
+		prof_free_sampled_object(old_usize, old_tctx);
+	if ((uintptr_t)tctx > (uintptr_t)1U)
+		prof_malloc_sample_object(ptr, usize, tctx);
+	else
+		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_INLINE void
-prof_free(const void *ptr, size_t size)
+prof_free(const void *ptr, size_t usize)
 {
-	prof_ctx_t *ctx = prof_ctx_get(ptr);
+	prof_tctx_t *tctx = prof_tctx_get(ptr);
 
 	cassert(config_prof);
+	assert(usize == isalloc(ptr, true));
 
-	if ((uintptr_t)ctx > (uintptr_t)1) {
-		prof_thr_cnt_t *tcnt;
-		assert(size == isalloc(ptr, true));
-		tcnt = prof_lookup(&ctx->bt);
-
-		if (tcnt != NULL) {
-			tcnt->epoch++;
-			/*********/
-			mb_write();
-			/*********/
-			tcnt->cnts.curobjs--;
-			tcnt->cnts.curbytes -= size;
-			/*********/
-			mb_write();
-			/*********/
-			tcnt->epoch++;
-			/*********/
-			mb_write();
-			/*********/
-		} else {
-			/*
-			 * OOM during free() cannot be propagated, so operate
-			 * directly on cnt->ctx->cnt_merged.
-			 */
-			malloc_mutex_lock(ctx->lock);
-			ctx->cnt_merged.curobjs--;
-			ctx->cnt_merged.curbytes -= size;
-			malloc_mutex_unlock(ctx->lock);
-		}
-	}
+	if ((uintptr_t)tctx > (uintptr_t)1U)
+		prof_free_sampled_object(usize, tctx);
 }
 #endif
author	Jason Evans <jasone@canonware.com>	2014-08-18 16:22:13 -0700
committer	Jason Evans <jasone@canonware.com>	2014-08-19 21:31:16 -0700
commit	602c8e0971160e4b85b08b16cf8a2375aa24bc04 (patch)
tree	86ba47333cd7a09e2038501044d8f88762cd5159 /include/jemalloc/internal/prof.h
parent	1628e8615ed6c82ded14d6013ac775274eb426e6 (diff)
download	jemalloc-602c8e0971160e4b85b08b16cf8a2375aa24bc04.tar.gz