Revert "Revert "Merge remote-tracking branch 'aosp/upstream-dev' into merge""

This reverts commit 75929a97332565c3b987986f35652b6d5d275d3c. The original failure this was reverted for seems to have been a bug somewhere else. Change-Id: Ib29ba03b1b967f940dc19eceac2aa1d2923be1eb
author: Christopher Ferris <cferris@google.com> 2015-04-22 06:59:28 +0000
committer: Christopher Ferris <cferris@google.com> 2015-04-22 00:05:52 -0700
commit: 83e5767ee9a8c68150cca06ae0d27a13ba4fcaf8 (patch)
tree: 9c72852322473822033addd4f1d902e4b33338d4 /include/jemalloc/internal/prof.h
parent: cfef1c84fffb9f781fccb0df4598e0dcdc022182 (diff)
download: jemalloc-83e5767ee9a8c68150cca06ae0d27a13ba4fcaf8.tar.gz
1 files changed, 293 insertions, 295 deletions
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index d82fbc4..2e22711 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -3,8 +3,8 @@
 
 typedef struct prof_bt_s prof_bt_t;
 typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_thr_cnt_s prof_thr_cnt_t;
-typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
 typedef struct prof_tdata_s prof_tdata_t;
 
 /* Option defaults. */
@@ -23,9 +23,6 @@ typedef struct prof_tdata_s prof_tdata_t;
  */
 #define	PROF_BT_MAX			128
 
-/* Maximum number of backtraces to store in each per thread LRU cache. */
-#define	PROF_TCMAX			1024
-
 /* Initial hash table size. */
 #define	PROF_CKH_MINITEMS		64
 
@@ -36,12 +33,18 @@ typedef struct prof_tdata_s prof_tdata_t;
 #define	PROF_PRINTF_BUFSIZE		128
 
 /*
- * Number of mutexes shared among all ctx's.  No space is allocated for these
+ * Number of mutexes shared among all gctx's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
 #define	PROF_NCTX_LOCKS			1024
 
 /*
+ * Number of mutexes shared among all tdata's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define	PROF_NTDATA_LOCKS		256
+
+/*
  * prof_tdata pointers close to NULL are used to encode state information that
  * is used for cleaning up during thread shutdown.
  */
@@ -68,111 +71,141 @@ typedef struct {
 #endif
 
 struct prof_cnt_s {
-	/*
-	 * Profiling counters.  An allocation/deallocation pair can operate on
-	 * different prof_thr_cnt_t objects that are linked into the same
-	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
-	 * negative.  In principle it is possible for the *bytes counters to
-	 * overflow/underflow, but a general solution would require something
-	 * like 128-bit counters; this implementation doesn't bother to solve
-	 * that problem.
-	 */
-	int64_t		curobjs;
-	int64_t		curbytes;
+	/* Profiling counters. */
+	uint64_t	curobjs;
+	uint64_t	curbytes;
 	uint64_t	accumobjs;
 	uint64_t	accumbytes;
 };
 
-struct prof_thr_cnt_s {
-	/* Linkage into prof_ctx_t's cnts_ql. */
-	ql_elm(prof_thr_cnt_t)	cnts_link;
+typedef enum {
+	prof_tctx_state_initializing,
+	prof_tctx_state_nominal,
+	prof_tctx_state_dumping,
+	prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
 
-	/* Linkage into thread's LRU. */
-	ql_elm(prof_thr_cnt_t)	lru_link;
+struct prof_tctx_s {
+	/* Thread data for thread that performed the allocation. */
+	prof_tdata_t		*tdata;
 
 	/*
-	 * Associated context.  If a thread frees an object that it did not
-	 * allocate, it is possible that the context is not cached in the
-	 * thread's hash table, in which case it must be able to look up the
-	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
-	 * and link it into the prof_ctx_t's cnts_ql.
+	 * Copy of tdata->thr_uid, necessary because tdata may be defunct during
+	 * teardown.
 	 */
-	prof_ctx_t		*ctx;
+	uint64_t		thr_uid;
+
+	/* Profiling counters, protected by tdata->lock. */
+	prof_cnt_t		cnts;
+
+	/* Associated global context. */
+	prof_gctx_t		*gctx;
 
 	/*
-	 * Threads use memory barriers to update the counters.  Since there is
-	 * only ever one writer, the only challenge is for the reader to get a
-	 * consistent read of the counters.
-	 *
-	 * The writer uses this series of operations:
-	 *
-	 * 1) Increment epoch to an odd number.
-	 * 2) Update counters.
-	 * 3) Increment epoch to an even number.
-	 *
-	 * The reader must assure 1) that the epoch is even while it reads the
-	 * counters, and 2) that the epoch doesn't change between the time it
-	 * starts and finishes reading the counters.
+	 * UID that distinguishes multiple tctx's created by the same thread,
+	 * but coexisting in gctx->tctxs.  There are two ways that such
+	 * coexistence can occur:
+	 * - A dumper thread can cause a tctx to be retained in the purgatory
+	 *   state.
+	 * - Although a single "producer" thread must create all tctx's which
+	 *   share the same thr_uid, multiple "consumers" can each concurrently
+	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
+	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+	 *   threshold can be hit again before the first consumer finishes
+	 *   executing prof_tctx_destroy().
 	 */
-	unsigned		epoch;
+	uint64_t		tctx_uid;
 
-	/* Profiling counters. */
-	prof_cnt_t		cnts;
-};
+	/* Linkage into gctx's tctxs. */
+	rb_node(prof_tctx_t)	tctx_link;
 
-struct prof_ctx_s {
-	/* Associated backtrace. */
-	prof_bt_t		*bt;
+	/*
+	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
+	 * sample vs destroy race.
+	 */
+	bool			prepared;
+
+	/* Current dump-related state, protected by gctx->lock. */
+	prof_tctx_state_t	state;
 
-	/* Protects nlimbo, cnt_merged, and cnts_ql. */
+	/*
+	 * Copy of cnts snapshotted during early dump phase, protected by
+	 * dump_mtx.
+	 */
+	prof_cnt_t		dump_cnts;
+};
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
+
+struct prof_gctx_s {
+	/* Protects nlimbo, cnt_summed, and tctxs. */
 	malloc_mutex_t		*lock;
 
 	/*
-	 * Number of threads that currently cause this ctx to be in a state of
+	 * Number of threads that currently cause this gctx to be in a state of
 	 * limbo due to one of:
-	 *   - Initializing per thread counters associated with this ctx.
-	 *   - Preparing to destroy this ctx.
-	 *   - Dumping a heap profile that includes this ctx.
+	 *   - Initializing this gctx.
+	 *   - Initializing per thread counters associated with this gctx.
+	 *   - Preparing to destroy this gctx.
+	 *   - Dumping a heap profile that includes this gctx.
 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
-	 * ctx.
+	 * gctx.
 	 */
 	unsigned		nlimbo;
 
-	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
-
-	/* When threads exit, they merge their stats into cnt_merged. */
-	prof_cnt_t		cnt_merged;
-
 	/*
-	 * List of profile counters, one for each thread that has allocated in
+	 * Tree of profile counters, one for each thread that has allocated in
 	 * this context.
 	 */
-	ql_head(prof_thr_cnt_t)	cnts_ql;
+	prof_tctx_tree_t	tctxs;
+
+	/* Linkage for tree of contexts to be dumped. */
+	rb_node(prof_gctx_t)	dump_link;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
 
-	/* Linkage for list of contexts to be dumped. */
-	ql_elm(prof_ctx_t)	dump_link;
+	/* Associated backtrace. */
+	prof_bt_t		bt;
+
+	/* Backtrace vector, variable size, referred to by bt. */
+	void			*vec[1];
 };
-typedef ql_head(prof_ctx_t) prof_ctx_list_t;
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
 
 struct prof_tdata_s {
+	malloc_mutex_t		*lock;
+
+	/* Monotonically increasing unique thread identifier. */
+	uint64_t		thr_uid;
+
 	/*
-	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
-	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
-	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
-	 * others will ever write them.
-	 *
-	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
-	 * counter data into the associated prof_ctx_t objects, and unlink/free
-	 * the prof_thr_cnt_t objects.
+	 * Monotonically increasing discriminator among tdata structures
+	 * associated with the same thr_uid.
 	 */
-	ckh_t			bt2cnt;
+	uint64_t		thr_discrim;
 
-	/* LRU for contents of bt2cnt. */
-	ql_head(prof_thr_cnt_t)	lru_ql;
+	/* Included in heap profile dumps if non-NULL. */
+	char			*thread_name;
 
-	/* Backtrace vector, used for calls to prof_backtrace(). */
-	void			**vec;
+	bool			attached;
+	bool			expired;
+
+	rb_node(prof_tdata_t)	tdata_link;
+
+	/*
+	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
+	 * necessary when incrementing this field, because only one thread ever
+	 * does so.
+	 */
+	uint64_t		tctx_uid_next;
+
+	/*
+	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
+	 * backtraces for which it has non-zero allocation/deallocation counters
+	 * associated with thread-specific prof_tctx_t objects.  Other threads
+	 * may write to prof_tctx_t contents when freeing associated objects.
+	 */
+	ckh_t			bt2tctx;
 
 	/* Sampling state. */
 	uint64_t		prng_state;
@@ -182,20 +215,36 @@ struct prof_tdata_s {
 	bool			enq;
 	bool			enq_idump;
 	bool			enq_gdump;
+
+	/*
+	 * Set to true during an early dump phase for tdata's which are
+	 * currently being dumped.  New threads' tdata's have this initialized
+	 * to false so that they aren't accidentally included in later dump
+	 * phases.
+	 */
+	bool			dumping;
+
+	/*
+	 * True if profiling is active for this tdata's thread
+	 * (thread.prof.active mallctl).
+	 */
+	bool			active;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
+	/* Backtrace vector, used for calls to prof_backtrace(). */
+	void			*vec[PROF_BT_MAX];
 };
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
 extern bool	opt_prof;
-/*
- * Even if opt_prof is true, sampling can be temporarily disabled by setting
- * opt_prof_active to false.  No locking is used when updating opt_prof_active,
- * so there are no guarantees regarding how long it will take for all threads
- * to notice state changes.
- */
 extern bool	opt_prof_active;
+extern bool	opt_prof_thread_active_init;
 extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
 extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool	opt_prof_gdump;       /* High-water memory dumping. */
@@ -209,6 +258,12 @@ extern char	opt_prof_prefix[
 #endif
     1];
 
+/* Accessed via prof_active_[gs]et{_unlocked,}(). */
+extern bool	prof_active;
+
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool	prof_gdump_val;
+
 /*
  * Profile dump interval, measured in bytes allocated.  Each arena triggers a
  * profile dump when it reaches this threshold.  The effect is that the
@@ -218,306 +273,249 @@ extern char	opt_prof_prefix[
  */
 extern uint64_t	prof_interval;
 
+/*
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
+ */
+extern size_t	lg_prof_sample;
+
+void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void	prof_malloc_sample_object(const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
 void	prof_backtrace(prof_bt_t *bt);
-prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
+prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 #ifdef JEMALLOC_JET
+size_t	prof_tdata_count(void);
 size_t	prof_bt_count(void);
+const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
+typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+extern prof_dump_header_t *prof_dump_header;
 #endif
 void	prof_idump(void);
 bool	prof_mdump(const char *filename);
 void	prof_gdump(void);
-prof_tdata_t	*prof_tdata_init(void);
-void	prof_tdata_cleanup(void *arg);
+prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
+prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void	prof_reset(tsd_t *tsd, size_t lg_sample);
+void	prof_tdata_cleanup(tsd_t *tsd);
+const char	*prof_thread_name_get(void);
+bool	prof_active_get(void);
+bool	prof_active_set(bool active);
+int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool	prof_thread_active_get(void);
+bool	prof_thread_active_set(bool active);
+bool	prof_thread_active_init_get(void);
+bool	prof_thread_active_init_set(bool active_init);
+bool	prof_gdump_get(void);
+bool	prof_gdump_set(bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
 bool	prof_boot2(void);
 void	prof_prefork(void);
 void	prof_postfork_parent(void);
 void	prof_postfork_child(void);
-void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
-#define	PROF_ALLOC_PREP(size, ret) do {					\
-	prof_tdata_t *prof_tdata;					\
-	prof_bt_t bt;							\
-									\
-	assert(size == s2u(size));					\
-									\
-	if (!opt_prof_active ||						\
-	    prof_sample_accum_update(size, false, &prof_tdata)) {	\
-		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
-	} else {							\
-		bt_init(&bt, prof_tdata->vec);				\
-		prof_backtrace(&bt);					\
-		ret = prof_lookup(&bt);					\
-	}								\
-} while (0)
-
 #ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
-
-prof_tdata_t	*prof_tdata_get(bool create);
-bool	prof_sample_accum_update(size_t size, bool commit,
-    prof_tdata_t **prof_tdata_out);
-prof_ctx_t	*prof_ctx_get(const void *ptr);
-void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-void	prof_malloc_record_object(const void *ptr, size_t usize,
-    prof_thr_cnt_t *cnt);
-void	prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
-void	prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
-    size_t old_usize, prof_ctx_t *old_ctx);
-void	prof_free(const void *ptr, size_t size);
+bool	prof_active_get_unlocked(void);
+bool	prof_gdump_get_unlocked(void);
+prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
+bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
+    prof_tdata_t **tdata_out);
+prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
+prof_tctx_t	*prof_tctx_get(const void *ptr);
+void	prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void	prof_malloc_sample_object(const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
+void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-malloc_tsd_externs(prof_tdata, prof_tdata_t *)
-malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
-    prof_tdata_cleanup)
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void)
+{
+
+	/*
+	 * Even if opt_prof is true, sampling can be temporarily disabled by
+	 * setting prof_active to false.  No locking is used when reading
+	 * prof_active in the fast path, so there are no guarantees regarding
+	 * how long it will take for all threads to notice state changes.
+	 */
+	return (prof_active);
+}
 
-JEMALLOC_INLINE prof_tdata_t *
-prof_tdata_get(bool create)
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void)
 {
-	prof_tdata_t *prof_tdata;
+
+	/*
+	 * No locking is used when reading prof_gdump_val in the fast path, so
+	 * there are no guarantees regarding how long it will take for all
+	 * threads to notice state changes.
+	 */
+	return (prof_gdump_val);
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create)
+{
+	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	prof_tdata = *prof_tdata_tsd_get();
-	if (create && prof_tdata == NULL)
-		prof_tdata = prof_tdata_init();
+	tdata = tsd_prof_tdata_get(tsd);
+	if (create) {
+		if (unlikely(tdata == NULL)) {
+			if (tsd_nominal(tsd)) {
+				tdata = prof_tdata_init(tsd);
+				tsd_prof_tdata_set(tsd, tdata);
+			}
+		} else if (unlikely(tdata->expired)) {
+			tdata = prof_tdata_reinit(tsd, tdata);
+			tsd_prof_tdata_set(tsd, tdata);
+		}
+		assert(tdata == NULL || tdata->attached);
+	}
 
-	return (prof_tdata);
+	return (tdata);
 }
 
-JEMALLOC_INLINE prof_ctx_t *
-prof_ctx_get(const void *ptr)
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_tctx_get(const void *ptr)
 {
-	prof_ctx_t *ret;
-	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (chunk != ptr) {
-		/* Region. */
-		ret = arena_prof_ctx_get(ptr);
-	} else
-		ret = huge_prof_ctx_get(ptr);
-
-	return (ret);
+	return (arena_prof_tctx_get(ptr));
 }
 
-JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
 {
-	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (chunk != ptr) {
-		/* Region. */
-		arena_prof_ctx_set(ptr, ctx);
-	} else
-		huge_prof_ctx_set(ptr, ctx);
+	arena_prof_tctx_set(ptr, tctx);
 }
 
-JEMALLOC_INLINE bool
-prof_sample_accum_update(size_t size, bool commit,
-    prof_tdata_t **prof_tdata_out)
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+    prof_tdata_t **tdata_out)
 {
-	prof_tdata_t *prof_tdata;
+	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	prof_tdata = prof_tdata_get(true);
-	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
-		prof_tdata = NULL;
+	tdata = prof_tdata_get(tsd, true);
+	if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		tdata = NULL;
 
-	if (prof_tdata_out != NULL)
-		*prof_tdata_out = prof_tdata;
+	if (tdata_out != NULL)
+		*tdata_out = tdata;
 
-	if (prof_tdata == NULL)
+	if (tdata == NULL)
 		return (true);
 
-	if (prof_tdata->bytes_until_sample >= size) {
-		if (commit)
-			prof_tdata->bytes_until_sample -= size;
+	if (tdata->bytes_until_sample >= usize) {
+		if (update)
+			tdata->bytes_until_sample -= usize;
 		return (true);
 	} else {
 		/* Compute new sample threshold. */
-		if (commit)
-			prof_sample_threshold_update(prof_tdata);
-		return (false);
+		if (update)
+			prof_sample_threshold_update(tdata);
+		return (!tdata->active);
 	}
 }
 
-JEMALLOC_INLINE void
-prof_malloc_record_object(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) {
-	prof_ctx_set(ptr, cnt->ctx);
-
-	cnt->epoch++;
-	/*********/
-	mb_write();
-	/*********/
-	cnt->cnts.curobjs++;
-	cnt->cnts.curbytes += usize;
-	if (opt_prof_accum) {
-		cnt->cnts.accumobjs++;
-		cnt->cnts.accumbytes += usize;
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
+{
+	prof_tctx_t *ret;
+	prof_tdata_t *tdata;
+	prof_bt_t bt;
+
+	assert(usize == s2u(usize));
+
+	if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
+	    usize, update, &tdata)))
+		ret = (prof_tctx_t *)(uintptr_t)1U;
+	else {
+		bt_init(&bt, tdata->vec);
+		prof_backtrace(&bt);
+		ret = prof_lookup(tsd, &bt);
 	}
-	/*********/
-	mb_write();
-	/*********/
-	cnt->epoch++;
-	/*********/
-	mb_write();
-	/*********/
+
+	return (ret);
 }
 
-JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(ptr, true));
 
-	if (prof_sample_accum_update(usize, true, NULL)) {
-		/*
-		 * Don't sample.  For malloc()-like allocation, it is
-		 * always possible to tell in advance how large an
-		 * object's usable size will be, so there should never
-		 * be a difference between the usize passed to
-		 * PROF_ALLOC_PREP() and prof_malloc().
-		 */
-		assert((uintptr_t)cnt == (uintptr_t)1U);
-	}
-
-	if ((uintptr_t)cnt > (uintptr_t)1U)
-		prof_malloc_record_object(ptr, usize, cnt);
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+		prof_malloc_sample_object(ptr, usize, tctx);
 	else
-		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
 
-JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
-    size_t old_usize, prof_ctx_t *old_ctx)
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
+    bool updated, size_t old_usize, prof_tctx_t *old_tctx)
 {
-	prof_thr_cnt_t *told_cnt;
 
 	cassert(config_prof);
-	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
-	if (ptr != NULL) {
+	if (!updated && ptr != NULL) {
 		assert(usize == isalloc(ptr, true));
-		if (prof_sample_accum_update(usize, true, NULL)) {
+		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
-			 * Don't sample.  The usize passed to
-			 * PROF_ALLOC_PREP() was larger than what
-			 * actually got allocated, so a backtrace was
-			 * captured for this allocation, even though
-			 * its actual usize was insufficient to cross
-			 * the sample threshold.
+			 * Don't sample.  The usize passed to PROF_ALLOC_PREP()
+			 * was larger than what actually got allocated, so a
+			 * backtrace was captured for this allocation, even
+			 * though its actual usize was insufficient to cross the
+			 * sample threshold.
 			 */
-			cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}
 
-	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
-		told_cnt = prof_lookup(old_ctx->bt);
-		if (told_cnt == NULL) {
-			/*
-			 * It's too late to propagate OOM for this realloc(),
-			 * so operate directly on old_cnt->ctx->cnt_merged.
-			 */
-			malloc_mutex_lock(old_ctx->lock);
-			old_ctx->cnt_merged.curobjs--;
-			old_ctx->cnt_merged.curbytes -= old_usize;
-			malloc_mutex_unlock(old_ctx->lock);
-			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-		}
-	} else
-		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
-
-	if ((uintptr_t)told_cnt > (uintptr_t)1U)
-		told_cnt->epoch++;
-	if ((uintptr_t)cnt > (uintptr_t)1U) {
-		prof_ctx_set(ptr, cnt->ctx);
-		cnt->epoch++;
-	} else if (ptr != NULL)
-		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
-	/*********/
-	mb_write();
-	/*********/
-	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
-		told_cnt->cnts.curobjs--;
-		told_cnt->cnts.curbytes -= old_usize;
-	}
-	if ((uintptr_t)cnt > (uintptr_t)1U) {
-		cnt->cnts.curobjs++;
-		cnt->cnts.curbytes += usize;
-		if (opt_prof_accum) {
-			cnt->cnts.accumobjs++;
-			cnt->cnts.accumbytes += usize;
-		}
-	}
-	/*********/
-	mb_write();
-	/*********/
-	if ((uintptr_t)told_cnt > (uintptr_t)1U)
-		told_cnt->epoch++;
-	if ((uintptr_t)cnt > (uintptr_t)1U)
-		cnt->epoch++;
-	/*********/
-	mb_write(); /* Not strictly necessary. */
+	if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
+		prof_free_sampled_object(tsd, old_usize, old_tctx);
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+		prof_malloc_sample_object(ptr, usize, tctx);
+	else
+		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
 
-JEMALLOC_INLINE void
-prof_free(const void *ptr, size_t size)
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_ctx_t *ctx = prof_ctx_get(ptr);
+	prof_tctx_t *tctx = prof_tctx_get(ptr);
 
 	cassert(config_prof);
+	assert(usize == isalloc(ptr, true));
 
-	if ((uintptr_t)ctx > (uintptr_t)1) {
-		prof_thr_cnt_t *tcnt;
-		assert(size == isalloc(ptr, true));
-		tcnt = prof_lookup(ctx->bt);
-
-		if (tcnt != NULL) {
-			tcnt->epoch++;
-			/*********/
-			mb_write();
-			/*********/
-			tcnt->cnts.curobjs--;
-			tcnt->cnts.curbytes -= size;
-			/*********/
-			mb_write();
-			/*********/
-			tcnt->epoch++;
-			/*********/
-			mb_write();
-			/*********/
-		} else {
-			/*
-			 * OOM during free() cannot be propagated, so operate
-			 * directly on cnt->ctx->cnt_merged.
-			 */
-			malloc_mutex_lock(ctx->lock);
-			ctx->cnt_merged.curobjs--;
-			ctx->cnt_merged.curbytes -= size;
-			malloc_mutex_unlock(ctx->lock);
-		}
-	}
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+		prof_free_sampled_object(tsd, usize, tctx);
 }
 #endif
author	Christopher Ferris <cferris@google.com>	2015-04-22 06:59:28 +0000
committer	Christopher Ferris <cferris@google.com>	2015-04-22 00:05:52 -0700
commit	83e5767ee9a8c68150cca06ae0d27a13ba4fcaf8 (patch)
tree	9c72852322473822033addd4f1d902e4b33338d4 /include/jemalloc/internal/prof.h
parent	cfef1c84fffb9f781fccb0df4598e0dcdc022182 (diff)
download	jemalloc-83e5767ee9a8c68150cca06ae0d27a13ba4fcaf8.tar.gz