summaryrefslogtreecommitdiff
path: root/base/tracked_objects.h
diff options
context:
space:
mode:
Diffstat (limited to 'base/tracked_objects.h')
-rw-r--r--base/tracked_objects.h286
1 files changed, 182 insertions, 104 deletions
diff --git a/base/tracked_objects.h b/base/tracked_objects.h
index 7ef0317c39..36caec3c6e 100644
--- a/base/tracked_objects.h
+++ b/base/tracked_objects.h
@@ -14,9 +14,12 @@
#include <utility>
#include <vector>
+#include "base/allocator/features.h"
#include "base/atomicops.h"
#include "base/base_export.h"
#include "base/containers/hash_tables.h"
+#include "base/debug/debugging_flags.h"
+#include "base/debug/thread_heap_usage_tracker.h"
#include "base/gtest_prod_util.h"
#include "base/lazy_instance.h"
#include "base/location.h"
@@ -59,71 +62,76 @@ struct TrackingInfo;
// with great efficiency (i.e., copying of strings is never needed, and
// comparisons for equality can be based on pointer comparisons).
//
-// Next, a Births instance is created for use ONLY on the thread where this
-// instance was created. That Births instance records (in a base class
-// BirthOnThread) references to the static data provided in a Location instance,
-// as well as a pointer specifying the thread on which the birth takes place.
-// Hence there is at most one Births instance for each Location on each thread.
-// The derived Births class contains slots for recording statistics about all
-// instances born at the same location. Statistics currently include only the
-// count of instances constructed.
+// Next, a Births instance is constructed or found. A Births instance records
+// (in a base class BirthOnThread) references to the static data provided in a
+// Location instance, as well as a pointer to the ThreadData bound to the thread
+// on which the birth takes place (see discussion on ThreadData below). There is
+// at most one Births instance for each Location / ThreadData pair. The derived
+// Births class contains slots for recording statistics about all instances born
+// at the same location. Statistics currently include only the count of
+// instances constructed.
//
// Since the base class BirthOnThread contains only constant data, it can be
-// freely accessed by any thread at any time (i.e., only the statistic needs to
-// be handled carefully, and stats are updated exclusively on the birth thread).
+// freely accessed by any thread at any time. The statistics must be handled
+// more carefully; they are updated exclusively by the single thread to which
+// the ThreadData is bound at a given time.
//
// For Tasks, having now either constructed or found the Births instance
// described above, a pointer to the Births instance is then recorded into the
-// PendingTask structure in MessageLoop. This fact alone is very useful in
-// debugging, when there is a question of where an instance came from. In
-// addition, the birth time is also recorded and used to later evaluate the
-// lifetime duration of the whole Task. As a result of the above embedding, we
-// can find out a Task's location of birth, and thread of birth, without using
-// any locks, as all that data is constant across the life of the process.
+// PendingTask structure. This fact alone is very useful in debugging, when
+// there is a question of where an instance came from. In addition, the birth
+// time is also recorded and used to later evaluate the lifetime duration of the
+// whole Task. As a result of the above embedding, we can find out a Task's
+// location of birth, and name of birth thread, without using any locks, as all
+// that data is constant across the life of the process.
//
// The above work *could* also be done for any other object as well by calling
// TallyABirthIfActive() and TallyRunOnNamedThreadIfTracking() as appropriate.
//
-// The amount of memory used in the above data structures depends on how many
-// threads there are, and how many Locations of construction there are.
-// Fortunately, we don't use memory that is the product of those two counts, but
-// rather we only need one Births instance for each thread that constructs an
-// instance at a Location. In many cases, instances are only created on one
-// thread, so the memory utilization is actually fairly restrained.
+// The upper bound for the amount of memory used in the above data structures is
+// the product of the number of ThreadData instances and the number of
+// Locations. Fortunately, Locations are often created on a single thread and
+// the memory utilization is actually fairly restrained.
//
// Lastly, when an instance is deleted, the final tallies of statistics are
// carefully accumulated. That tallying writes into slots (members) in a
-// collection of DeathData instances. For each birth place Location that is
-// destroyed on a thread, there is a DeathData instance to record the additional
-// death count, as well as accumulate the run-time and queue-time durations for
-// the instance as it is destroyed (dies). By maintaining a single place to
-// aggregate this running sum *only* for the given thread, we avoid the need to
-// lock such DeathData instances. (i.e., these accumulated stats in a DeathData
-// instance are exclusively updated by the singular owning thread).
+// collection of DeathData instances. For each Births / death ThreadData pair,
+// there is a DeathData instance to record the additional death count, as well
+// as to accumulate the run-time and queue-time durations for the instance as it
+// is destroyed (dies). Since a ThreadData is bound to at most one thread at a
+// time, there is no need to lock such DeathData instances. (i.e., these
+// accumulated stats in a DeathData instance are exclusively updated by the
+// singular owning thread).
//
-// With the above life cycle description complete, the major remaining detail
-// is explaining how each thread maintains a list of DeathData instances, and
-// of Births instances, and is able to avoid additional (redundant/unnecessary)
-// allocations.
+// With the above life cycle description complete, the major remaining detail is
+// explaining how existing Births and DeathData instances are found to avoid
+// redundant allocations.
//
-// Each thread maintains a list of data items specific to that thread in a
-// ThreadData instance (for that specific thread only). The two critical items
-// are lists of DeathData and Births instances. These lists are maintained in
-// STL maps, which are indexed by Location. As noted earlier, we can compare
-// locations very efficiently as we consider the underlying data (file,
-// function, line) to be atoms, and hence pointer comparison is used rather than
-// (slow) string comparisons.
+// A ThreadData instance maintains maps of Births and DeathData instances. The
+// Births map is indexed by Location and the DeathData map is indexed by
+// Births*. As noted earlier, we can compare Locations very efficiently as we
+// consider the underlying data (file, function, line) to be atoms, and hence
+// pointer comparison is used rather than (slow) string comparisons.
//
-// To provide a mechanism for iterating over all "known threads," which means
-// threads that have recorded a birth or a death, we create a singly linked list
-// of ThreadData instances. Each such instance maintains a pointer to the next
-// one. A static member of ThreadData provides a pointer to the first item on
-// this global list, and access via that all_thread_data_list_head_ item
-// requires the use of the list_lock_.
-// When new ThreadData instances is added to the global list, it is pre-pended,
-// which ensures that any prior acquisition of the list is valid (i.e., the
-// holder can iterate over it without fear of it changing, or the necessity of
-// using an additional lock. Iterations are actually pretty rare (used
+// The first time that a thread calls ThreadData::InitializeThreadContext() or
+// ThreadData::Get(), a ThreadData instance is bound to it and stored in TLS. If
+// a ThreadData bound to a terminated thread with the same sanitized name (i.e.
+// name without trailing digits) as the current thread is available, it is
+// reused. Otherwise, a new ThreadData instance is instantiated. Since a
+// ThreadData is bound to at most one thread at a time, there is no need to
+// acquire a lock to access its maps. Over time, a ThreadData may be bound to
+// different threads that share the same sanitized name.
+//
+// We maintain a list of all ThreadData instances for the current process. Each
+// ThreadData instance has a pointer to the next one. A static member of
+// ThreadData provides a pointer to the first item on this global list, and
+// access via that all_thread_data_list_head_ item requires the use of the
+// list_lock_.
+//
+// When new ThreadData instances are added to the global list, they are pre-
+// pended, which ensures that any prior acquisition of the list is valid (i.e.,
+// the holder can iterate over it without fear of it changing, or the necessity
+// of using an additional lock. Iterations are actually pretty rare (used
// primarily for cleanup, or snapshotting data for display), so this lock has
// very little global performance impact.
//
@@ -170,12 +178,13 @@ struct TrackingInfo;
// memory reference).
//
// TODO(jar): We can implement a Snapshot system that *tries* to grab the
-// snapshots on the source threads *when* they have MessageLoops available
-// (worker threads don't have message loops generally, and hence gathering from
-// them will continue to be asynchronous). We had an implementation of this in
-// the past, but the difficulty is dealing with message loops being terminated.
-// We can *try* to spam the available threads via some task runner to
-// achieve this feat, and it *might* be valuable when we are collecting data
+// snapshots on the source threads *when* they have SingleThreadTaskRunners
+// available (worker threads don't have SingleThreadTaskRunners, and hence
+// gathering from them will continue to be asynchronous). We had an
+// implementation of this in the past, but the difficulty is dealing with
+// threads being terminated. We can *try* to post a task to threads that have a
+// SingleThreadTaskRunner and check if that succeeds (will fail if the thread
+// has been terminated). This *might* be valuable when we are collecting data
// for upload via UMA (where correctness of data may be more significant than
// for a single screen of about:profiler).
//
@@ -226,7 +235,7 @@ struct BASE_EXPORT BirthOnThreadSnapshot {
~BirthOnThreadSnapshot();
LocationSnapshot location;
- std::string thread_name;
+ std::string sanitized_thread_name;
};
//------------------------------------------------------------------------------
@@ -248,6 +257,8 @@ class BASE_EXPORT Births: public BirthOnThread {
DISALLOW_COPY_AND_ASSIGN(Births);
};
+class DeathData;
+
//------------------------------------------------------------------------------
// A "snapshotted" representation of the DeathData class.
@@ -265,7 +276,15 @@ struct BASE_EXPORT DeathDataSnapshot {
int32_t run_duration_sample,
int32_t queue_duration_sum,
int32_t queue_duration_max,
- int32_t queue_duration_sample);
+ int32_t queue_duration_sample,
+ int32_t alloc_ops,
+ int32_t free_ops,
+ int32_t allocated_bytes,
+ int32_t freed_bytes,
+ int32_t alloc_overhead_bytes,
+ int32_t max_allocated_bytes);
+ DeathDataSnapshot(const DeathData& death_data);
+ DeathDataSnapshot(const DeathDataSnapshot& other);
~DeathDataSnapshot();
// Calculates and returns the delta between this snapshot and an earlier
@@ -279,6 +298,13 @@ struct BASE_EXPORT DeathDataSnapshot {
int32_t queue_duration_sum;
int32_t queue_duration_max;
int32_t queue_duration_sample;
+
+ int32_t alloc_ops;
+ int32_t free_ops;
+ int32_t allocated_bytes;
+ int32_t freed_bytes;
+ int32_t alloc_overhead_bytes;
+ int32_t max_allocated_bytes;
};
//------------------------------------------------------------------------------
@@ -287,13 +313,7 @@ struct BASE_EXPORT DeathDataSnapshot {
struct DeathDataPhaseSnapshot {
DeathDataPhaseSnapshot(int profiling_phase,
- int count,
- int32_t run_duration_sum,
- int32_t run_duration_max,
- int32_t run_duration_sample,
- int32_t queue_duration_sum,
- int32_t queue_duration_max,
- int32_t queue_duration_sample,
+ const DeathData& death_data,
const DeathDataPhaseSnapshot* prev);
// Profiling phase at which completion this snapshot was taken.
@@ -326,9 +346,26 @@ class BASE_EXPORT DeathData {
// Update stats for a task destruction (death) that had a Run() time of
// |duration|, and has had a queueing delay of |queue_duration|.
- void RecordDeath(const int32_t queue_duration,
- const int32_t run_duration,
- const uint32_t random_number);
+ void RecordDurations(const int32_t queue_duration,
+ const int32_t run_duration,
+ const uint32_t random_number);
+
+ // Update stats for a task destruction that performed |alloc_ops|
+ // allocations, |free_ops| frees, allocated |allocated_bytes| bytes, freed
+ // |freed_bytes|, where an estimated |alloc_overhead_bytes| went to heap
+ // overhead, and where at most |max_allocated_bytes| were outstanding at any
+ // one time.
+ // Note that |alloc_overhead_bytes|/|alloc_ops| yields the average estimated
+ // heap overhead of allocations in the task, and |allocated_bytes|/|alloc_ops|
+ // yields the average size of allocation.
+ // Note also that |allocated_bytes|-|freed_bytes| yields the net heap memory
+ // usage of the task, which can be negative.
+ void RecordAllocations(const uint32_t alloc_ops,
+ const uint32_t free_ops,
+ const uint32_t allocated_bytes,
+ const uint32_t freed_bytes,
+ const uint32_t alloc_overhead_bytes,
+ const uint32_t max_allocated_bytes);
// Metrics and past snapshots accessors, used only for serialization and in
// tests.
@@ -351,6 +388,22 @@ class BASE_EXPORT DeathData {
int32_t queue_duration_sample() const {
return base::subtle::NoBarrier_Load(&queue_duration_sample_);
}
+ int32_t alloc_ops() const {
+ return base::subtle::NoBarrier_Load(&alloc_ops_);
+ }
+ int32_t free_ops() const { return base::subtle::NoBarrier_Load(&free_ops_); }
+ int32_t allocated_bytes() const {
+ return base::subtle::NoBarrier_Load(&allocated_bytes_);
+ }
+ int32_t freed_bytes() const {
+ return base::subtle::NoBarrier_Load(&freed_bytes_);
+ }
+ int32_t alloc_overhead_bytes() const {
+ return base::subtle::NoBarrier_Load(&alloc_overhead_bytes_);
+ }
+ int32_t max_allocated_bytes() const {
+ return base::subtle::NoBarrier_Load(&max_allocated_bytes_);
+ }
const DeathDataPhaseSnapshot* last_phase_snapshot() const {
return last_phase_snapshot_;
}
@@ -361,6 +414,12 @@ class BASE_EXPORT DeathData {
void OnProfilingPhaseCompleted(int profiling_phase);
private:
+ // A saturating addition operation for member variables. This elides the
+ // use of atomic-primitive reads for members that are only written on the
+ // owning thread.
+ static void SaturatingMemberAdd(const uint32_t addend,
+ base::subtle::Atomic32* sum);
+
// Members are ordered from most regularly read and updated, to least
// frequently used. This might help a bit with cache lines.
// Number of runs seen (divisor for calculating averages).
@@ -383,6 +442,24 @@ class BASE_EXPORT DeathData {
// snapshot thread.
base::subtle::Atomic32 run_duration_max_;
base::subtle::Atomic32 queue_duration_max_;
+
+ // The cumulative number of allocation and free operations.
+ base::subtle::Atomic32 alloc_ops_;
+ base::subtle::Atomic32 free_ops_;
+
+ // The number of bytes allocated by the task.
+ base::subtle::Atomic32 allocated_bytes_;
+
+ // The number of bytes freed by the task.
+ base::subtle::Atomic32 freed_bytes_;
+
+ // The cumulative number of overhead bytes. Where available this yields an
+ // estimate of the heap overhead for allocations.
+ base::subtle::Atomic32 alloc_overhead_bytes_;
+
+ // The high-watermark for the number of outstanding heap allocated bytes.
+ base::subtle::Atomic32 max_allocated_bytes_;
+
// Samples, used by crowd sourcing gatherers. These are almost never read,
// and rarely updated. They can be modified only on the death thread.
base::subtle::Atomic32 run_duration_sample_;
@@ -407,14 +484,14 @@ struct BASE_EXPORT TaskSnapshot {
TaskSnapshot();
TaskSnapshot(const BirthOnThreadSnapshot& birth,
const DeathDataSnapshot& death_data,
- const std::string& death_thread_name);
+ const std::string& death_sanitized_thread_name);
~TaskSnapshot();
BirthOnThreadSnapshot birth;
// Delta between death data for a thread for a certain profiling phase and the
// snapshot for the pervious phase, if any. Otherwise, just a snapshot.
DeathDataSnapshot death_data;
- std::string death_thread_name;
+ std::string death_sanitized_thread_name;
};
//------------------------------------------------------------------------------
@@ -450,9 +527,8 @@ class BASE_EXPORT ThreadData {
// Initialize the current thread context with a new instance of ThreadData.
// This is used by all threads that have names, and should be explicitly
- // set *before* any births on the threads have taken place. It is generally
- // only used by the message loop, which has a well defined thread name.
- static void InitializeThreadContext(const std::string& suggested_name);
+ // set *before* any births on the threads have taken place.
+ static void InitializeThreadContext(const std::string& thread_name);
// Using Thread Local Store, find the current instance for collecting data.
// If an instance does not exist, construct one (and remember it for use on
@@ -510,7 +586,9 @@ class BASE_EXPORT ThreadData {
static void TallyRunInAScopedRegionIfTracking(const Births* births,
const TaskStopwatch& stopwatch);
- const std::string& thread_name() const { return thread_name_; }
+ const std::string& sanitized_thread_name() const {
+ return sanitized_thread_name_;
+ }
// Initializes all statics if needed (this initialization call should be made
// while we are single threaded).
@@ -559,12 +637,7 @@ class BASE_EXPORT ThreadData {
typedef std::vector<std::pair<const Births*, DeathDataPhaseSnapshot>>
DeathsSnapshot;
- // Worker thread construction creates a name since there is none.
- explicit ThreadData(int thread_number);
-
- // Message loop based construction should provide a name.
- explicit ThreadData(const std::string& suggested_name);
-
+ explicit ThreadData(const std::string& sanitized_thread_name);
~ThreadData();
// Push this instance to the head of all_thread_data_list_head_, linking it to
@@ -628,6 +701,12 @@ class BASE_EXPORT ThreadData {
// ThreadData instances.
static void ShutdownSingleThreadedCleanup(bool leak);
+ // Returns a ThreadData instance for a thread whose sanitized name is
+ // |sanitized_thread_name|. The returned instance may have been extracted from
+ // the list of retired ThreadData instances or newly allocated.
+ static ThreadData* GetRetiredOrCreateThreadData(
+ const std::string& sanitized_thread_name);
+
// When non-null, this specifies an external function that supplies monotone
// increasing time functcion.
static NowFunction* now_function_for_testing_;
@@ -635,22 +714,16 @@ class BASE_EXPORT ThreadData {
// We use thread local store to identify which ThreadData to interact with.
static base::ThreadLocalStorage::StaticSlot tls_index_;
- // List of ThreadData instances for use with worker threads. When a worker
- // thread is done (terminated), we push it onto this list. When a new worker
- // thread is created, we first try to re-use a ThreadData instance from the
- // list, and if none are available, construct a new one.
- // This is only accessed while list_lock_ is held.
- static ThreadData* first_retired_worker_;
+ // Linked list of ThreadData instances that were associated with threads that
+ // have been terminated and that have not been associated with a new thread
+ // since then. This is only accessed while |list_lock_| is held.
+ static ThreadData* first_retired_thread_data_;
// Link to the most recently created instance (starts a null terminated list).
// The list is traversed by about:profiler when it needs to snapshot data.
// This is only accessed while list_lock_ is held.
static ThreadData* all_thread_data_list_head_;
- // The next available worker thread number. This should only be accessed when
- // the list_lock_ is held.
- static int worker_thread_data_creation_count_;
-
// The number of times TLS has called us back to cleanup a ThreadData
// instance. This is only accessed while list_lock_ is held.
static int cleanup_count_;
@@ -671,23 +744,16 @@ class BASE_EXPORT ThreadData {
// Link to next instance (null terminated list). Used to globally track all
// registered instances (corresponds to all registered threads where we keep
- // data).
+ // data). Only modified in the constructor.
ThreadData* next_;
- // Pointer to another ThreadData instance for a Worker-Thread that has been
- // retired (its thread was terminated). This value is non-NULL only for a
- // retired ThreadData associated with a Worker-Thread.
- ThreadData* next_retired_worker_;
-
- // The name of the thread that is being recorded. If this thread has no
- // message_loop, then this is a worker thread, with a sequence number postfix.
- std::string thread_name_;
+ // Pointer to another retired ThreadData instance. This value is nullptr if
+ // this is associated with an active thread.
+ ThreadData* next_retired_thread_data_;
- // Indicate if this is a worker thread, and the ThreadData contexts should be
- // stored in the unregistered_thread_data_pool_ when not in use.
- // Value is zero when it is not a worker thread. Value is a positive integer
- // corresponding to the created thread name if it is a worker thread.
- int worker_thread_number_;
+ // The name of the thread that is being recorded, with all trailing digits
+ // replaced with a single "*" character.
+ const std::string sanitized_thread_name_;
// A map used on each thread to keep track of Births on this thread.
// This map should only be accessed on the thread it was constructed on.
@@ -755,6 +821,13 @@ class BASE_EXPORT TaskStopwatch {
// this thread during that period.
int32_t RunDurationMs() const;
+#if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
+ const base::debug::ThreadHeapUsageTracker& heap_usage() const {
+ return heap_usage_;
+ }
+ bool heap_tracking_enabled() const { return heap_tracking_enabled_; }
+#endif
+
// Returns tracking info for the current thread.
ThreadData* GetThreadData() const;
@@ -762,6 +835,11 @@ class BASE_EXPORT TaskStopwatch {
// Time when the stopwatch was started.
TrackedTime start_time_;
+#if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
+ base::debug::ThreadHeapUsageTracker heap_usage_;
+ bool heap_tracking_enabled_;
+#endif
+
// Wallclock duration of the task.
int32_t wallclock_duration_ms_;