aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Ferris <cferris@google.com>2014-11-14 12:09:46 -0800
committerChristopher Ferris <cferris@google.com>2014-12-02 12:03:17 -0800
commitfb795867f0b3aa28bbdf177e1026f3e3408e0338 (patch)
treee8d6d43379152f2f33b00f6ee3634414f84f136d
parent0f804259d71591e76988474a8c9eabdf1590c6e1 (diff)
downloadjemalloc-android-cts-5.1_r8.tar.gz
The tcache in jemalloc can take up quite a bit of extra PSS. Disabling the tcache can save a lot of PSS, but it radically reduces performance. Tune the number of small and large values to store in the tcache. Immediately force any dirty pages to be purged, rather than keep some number of dirty pages around. Restore the chunk size back to 4MB. Using this chunk size and the force dirty page results in a higher cf-bench native mallocs score but about the same amount of PSS use. Limit the number of arenas to 2. The default is 2 * number of cpus, but that increases the amount of PSS used. My benchmarking indicates that more than 2 really doesn't help too much even on a device with 4 cpus. Nearly all speed-ups come from the tcache. Bug: 17498287 Change-Id: I23b23dd88288c90e002a0a04684fb06dbf4ee742
-rw-r--r--Android.mk24
-rw-r--r--include/jemalloc/internal/chunk.h4
-rw-r--r--include/jemalloc/internal/tcache.h12
-rw-r--r--src/arena.c4
-rw-r--r--src/jemalloc.c8
5 files changed, 48 insertions, 4 deletions
diff --git a/Android.mk b/Android.mk
index f9497cb..5acd474 100644
--- a/Android.mk
+++ b/Android.mk
@@ -22,6 +22,30 @@ common_cflags := \
-fvisibility=hidden \
-Wno-unused-parameter \
+# These parameters change the way jemalloc works.
+# ANDROID_ALWAYS_PURGE
+# If defined, always purge immediately when a page is purgeable.
+# ANDROID_MAX_ARENAS=XX
+# The total number of arenas will be less than or equal to this number.
+# The number of arenas will be calculated as 2 * the number of cpus
+# but no larger than XX.
+# ANDROID_TCACHE_NSLOTS_SMALL_MAX=XX
+# The number of small slots held in the tcache. The higher this number
+# is, the higher amount of PSS consumed. If this number is set too low
+# then small allocations will take longer to complete.
+# ANDROID_TCACHE_NSLOTS_LARGE=XX
+# The number of large slots held in the tcache. The higher this number
+# is, the higher amount of PSS consumed. If this number is set too low
+# then large allocations will take longer to complete.
+# ANDROID_LG_TCACHE_MAXCLASS_DEFAULT=XX
+# 1 << XX is the maximum sized allocation that will be in the tcache.
+common_cflags += \
+ -DANDROID_ALWAYS_PURGE \
+ -DANDROID_MAX_ARENAS=2 \
+ -DANDROID_TCACHE_NSLOTS_SMALL_MAX=8 \
+ -DANDROID_TCACHE_NSLOTS_LARGE=16 \
+ -DANDROID_LG_TCACHE_MAXCLASS_DEFAULT=16 \
+
common_c_includes := \
$(LOCAL_PATH)/src \
$(LOCAL_PATH)/include \
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 47bbccd..f3bfbe0 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -5,11 +5,7 @@
* Size and alignment of memory chunks that are allocated by the OS's virtual
* memory system.
*/
-#if defined(__ANDROID__)
-#define LG_CHUNK_DEFAULT 20
-#else
#define LG_CHUNK_DEFAULT 22
-#endif
/* Return the chunk address for allocation address a. */
#define CHUNK_ADDR2BASE(a) \
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index c0d48b9..6712341 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -22,13 +22,25 @@ typedef struct tcache_s tcache_t;
*
* This constant must be an even number.
*/
+#if defined(ANDROID_TCACHE_NSLOTS_SMALL_MAX)
+#define TCACHE_NSLOTS_SMALL_MAX ANDROID_TCACHE_NSLOTS_SMALL_MAX
+#else
#define TCACHE_NSLOTS_SMALL_MAX 200
+#endif
/* Number of cache slots for large size classes. */
+#if defined(ANDROID_TCACHE_NSLOTS_LARGE)
+#define TCACHE_NSLOTS_LARGE ANDROID_TCACHE_NSLOTS_LARGE
+#else
#define TCACHE_NSLOTS_LARGE 20
+#endif
/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
+#if defined(ANDROID_LG_TCACHE_MAXCLASS_DEFAULT)
+#define LG_TCACHE_MAXCLASS_DEFAULT ANDROID_LG_TCACHE_MAXCLASS_DEFAULT
+#else
#define LG_TCACHE_MAXCLASS_DEFAULT 15
+#endif
/*
* TCACHE_GC_SWEEP is the approximate number of allocation events between
diff --git a/src/arena.c b/src/arena.c
index d3fe0fb..026c74a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -868,14 +868,17 @@ arena_run_alloc_small(arena_t *arena, size_t size, size_t binind)
static inline void
arena_maybe_purge(arena_t *arena)
{
+#if !defined(ANDROID_ALWAYS_PURGE)
size_t npurgeable, threshold;
/* Don't purge if the option is disabled. */
if (opt_lg_dirty_mult < 0)
return;
+#endif
/* Don't purge if all dirty pages are already being purged. */
if (arena->ndirty <= arena->npurgatory)
return;
+#if !defined(ANDROID_ALWAYS_PURGE)
npurgeable = arena->ndirty - arena->npurgatory;
threshold = (arena->nactive >> opt_lg_dirty_mult);
/*
@@ -884,6 +887,7 @@ arena_maybe_purge(arena_t *arena)
*/
if (npurgeable <= threshold)
return;
+#endif
arena_purge(arena, false);
}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index baff69d..7e7aaf4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -826,6 +826,14 @@ malloc_init_hard(void)
else
opt_narenas = 1;
}
+#if defined(ANDROID_MAX_ARENAS)
+ /* Never create more than MAX_ARENAS arenas regardless of num_cpus.
+ * Extra arenas use more PSS and are not very useful unless
+ * lots of threads are allocing/freeing at the same time.
+ */
+ if (opt_narenas > ANDROID_MAX_ARENAS)
+ opt_narenas = ANDROID_MAX_ARENAS;
+#endif
narenas_auto = opt_narenas;
/*
* Make sure that the arenas array can be allocated. In practice, this