Add framework for parsing logs obtained from heap measurements in ChromeOS machines.

BUG=None TEST=Manual testing on chrome logs. Change-Id: I48f0062180fd6ee723ccbff047b5fea625e4e3e1 Reviewed-on: https://chrome-internal-review.googlesource.com/168652 Reviewed-by: Simon Que <sque@google.com> Tested-by: Soumya Basu <sbasu@google.com> Commit-Queue: Soumya Basu <sbasu@google.com>
author: Soumya Basu <sbasu@google.com> 2014-07-09 16:30:11 -0700
committer: chrome-internal-fetch <chrome-internal-fetch@google.com> 2014-07-18 22:41:47 +0000
commit: 50ef334ec9d72f3bfa06be51a493a1ff8f4c3162 (patch)
tree: 75a43e9ddaf028a9251325700812fa3468659c64 /mem_tests
parent: 74b3d3234dc857ebee3af81faa3f8f420ac6ecce (diff)
download: toolchain-utils-50ef334ec9d72f3bfa06be51a493a1ff8f4c3162.tar.gz
6 files changed, 225 insertions, 0 deletions
diff --git a/mem_tests/README b/mem_tests/README
new file mode 100644
index 00000000..4e35f684
--- /dev/null
+++ b/mem_tests/README
@@ -0,0 +1,46 @@
+Usage
+-----
+
+These scripts are made to parse TCMalloc output in order to extract certain
+info from them.
+
+In particular, these scripts rely on the error logging system for ChromeOS in
+order to extract information. In order to use a script (e.g. total_mem.py), you
+just have the command:
+
+./total_mem.py FILENAME
+
+where FILENAME is the name of the log file to be parsed.
+
+Codebase Changes
+----------------
+
+There are two ideas that motivate these changes:
+
+1- Turn on TCMalloc sampling.
+2- Use perf to collect the sample information.
+
+The following files have to be changed:
+
+in chrome/browser/metrics/perf_provider_chrome_os:
+
+add:
+    #include "third_party/tcmalloc/chromium/src/gperftools/malloc_extension.h"
+
+Change the perf profiling interval to something small (60*1000 milliseconds).
+
+inside DoPeriodicCollection, insert the following code:
+
+    std::string output;
+    char* chr_arr = new char[9999];
+    MallocExtension::instance() ->GetHeapSample(&output);
+    MallocExtension::instance() ->GetStats(chr_arr, 9999);
+    LOG(ERROR) << "Output Heap Data: ";
+    LOG(ERROR) << output;
+    LOG(ERROR) << "Output Heap Stats: ";
+    output = "";
+    for (unsigned int i = 0; i < strlen(chr_arr); i++) {
+        output += chr_arr[i];
+    }
+    LOG(ERROR) << output;
+    delete[] chr_arr;
diff --git a/mem_tests/clean_data.py b/mem_tests/clean_data.py
new file mode 100755
index 00000000..dc8a7b71
--- /dev/null
+++ b/mem_tests/clean_data.py
@@ -0,0 +1,30 @@
+#! /usr/bin/python
+
+"""Cleans output from other scripts to eliminate duplicates.
+
+When frequently sampling data, we see that records occasionally will contain
+the same timestamp (due to perf recording twice in the same second).
+
+This removes all of the duplicate timestamps for every record. Order with
+respect to timestamps is not preserved. Also, the assumption is that the log
+file is a csv with the first value in each row being the time in seconds from a
+standard time.
+
+"""
+
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("filename")
+args = parser.parse_args()
+
+my_file = open(args.filename)
+output_file = open("clean2.csv", "a")
+dictionary = dict()
+
+for line in my_file:
+    new_time = int(line.split(",")[0])
+    dictionary[new_time] = line
+
+for key in dictionary.keys():
+    output_file.write(dictionary[key])
diff --git a/mem_tests/mem_groups.py b/mem_tests/mem_groups.py
new file mode 100755
index 00000000..75591182
--- /dev/null
+++ b/mem_tests/mem_groups.py
@@ -0,0 +1,56 @@
+#! /usr/bin/python
+
+"""Groups memory by allocation sizes.
+
+Takes a log entry and sorts sorts everything into groups based on what size
+chunks the memory has been allocated in. groups is an array that contains the
+divisions (in bytes).
+
+The output format is:
+
+timestamp, percent of memory in chunks < groups[0], percent between groups[0]
+and groups[1], etc.
+
+"""
+
+import argparse
+from utils import compute_total_diff
+from datetime import datetime
+
+pretty_print = True
+
+parser = argparse.ArgumentParser()
+parser.add_argument("filename")
+args = parser.parse_args()
+
+my_file = open(args.filename)
+output_file = open("groups.csv", "a")
+
+# The cutoffs for each group in the output (in bytes)
+groups = [1024, 8192, 65536, 524288, 4194304]
+
+base_time = datetime(2014, 6, 11, 0, 0)
+prev_line = ""
+half_entry = (None, None)
+
+for line in my_file:
+    if "heap profile:" in line:
+        if half_entry[0] is not None:
+            group_totals = half_entry[1]
+            total = sum(group_totals) * 1.0
+            to_join = [half_entry[0]] + [value / total for value in group_totals]
+            to_output = ",".join([str(elem) for elem in to_join])
+            output_file.write(to_output)
+        total_diff = compute_total_diff(line, base_time)
+        half_entry = (total_diff, [0]*(len(groups) + 1))
+    if "] @ " in line and "heap profile:" not in line:
+        mem_samples = line.strip().split("[")[0]
+        num_samples, total_mem = map(int, mem_samples.strip().split(":"))
+        mem_per_sample = total_mem // num_samples
+        group_totals = half_entry[1]
+        for cutoff_index in range(len(groups)):
+            if mem_per_sample <= groups[cutoff_index]:
+                group_totals[cutoff_index] += total_mem
+                break
+        if mem_per_sample > groups[-1]:
+            group_totals[-1] += total_mem
diff --git a/mem_tests/total_mem_actual.py b/mem_tests/total_mem_actual.py
new file mode 100755
index 00000000..c9c51b16
--- /dev/null
+++ b/mem_tests/total_mem_actual.py
@@ -0,0 +1,38 @@
+#! /usr/bin/python
+
+"""Parses the actual memory usage from TCMalloc.
+
+This goes through logs that have the actual allocated memory (not sampled) in
+the logs. The output is of the form of:
+
+time (in seconds from some base time), amount of memory allocated by the
+application
+
+"""
+
+import argparse
+from utils import compute_total_diff
+from datetime import datetime
+
+pretty_print = True
+
+parser = argparse.ArgumentParser()
+parser.add_argument("filename")
+args = parser.parse_args()
+
+my_file = open(args.filename)
+output_file = open("raw_memory_data.csv", "a")
+
+base_time = datetime(2014, 6, 11, 0, 0)
+prev_line = ""
+half_entry = (None, None)
+
+for line in my_file:
+    if "Output Heap Stats:" in line:
+        total_diff = compute_total_diff(line, base_time)
+        half_entry = (total_diff, None)
+    if "Bytes in use by application" in line:
+        total_diff = half_entry[0]
+        memory_used = int(line.strip().split()[1])
+        half_entry = (None, None)
+        output_file.write("{0},{1}\n".format(total_diff, memory_used))
diff --git a/mem_tests/total_mem_sampled.py b/mem_tests/total_mem_sampled.py
new file mode 100755
index 00000000..f8ed8013
--- /dev/null
+++ b/mem_tests/total_mem_sampled.py
@@ -0,0 +1,32 @@
+#! /usr/bin/python
+
+"""Parses the total amount of sampled memory from log files.
+
+This file outputs the total amount of memory that has been sampled by tcmalloc.
+The output is of the format:
+
+time in seconds from a base time, amount of memory that has been sampled
+
+"""
+
+import argparse
+from utils import compute_total_diff
+from datetime import datetime
+
+parser = argparse.ArgumentParser()
+parser.add_argument("filename")
+args = parser.parse_args()
+
+my_file = open(args.filename)
+output_file = open("memory_data.csv", "a")
+
+base_time = datetime(2014, 6, 11, 0, 0)
+prev_line = ""
+half_entry = (None, None)
+
+for line in my_file:
+    if "heap profile: " not in line:
+        continue
+    memory_used = line.strip().split(":")[-1].strip().split("]")[0].strip()
+    total_diff = compute_total_diff(line, base_time)
+    output_file.write("{0},{1}\n".format(int(total_diff), memory_used))
diff --git a/mem_tests/utils.py b/mem_tests/utils.py
new file mode 100644
index 00000000..54dbcc2d
--- /dev/null
+++ b/mem_tests/utils.py
@@ -0,0 +1,23 @@
+#! /usr/bin/python
+
+"""Utility functions for the memory tests.
+"""
+
+from datetime import datetime
+
+def compute_total_diff(line, base_time):
+    """
+    Computes the difference in time the line was recorded from the base time.
+
+    An example of a line is:
+    [4688:4688:0701/010151:ERROR:perf_provider_chromeos.cc(228)]...
+
+    Here, the month is 07, the day is 01 and the time is 01:01:51.
+
+    line- the line that contains the time the record was taken
+    base_time- the base time to measure our timestamp from
+    """
+    date = line.strip().split(":")[2].split("/")
+    timestamp = datetime(2014, int(date[0][0:2]), int(date[0][2:4]),
+                         int(date[1][0:2]), int(date[1][2:4]), int(date[1][4:6]))
+    return (timestamp - base_time).total_seconds()
author	Soumya Basu <sbasu@google.com>	2014-07-09 16:30:11 -0700
committer	chrome-internal-fetch <chrome-internal-fetch@google.com>	2014-07-18 22:41:47 +0000
commit	50ef334ec9d72f3bfa06be51a493a1ff8f4c3162 (patch)
tree	75a43e9ddaf028a9251325700812fa3468659c64 /mem_tests
parent	74b3d3234dc857ebee3af81faa3f8f420ac6ecce (diff)
download	toolchain-utils-50ef334ec9d72f3bfa06be51a493a1ff8f4c3162.tar.gz