diff options
author | Soumya Basu <sbasu@google.com> | 2014-07-09 16:30:11 -0700 |
---|---|---|
committer | chrome-internal-fetch <chrome-internal-fetch@google.com> | 2014-07-18 22:41:47 +0000 |
commit | 50ef334ec9d72f3bfa06be51a493a1ff8f4c3162 (patch) | |
tree | 75a43e9ddaf028a9251325700812fa3468659c64 /mem_tests | |
parent | 74b3d3234dc857ebee3af81faa3f8f420ac6ecce (diff) | |
download | toolchain-utils-50ef334ec9d72f3bfa06be51a493a1ff8f4c3162.tar.gz |
Add framework for parsing logs obtained from heap measurements in ChromeOS machines.
BUG=None
TEST=Manual testing on chrome logs.
Change-Id: I48f0062180fd6ee723ccbff047b5fea625e4e3e1
Reviewed-on: https://chrome-internal-review.googlesource.com/168652
Reviewed-by: Simon Que <sque@google.com>
Tested-by: Soumya Basu <sbasu@google.com>
Commit-Queue: Soumya Basu <sbasu@google.com>
Diffstat (limited to 'mem_tests')
-rw-r--r-- | mem_tests/README | 46 | ||||
-rwxr-xr-x | mem_tests/clean_data.py | 30 | ||||
-rwxr-xr-x | mem_tests/mem_groups.py | 56 | ||||
-rwxr-xr-x | mem_tests/total_mem_actual.py | 38 | ||||
-rwxr-xr-x | mem_tests/total_mem_sampled.py | 32 | ||||
-rw-r--r-- | mem_tests/utils.py | 23 |
6 files changed, 225 insertions, 0 deletions
diff --git a/mem_tests/README b/mem_tests/README new file mode 100644 index 00000000..4e35f684 --- /dev/null +++ b/mem_tests/README @@ -0,0 +1,46 @@ +Usage +----- + +These scripts are made to parse TCMalloc output in order to extract certain +info from them. + +In particular, these scripts rely on the error logging system for ChromeOS in +order to extract information. In order to use a script (e.g. total_mem.py), you +just have the command: + +./total_mem.py FILENAME + +where FILENAME is the name of the log file to be parsed. + +Codebase Changes +---------------- + +There are two ideas that motivate these changes: + +1- Turn on TCMalloc sampling. +2- Use perf to collect the sample information. + +The following files have to be changed: + +in chrome/browser/metrics/perf_provider_chrome_os: + +add: + #include "third_party/tcmalloc/chromium/src/gperftools/malloc_extension.h" + +Change the perf profiling interval to something small (60*1000 milliseconds). + +inside DoPeriodicCollection, insert the following code: + + std::string output; + char* chr_arr = new char[9999]; + MallocExtension::instance() ->GetHeapSample(&output); + MallocExtension::instance() ->GetStats(chr_arr, 9999); + LOG(ERROR) << "Output Heap Data: "; + LOG(ERROR) << output; + LOG(ERROR) << "Output Heap Stats: "; + output = ""; + for (unsigned int i = 0; i < strlen(chr_arr); i++) { + output += chr_arr[i]; + } + LOG(ERROR) << output; + delete[] chr_arr; diff --git a/mem_tests/clean_data.py b/mem_tests/clean_data.py new file mode 100755 index 00000000..dc8a7b71 --- /dev/null +++ b/mem_tests/clean_data.py @@ -0,0 +1,30 @@ +#! /usr/bin/python + +"""Cleans output from other scripts to eliminate duplicates. + +When frequently sampling data, we see that records occasionally will contain +the same timestamp (due to perf recording twice in the same second). + +This removes all of the duplicate timestamps for every record. Order with +respect to timestamps is not preserved. Also, the assumption is that the log +file is a csv with the first value in each row being the time in seconds from a +standard time. + +""" + +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("filename") +args = parser.parse_args() + +my_file = open(args.filename) +output_file = open("clean2.csv", "a") +dictionary = dict() + +for line in my_file: + new_time = int(line.split(",")[0]) + dictionary[new_time] = line + +for key in dictionary.keys(): + output_file.write(dictionary[key]) diff --git a/mem_tests/mem_groups.py b/mem_tests/mem_groups.py new file mode 100755 index 00000000..75591182 --- /dev/null +++ b/mem_tests/mem_groups.py @@ -0,0 +1,56 @@ +#! /usr/bin/python + +"""Groups memory by allocation sizes. + +Takes a log entry and sorts sorts everything into groups based on what size +chunks the memory has been allocated in. groups is an array that contains the +divisions (in bytes). + +The output format is: + +timestamp, percent of memory in chunks < groups[0], percent between groups[0] +and groups[1], etc. + +""" + +import argparse +from utils import compute_total_diff +from datetime import datetime + +pretty_print = True + +parser = argparse.ArgumentParser() +parser.add_argument("filename") +args = parser.parse_args() + +my_file = open(args.filename) +output_file = open("groups.csv", "a") + +# The cutoffs for each group in the output (in bytes) +groups = [1024, 8192, 65536, 524288, 4194304] + +base_time = datetime(2014, 6, 11, 0, 0) +prev_line = "" +half_entry = (None, None) + +for line in my_file: + if "heap profile:" in line: + if half_entry[0] is not None: + group_totals = half_entry[1] + total = sum(group_totals) * 1.0 + to_join = [half_entry[0]] + [value / total for value in group_totals] + to_output = ",".join([str(elem) for elem in to_join]) + output_file.write(to_output) + total_diff = compute_total_diff(line, base_time) + half_entry = (total_diff, [0]*(len(groups) + 1)) + if "] @ " in line and "heap profile:" not in line: + mem_samples = line.strip().split("[")[0] + num_samples, total_mem = map(int, mem_samples.strip().split(":")) + mem_per_sample = total_mem // num_samples + group_totals = half_entry[1] + for cutoff_index in range(len(groups)): + if mem_per_sample <= groups[cutoff_index]: + group_totals[cutoff_index] += total_mem + break + if mem_per_sample > groups[-1]: + group_totals[-1] += total_mem diff --git a/mem_tests/total_mem_actual.py b/mem_tests/total_mem_actual.py new file mode 100755 index 00000000..c9c51b16 --- /dev/null +++ b/mem_tests/total_mem_actual.py @@ -0,0 +1,38 @@ +#! /usr/bin/python + +"""Parses the actual memory usage from TCMalloc. + +This goes through logs that have the actual allocated memory (not sampled) in +the logs. The output is of the form of: + +time (in seconds from some base time), amount of memory allocated by the +application + +""" + +import argparse +from utils import compute_total_diff +from datetime import datetime + +pretty_print = True + +parser = argparse.ArgumentParser() +parser.add_argument("filename") +args = parser.parse_args() + +my_file = open(args.filename) +output_file = open("raw_memory_data.csv", "a") + +base_time = datetime(2014, 6, 11, 0, 0) +prev_line = "" +half_entry = (None, None) + +for line in my_file: + if "Output Heap Stats:" in line: + total_diff = compute_total_diff(line, base_time) + half_entry = (total_diff, None) + if "Bytes in use by application" in line: + total_diff = half_entry[0] + memory_used = int(line.strip().split()[1]) + half_entry = (None, None) + output_file.write("{0},{1}\n".format(total_diff, memory_used)) diff --git a/mem_tests/total_mem_sampled.py b/mem_tests/total_mem_sampled.py new file mode 100755 index 00000000..f8ed8013 --- /dev/null +++ b/mem_tests/total_mem_sampled.py @@ -0,0 +1,32 @@ +#! /usr/bin/python + +"""Parses the total amount of sampled memory from log files. + +This file outputs the total amount of memory that has been sampled by tcmalloc. +The output is of the format: + +time in seconds from a base time, amount of memory that has been sampled + +""" + +import argparse +from utils import compute_total_diff +from datetime import datetime + +parser = argparse.ArgumentParser() +parser.add_argument("filename") +args = parser.parse_args() + +my_file = open(args.filename) +output_file = open("memory_data.csv", "a") + +base_time = datetime(2014, 6, 11, 0, 0) +prev_line = "" +half_entry = (None, None) + +for line in my_file: + if "heap profile: " not in line: + continue + memory_used = line.strip().split(":")[-1].strip().split("]")[0].strip() + total_diff = compute_total_diff(line, base_time) + output_file.write("{0},{1}\n".format(int(total_diff), memory_used)) diff --git a/mem_tests/utils.py b/mem_tests/utils.py new file mode 100644 index 00000000..54dbcc2d --- /dev/null +++ b/mem_tests/utils.py @@ -0,0 +1,23 @@ +#! /usr/bin/python + +"""Utility functions for the memory tests. +""" + +from datetime import datetime + +def compute_total_diff(line, base_time): + """ + Computes the difference in time the line was recorded from the base time. + + An example of a line is: + [4688:4688:0701/010151:ERROR:perf_provider_chromeos.cc(228)]... + + Here, the month is 07, the day is 01 and the time is 01:01:51. + + line- the line that contains the time the record was taken + base_time- the base time to measure our timestamp from + """ + date = line.strip().split(":")[2].split("/") + timestamp = datetime(2014, int(date[0][0:2]), int(date[0][2:4]), + int(date[1][0:2]), int(date[1][2:4]), int(date[1][4:6])) + return (timestamp - base_time).total_seconds() |