diff options
author | android-build-team Robot <android-build-team-robot@google.com> | 2017-07-02 07:40:06 +0000 |
---|---|---|
committer | android-build-team Robot <android-build-team-robot@google.com> | 2017-07-02 07:40:06 +0000 |
commit | 2e1c5f191dec691230658dcba12048c658b7ce03 (patch) | |
tree | fe30c6c94affc5620f752b3cf12c766484a8c5b2 | |
parent | 696d715cfad7971a71bc565c3199b5e8c7f4f719 (diff) | |
parent | a951463b87e0a17229d5fcd426b0b05a4ffb0764 (diff) | |
download | trappy-2e1c5f191dec691230658dcba12048c658b7ce03.tar.gz |
release-request-d9dc98f7-19b2-484c-b4d1-f35dc43e9c05-for-git_oc-mr1-release-4152006 snap-temp-L91700000079405440
Change-Id: I1beb78c74f7bc96c5031133d1ff22fe621e0e6ae
26 files changed, 392 insertions, 32 deletions
diff --git a/tests/test_base.py b/tests/test_base.py index a0a4920..8bebfba 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -85,7 +85,7 @@ class TestBase(utils_tests.SetupDirectory): in_data = """ kworker/4:1-397 [004] 720.741315: thermal_power_cpu_get: cpus=000000f0 freq=1900000 raw_cpu_power=1259 load={} power=61 kworker/4:1-397 [004] 720.741349: thermal_power_cpu_get: cpus=0000000f freq=1400000 raw_cpu_power=189 load={} power=14""" - expected_columns = set(["__comm", "__pid", "__cpu", "__line", "cpus", "freq", + expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "cpus", "freq", "raw_cpu_power", "power"]) with open("trace.txt", "w") as fout: @@ -131,7 +131,7 @@ class TestBase(utils_tests.SetupDirectory): timestamp ) - expected_columns = set(["__comm", "__pid", "__cpu", "__line", "tag"]) + expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "tag"]) with open("trace.txt", "w") as fout: fout.write(in_data) @@ -157,7 +157,7 @@ class TestBase(utils_tests.SetupDirectory): in_data = """ rcu_preempt-7 [000] 73.604532: my_sched_stat_runtime: comm=Space separated taskname pid=7 runtime=262875 [ns] vruntime=17096359856 [ns]""" - expected_columns = set(["__comm", "__pid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"]) + expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"]) with open("trace.txt", "w") as fout: fout.write(in_data) @@ -234,7 +234,7 @@ class TestBase(utils_tests.SetupDirectory): df = trace.equals_event.data_frame self.assertSetEqual(set(df.columns), - set(["__comm", "__pid", "__cpu", "__line", "my_field"])) + set(["__comm", "__pid", "__tgid", "__cpu", "__line", "my_field"])) self.assertListEqual(df["my_field"].tolist(), ["foo", "foo=bar", "foo=bar=baz", 1, "1=2", "1=foo", "1foo=2"]) diff --git a/tests/test_caching.py b/tests/test_caching.py new file mode 100644 index 0000000..d0893b7 --- /dev/null +++ b/tests/test_caching.py @@ -0,0 +1,193 @@ +# Copyright 2015-2017 ARM Limited, Google and contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import shutil +import sys +import unittest +import utils_tests +import trappy +from trappy.ftrace import GenericFTrace + +class TestCaching(utils_tests.SetupDirectory): + def __init__(self, *args, **kwargs): + super(TestCaching, self).__init__( + [("trace_sched.txt", "trace.txt"), + ("trace_sched.txt", "trace.raw.txt")], + *args, + **kwargs) + + def test_cache_created(self): + """Test cache creation when enabled""" + GenericFTrace.disable_cache = False + trace = trappy.FTrace() + + trace_path = os.path.abspath(trace.trace_path) + trace_dir = os.path.dirname(trace_path) + trace_file = os.path.basename(trace_path) + cache_dir = '.' + trace_file + '.cache' + + self.assertTrue(cache_dir in os.listdir(trace_dir)) + + def test_cache_not_created(self): + """Test that cache should not be created when disabled """ + GenericFTrace.disable_cache = True + trace = trappy.FTrace() + + trace_path = os.path.abspath(trace.trace_path) + trace_dir = os.path.dirname(trace_path) + trace_file = os.path.basename(trace_path) + cache_dir = '.' + trace_file + '.cache' + + self.assertFalse(cache_dir in os.listdir(trace_dir)) + + def test_compare_cached_vs_uncached(self): + """ Test that the cached and uncached traces are same """ + # Build the cache, but the actual trace will be parsed + # fresh since this is a first time parse + GenericFTrace.disable_cache = False + uncached_trace = trappy.FTrace() + uncached_dfr = uncached_trace.sched_wakeup.data_frame + + # Now read from previously parsed cache by reusing the path + cached_trace = trappy.FTrace(uncached_trace.trace_path) + cached_dfr = cached_trace.sched_wakeup.data_frame + + # Test whether timestamps are the same: + # The cached/uncached versions of the timestamps are slightly + # different due to floating point precision errors due to converting + # back and forth CSV and DataFrame. For all purposes this is not relevant + # since such rounding doesn't effect the end result. + # Here's an example of the error, the actual normalized time when + # calculated by hand is 0.081489, however following is what's stored + # in the CSV for sched_wakeup events in this trace. + # When converting the index to strings (and also what's in the CSV) + # cached: ['0.0814890000001', '1.981491'] + # uncached: ['0.0814890000001', '1.981491'] + # + # Keeping index as numpy.float64 + # cached: [0.081489000000100009, 1.9814909999999999] + # uncached: [0.081489000000146916, 1.9814909999995507] + # + # To make it possible to test, lets just convert the timestamps to strings + # and compare them below. + + cached_times = [str(r[0]) for r in cached_dfr.iterrows()] + uncached_times = [str(r[0]) for r in uncached_dfr.iterrows()] + + self.assertTrue(cached_times == uncached_times) + + # compare other columns as well + self.assertTrue([r[1].pid for r in cached_dfr.iterrows()] == + [r[1].pid for r in uncached_dfr.iterrows()]) + + self.assertTrue([r[1].comm for r in cached_dfr.iterrows()] == + [r[1].comm for r in uncached_dfr.iterrows()]) + + self.assertTrue([r[1].prio for r in cached_dfr.iterrows()] == + [r[1].prio for r in uncached_dfr.iterrows()]) + + def test_invalid_cache_overwritten(self): + """Test a cache with a bad checksum is overwritten""" + # This is a directory so we can't use the files_to_copy arg of + # SetUpDirectory, just do it ourselves. + cache_path = ".trace.txt.cache" + src = os.path.join(utils_tests.TESTS_DIRECTORY, "trace_sched.txt.cache") + shutil.copytree(src, cache_path) + + md5_path = os.path.join(cache_path, "md5sum") + def read_md5sum(): + with open(md5_path) as f: + return f.read() + + # Change 1 character of the stored checksum + md5sum = read_md5sum() + # Sorry, I guess modifying strings in Python is kind of awkward? + md5sum_inc = "".join(list(md5sum[:-1]) + [chr(ord(md5sum[-1]) + 1)]) + with open(md5_path, "w") as f: + f.write(md5sum_inc) + + # Parse a trace, this should delete and overwrite the invalidated cache + GenericFTrace.disable_cache = False + trace = trappy.FTrace() + + # Check that the modified md5sum was overwritten + self.assertNotEqual(read_md5sum(), md5sum_inc, + "The invalid ftrace cache wasn't overwritten") + + def test_cache_dynamic_events(self): + """Test that caching works if new event parsers have been registered""" + + # Parse the trace to create a cache + GenericFTrace.disable_cache = False + trace1 = trappy.FTrace() + + # Check we're actually testing what we think we are + if hasattr(trace1, 'dynamic_event'): + raise RuntimeError('Test bug: found unexpected event in trace') + + # Now register a new event type, call the constructor again, and check + # that the newly added event (which is not present in the cache) is + # parsed. + + parse_class = trappy.register_dynamic_ftrace("DynamicEvent", "dynamic_test_key") + + trace2 = trappy.FTrace() + self.assertTrue(len(trace2.dynamic_event.data_frame) == 1) + + trappy.unregister_dynamic_ftrace(parse_class) + + def test_cache_normalize_time(self): + """Test that caching doesn't break normalize_time""" + GenericFTrace.disable_cache = False + + # Times in trace_sched.txt + start_time = 6550.018511 + first_freq_event_time = 6550.056870 + + # Parse without normalizing time + trace1 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'], + normalize_time=False) + + self.assertEqual(trace1.cpu_frequency.data_frame.index[0], + first_freq_event_time) + + # Parse with normalized time + trace2 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'], + normalize_time=True) + + self.assertEqual(trace2.cpu_frequency.data_frame.index[0], + first_freq_event_time - start_time) + + def test_cache_window(self): + """Test that caching doesn't break the 'window' parameter""" + GenericFTrace.disable_cache = False + + trace1 = trappy.FTrace( + events=['sched_wakeup'], + window=(0, 1)) + + # Check that we're testing what we think we're testing The trace + # contains 2 sched_wakeup events; this window should get rid of one of + # them. + if len(trace1.sched_wakeup.data_frame) != 1: + raise RuntimeError('Test bug: bad sched_wakeup event count') + + # Parse again without the window + trace1 = trappy.FTrace( + events=['sched_wakeup'], + window=(0, None)) + + self.assertEqual(len(trace1.sched_wakeup.data_frame), 2) diff --git a/tests/test_ftrace.py b/tests/test_ftrace.py index 9f09e8e..e6f6319 100644 --- a/tests/test_ftrace.py +++ b/tests/test_ftrace.py @@ -230,8 +230,8 @@ class TestFTrace(BaseTestThermal): # Make sure there are no NaNs in the middle of the array self.assertTrue(allfreqs[0][1]["A57_freq_in"].notnull().all()) - def test_run_event_callbacks(self): - """Test run_event_callbacks()""" + def test_apply_callbacks(self): + """Test apply_callbacks()""" counts = { "cpu_in_power": 0, @@ -250,7 +250,7 @@ class TestFTrace(BaseTestThermal): } trace = trappy.FTrace() - trace.run_event_callbacks(fn_map) + trace.apply_callbacks(fn_map) self.assertEqual(counts["cpu_in_power"], 134) self.assertEqual(counts["cpu_out_power"], 134) diff --git a/tests/trace_sched.txt.cache/CpuIdle.csv b/tests/trace_sched.txt.cache/CpuIdle.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/CpuIdle.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/CpuInPower.csv b/tests/trace_sched.txt.cache/CpuInPower.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/CpuInPower.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/CpuOutPower.csv b/tests/trace_sched.txt.cache/CpuOutPower.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/CpuOutPower.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/DevfreqInPower.csv b/tests/trace_sched.txt.cache/DevfreqInPower.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/DevfreqInPower.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/DevfreqOutPower.csv b/tests/trace_sched.txt.cache/DevfreqOutPower.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/DevfreqOutPower.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/PIDController.csv b/tests/trace_sched.txt.cache/PIDController.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/PIDController.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv new file mode 100644 index 0000000..a1764fe --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv @@ -0,0 +1,2 @@ +Time,__comm,__cpu,__pid,cpu,cpu_scale_factor,freq_scale_factor +0.000167999999576,<idle>,0,0,0,1024,426 diff --git a/tests/trace_sched.txt.cache/SchedCpuCapacity.csv b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv new file mode 100644 index 0000000..4b75c6a --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv @@ -0,0 +1,2 @@ +Time,__comm,__cpu,__pid,capacity,cpu,rt_capacity +0.000293999999485,trace-cmd,3,3519,430,3,1024 diff --git a/tests/trace_sched.txt.cache/SchedCpuFrequency.csv b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv new file mode 100644 index 0000000..dbb941d --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv @@ -0,0 +1,2 @@ +Time,__comm,__cpu,__pid,cpu,frequency +0.0383590000001,kworker/0:0,0,3410,0,600000 diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv new file mode 100644 index 0000000..54a9596 --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv @@ -0,0 +1,2 @@ +Time,__comm,__cpu,__pid,cpu,load,utilization +1.99999976758e-06,sshd,0,2962,0,13,18 diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv new file mode 100644 index 0000000..fc57841 --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv @@ -0,0 +1,2 @@ +Time,__comm,__cpu,__pid,cpus,load,utilization +0.0,rcuos/2,1,22,00000002,0,0 diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv new file mode 100644 index 0000000..8b3ccfe --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv @@ -0,0 +1,2 @@ +Time,__comm,__cpu,__pid,avg_period,comm,load,pid,runnable_avg_sum,running_avg_sum,utilization +9.99999429041e-07,trace-cmd,4,2971,48595,sshd,0,2962,0,0,0 diff --git a/tests/trace_sched.txt.cache/SchedMigrateTask.csv b/tests/trace_sched.txt.cache/SchedMigrateTask.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedMigrateTask.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/SchedSwitch.csv b/tests/trace_sched.txt.cache/SchedSwitch.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedSwitch.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/SchedWakeup.csv b/tests/trace_sched.txt.cache/SchedWakeup.csv new file mode 100644 index 0000000..6210734 --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedWakeup.csv @@ -0,0 +1,3 @@ +Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu +0.0814890000001,<idle>,1,0,rcu_preempt,7,120,1,1 +1.981491,<idle>,1,0,rcu_preempt,7,120,1,1 diff --git a/tests/trace_sched.txt.cache/SchedWakeupNew.csv b/tests/trace_sched.txt.cache/SchedWakeupNew.csv new file mode 100644 index 0000000..4ea006b --- /dev/null +++ b/tests/trace_sched.txt.cache/SchedWakeupNew.csv @@ -0,0 +1,3 @@ +Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu +0.000152999999955,<...>,0,19427,shutils,19428,120,1,2 +1.975373,<...>,0,19427,shutils,19428,120,1,2 diff --git a/tests/trace_sched.txt.cache/Thermal.csv b/tests/trace_sched.txt.cache/Thermal.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/Thermal.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/ThermalGovernor.csv b/tests/trace_sched.txt.cache/ThermalGovernor.csv new file mode 100644 index 0000000..e16c76d --- /dev/null +++ b/tests/trace_sched.txt.cache/ThermalGovernor.csv @@ -0,0 +1 @@ +"" diff --git a/tests/trace_sched.txt.cache/md5sum b/tests/trace_sched.txt.cache/md5sum new file mode 100644 index 0000000..9b481a3 --- /dev/null +++ b/tests/trace_sched.txt.cache/md5sum @@ -0,0 +1 @@ +47be9ccdd36fa0c3646b0d9b0f649da4
\ No newline at end of file diff --git a/tests/utils_tests.py b/tests/utils_tests.py index 617cfa3..e13b868 100644 --- a/tests/utils_tests.py +++ b/tests/utils_tests.py @@ -19,6 +19,8 @@ import os import shutil import subprocess import tempfile +import trappy +from trappy.ftrace import GenericFTrace TESTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) @@ -36,6 +38,7 @@ class SetupDirectory(unittest.TestCase): def __init__(self, files_to_copy, *args, **kwargs): self.files_to_copy = files_to_copy super(SetupDirectory, self).__init__(*args, **kwargs) + GenericFTrace.disable_cache = True def setUp(self): self.previous_dir = os.getcwd() diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py index e4fec48..a953a60 100644 --- a/trappy/bare_trace.py +++ b/trappy/bare_trace.py @@ -134,6 +134,9 @@ class BareTrace(object): def finalize_objects(self): for trace_class in self.trace_classes: + # If cached, don't need to do any other DF operation + if trace_class.cached: + continue trace_class.tracer = self trace_class.create_dataframe() trace_class.finalize_object() diff --git a/trappy/base.py b/trappy/base.py index 3c4c7bd..8a7fb38 100644 --- a/trappy/base.py +++ b/trappy/base.py @@ -111,8 +111,10 @@ class Base(object): self.time_array = [] self.comm_array = [] self.pid_array = [] + self.tgid_array = [] self.cpu_array = [] self.parse_raw = parse_raw + self.cached = False def finalize_object(self): pass @@ -151,7 +153,7 @@ class Base(object): return ret - def append_data(self, time, comm, pid, cpu, line, data): + def append_data(self, time, comm, pid, tgid, cpu, line, data): """Append data parsed from a line to the corresponding arrays The :mod:`DataFrame` will be created from this when the whole trace @@ -175,15 +177,31 @@ class Base(object): self.time_array.append(time) self.comm_array.append(comm) self.pid_array.append(pid) + self.tgid_array.append(tgid) self.cpu_array.append(cpu) self.line_array.append(line) self.data_array.append(data) - def conv_to_int(self, value): + def string_cast(self, string, type): + """ Attempt to convert string to another type + + Here we attempt to cast string to a type. Currently only + integer conversion is supported with future expansion + left open to other types. + + :param string: The value to convert. + :type string: str + + :param type: The type to convert to. + :type type: type + """ + # Currently this function only supports int conversion + if type != int: + return # Handle false-positives for negative numbers - if value.lstrip("-").isdigit(): - value = int(value) - return value + if not string.lstrip("-").isdigit(): + return string + return int(string) def generate_data_dict(self, data_str): data_dict = {} @@ -196,7 +214,7 @@ class Base(object): data_dict[prev_key] += ' ' + field continue (key, value) = field.split('=', 1) - value = self.conv_to_int(value) + value = self.string_cast(value, int) data_dict[key] = value prev_key = key return data_dict @@ -210,10 +228,10 @@ class Base(object): check_memory_usage = True check_memory_count = 1 - for (comm, pid, cpu, line, data_str) in zip(self.comm_array, self.pid_array, - self.cpu_array, self.line_array, - self.data_array): - data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu, "__line": line} + for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array, + self.tgid_array, self.cpu_array, + self.line_array, self.data_array): + data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line} data_dict.update(self.generate_data_dict(data_str)) # When running out of memory, Pandas has been observed to segfault @@ -262,6 +280,14 @@ class Base(object): """ self.data_frame.to_csv(fname) + def read_csv(self, fname): + """Read the csv data into a DataFrame + + :param fname: The name of the CSV file + :type fname: str + """ + self.data_frame = pd.read_csv(fname, index_col = 0) + def normalize_time(self, basetime): """Substract basetime from the Time of the data frame diff --git a/trappy/ftrace.py b/trappy/ftrace.py index e308398..eabafc8 100644 --- a/trappy/ftrace.py +++ b/trappy/ftrace.py @@ -18,9 +18,13 @@ # pylint: disable=no-member import itertools +import json import os import re import pandas as pd +import hashlib +import shutil +import warnings from trappy.bare_trace import BareTrace from trappy.utils import listify @@ -47,8 +51,8 @@ def _plot_freq_hists(allfreqs, what, axis, title): "Frequency", xlim, "default") SPECIAL_FIELDS_RE = re.compile( - r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\ - r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\ + r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\ + r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+"\ r"(?P<timestamp>[0-9]+(?P<us>\.[0-9]+)?): (\w+:\s+)+(?P<data>.+)" ) @@ -62,6 +66,67 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" dynamic_classes = {} + disable_cache = False + + def _trace_cache_path(self): + trace_file = self.trace_path + cache_dir = '.' + os.path.basename(trace_file) + '.cache' + tracefile_dir = os.path.dirname(os.path.abspath(trace_file)) + cache_path = os.path.join(tracefile_dir, cache_dir) + return cache_path + + def _check_trace_cache(self, params): + cache_path = self._trace_cache_path() + md5file = os.path.join(cache_path, 'md5sum') + basetime_path = os.path.join(cache_path, 'basetime') + params_path = os.path.join(cache_path, 'params.json') + + for path in [cache_path, md5file, params_path]: + if not os.path.exists(path): + return False + + with open(md5file) as f: + cache_md5sum = f.read() + with open(basetime_path) as f: + self.basetime = float(f.read()) + with open(self.trace_path, 'rb') as f: + trace_md5sum = hashlib.md5(f.read()).hexdigest() + with open(params_path) as f: + cache_params = json.dumps(json.load(f)) + + # Convert to a json string for comparison + params = json.dumps(params) + + # check if cache is valid + if cache_md5sum != trace_md5sum or cache_params != params: + shutil.rmtree(cache_path) + return False + return True + + def _create_trace_cache(self, params): + cache_path = self._trace_cache_path() + md5file = os.path.join(cache_path, 'md5sum') + basetime_path = os.path.join(cache_path, 'basetime') + params_path = os.path.join(cache_path, 'params.json') + + if os.path.exists(cache_path): + shutil.rmtree(cache_path) + os.mkdir(cache_path) + + md5sum = hashlib.md5(open(self.trace_path, 'rb').read()).hexdigest() + with open(md5file, 'w') as f: + f.write(md5sum) + + with open(basetime_path, 'w') as f: + f.write(str(self.basetime)) + + with open(params_path, 'w') as f: + json.dump(params, f) + + def _get_csv_path(self, trace_class): + path = self._trace_cache_path() + return os.path.join(path, trace_class.__class__.__name__ + '.csv') + def __init__(self, name="", normalize_time=True, scope="all", events=[], window=(0, None), abs_window=(0, None)): super(GenericFTrace, self).__init__(name) @@ -127,9 +192,43 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" del scope_classes[name] def _do_parse(self): + params = {'window': self.window, 'abs_window': self.abs_window} + if not self.__class__.disable_cache and self._check_trace_cache(params): + # Read csv into frames + for trace_class in self.trace_classes: + try: + csv_file = self._get_csv_path(trace_class) + trace_class.read_csv(csv_file) + trace_class.cached = True + except: + warnstr = "TRAPpy: Couldn't read {} from cache, reading it from trace".format(trace_class) + warnings.warn(warnstr) + + if all([c.cached for c in self.trace_classes]): + if self.normalize_time: + self._normalize_time() + return + self.__parse_trace_file(self.trace_path) + self.finalize_objects() + if not self.__class__.disable_cache: + try: + # Recreate basic cache directories only if nothing cached + if not all([c.cached for c in self.trace_classes]): + self._create_trace_cache(params) + + # Write out only events that weren't cached before + for trace_class in self.trace_classes: + if trace_class.cached: + continue + csv_file = self._get_csv_path(trace_class) + trace_class.write_csv(csv_file) + except OSError as err: + warnings.warn( + "TRAPpy: Cache not created due to OS error: {0}".format(err)) + if self.normalize_time: self._normalize_time() @@ -165,6 +264,15 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" trace_class = DynamicTypeFactory(event_name, (Base,), kwords) self.class_definitions[event_name] = trace_class + def __get_trace_class(self, line, cls_word): + trace_class = None + for unique_word, cls in cls_word.iteritems(): + if unique_word in line: + trace_class = cls + if not cls.fallback: + return trace_class + return trace_class + def __populate_data(self, fin, cls_for_unique_word): """Append to trace data from a txt trace""" @@ -173,16 +281,10 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" actual_trace) for line in actual_trace: - trace_class = None - for unique_word, cls in cls_for_unique_word.iteritems(): - if unique_word in line: - trace_class = cls - if not cls.fallback: - break - else: - if not trace_class: - self.lines += 1 - continue + trace_class = self.__get_trace_class(line, cls_for_unique_word) + if not trace_class: + self.lines += 1 + continue line = line[:-1] @@ -192,6 +294,8 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" comm = fields_match.group('comm') pid = int(fields_match.group('pid')) cpu = int(fields_match.group('cpu')) + tgid = fields_match.group('tgid') + tgid = -1 if (not tgid or '-' in tgid) else int(tgid) # The timestamp, depending on the trace_clock configuration, can be # reported either in [s].[us] or [ns] format. Let's ensure that we @@ -218,7 +322,7 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" if "={}" in data_str: data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str) - trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str) + trace_class.append_data(timestamp, comm, pid, tgid, cpu, self.lines, data_str) self.lines += 1 def trace_hasnt_started(self): @@ -259,6 +363,8 @@ is part of the trace. cls_for_unique_word = {} for trace_name in self.class_definitions.iterkeys(): trace_class = getattr(self, trace_name) + if trace_class.cached: + continue unique_word = trace_class.unique_word cls_for_unique_word[unique_word] = trace_class @@ -309,7 +415,7 @@ is part of the trace. return ret - def run_event_callbacks(self, fn_map): + def apply_callbacks(self, fn_map): """ Apply callback functions to trace events in chronological order. @@ -321,7 +427,7 @@ is part of the trace. For example, to iterate over trace t, applying your functions callback_fn1 and callback_fn2 to each sched_switch and sched_wakeup event respectively: - t.run_event_callbacks({ + t.apply_callbacks({ "sched_switch": callback_fn1, "sched_wakeup": callback_fn2 }) |