release-request-d9dc98f7-19b2-484c-b4d1-f35dc43e9c05-for-git_oc-mr1-release-4152006 snap-temp-L91700000079405440

Change-Id: I1beb78c74f7bc96c5031133d1ff22fe621e0e6ae
author: android-build-team Robot <android-build-team-robot@google.com> 2017-07-02 07:40:06 +0000
committer: android-build-team Robot <android-build-team-robot@google.com> 2017-07-02 07:40:06 +0000
commit: 2e1c5f191dec691230658dcba12048c658b7ce03 (patch)
tree: fe30c6c94affc5620f752b3cf12c766484a8c5b2
parent: 696d715cfad7971a71bc565c3199b5e8c7f4f719 (diff)
parent: a951463b87e0a17229d5fcd426b0b05a4ffb0764 (diff)
download: trappy-2e1c5f191dec691230658dcba12048c658b7ce03.tar.gz
26 files changed, 392 insertions, 32 deletions
diff --git a/tests/test_base.py b/tests/test_base.py
index a0a4920..8bebfba 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -85,7 +85,7 @@ class TestBase(utils_tests.SetupDirectory):
         in_data = """     kworker/4:1-397   [004]   720.741315: thermal_power_cpu_get: cpus=000000f0 freq=1900000 raw_cpu_power=1259 load={} power=61
      kworker/4:1-397   [004]   720.741349: thermal_power_cpu_get: cpus=0000000f freq=1400000 raw_cpu_power=189 load={} power=14"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "cpus", "freq",
+        expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "cpus", "freq",
                                 "raw_cpu_power", "power"])
 
         with open("trace.txt", "w") as fout:
@@ -131,7 +131,7 @@ class TestBase(utils_tests.SetupDirectory):
                         timestamp
                         )
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "tag"])
+        expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "tag"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -157,7 +157,7 @@ class TestBase(utils_tests.SetupDirectory):
 
         in_data = """     rcu_preempt-7     [000]    73.604532: my_sched_stat_runtime:   comm=Space separated taskname pid=7 runtime=262875 [ns] vruntime=17096359856 [ns]"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
+        expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -234,7 +234,7 @@ class TestBase(utils_tests.SetupDirectory):
 
         df = trace.equals_event.data_frame
         self.assertSetEqual(set(df.columns),
-                            set(["__comm", "__pid", "__cpu", "__line", "my_field"]))
+                            set(["__comm", "__pid", "__tgid", "__cpu", "__line", "my_field"]))
         self.assertListEqual(df["my_field"].tolist(),
                              ["foo", "foo=bar", "foo=bar=baz", 1,
                               "1=2", "1=foo", "1foo=2"])
diff --git a/tests/test_caching.py b/tests/test_caching.py
new file mode 100644
index 0000000..d0893b7
--- /dev/null
+++ b/tests/test_caching.py
@@ -0,0 +1,193 @@
+#    Copyright 2015-2017 ARM Limited, Google and contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import shutil
+import sys
+import unittest
+import utils_tests
+import trappy
+from trappy.ftrace import GenericFTrace
+
+class TestCaching(utils_tests.SetupDirectory):
+    def __init__(self, *args, **kwargs):
+        super(TestCaching, self).__init__(
+            [("trace_sched.txt", "trace.txt"),
+             ("trace_sched.txt", "trace.raw.txt")],
+            *args,
+            **kwargs)
+
+    def test_cache_created(self):
+        """Test cache creation when enabled"""
+        GenericFTrace.disable_cache = False
+        trace = trappy.FTrace()
+
+        trace_path = os.path.abspath(trace.trace_path)
+        trace_dir = os.path.dirname(trace_path)
+        trace_file = os.path.basename(trace_path)
+        cache_dir = '.' + trace_file + '.cache'
+
+        self.assertTrue(cache_dir in os.listdir(trace_dir))
+
+    def test_cache_not_created(self):
+        """Test that cache should not be created when disabled """
+        GenericFTrace.disable_cache = True
+        trace = trappy.FTrace()
+
+        trace_path = os.path.abspath(trace.trace_path)
+        trace_dir = os.path.dirname(trace_path)
+        trace_file = os.path.basename(trace_path)
+        cache_dir = '.' + trace_file + '.cache'
+
+        self.assertFalse(cache_dir in os.listdir(trace_dir))
+
+    def test_compare_cached_vs_uncached(self):
+        """ Test that the cached and uncached traces are same """
+        # Build the cache, but the actual trace will be parsed
+        # fresh since this is a first time parse
+        GenericFTrace.disable_cache = False
+        uncached_trace = trappy.FTrace()
+        uncached_dfr = uncached_trace.sched_wakeup.data_frame
+
+        # Now read from previously parsed cache by reusing the path
+        cached_trace = trappy.FTrace(uncached_trace.trace_path)
+        cached_dfr = cached_trace.sched_wakeup.data_frame
+
+        # Test whether timestamps are the same:
+        # The cached/uncached versions of the timestamps are slightly
+        # different due to floating point precision errors due to converting
+        # back and forth CSV and DataFrame. For all purposes this is not relevant
+        # since such rounding doesn't effect the end result.
+        # Here's an example of the error, the actual normalized time when
+        # calculated by hand is 0.081489, however following is what's stored
+        # in the CSV for sched_wakeup events in this trace.
+        # When converting the index to strings (and also what's in the CSV)
+        # cached: ['0.0814890000001', '1.981491']
+        # uncached: ['0.0814890000001', '1.981491']
+        #
+        # Keeping index as numpy.float64
+        # cached: [0.081489000000100009, 1.9814909999999999]
+        # uncached: [0.081489000000146916, 1.9814909999995507]
+        #
+        # To make it possible to test, lets just convert the timestamps to strings
+        # and compare them below.
+
+        cached_times = [str(r[0]) for r in cached_dfr.iterrows()]
+        uncached_times = [str(r[0]) for r in uncached_dfr.iterrows()]
+
+        self.assertTrue(cached_times == uncached_times)
+
+        # compare other columns as well
+        self.assertTrue([r[1].pid for r in cached_dfr.iterrows()] ==
+                        [r[1].pid for r in uncached_dfr.iterrows()])
+
+        self.assertTrue([r[1].comm for r in cached_dfr.iterrows()] ==
+                        [r[1].comm for r in uncached_dfr.iterrows()])
+
+        self.assertTrue([r[1].prio for r in cached_dfr.iterrows()] ==
+                        [r[1].prio for r in uncached_dfr.iterrows()])
+
+    def test_invalid_cache_overwritten(self):
+        """Test a cache with a bad checksum is overwritten"""
+        # This is a directory so we can't use the files_to_copy arg of
+        # SetUpDirectory, just do it ourselves.
+        cache_path = ".trace.txt.cache"
+        src = os.path.join(utils_tests.TESTS_DIRECTORY, "trace_sched.txt.cache")
+        shutil.copytree(src, cache_path)
+
+        md5_path = os.path.join(cache_path, "md5sum")
+        def read_md5sum():
+            with open(md5_path) as f:
+                return f.read()
+
+        # Change 1 character of the stored checksum
+        md5sum = read_md5sum()
+        # Sorry, I guess modifying strings in Python is kind of awkward?
+        md5sum_inc = "".join(list(md5sum[:-1]) + [chr(ord(md5sum[-1]) + 1)])
+        with open(md5_path, "w") as f:
+            f.write(md5sum_inc)
+
+        # Parse a trace, this should delete and overwrite the invalidated cache
+        GenericFTrace.disable_cache = False
+        trace = trappy.FTrace()
+
+        # Check that the modified md5sum was overwritten
+        self.assertNotEqual(read_md5sum(), md5sum_inc,
+                            "The invalid ftrace cache wasn't overwritten")
+
+    def test_cache_dynamic_events(self):
+        """Test that caching works if new event parsers have been registered"""
+
+        # Parse the trace to create a cache
+        GenericFTrace.disable_cache = False
+        trace1 = trappy.FTrace()
+
+        # Check we're actually testing what we think we are
+        if hasattr(trace1, 'dynamic_event'):
+            raise RuntimeError('Test bug: found unexpected event in trace')
+
+        # Now register a new event type, call the constructor again, and check
+        # that the newly added event (which is not present in the cache) is
+        # parsed.
+
+        parse_class = trappy.register_dynamic_ftrace("DynamicEvent", "dynamic_test_key")
+
+        trace2 = trappy.FTrace()
+        self.assertTrue(len(trace2.dynamic_event.data_frame) == 1)
+
+        trappy.unregister_dynamic_ftrace(parse_class)
+
+    def test_cache_normalize_time(self):
+        """Test that caching doesn't break normalize_time"""
+        GenericFTrace.disable_cache = False
+
+        # Times in trace_sched.txt
+        start_time = 6550.018511
+        first_freq_event_time = 6550.056870
+
+        # Parse without normalizing time
+        trace1 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'],
+                               normalize_time=False)
+
+        self.assertEqual(trace1.cpu_frequency.data_frame.index[0],
+                         first_freq_event_time)
+
+        # Parse with normalized time
+        trace2 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'],
+                               normalize_time=True)
+
+        self.assertEqual(trace2.cpu_frequency.data_frame.index[0],
+                         first_freq_event_time - start_time)
+
+    def test_cache_window(self):
+        """Test that caching doesn't break the 'window' parameter"""
+        GenericFTrace.disable_cache = False
+
+        trace1 = trappy.FTrace(
+            events=['sched_wakeup'],
+            window=(0, 1))
+
+        # Check that we're testing what we think we're testing The trace
+        # contains 2 sched_wakeup events; this window should get rid of one of
+        # them.
+        if len(trace1.sched_wakeup.data_frame) != 1:
+            raise RuntimeError('Test bug: bad sched_wakeup event count')
+
+        # Parse again without the window
+        trace1 = trappy.FTrace(
+            events=['sched_wakeup'],
+            window=(0, None))
+
+        self.assertEqual(len(trace1.sched_wakeup.data_frame), 2)
diff --git a/tests/test_ftrace.py b/tests/test_ftrace.py
index 9f09e8e..e6f6319 100644
--- a/tests/test_ftrace.py
+++ b/tests/test_ftrace.py
@@ -230,8 +230,8 @@ class TestFTrace(BaseTestThermal):
         # Make sure there are no NaNs in the middle of the array
         self.assertTrue(allfreqs[0][1]["A57_freq_in"].notnull().all())
 
-    def test_run_event_callbacks(self):
-        """Test run_event_callbacks()"""
+    def test_apply_callbacks(self):
+        """Test apply_callbacks()"""
 
         counts = {
             "cpu_in_power": 0,
@@ -250,7 +250,7 @@ class TestFTrace(BaseTestThermal):
         }
         trace = trappy.FTrace()
 
-        trace.run_event_callbacks(fn_map)
+        trace.apply_callbacks(fn_map)
 
         self.assertEqual(counts["cpu_in_power"], 134)
         self.assertEqual(counts["cpu_out_power"], 134)
diff --git a/tests/trace_sched.txt.cache/CpuIdle.csv b/tests/trace_sched.txt.cache/CpuIdle.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuIdle.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/CpuInPower.csv b/tests/trace_sched.txt.cache/CpuInPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuInPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/CpuOutPower.csv b/tests/trace_sched.txt.cache/CpuOutPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuOutPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/DevfreqInPower.csv b/tests/trace_sched.txt.cache/DevfreqInPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/DevfreqInPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/DevfreqOutPower.csv b/tests/trace_sched.txt.cache/DevfreqOutPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/DevfreqOutPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/PIDController.csv b/tests/trace_sched.txt.cache/PIDController.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/PIDController.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv
new file mode 100644
index 0000000..a1764fe
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,cpu_scale_factor,freq_scale_factor
+0.000167999999576,<idle>,0,0,0,1024,426
diff --git a/tests/trace_sched.txt.cache/SchedCpuCapacity.csv b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv
new file mode 100644
index 0000000..4b75c6a
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,capacity,cpu,rt_capacity
+0.000293999999485,trace-cmd,3,3519,430,3,1024
diff --git a/tests/trace_sched.txt.cache/SchedCpuFrequency.csv b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv
new file mode 100644
index 0000000..dbb941d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,frequency
+0.0383590000001,kworker/0:0,0,3410,0,600000
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv
new file mode 100644
index 0000000..54a9596
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,load,utilization
+1.99999976758e-06,sshd,0,2962,0,13,18
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv
new file mode 100644
index 0000000..fc57841
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpus,load,utilization
+0.0,rcuos/2,1,22,00000002,0,0
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv
new file mode 100644
index 0000000..8b3ccfe
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,avg_period,comm,load,pid,runnable_avg_sum,running_avg_sum,utilization
+9.99999429041e-07,trace-cmd,4,2971,48595,sshd,0,2962,0,0,0
diff --git a/tests/trace_sched.txt.cache/SchedMigrateTask.csv b/tests/trace_sched.txt.cache/SchedMigrateTask.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedMigrateTask.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedSwitch.csv b/tests/trace_sched.txt.cache/SchedSwitch.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedSwitch.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedWakeup.csv b/tests/trace_sched.txt.cache/SchedWakeup.csv
new file mode 100644
index 0000000..6210734
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedWakeup.csv
@@ -0,0 +1,3 @@
+Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu
+0.0814890000001,<idle>,1,0,rcu_preempt,7,120,1,1
+1.981491,<idle>,1,0,rcu_preempt,7,120,1,1
diff --git a/tests/trace_sched.txt.cache/SchedWakeupNew.csv b/tests/trace_sched.txt.cache/SchedWakeupNew.csv
new file mode 100644
index 0000000..4ea006b
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedWakeupNew.csv
@@ -0,0 +1,3 @@
+Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu
+0.000152999999955,<...>,0,19427,shutils,19428,120,1,2
+1.975373,<...>,0,19427,shutils,19428,120,1,2
diff --git a/tests/trace_sched.txt.cache/Thermal.csv b/tests/trace_sched.txt.cache/Thermal.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/Thermal.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/ThermalGovernor.csv b/tests/trace_sched.txt.cache/ThermalGovernor.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/ThermalGovernor.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/md5sum b/tests/trace_sched.txt.cache/md5sum
new file mode 100644
index 0000000..9b481a3
--- /dev/null
+++ b/tests/trace_sched.txt.cache/md5sum
@@ -0,0 +1 @@
+47be9ccdd36fa0c3646b0d9b0f649da4
+\ No newline at end of file
diff --git a/tests/utils_tests.py b/tests/utils_tests.py
index 617cfa3..e13b868 100644
--- a/tests/utils_tests.py
+++ b/tests/utils_tests.py
@@ -19,6 +19,8 @@ import os
 import shutil
 import subprocess
 import tempfile
+import trappy
+from trappy.ftrace import GenericFTrace
 
 TESTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
 
@@ -36,6 +38,7 @@ class SetupDirectory(unittest.TestCase):
     def __init__(self, files_to_copy, *args, **kwargs):
         self.files_to_copy = files_to_copy
         super(SetupDirectory, self).__init__(*args, **kwargs)
+        GenericFTrace.disable_cache = True
 
     def setUp(self):
         self.previous_dir = os.getcwd()
diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py
index e4fec48..a953a60 100644
--- a/trappy/bare_trace.py
+++ b/trappy/bare_trace.py
@@ -134,6 +134,9 @@ class BareTrace(object):
 
     def finalize_objects(self):
         for trace_class in self.trace_classes:
+            # If cached, don't need to do any other DF operation
+            if trace_class.cached:
+                continue
             trace_class.tracer = self
             trace_class.create_dataframe()
             trace_class.finalize_object()
diff --git a/trappy/base.py b/trappy/base.py
index 3c4c7bd..8a7fb38 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -111,8 +111,10 @@ class Base(object):
         self.time_array = []
         self.comm_array = []
         self.pid_array = []
+        self.tgid_array = []
         self.cpu_array = []
         self.parse_raw = parse_raw
+        self.cached = False
 
     def finalize_object(self):
         pass
@@ -151,7 +153,7 @@ class Base(object):
 
         return ret
 
-    def append_data(self, time, comm, pid, cpu, line, data):
+    def append_data(self, time, comm, pid, tgid, cpu, line, data):
         """Append data parsed from a line to the corresponding arrays
 
         The :mod:`DataFrame` will be created from this when the whole trace
@@ -175,15 +177,31 @@ class Base(object):
         self.time_array.append(time)
         self.comm_array.append(comm)
         self.pid_array.append(pid)
+        self.tgid_array.append(tgid)
         self.cpu_array.append(cpu)
         self.line_array.append(line)
         self.data_array.append(data)
 
-    def conv_to_int(self, value):
+    def string_cast(self, string, type):
+        """ Attempt to convert string to another type
+
+        Here we attempt to cast string to a type. Currently only
+        integer conversion is supported with future expansion
+        left open to other types.
+
+        :param string: The value to convert.
+        :type string: str
+
+        :param type: The type to convert to.
+        :type type: type
+        """
+        # Currently this function only supports int conversion
+        if type != int:
+            return
         # Handle false-positives for negative numbers
-        if value.lstrip("-").isdigit():
-            value = int(value)
-        return value
+        if not string.lstrip("-").isdigit():
+            return string
+        return int(string)
 
     def generate_data_dict(self, data_str):
         data_dict = {}
@@ -196,7 +214,7 @@ class Base(object):
                 data_dict[prev_key] += ' ' + field
                 continue
             (key, value) = field.split('=', 1)
-            value = self.conv_to_int(value)
+            value = self.string_cast(value, int)
             data_dict[key] = value
             prev_key = key
         return data_dict
@@ -210,10 +228,10 @@ class Base(object):
         check_memory_usage = True
         check_memory_count = 1
 
-        for (comm, pid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
-                                              self.cpu_array, self.line_array,
-                                              self.data_array):
-            data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu, "__line": line}
+        for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
+                                              self.tgid_array, self.cpu_array,
+                                              self.line_array, self.data_array):
+            data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
             data_dict.update(self.generate_data_dict(data_str))
 
             # When running out of memory, Pandas has been observed to segfault
@@ -262,6 +280,14 @@ class Base(object):
         """
         self.data_frame.to_csv(fname)
 
+    def read_csv(self, fname):
+        """Read the csv data into a DataFrame
+
+        :param fname: The name of the CSV file
+        :type fname: str
+        """
+        self.data_frame = pd.read_csv(fname, index_col = 0)
+
     def normalize_time(self, basetime):
         """Substract basetime from the Time of the data frame
 
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index e308398..eabafc8 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -18,9 +18,13 @@
 # pylint: disable=no-member
 
 import itertools
+import json
 import os
 import re
 import pandas as pd
+import hashlib
+import shutil
+import warnings
 
 from trappy.bare_trace import BareTrace
 from trappy.utils import listify
@@ -47,8 +51,8 @@ def _plot_freq_hists(allfreqs, what, axis, title):
                              "Frequency", xlim, "default")
 
 SPECIAL_FIELDS_RE = re.compile(
-                        r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\
-                        r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
+                        r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\
+                        r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
                         r"(?P<timestamp>[0-9]+(?P<us>\.[0-9]+)?): (\w+:\s+)+(?P<data>.+)"
 )
 
@@ -62,6 +66,67 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
 
     dynamic_classes = {}
 
+    disable_cache = False
+
+    def _trace_cache_path(self):
+        trace_file = self.trace_path
+        cache_dir  = '.' +  os.path.basename(trace_file) + '.cache'
+        tracefile_dir = os.path.dirname(os.path.abspath(trace_file))
+        cache_path = os.path.join(tracefile_dir, cache_dir)
+        return cache_path
+
+    def _check_trace_cache(self, params):
+        cache_path = self._trace_cache_path()
+        md5file = os.path.join(cache_path, 'md5sum')
+        basetime_path = os.path.join(cache_path, 'basetime')
+        params_path = os.path.join(cache_path, 'params.json')
+
+        for path in [cache_path, md5file, params_path]:
+            if not os.path.exists(path):
+                return False
+
+        with open(md5file) as f:
+            cache_md5sum = f.read()
+        with open(basetime_path) as f:
+            self.basetime = float(f.read())
+        with open(self.trace_path, 'rb') as f:
+            trace_md5sum = hashlib.md5(f.read()).hexdigest()
+        with open(params_path) as f:
+            cache_params = json.dumps(json.load(f))
+
+        # Convert to a json string for comparison
+        params = json.dumps(params)
+
+        # check if cache is valid
+        if cache_md5sum != trace_md5sum or cache_params != params:
+            shutil.rmtree(cache_path)
+            return False
+        return True
+
+    def _create_trace_cache(self, params):
+        cache_path = self._trace_cache_path()
+        md5file = os.path.join(cache_path, 'md5sum')
+        basetime_path = os.path.join(cache_path, 'basetime')
+        params_path = os.path.join(cache_path, 'params.json')
+
+        if os.path.exists(cache_path):
+            shutil.rmtree(cache_path)
+        os.mkdir(cache_path)
+
+        md5sum = hashlib.md5(open(self.trace_path, 'rb').read()).hexdigest()
+        with open(md5file, 'w') as f:
+            f.write(md5sum)
+
+        with open(basetime_path, 'w') as f:
+            f.write(str(self.basetime))
+
+        with open(params_path, 'w') as f:
+            json.dump(params, f)
+
+    def _get_csv_path(self, trace_class):
+        path = self._trace_cache_path()
+        return os.path.join(path, trace_class.__class__.__name__ + '.csv')
+
     def __init__(self, name="", normalize_time=True, scope="all",
                  events=[], window=(0, None), abs_window=(0, None)):
         super(GenericFTrace, self).__init__(name)
@@ -127,9 +192,43 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
                 del scope_classes[name]
 
     def _do_parse(self):
+        params = {'window': self.window, 'abs_window': self.abs_window}
+        if not self.__class__.disable_cache and self._check_trace_cache(params):
+            # Read csv into frames
+            for trace_class in self.trace_classes:
+                try:
+                    csv_file = self._get_csv_path(trace_class)
+                    trace_class.read_csv(csv_file)
+                    trace_class.cached = True
+                except:
+                    warnstr = "TRAPpy: Couldn't read {} from cache, reading it from trace".format(trace_class)
+                    warnings.warn(warnstr)
+
+        if all([c.cached for c in self.trace_classes]):
+            if self.normalize_time:
+                self._normalize_time()
+            return
+
         self.__parse_trace_file(self.trace_path)
+
         self.finalize_objects()
 
+        if not self.__class__.disable_cache:
+            try:
+                # Recreate basic cache directories only if nothing cached
+                if not all([c.cached for c in self.trace_classes]):
+                    self._create_trace_cache(params)
+
+                # Write out only events that weren't cached before
+                for trace_class in self.trace_classes:
+                    if trace_class.cached:
+                        continue
+                    csv_file = self._get_csv_path(trace_class)
+                    trace_class.write_csv(csv_file)
+            except OSError as err:
+                warnings.warn(
+                    "TRAPpy: Cache not created due to OS error: {0}".format(err))
+
         if self.normalize_time:
             self._normalize_time()
 
@@ -165,6 +264,15 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
                 trace_class = DynamicTypeFactory(event_name, (Base,), kwords)
                 self.class_definitions[event_name] = trace_class
 
+    def __get_trace_class(self, line, cls_word):
+        trace_class = None
+        for unique_word, cls in cls_word.iteritems():
+            if unique_word in line:
+                trace_class = cls
+                if not cls.fallback:
+                    return trace_class
+        return trace_class
+
     def __populate_data(self, fin, cls_for_unique_word):
         """Append to trace data from a txt trace"""
 
@@ -173,16 +281,10 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
                                            actual_trace)
 
         for line in actual_trace:
-            trace_class = None
-            for unique_word, cls in cls_for_unique_word.iteritems():
-                if unique_word in line:
-                    trace_class = cls
-                    if not cls.fallback:
-                        break
-            else:
-                if not trace_class:
-                    self.lines += 1
-                    continue
+            trace_class = self.__get_trace_class(line, cls_for_unique_word)
+            if not trace_class:
+                self.lines += 1
+                continue
 
             line = line[:-1]
 
@@ -192,6 +294,8 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
             comm = fields_match.group('comm')
             pid = int(fields_match.group('pid'))
             cpu = int(fields_match.group('cpu'))
+            tgid = fields_match.group('tgid')
+            tgid = -1 if (not tgid or '-' in tgid) else int(tgid)
 
             # The timestamp, depending on the trace_clock configuration, can be
             # reported either in [s].[us] or [ns] format. Let's ensure that we
@@ -218,7 +322,7 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
             if "={}" in data_str:
                 data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
 
-            trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str)
+            trace_class.append_data(timestamp, comm, pid, tgid, cpu, self.lines, data_str)
             self.lines += 1
 
     def trace_hasnt_started(self):
@@ -259,6 +363,8 @@ is part of the trace.
         cls_for_unique_word = {}
         for trace_name in self.class_definitions.iterkeys():
             trace_class = getattr(self, trace_name)
+            if trace_class.cached:
+                continue
 
             unique_word = trace_class.unique_word
             cls_for_unique_word[unique_word] = trace_class
@@ -309,7 +415,7 @@ is part of the trace.
 
         return ret
 
-    def run_event_callbacks(self, fn_map):
+    def apply_callbacks(self, fn_map):
         """
         Apply callback functions to trace events in chronological order.
 
@@ -321,7 +427,7 @@ is part of the trace.
         For example, to iterate over trace t, applying your functions callback_fn1
         and callback_fn2 to each sched_switch and sched_wakeup event respectively:
 
-        t.run_event_callbacks({
+        t.apply_callbacks({
             "sched_switch": callback_fn1,
             "sched_wakeup": callback_fn2
         })
author	android-build-team Robot <android-build-team-robot@google.com>	2017-07-02 07:40:06 +0000
committer	android-build-team Robot <android-build-team-robot@google.com>	2017-07-02 07:40:06 +0000
commit	2e1c5f191dec691230658dcba12048c658b7ce03 (patch)
tree	fe30c6c94affc5620f752b3cf12c766484a8c5b2
parent	696d715cfad7971a71bc565c3199b5e8c7f4f719 (diff)
parent	a951463b87e0a17229d5fcd426b0b05a4ffb0764 (diff)
download	trappy-2e1c5f191dec691230658dcba12048c658b7ce03.tar.gz