aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorandroid-build-team Robot <android-build-team-robot@google.com>2017-07-02 07:40:06 +0000
committerandroid-build-team Robot <android-build-team-robot@google.com>2017-07-02 07:40:06 +0000
commit2e1c5f191dec691230658dcba12048c658b7ce03 (patch)
treefe30c6c94affc5620f752b3cf12c766484a8c5b2
parent696d715cfad7971a71bc565c3199b5e8c7f4f719 (diff)
parenta951463b87e0a17229d5fcd426b0b05a4ffb0764 (diff)
downloadtrappy-2e1c5f191dec691230658dcba12048c658b7ce03.tar.gz
release-request-d9dc98f7-19b2-484c-b4d1-f35dc43e9c05-for-git_oc-mr1-release-4152006 snap-temp-L91700000079405440
Change-Id: I1beb78c74f7bc96c5031133d1ff22fe621e0e6ae
-rw-r--r--tests/test_base.py8
-rw-r--r--tests/test_caching.py193
-rw-r--r--tests/test_ftrace.py6
-rw-r--r--tests/trace_sched.txt.cache/CpuIdle.csv1
-rw-r--r--tests/trace_sched.txt.cache/CpuInPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/CpuOutPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/DevfreqInPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/DevfreqOutPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/PIDController.csv1
-rw-r--r--tests/trace_sched.txt.cache/SchedContribScaleFactor.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedCpuCapacity.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedCpuFrequency.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedLoadAvgTask.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedMigrateTask.csv1
-rw-r--r--tests/trace_sched.txt.cache/SchedSwitch.csv1
-rw-r--r--tests/trace_sched.txt.cache/SchedWakeup.csv3
-rw-r--r--tests/trace_sched.txt.cache/SchedWakeupNew.csv3
-rw-r--r--tests/trace_sched.txt.cache/Thermal.csv1
-rw-r--r--tests/trace_sched.txt.cache/ThermalGovernor.csv1
-rw-r--r--tests/trace_sched.txt.cache/md5sum1
-rw-r--r--tests/utils_tests.py3
-rw-r--r--trappy/bare_trace.py3
-rw-r--r--trappy/base.py46
-rw-r--r--trappy/ftrace.py136
26 files changed, 392 insertions, 32 deletions
diff --git a/tests/test_base.py b/tests/test_base.py
index a0a4920..8bebfba 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -85,7 +85,7 @@ class TestBase(utils_tests.SetupDirectory):
in_data = """ kworker/4:1-397 [004] 720.741315: thermal_power_cpu_get: cpus=000000f0 freq=1900000 raw_cpu_power=1259 load={} power=61
kworker/4:1-397 [004] 720.741349: thermal_power_cpu_get: cpus=0000000f freq=1400000 raw_cpu_power=189 load={} power=14"""
- expected_columns = set(["__comm", "__pid", "__cpu", "__line", "cpus", "freq",
+ expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "cpus", "freq",
"raw_cpu_power", "power"])
with open("trace.txt", "w") as fout:
@@ -131,7 +131,7 @@ class TestBase(utils_tests.SetupDirectory):
timestamp
)
- expected_columns = set(["__comm", "__pid", "__cpu", "__line", "tag"])
+ expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "tag"])
with open("trace.txt", "w") as fout:
fout.write(in_data)
@@ -157,7 +157,7 @@ class TestBase(utils_tests.SetupDirectory):
in_data = """ rcu_preempt-7 [000] 73.604532: my_sched_stat_runtime: comm=Space separated taskname pid=7 runtime=262875 [ns] vruntime=17096359856 [ns]"""
- expected_columns = set(["__comm", "__pid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
+ expected_columns = set(["__comm", "__pid", "__tgid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
with open("trace.txt", "w") as fout:
fout.write(in_data)
@@ -234,7 +234,7 @@ class TestBase(utils_tests.SetupDirectory):
df = trace.equals_event.data_frame
self.assertSetEqual(set(df.columns),
- set(["__comm", "__pid", "__cpu", "__line", "my_field"]))
+ set(["__comm", "__pid", "__tgid", "__cpu", "__line", "my_field"]))
self.assertListEqual(df["my_field"].tolist(),
["foo", "foo=bar", "foo=bar=baz", 1,
"1=2", "1=foo", "1foo=2"])
diff --git a/tests/test_caching.py b/tests/test_caching.py
new file mode 100644
index 0000000..d0893b7
--- /dev/null
+++ b/tests/test_caching.py
@@ -0,0 +1,193 @@
+# Copyright 2015-2017 ARM Limited, Google and contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import shutil
+import sys
+import unittest
+import utils_tests
+import trappy
+from trappy.ftrace import GenericFTrace
+
+class TestCaching(utils_tests.SetupDirectory):
+ def __init__(self, *args, **kwargs):
+ super(TestCaching, self).__init__(
+ [("trace_sched.txt", "trace.txt"),
+ ("trace_sched.txt", "trace.raw.txt")],
+ *args,
+ **kwargs)
+
+ def test_cache_created(self):
+ """Test cache creation when enabled"""
+ GenericFTrace.disable_cache = False
+ trace = trappy.FTrace()
+
+ trace_path = os.path.abspath(trace.trace_path)
+ trace_dir = os.path.dirname(trace_path)
+ trace_file = os.path.basename(trace_path)
+ cache_dir = '.' + trace_file + '.cache'
+
+ self.assertTrue(cache_dir in os.listdir(trace_dir))
+
+ def test_cache_not_created(self):
+ """Test that cache should not be created when disabled """
+ GenericFTrace.disable_cache = True
+ trace = trappy.FTrace()
+
+ trace_path = os.path.abspath(trace.trace_path)
+ trace_dir = os.path.dirname(trace_path)
+ trace_file = os.path.basename(trace_path)
+ cache_dir = '.' + trace_file + '.cache'
+
+ self.assertFalse(cache_dir in os.listdir(trace_dir))
+
+ def test_compare_cached_vs_uncached(self):
+ """ Test that the cached and uncached traces are same """
+ # Build the cache, but the actual trace will be parsed
+ # fresh since this is a first time parse
+ GenericFTrace.disable_cache = False
+ uncached_trace = trappy.FTrace()
+ uncached_dfr = uncached_trace.sched_wakeup.data_frame
+
+ # Now read from previously parsed cache by reusing the path
+ cached_trace = trappy.FTrace(uncached_trace.trace_path)
+ cached_dfr = cached_trace.sched_wakeup.data_frame
+
+ # Test whether timestamps are the same:
+ # The cached/uncached versions of the timestamps are slightly
+ # different due to floating point precision errors due to converting
+ # back and forth CSV and DataFrame. For all purposes this is not relevant
+ # since such rounding doesn't effect the end result.
+ # Here's an example of the error, the actual normalized time when
+ # calculated by hand is 0.081489, however following is what's stored
+ # in the CSV for sched_wakeup events in this trace.
+ # When converting the index to strings (and also what's in the CSV)
+ # cached: ['0.0814890000001', '1.981491']
+ # uncached: ['0.0814890000001', '1.981491']
+ #
+ # Keeping index as numpy.float64
+ # cached: [0.081489000000100009, 1.9814909999999999]
+ # uncached: [0.081489000000146916, 1.9814909999995507]
+ #
+ # To make it possible to test, lets just convert the timestamps to strings
+ # and compare them below.
+
+ cached_times = [str(r[0]) for r in cached_dfr.iterrows()]
+ uncached_times = [str(r[0]) for r in uncached_dfr.iterrows()]
+
+ self.assertTrue(cached_times == uncached_times)
+
+ # compare other columns as well
+ self.assertTrue([r[1].pid for r in cached_dfr.iterrows()] ==
+ [r[1].pid for r in uncached_dfr.iterrows()])
+
+ self.assertTrue([r[1].comm for r in cached_dfr.iterrows()] ==
+ [r[1].comm for r in uncached_dfr.iterrows()])
+
+ self.assertTrue([r[1].prio for r in cached_dfr.iterrows()] ==
+ [r[1].prio for r in uncached_dfr.iterrows()])
+
+ def test_invalid_cache_overwritten(self):
+ """Test a cache with a bad checksum is overwritten"""
+ # This is a directory so we can't use the files_to_copy arg of
+ # SetUpDirectory, just do it ourselves.
+ cache_path = ".trace.txt.cache"
+ src = os.path.join(utils_tests.TESTS_DIRECTORY, "trace_sched.txt.cache")
+ shutil.copytree(src, cache_path)
+
+ md5_path = os.path.join(cache_path, "md5sum")
+ def read_md5sum():
+ with open(md5_path) as f:
+ return f.read()
+
+ # Change 1 character of the stored checksum
+ md5sum = read_md5sum()
+ # Sorry, I guess modifying strings in Python is kind of awkward?
+ md5sum_inc = "".join(list(md5sum[:-1]) + [chr(ord(md5sum[-1]) + 1)])
+ with open(md5_path, "w") as f:
+ f.write(md5sum_inc)
+
+ # Parse a trace, this should delete and overwrite the invalidated cache
+ GenericFTrace.disable_cache = False
+ trace = trappy.FTrace()
+
+ # Check that the modified md5sum was overwritten
+ self.assertNotEqual(read_md5sum(), md5sum_inc,
+ "The invalid ftrace cache wasn't overwritten")
+
+ def test_cache_dynamic_events(self):
+ """Test that caching works if new event parsers have been registered"""
+
+ # Parse the trace to create a cache
+ GenericFTrace.disable_cache = False
+ trace1 = trappy.FTrace()
+
+ # Check we're actually testing what we think we are
+ if hasattr(trace1, 'dynamic_event'):
+ raise RuntimeError('Test bug: found unexpected event in trace')
+
+ # Now register a new event type, call the constructor again, and check
+ # that the newly added event (which is not present in the cache) is
+ # parsed.
+
+ parse_class = trappy.register_dynamic_ftrace("DynamicEvent", "dynamic_test_key")
+
+ trace2 = trappy.FTrace()
+ self.assertTrue(len(trace2.dynamic_event.data_frame) == 1)
+
+ trappy.unregister_dynamic_ftrace(parse_class)
+
+ def test_cache_normalize_time(self):
+ """Test that caching doesn't break normalize_time"""
+ GenericFTrace.disable_cache = False
+
+ # Times in trace_sched.txt
+ start_time = 6550.018511
+ first_freq_event_time = 6550.056870
+
+ # Parse without normalizing time
+ trace1 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'],
+ normalize_time=False)
+
+ self.assertEqual(trace1.cpu_frequency.data_frame.index[0],
+ first_freq_event_time)
+
+ # Parse with normalized time
+ trace2 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'],
+ normalize_time=True)
+
+ self.assertEqual(trace2.cpu_frequency.data_frame.index[0],
+ first_freq_event_time - start_time)
+
+ def test_cache_window(self):
+ """Test that caching doesn't break the 'window' parameter"""
+ GenericFTrace.disable_cache = False
+
+ trace1 = trappy.FTrace(
+ events=['sched_wakeup'],
+ window=(0, 1))
+
+ # Check that we're testing what we think we're testing The trace
+ # contains 2 sched_wakeup events; this window should get rid of one of
+ # them.
+ if len(trace1.sched_wakeup.data_frame) != 1:
+ raise RuntimeError('Test bug: bad sched_wakeup event count')
+
+ # Parse again without the window
+ trace1 = trappy.FTrace(
+ events=['sched_wakeup'],
+ window=(0, None))
+
+ self.assertEqual(len(trace1.sched_wakeup.data_frame), 2)
diff --git a/tests/test_ftrace.py b/tests/test_ftrace.py
index 9f09e8e..e6f6319 100644
--- a/tests/test_ftrace.py
+++ b/tests/test_ftrace.py
@@ -230,8 +230,8 @@ class TestFTrace(BaseTestThermal):
# Make sure there are no NaNs in the middle of the array
self.assertTrue(allfreqs[0][1]["A57_freq_in"].notnull().all())
- def test_run_event_callbacks(self):
- """Test run_event_callbacks()"""
+ def test_apply_callbacks(self):
+ """Test apply_callbacks()"""
counts = {
"cpu_in_power": 0,
@@ -250,7 +250,7 @@ class TestFTrace(BaseTestThermal):
}
trace = trappy.FTrace()
- trace.run_event_callbacks(fn_map)
+ trace.apply_callbacks(fn_map)
self.assertEqual(counts["cpu_in_power"], 134)
self.assertEqual(counts["cpu_out_power"], 134)
diff --git a/tests/trace_sched.txt.cache/CpuIdle.csv b/tests/trace_sched.txt.cache/CpuIdle.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuIdle.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/CpuInPower.csv b/tests/trace_sched.txt.cache/CpuInPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuInPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/CpuOutPower.csv b/tests/trace_sched.txt.cache/CpuOutPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuOutPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/DevfreqInPower.csv b/tests/trace_sched.txt.cache/DevfreqInPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/DevfreqInPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/DevfreqOutPower.csv b/tests/trace_sched.txt.cache/DevfreqOutPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/DevfreqOutPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/PIDController.csv b/tests/trace_sched.txt.cache/PIDController.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/PIDController.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv
new file mode 100644
index 0000000..a1764fe
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,cpu_scale_factor,freq_scale_factor
+0.000167999999576,<idle>,0,0,0,1024,426
diff --git a/tests/trace_sched.txt.cache/SchedCpuCapacity.csv b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv
new file mode 100644
index 0000000..4b75c6a
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,capacity,cpu,rt_capacity
+0.000293999999485,trace-cmd,3,3519,430,3,1024
diff --git a/tests/trace_sched.txt.cache/SchedCpuFrequency.csv b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv
new file mode 100644
index 0000000..dbb941d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,frequency
+0.0383590000001,kworker/0:0,0,3410,0,600000
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv
new file mode 100644
index 0000000..54a9596
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,load,utilization
+1.99999976758e-06,sshd,0,2962,0,13,18
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv
new file mode 100644
index 0000000..fc57841
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpus,load,utilization
+0.0,rcuos/2,1,22,00000002,0,0
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv
new file mode 100644
index 0000000..8b3ccfe
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,avg_period,comm,load,pid,runnable_avg_sum,running_avg_sum,utilization
+9.99999429041e-07,trace-cmd,4,2971,48595,sshd,0,2962,0,0,0
diff --git a/tests/trace_sched.txt.cache/SchedMigrateTask.csv b/tests/trace_sched.txt.cache/SchedMigrateTask.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedMigrateTask.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedSwitch.csv b/tests/trace_sched.txt.cache/SchedSwitch.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedSwitch.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedWakeup.csv b/tests/trace_sched.txt.cache/SchedWakeup.csv
new file mode 100644
index 0000000..6210734
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedWakeup.csv
@@ -0,0 +1,3 @@
+Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu
+0.0814890000001,<idle>,1,0,rcu_preempt,7,120,1,1
+1.981491,<idle>,1,0,rcu_preempt,7,120,1,1
diff --git a/tests/trace_sched.txt.cache/SchedWakeupNew.csv b/tests/trace_sched.txt.cache/SchedWakeupNew.csv
new file mode 100644
index 0000000..4ea006b
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedWakeupNew.csv
@@ -0,0 +1,3 @@
+Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu
+0.000152999999955,<...>,0,19427,shutils,19428,120,1,2
+1.975373,<...>,0,19427,shutils,19428,120,1,2
diff --git a/tests/trace_sched.txt.cache/Thermal.csv b/tests/trace_sched.txt.cache/Thermal.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/Thermal.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/ThermalGovernor.csv b/tests/trace_sched.txt.cache/ThermalGovernor.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/ThermalGovernor.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/md5sum b/tests/trace_sched.txt.cache/md5sum
new file mode 100644
index 0000000..9b481a3
--- /dev/null
+++ b/tests/trace_sched.txt.cache/md5sum
@@ -0,0 +1 @@
+47be9ccdd36fa0c3646b0d9b0f649da4 \ No newline at end of file
diff --git a/tests/utils_tests.py b/tests/utils_tests.py
index 617cfa3..e13b868 100644
--- a/tests/utils_tests.py
+++ b/tests/utils_tests.py
@@ -19,6 +19,8 @@ import os
import shutil
import subprocess
import tempfile
+import trappy
+from trappy.ftrace import GenericFTrace
TESTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
@@ -36,6 +38,7 @@ class SetupDirectory(unittest.TestCase):
def __init__(self, files_to_copy, *args, **kwargs):
self.files_to_copy = files_to_copy
super(SetupDirectory, self).__init__(*args, **kwargs)
+ GenericFTrace.disable_cache = True
def setUp(self):
self.previous_dir = os.getcwd()
diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py
index e4fec48..a953a60 100644
--- a/trappy/bare_trace.py
+++ b/trappy/bare_trace.py
@@ -134,6 +134,9 @@ class BareTrace(object):
def finalize_objects(self):
for trace_class in self.trace_classes:
+ # If cached, don't need to do any other DF operation
+ if trace_class.cached:
+ continue
trace_class.tracer = self
trace_class.create_dataframe()
trace_class.finalize_object()
diff --git a/trappy/base.py b/trappy/base.py
index 3c4c7bd..8a7fb38 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -111,8 +111,10 @@ class Base(object):
self.time_array = []
self.comm_array = []
self.pid_array = []
+ self.tgid_array = []
self.cpu_array = []
self.parse_raw = parse_raw
+ self.cached = False
def finalize_object(self):
pass
@@ -151,7 +153,7 @@ class Base(object):
return ret
- def append_data(self, time, comm, pid, cpu, line, data):
+ def append_data(self, time, comm, pid, tgid, cpu, line, data):
"""Append data parsed from a line to the corresponding arrays
The :mod:`DataFrame` will be created from this when the whole trace
@@ -175,15 +177,31 @@ class Base(object):
self.time_array.append(time)
self.comm_array.append(comm)
self.pid_array.append(pid)
+ self.tgid_array.append(tgid)
self.cpu_array.append(cpu)
self.line_array.append(line)
self.data_array.append(data)
- def conv_to_int(self, value):
+ def string_cast(self, string, type):
+ """ Attempt to convert string to another type
+
+ Here we attempt to cast string to a type. Currently only
+ integer conversion is supported with future expansion
+ left open to other types.
+
+ :param string: The value to convert.
+ :type string: str
+
+ :param type: The type to convert to.
+ :type type: type
+ """
+ # Currently this function only supports int conversion
+ if type != int:
+ return
# Handle false-positives for negative numbers
- if value.lstrip("-").isdigit():
- value = int(value)
- return value
+ if not string.lstrip("-").isdigit():
+ return string
+ return int(string)
def generate_data_dict(self, data_str):
data_dict = {}
@@ -196,7 +214,7 @@ class Base(object):
data_dict[prev_key] += ' ' + field
continue
(key, value) = field.split('=', 1)
- value = self.conv_to_int(value)
+ value = self.string_cast(value, int)
data_dict[key] = value
prev_key = key
return data_dict
@@ -210,10 +228,10 @@ class Base(object):
check_memory_usage = True
check_memory_count = 1
- for (comm, pid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
- self.cpu_array, self.line_array,
- self.data_array):
- data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu, "__line": line}
+ for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
+ self.tgid_array, self.cpu_array,
+ self.line_array, self.data_array):
+ data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
data_dict.update(self.generate_data_dict(data_str))
# When running out of memory, Pandas has been observed to segfault
@@ -262,6 +280,14 @@ class Base(object):
"""
self.data_frame.to_csv(fname)
+ def read_csv(self, fname):
+ """Read the csv data into a DataFrame
+
+ :param fname: The name of the CSV file
+ :type fname: str
+ """
+ self.data_frame = pd.read_csv(fname, index_col = 0)
+
def normalize_time(self, basetime):
"""Substract basetime from the Time of the data frame
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index e308398..eabafc8 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -18,9 +18,13 @@
# pylint: disable=no-member
import itertools
+import json
import os
import re
import pandas as pd
+import hashlib
+import shutil
+import warnings
from trappy.bare_trace import BareTrace
from trappy.utils import listify
@@ -47,8 +51,8 @@ def _plot_freq_hists(allfreqs, what, axis, title):
"Frequency", xlim, "default")
SPECIAL_FIELDS_RE = re.compile(
- r"^\s*(?P<comm>.*)-(?P<pid>\d+)(?:\s+\(.*\))"\
- r"?\s+\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
+ r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\
+ r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
r"(?P<timestamp>[0-9]+(?P<us>\.[0-9]+)?): (\w+:\s+)+(?P<data>.+)"
)
@@ -62,6 +66,67 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
dynamic_classes = {}
+ disable_cache = False
+
+ def _trace_cache_path(self):
+ trace_file = self.trace_path
+ cache_dir = '.' + os.path.basename(trace_file) + '.cache'
+ tracefile_dir = os.path.dirname(os.path.abspath(trace_file))
+ cache_path = os.path.join(tracefile_dir, cache_dir)
+ return cache_path
+
+ def _check_trace_cache(self, params):
+ cache_path = self._trace_cache_path()
+ md5file = os.path.join(cache_path, 'md5sum')
+ basetime_path = os.path.join(cache_path, 'basetime')
+ params_path = os.path.join(cache_path, 'params.json')
+
+ for path in [cache_path, md5file, params_path]:
+ if not os.path.exists(path):
+ return False
+
+ with open(md5file) as f:
+ cache_md5sum = f.read()
+ with open(basetime_path) as f:
+ self.basetime = float(f.read())
+ with open(self.trace_path, 'rb') as f:
+ trace_md5sum = hashlib.md5(f.read()).hexdigest()
+ with open(params_path) as f:
+ cache_params = json.dumps(json.load(f))
+
+ # Convert to a json string for comparison
+ params = json.dumps(params)
+
+ # check if cache is valid
+ if cache_md5sum != trace_md5sum or cache_params != params:
+ shutil.rmtree(cache_path)
+ return False
+ return True
+
+ def _create_trace_cache(self, params):
+ cache_path = self._trace_cache_path()
+ md5file = os.path.join(cache_path, 'md5sum')
+ basetime_path = os.path.join(cache_path, 'basetime')
+ params_path = os.path.join(cache_path, 'params.json')
+
+ if os.path.exists(cache_path):
+ shutil.rmtree(cache_path)
+ os.mkdir(cache_path)
+
+ md5sum = hashlib.md5(open(self.trace_path, 'rb').read()).hexdigest()
+ with open(md5file, 'w') as f:
+ f.write(md5sum)
+
+ with open(basetime_path, 'w') as f:
+ f.write(str(self.basetime))
+
+ with open(params_path, 'w') as f:
+ json.dump(params, f)
+
+ def _get_csv_path(self, trace_class):
+ path = self._trace_cache_path()
+ return os.path.join(path, trace_class.__class__.__name__ + '.csv')
+
def __init__(self, name="", normalize_time=True, scope="all",
events=[], window=(0, None), abs_window=(0, None)):
super(GenericFTrace, self).__init__(name)
@@ -127,9 +192,43 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
del scope_classes[name]
def _do_parse(self):
+ params = {'window': self.window, 'abs_window': self.abs_window}
+ if not self.__class__.disable_cache and self._check_trace_cache(params):
+ # Read csv into frames
+ for trace_class in self.trace_classes:
+ try:
+ csv_file = self._get_csv_path(trace_class)
+ trace_class.read_csv(csv_file)
+ trace_class.cached = True
+ except:
+ warnstr = "TRAPpy: Couldn't read {} from cache, reading it from trace".format(trace_class)
+ warnings.warn(warnstr)
+
+ if all([c.cached for c in self.trace_classes]):
+ if self.normalize_time:
+ self._normalize_time()
+ return
+
self.__parse_trace_file(self.trace_path)
+
self.finalize_objects()
+ if not self.__class__.disable_cache:
+ try:
+ # Recreate basic cache directories only if nothing cached
+ if not all([c.cached for c in self.trace_classes]):
+ self._create_trace_cache(params)
+
+ # Write out only events that weren't cached before
+ for trace_class in self.trace_classes:
+ if trace_class.cached:
+ continue
+ csv_file = self._get_csv_path(trace_class)
+ trace_class.write_csv(csv_file)
+ except OSError as err:
+ warnings.warn(
+ "TRAPpy: Cache not created due to OS error: {0}".format(err))
+
if self.normalize_time:
self._normalize_time()
@@ -165,6 +264,15 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
trace_class = DynamicTypeFactory(event_name, (Base,), kwords)
self.class_definitions[event_name] = trace_class
+ def __get_trace_class(self, line, cls_word):
+ trace_class = None
+ for unique_word, cls in cls_word.iteritems():
+ if unique_word in line:
+ trace_class = cls
+ if not cls.fallback:
+ return trace_class
+ return trace_class
+
def __populate_data(self, fin, cls_for_unique_word):
"""Append to trace data from a txt trace"""
@@ -173,16 +281,10 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
actual_trace)
for line in actual_trace:
- trace_class = None
- for unique_word, cls in cls_for_unique_word.iteritems():
- if unique_word in line:
- trace_class = cls
- if not cls.fallback:
- break
- else:
- if not trace_class:
- self.lines += 1
- continue
+ trace_class = self.__get_trace_class(line, cls_for_unique_word)
+ if not trace_class:
+ self.lines += 1
+ continue
line = line[:-1]
@@ -192,6 +294,8 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
comm = fields_match.group('comm')
pid = int(fields_match.group('pid'))
cpu = int(fields_match.group('cpu'))
+ tgid = fields_match.group('tgid')
+ tgid = -1 if (not tgid or '-' in tgid) else int(tgid)
# The timestamp, depending on the trace_clock configuration, can be
# reported either in [s].[us] or [ns] format. Let's ensure that we
@@ -218,7 +322,7 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
if "={}" in data_str:
data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
- trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str)
+ trace_class.append_data(timestamp, comm, pid, tgid, cpu, self.lines, data_str)
self.lines += 1
def trace_hasnt_started(self):
@@ -259,6 +363,8 @@ is part of the trace.
cls_for_unique_word = {}
for trace_name in self.class_definitions.iterkeys():
trace_class = getattr(self, trace_name)
+ if trace_class.cached:
+ continue
unique_word = trace_class.unique_word
cls_for_unique_word[unique_word] = trace_class
@@ -309,7 +415,7 @@ is part of the trace.
return ret
- def run_event_callbacks(self, fn_map):
+ def apply_callbacks(self, fn_map):
"""
Apply callback functions to trace events in chronological order.
@@ -321,7 +427,7 @@ is part of the trace.
For example, to iterate over trace t, applying your functions callback_fn1
and callback_fn2 to each sched_switch and sched_wakeup event respectively:
- t.run_event_callbacks({
+ t.apply_callbacks({
"sched_switch": callback_fn1,
"sched_wakeup": callback_fn2
})