aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKP Singh <kpsingh@google.com>2017-06-29 22:53:34 +0200
committerKP Singh <kpsingh@google.com>2017-06-29 22:53:34 +0200
commitd84e541b1badf77e639022a5608852c14889f2d3 (patch)
tree29ec7dd48d9be7f17a96ecf3e87dcc7b423ce347
parent50610cbe8742f4a1e3da60b406cb36b4c962bd72 (diff)
parent878b9acd722329594362d333a6d7a1ee291746a8 (diff)
downloadtrappy-d84e541b1badf77e639022a5608852c14889f2d3.tar.gz
Merge branch 'joelagnel-for-trappy-cache'
-rw-r--r--tests/test_caching.py193
-rw-r--r--tests/trace_sched.txt.cache/CpuIdle.csv1
-rw-r--r--tests/trace_sched.txt.cache/CpuInPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/CpuOutPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/DevfreqInPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/DevfreqOutPower.csv1
-rw-r--r--tests/trace_sched.txt.cache/PIDController.csv1
-rw-r--r--tests/trace_sched.txt.cache/SchedContribScaleFactor.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedCpuCapacity.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedCpuFrequency.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedLoadAvgTask.csv2
-rw-r--r--tests/trace_sched.txt.cache/SchedMigrateTask.csv1
-rw-r--r--tests/trace_sched.txt.cache/SchedSwitch.csv1
-rw-r--r--tests/trace_sched.txt.cache/SchedWakeup.csv3
-rw-r--r--tests/trace_sched.txt.cache/SchedWakeupNew.csv3
-rw-r--r--tests/trace_sched.txt.cache/Thermal.csv1
-rw-r--r--tests/trace_sched.txt.cache/ThermalGovernor.csv1
-rw-r--r--tests/trace_sched.txt.cache/md5sum1
-rw-r--r--tests/utils_tests.py3
-rw-r--r--trappy/base.py9
-rw-r--r--trappy/ftrace.py86
23 files changed, 320 insertions, 0 deletions
diff --git a/tests/test_caching.py b/tests/test_caching.py
new file mode 100644
index 0000000..d0893b7
--- /dev/null
+++ b/tests/test_caching.py
@@ -0,0 +1,193 @@
+# Copyright 2015-2017 ARM Limited, Google and contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import shutil
+import sys
+import unittest
+import utils_tests
+import trappy
+from trappy.ftrace import GenericFTrace
+
+class TestCaching(utils_tests.SetupDirectory):
+ def __init__(self, *args, **kwargs):
+ super(TestCaching, self).__init__(
+ [("trace_sched.txt", "trace.txt"),
+ ("trace_sched.txt", "trace.raw.txt")],
+ *args,
+ **kwargs)
+
+ def test_cache_created(self):
+ """Test cache creation when enabled"""
+ GenericFTrace.disable_cache = False
+ trace = trappy.FTrace()
+
+ trace_path = os.path.abspath(trace.trace_path)
+ trace_dir = os.path.dirname(trace_path)
+ trace_file = os.path.basename(trace_path)
+ cache_dir = '.' + trace_file + '.cache'
+
+ self.assertTrue(cache_dir in os.listdir(trace_dir))
+
+ def test_cache_not_created(self):
+ """Test that cache should not be created when disabled """
+ GenericFTrace.disable_cache = True
+ trace = trappy.FTrace()
+
+ trace_path = os.path.abspath(trace.trace_path)
+ trace_dir = os.path.dirname(trace_path)
+ trace_file = os.path.basename(trace_path)
+ cache_dir = '.' + trace_file + '.cache'
+
+ self.assertFalse(cache_dir in os.listdir(trace_dir))
+
+ def test_compare_cached_vs_uncached(self):
+ """ Test that the cached and uncached traces are same """
+ # Build the cache, but the actual trace will be parsed
+ # fresh since this is a first time parse
+ GenericFTrace.disable_cache = False
+ uncached_trace = trappy.FTrace()
+ uncached_dfr = uncached_trace.sched_wakeup.data_frame
+
+ # Now read from previously parsed cache by reusing the path
+ cached_trace = trappy.FTrace(uncached_trace.trace_path)
+ cached_dfr = cached_trace.sched_wakeup.data_frame
+
+ # Test whether timestamps are the same:
+ # The cached/uncached versions of the timestamps are slightly
+ # different due to floating point precision errors due to converting
+ # back and forth CSV and DataFrame. For all purposes this is not relevant
+ # since such rounding doesn't effect the end result.
+ # Here's an example of the error, the actual normalized time when
+ # calculated by hand is 0.081489, however following is what's stored
+ # in the CSV for sched_wakeup events in this trace.
+ # When converting the index to strings (and also what's in the CSV)
+ # cached: ['0.0814890000001', '1.981491']
+ # uncached: ['0.0814890000001', '1.981491']
+ #
+ # Keeping index as numpy.float64
+ # cached: [0.081489000000100009, 1.9814909999999999]
+ # uncached: [0.081489000000146916, 1.9814909999995507]
+ #
+ # To make it possible to test, lets just convert the timestamps to strings
+ # and compare them below.
+
+ cached_times = [str(r[0]) for r in cached_dfr.iterrows()]
+ uncached_times = [str(r[0]) for r in uncached_dfr.iterrows()]
+
+ self.assertTrue(cached_times == uncached_times)
+
+ # compare other columns as well
+ self.assertTrue([r[1].pid for r in cached_dfr.iterrows()] ==
+ [r[1].pid for r in uncached_dfr.iterrows()])
+
+ self.assertTrue([r[1].comm for r in cached_dfr.iterrows()] ==
+ [r[1].comm for r in uncached_dfr.iterrows()])
+
+ self.assertTrue([r[1].prio for r in cached_dfr.iterrows()] ==
+ [r[1].prio for r in uncached_dfr.iterrows()])
+
+ def test_invalid_cache_overwritten(self):
+ """Test a cache with a bad checksum is overwritten"""
+ # This is a directory so we can't use the files_to_copy arg of
+ # SetUpDirectory, just do it ourselves.
+ cache_path = ".trace.txt.cache"
+ src = os.path.join(utils_tests.TESTS_DIRECTORY, "trace_sched.txt.cache")
+ shutil.copytree(src, cache_path)
+
+ md5_path = os.path.join(cache_path, "md5sum")
+ def read_md5sum():
+ with open(md5_path) as f:
+ return f.read()
+
+ # Change 1 character of the stored checksum
+ md5sum = read_md5sum()
+ # Sorry, I guess modifying strings in Python is kind of awkward?
+ md5sum_inc = "".join(list(md5sum[:-1]) + [chr(ord(md5sum[-1]) + 1)])
+ with open(md5_path, "w") as f:
+ f.write(md5sum_inc)
+
+ # Parse a trace, this should delete and overwrite the invalidated cache
+ GenericFTrace.disable_cache = False
+ trace = trappy.FTrace()
+
+ # Check that the modified md5sum was overwritten
+ self.assertNotEqual(read_md5sum(), md5sum_inc,
+ "The invalid ftrace cache wasn't overwritten")
+
+ def test_cache_dynamic_events(self):
+ """Test that caching works if new event parsers have been registered"""
+
+ # Parse the trace to create a cache
+ GenericFTrace.disable_cache = False
+ trace1 = trappy.FTrace()
+
+ # Check we're actually testing what we think we are
+ if hasattr(trace1, 'dynamic_event'):
+ raise RuntimeError('Test bug: found unexpected event in trace')
+
+ # Now register a new event type, call the constructor again, and check
+ # that the newly added event (which is not present in the cache) is
+ # parsed.
+
+ parse_class = trappy.register_dynamic_ftrace("DynamicEvent", "dynamic_test_key")
+
+ trace2 = trappy.FTrace()
+ self.assertTrue(len(trace2.dynamic_event.data_frame) == 1)
+
+ trappy.unregister_dynamic_ftrace(parse_class)
+
+ def test_cache_normalize_time(self):
+ """Test that caching doesn't break normalize_time"""
+ GenericFTrace.disable_cache = False
+
+ # Times in trace_sched.txt
+ start_time = 6550.018511
+ first_freq_event_time = 6550.056870
+
+ # Parse without normalizing time
+ trace1 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'],
+ normalize_time=False)
+
+ self.assertEqual(trace1.cpu_frequency.data_frame.index[0],
+ first_freq_event_time)
+
+ # Parse with normalized time
+ trace2 = trappy.FTrace(events=['cpu_frequency', 'sched_wakeup'],
+ normalize_time=True)
+
+ self.assertEqual(trace2.cpu_frequency.data_frame.index[0],
+ first_freq_event_time - start_time)
+
+ def test_cache_window(self):
+ """Test that caching doesn't break the 'window' parameter"""
+ GenericFTrace.disable_cache = False
+
+ trace1 = trappy.FTrace(
+ events=['sched_wakeup'],
+ window=(0, 1))
+
+ # Check that we're testing what we think we're testing The trace
+ # contains 2 sched_wakeup events; this window should get rid of one of
+ # them.
+ if len(trace1.sched_wakeup.data_frame) != 1:
+ raise RuntimeError('Test bug: bad sched_wakeup event count')
+
+ # Parse again without the window
+ trace1 = trappy.FTrace(
+ events=['sched_wakeup'],
+ window=(0, None))
+
+ self.assertEqual(len(trace1.sched_wakeup.data_frame), 2)
diff --git a/tests/trace_sched.txt.cache/CpuIdle.csv b/tests/trace_sched.txt.cache/CpuIdle.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuIdle.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/CpuInPower.csv b/tests/trace_sched.txt.cache/CpuInPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuInPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/CpuOutPower.csv b/tests/trace_sched.txt.cache/CpuOutPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/CpuOutPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/DevfreqInPower.csv b/tests/trace_sched.txt.cache/DevfreqInPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/DevfreqInPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/DevfreqOutPower.csv b/tests/trace_sched.txt.cache/DevfreqOutPower.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/DevfreqOutPower.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/PIDController.csv b/tests/trace_sched.txt.cache/PIDController.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/PIDController.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv
new file mode 100644
index 0000000..a1764fe
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedContribScaleFactor.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,cpu_scale_factor,freq_scale_factor
+0.000167999999576,<idle>,0,0,0,1024,426
diff --git a/tests/trace_sched.txt.cache/SchedCpuCapacity.csv b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv
new file mode 100644
index 0000000..4b75c6a
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedCpuCapacity.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,capacity,cpu,rt_capacity
+0.000293999999485,trace-cmd,3,3519,430,3,1024
diff --git a/tests/trace_sched.txt.cache/SchedCpuFrequency.csv b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv
new file mode 100644
index 0000000..dbb941d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedCpuFrequency.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,frequency
+0.0383590000001,kworker/0:0,0,3410,0,600000
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv
new file mode 100644
index 0000000..54a9596
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgCpu.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpu,load,utilization
+1.99999976758e-06,sshd,0,2962,0,13,18
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv
new file mode 100644
index 0000000..fc57841
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgSchedGroup.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,cpus,load,utilization
+0.0,rcuos/2,1,22,00000002,0,0
diff --git a/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv
new file mode 100644
index 0000000..8b3ccfe
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedLoadAvgTask.csv
@@ -0,0 +1,2 @@
+Time,__comm,__cpu,__pid,avg_period,comm,load,pid,runnable_avg_sum,running_avg_sum,utilization
+9.99999429041e-07,trace-cmd,4,2971,48595,sshd,0,2962,0,0,0
diff --git a/tests/trace_sched.txt.cache/SchedMigrateTask.csv b/tests/trace_sched.txt.cache/SchedMigrateTask.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedMigrateTask.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedSwitch.csv b/tests/trace_sched.txt.cache/SchedSwitch.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedSwitch.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/SchedWakeup.csv b/tests/trace_sched.txt.cache/SchedWakeup.csv
new file mode 100644
index 0000000..6210734
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedWakeup.csv
@@ -0,0 +1,3 @@
+Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu
+0.0814890000001,<idle>,1,0,rcu_preempt,7,120,1,1
+1.981491,<idle>,1,0,rcu_preempt,7,120,1,1
diff --git a/tests/trace_sched.txt.cache/SchedWakeupNew.csv b/tests/trace_sched.txt.cache/SchedWakeupNew.csv
new file mode 100644
index 0000000..4ea006b
--- /dev/null
+++ b/tests/trace_sched.txt.cache/SchedWakeupNew.csv
@@ -0,0 +1,3 @@
+Time,__comm,__cpu,__pid,comm,pid,prio,success,target_cpu
+0.000152999999955,<...>,0,19427,shutils,19428,120,1,2
+1.975373,<...>,0,19427,shutils,19428,120,1,2
diff --git a/tests/trace_sched.txt.cache/Thermal.csv b/tests/trace_sched.txt.cache/Thermal.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/Thermal.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/ThermalGovernor.csv b/tests/trace_sched.txt.cache/ThermalGovernor.csv
new file mode 100644
index 0000000..e16c76d
--- /dev/null
+++ b/tests/trace_sched.txt.cache/ThermalGovernor.csv
@@ -0,0 +1 @@
+""
diff --git a/tests/trace_sched.txt.cache/md5sum b/tests/trace_sched.txt.cache/md5sum
new file mode 100644
index 0000000..9b481a3
--- /dev/null
+++ b/tests/trace_sched.txt.cache/md5sum
@@ -0,0 +1 @@
+47be9ccdd36fa0c3646b0d9b0f649da4 \ No newline at end of file
diff --git a/tests/utils_tests.py b/tests/utils_tests.py
index 617cfa3..e13b868 100644
--- a/tests/utils_tests.py
+++ b/tests/utils_tests.py
@@ -19,6 +19,8 @@ import os
import shutil
import subprocess
import tempfile
+import trappy
+from trappy.ftrace import GenericFTrace
TESTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
@@ -36,6 +38,7 @@ class SetupDirectory(unittest.TestCase):
def __init__(self, files_to_copy, *args, **kwargs):
self.files_to_copy = files_to_copy
super(SetupDirectory, self).__init__(*args, **kwargs)
+ GenericFTrace.disable_cache = True
def setUp(self):
self.previous_dir = os.getcwd()
diff --git a/trappy/base.py b/trappy/base.py
index 4f06af2..4f2fb21 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -113,6 +113,7 @@ class Base(object):
self.pid_array = []
self.cpu_array = []
self.parse_raw = parse_raw
+ self.cached = False
def finalize_object(self):
pass
@@ -259,6 +260,14 @@ class Base(object):
"""
self.data_frame.to_csv(fname)
+ def read_csv(self, fname):
+ """Read the csv data into a DataFrame
+
+ :param fname: The name of the CSV file
+ :type fname: str
+ """
+ self.data_frame = pd.read_csv(fname, index_col = 0)
+
def normalize_time(self, basetime):
"""Substract basetime from the Time of the data frame
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index ea435f5..dae1e43 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -18,9 +18,13 @@
# pylint: disable=no-member
import itertools
+import json
import os
import re
import pandas as pd
+import hashlib
+import shutil
+import warnings
from trappy.bare_trace import BareTrace
from trappy.utils import listify
@@ -62,6 +66,57 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
dynamic_classes = {}
+ disable_cache = False
+
+ def _trace_cache_path(self):
+ trace_file = self.trace_path
+ cache_dir = '.' + os.path.basename(trace_file) + '.cache'
+ tracefile_dir = os.path.dirname(os.path.abspath(trace_file))
+ cache_path = os.path.join(tracefile_dir, cache_dir)
+ return cache_path
+
+ def _check_trace_cache(self, params):
+ cache_path = self._trace_cache_path()
+ md5file = os.path.join(cache_path, 'md5sum')
+ params_path = os.path.join(cache_path, 'params.json')
+
+ for path in [cache_path, md5file, params_path]:
+ if not os.path.exists(path):
+ return False
+
+ with open(md5file) as f:
+ cache_md5sum = f.read()
+ with open(self.trace_path, 'rb') as f:
+ trace_md5sum = hashlib.md5(f.read()).hexdigest()
+ with open(params_path) as f:
+ cache_params = json.load(f)
+
+ # check if cache is valid
+ if cache_md5sum != trace_md5sum or cache_params != params:
+ shutil.rmtree(cache_path)
+ return False
+ return True
+
+ def _create_trace_cache(self, params):
+ cache_path = self._trace_cache_path()
+ md5file = os.path.join(cache_path, 'md5sum')
+ params_path = os.path.join(cache_path, 'params.json')
+
+ if os.path.exists(cache_path):
+ shutil.rmtree(cache_path)
+ os.mkdir(cache_path)
+
+ md5sum = hashlib.md5(open(self.trace_path, 'rb').read()).hexdigest()
+ with open(md5file, 'w') as f:
+ f.write(md5sum)
+
+ with open(params_path, 'w') as f:
+ json.dump(params, f)
+
+ def _get_csv_path(self, trace_class):
+ path = self._trace_cache_path()
+ return os.path.join(path, trace_class.__class__.__name__ + '.csv')
+
def __init__(self, name="", normalize_time=True, scope="all",
events=[], window=(0, None), abs_window=(0, None)):
super(GenericFTrace, self).__init__(name)
@@ -127,7 +182,36 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
del scope_classes[name]
def _do_parse(self):
+ params = {'window': self.window, 'abs_window': self.abs_window}
+ if not self.__class__.disable_cache and self._check_trace_cache(params):
+ # Read csv into frames
+ for trace_class in self.trace_classes:
+ try:
+ csv_file = self._get_csv_path(trace_class)
+ trace_class.read_csv(csv_file)
+ trace_class.cached = True
+ except:
+ warnstr = "TRAPpy: Couldn't read {} from cache, reading it from trace".format(trace_class)
+ warnings.warn(warnstr)
+
self.__parse_trace_file(self.trace_path)
+
+ if not self.__class__.disable_cache:
+ try:
+ # Recreate basic cache directories only if nothing cached
+ if not all([c.cached for c in self.trace_classes]):
+ self._create_trace_cache(params)
+
+ # Write out only events that weren't cached before
+ for trace_class in self.trace_classes:
+ if trace_class.cached:
+ continue
+ csv_file = self._get_csv_path(trace_class)
+ trace_class.write_csv(csv_file)
+ except OSError as err:
+ warnings.warn(
+ "TRAPpy: Cache not created due to OS error: {0}".format(err))
+
self.finalize_objects()
if self.normalize_time:
@@ -265,6 +349,8 @@ is part of the trace.
cls_for_unique_word = {}
for trace_name in self.class_definitions.iterkeys():
trace_class = getattr(self, trace_name)
+ if trace_class.cached:
+ continue
unique_word = trace_class.unique_word
cls_for_unique_word[unique_word] = trace_class