aboutsummaryrefslogtreecommitdiff
path: root/trappy
diff options
context:
space:
mode:
Diffstat (limited to 'trappy')
-rw-r--r--trappy/bare_trace.py15
-rw-r--r--trappy/base.py66
-rw-r--r--trappy/dynamic.py2
-rw-r--r--trappy/ftrace.py366
-rw-r--r--trappy/sched.py12
-rw-r--r--trappy/systrace.py47
-rw-r--r--trappy/tracing_mark_write.py43
-rw-r--r--trappy/utils.py59
8 files changed, 465 insertions, 145 deletions
diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py
index 4900e8a..a953a60 100644
--- a/trappy/bare_trace.py
+++ b/trappy/bare_trace.py
@@ -27,13 +27,12 @@ class BareTrace(object):
"""
- def __init__(self, name="", build_df=True):
+ def __init__(self, name=""):
self.name = name
self.normalized_time = False
self.class_definitions = {}
self.trace_classes = []
self.basetime = 0
- self.build_df = build_df
def get_duration(self):
"""Returns the largest time value of all classes,
@@ -68,14 +67,13 @@ class BareTrace(object):
return filters
- def normalize_time(self, basetime=None):
+ def _normalize_time(self, basetime=None):
"""Normalize the time of all the trace classes
:param basetime: The offset which needs to be subtracted from
the time index
:type basetime: float
"""
- return # HACK: Since we're not normalizing anymore after the fact
if basetime is not None:
self.basetime = basetime
@@ -135,8 +133,13 @@ class BareTrace(object):
setattr(self, name, event)
def finalize_objects(self):
- if not self.build_df:
- return
for trace_class in self.trace_classes:
+ # If cached, don't need to do any other DF operation
+ if trace_class.cached:
+ continue
+ trace_class.tracer = self
trace_class.create_dataframe()
trace_class.finalize_object()
+
+ def generate_data_dict(self):
+ return None
diff --git a/trappy/base.py b/trappy/base.py
index c0238cf..8a7fb38 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -92,22 +92,29 @@ class Base(object):
e.g. "sched_switch:") but it can be anything else for trace points
generated using trace_printk().
- :param parse_raw: If :code:`True`, raw trace data (-R option) to
+ :param parse_raw: If :code:`True`, raw trace data (-r option) to
trace-cmd will be used
+ :param fallback: If :code:`True`, the parsing class will be used
+ only if no other candidate class's unique_word matched. subclasses
+ should override this (for ex. TracingMarkWrite uses it)
+
This class acts as a base class for all TRAPpy events
"""
- def __init__(self, parse_raw=False):
+ def __init__(self, parse_raw=False, fallback=False):
+ self.fallback = fallback
+ self.tracer = None
self.data_frame = pd.DataFrame()
+ self.line_array = []
self.data_array = []
self.time_array = []
self.comm_array = []
self.pid_array = []
self.tgid_array = []
self.cpu_array = []
- self.callback = None
self.parse_raw = parse_raw
+ self.cached = False
def finalize_object(self):
pass
@@ -146,7 +153,7 @@ class Base(object):
return ret
- def append_data(self, time, comm, pid, tgid, cpu, data):
+ def append_data(self, time, comm, pid, tgid, cpu, line, data):
"""Append data parsed from a line to the corresponding arrays
The :mod:`DataFrame` will be created from this when the whole trace
@@ -172,15 +179,32 @@ class Base(object):
self.pid_array.append(pid)
self.tgid_array.append(tgid)
self.cpu_array.append(cpu)
+ self.line_array.append(line)
self.data_array.append(data)
- if not self.callback:
+ def string_cast(self, string, type):
+ """ Attempt to convert string to another type
+
+ Here we attempt to cast string to a type. Currently only
+ integer conversion is supported with future expansion
+ left open to other types.
+
+ :param string: The value to convert.
+ :type string: str
+
+ :param type: The type to convert to.
+ :type type: type
+ """
+ # Currently this function only supports int conversion
+ if type != int:
return
- data_dict = self.generate_data_dict(comm, pid, cpu, data)
- self.callback(time, data_dict)
+ # Handle false-positives for negative numbers
+ if not string.lstrip("-").isdigit():
+ return string
+ return int(string)
- def generate_data_dict(self, comm, pid, tgid, cpu, data_str):
- data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu}
+ def generate_data_dict(self, data_str):
+ data_dict = {}
prev_key = None
for field in data_str.split():
if "=" not in field:
@@ -190,10 +214,7 @@ class Base(object):
data_dict[prev_key] += ' ' + field
continue
(key, value) = field.split('=', 1)
- try:
- value = int(value)
- except ValueError:
- pass
+ value = self.string_cast(value, int)
data_dict[key] = value
prev_key = key
return data_dict
@@ -207,10 +228,11 @@ class Base(object):
check_memory_usage = True
check_memory_count = 1
- for (comm, pid, tgid, cpu, data_str) in zip(self.comm_array, self.pid_array,
+ for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
self.tgid_array, self.cpu_array,
- self.data_array):
- data_dict = self.generate_data_dict(comm, pid, tgid, cpu, data_str)
+ self.line_array, self.data_array):
+ data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
+ data_dict.update(self.generate_data_dict(data_str))
# When running out of memory, Pandas has been observed to segfault
# rather than throwing a proper Python error.
@@ -244,6 +266,7 @@ class Base(object):
self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx)
self.time_array = []
+ self.line_array = []
self.comm_array = []
self.pid_array = []
self.cpu_array = []
@@ -257,6 +280,14 @@ class Base(object):
"""
self.data_frame.to_csv(fname)
+ def read_csv(self, fname):
+ """Read the csv data into a DataFrame
+
+ :param fname: The name of the CSV file
+ :type fname: str
+ """
+ self.data_frame = pd.read_csv(fname, index_col = 0)
+
def normalize_time(self, basetime):
"""Substract basetime from the Time of the data frame
@@ -264,9 +295,6 @@ class Base(object):
the time index
:type basetime: float
"""
- # HACK: We don't normalize anymore after the fact
- return
-
if basetime and not self.data_frame.empty:
self.data_frame.reset_index(inplace=True)
self.data_frame["Time"] = self.data_frame["Time"] - basetime
diff --git a/trappy/dynamic.py b/trappy/dynamic.py
index f764232..5b094ce 100644
--- a/trappy/dynamic.py
+++ b/trappy/dynamic.py
@@ -81,7 +81,7 @@ def register_dynamic_ftrace(class_name, unique_word, scope="all",
the parsing of events and group them together)
:type scope: str
- :param parse_raw: If, true, raw trace output (-R flag)
+ :param parse_raw: If, true, raw trace output (-r flag)
will be used
:type parse_raw: bool
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index 07cb94b..3948cb6 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -18,9 +18,13 @@
# pylint: disable=no-member
import itertools
+import json
import os
import re
import pandas as pd
+import hashlib
+import shutil
+import warnings
from trappy.bare_trace import BareTrace
from trappy.utils import listify
@@ -46,6 +50,12 @@ def _plot_freq_hists(allfreqs, what, axis, title):
trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20,
"Frequency", xlim, "default")
+SPECIAL_FIELDS_RE = re.compile(
+ r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\
+ r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
+ r"(?P<timestamp>[0-9]+(?P<us>\.[0-9]+)?): (\w+:\s+)+(?P<data>.+)"
+)
+
class GenericFTrace(BareTrace):
"""Generic class to parse output of FTrace. This class is meant to be
subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
@@ -56,15 +66,70 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
dynamic_classes = {}
- def __init__(self, name="", normalize_time=True, scope="all",
- events=[], event_callbacks={}, window=(0, None),
- abs_window=(0, None), build_df=True):
- super(GenericFTrace, self).__init__(name, build_df)
+ disable_cache = False
+
+ def _trace_cache_path(self):
+ trace_file = self.trace_path
+ cache_dir = '.' + os.path.basename(trace_file) + '.cache'
+ tracefile_dir = os.path.dirname(os.path.abspath(trace_file))
+ cache_path = os.path.join(tracefile_dir, cache_dir)
+ return cache_path
+
+ def _check_trace_cache(self, params):
+ cache_path = self._trace_cache_path()
+ md5file = os.path.join(cache_path, 'md5sum')
+ basetime_path = os.path.join(cache_path, 'basetime')
+ params_path = os.path.join(cache_path, 'params.json')
+
+ for path in [cache_path, md5file, params_path]:
+ if not os.path.exists(path):
+ return False
+
+ with open(md5file) as f:
+ cache_md5sum = f.read()
+ with open(basetime_path) as f:
+ self.basetime = float(f.read())
+ with open(self.trace_path, 'rb') as f:
+ trace_md5sum = hashlib.md5(f.read()).hexdigest()
+ with open(params_path) as f:
+ cache_params = json.dumps(json.load(f))
+
+ # Convert to a json string for comparison
+ params = json.dumps(params)
+
+ # check if cache is valid
+ if cache_md5sum != trace_md5sum or cache_params != params:
+ shutil.rmtree(cache_path)
+ return False
+ return True
+
+ def _create_trace_cache(self, params):
+ cache_path = self._trace_cache_path()
+ md5file = os.path.join(cache_path, 'md5sum')
+ basetime_path = os.path.join(cache_path, 'basetime')
+ params_path = os.path.join(cache_path, 'params.json')
+
+ if os.path.exists(cache_path):
+ shutil.rmtree(cache_path)
+ os.mkdir(cache_path)
+
+ md5sum = hashlib.md5(open(self.trace_path, 'rb').read()).hexdigest()
+ with open(md5file, 'w') as f:
+ f.write(md5sum)
- self.normalized_time = normalize_time
+ with open(basetime_path, 'w') as f:
+ f.write(str(self.basetime))
- if not hasattr(self, "needs_raw_parsing"):
- self.needs_raw_parsing = False
+ with open(params_path, 'w') as f:
+ json.dump(params, f)
+
+ def _get_csv_path(self, trace_class):
+ path = self._trace_cache_path()
+ return os.path.join(path, trace_class.__class__.__name__ + '.csv')
+
+ def __init__(self, name="", normalize_time=True, scope="all",
+ events=[], window=(0, None), abs_window=(0, None)):
+ super(GenericFTrace, self).__init__(name)
self.class_definitions.update(self.dynamic_classes.items())
self.__add_events(listify(events))
@@ -79,16 +144,13 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
for attr, class_def in self.class_definitions.iteritems():
trace_class = class_def()
- if event_callbacks.has_key(attr):
- trace_class.callback = event_callbacks[attr]
setattr(self, attr, trace_class)
self.trace_classes.append(trace_class)
- self.__parse_trace_file(self.trace_path, window, abs_window)
- if self.needs_raw_parsing and (self.trace_path_raw is not None):
- self.__parse_trace_file(self.trace_path_raw, window, abs_window,
- raw=True)
- self.finalize_objects()
+ # save parameters to complete init later
+ self.normalize_time = normalize_time
+ self.window = window
+ self.abs_window = abs_window
@classmethod
def register_parser(cls, cobject, scope):
@@ -129,6 +191,47 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
if cobject == obj:
del scope_classes[name]
+ def _do_parse(self):
+ params = {'window': self.window, 'abs_window': self.abs_window}
+ if not self.__class__.disable_cache and self._check_trace_cache(params):
+ # Read csv into frames
+ for trace_class in self.trace_classes:
+ try:
+ csv_file = self._get_csv_path(trace_class)
+ trace_class.read_csv(csv_file)
+ trace_class.cached = True
+ except:
+ warnstr = "TRAPpy: Couldn't read {} from cache, reading it from trace".format(trace_class)
+ warnings.warn(warnstr)
+
+ if all([c.cached for c in self.trace_classes]):
+ if self.normalize_time:
+ self._normalize_time()
+ return
+
+ self.__parse_trace_file(self.trace_path)
+
+ self.finalize_objects()
+
+ if not self.__class__.disable_cache:
+ try:
+ # Recreate basic cache directories only if nothing cached
+ if not any([c.cached for c in self.trace_classes]):
+ self._create_trace_cache(params)
+
+ # Write out only events that weren't cached before
+ for trace_class in self.trace_classes:
+ if trace_class.cached:
+ continue
+ csv_file = self._get_csv_path(trace_class)
+ trace_class.write_csv(csv_file)
+ except OSError as err:
+ warnings.warn(
+ "TRAPpy: Cache not created due to OS error: {0}".format(err))
+
+ if self.normalize_time:
+ self._normalize_time()
+
def __add_events(self, events):
"""Add events to the class_definitions
@@ -161,73 +264,66 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
trace_class = DynamicTypeFactory(event_name, (Base,), kwords)
self.class_definitions[event_name] = trace_class
- def __populate_data(self, fin, cls_for_unique_word, window, abs_window):
- """Append to trace data from a txt trace"""
-
- def contains_unique_word(line, unique_words=cls_for_unique_word.keys()):
- for unique_word in unique_words:
- if unique_word in line:
- return True
- return False
+ def __get_trace_class(self, line, cls_word):
+ trace_class = None
+ for unique_word, cls in cls_word.iteritems():
+ if unique_word in line:
+ trace_class = cls
+ if not cls.fallback:
+ return trace_class
+ return trace_class
- special_fields_regexp = r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\
- r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+(?P<timestamp>[0-9]+\.[0-9]+):"
- special_fields_regexp = re.compile(special_fields_regexp)
- start_match = re.compile(r"[A-Za-z0-9_]+=")
+ def __populate_data(self, fin, cls_for_unique_word):
+ """Append to trace data from a txt trace"""
actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin)
actual_trace = itertools.takewhile(self.trace_hasnt_finished(),
actual_trace)
- for line in itertools.ifilter(contains_unique_word, actual_trace):
- for unique_word, cls in cls_for_unique_word.iteritems():
- if unique_word in line:
- trace_class = cls
- break
- else:
- raise FTraceParseError("No unique word in '{}'".format(line))
+ for line in actual_trace:
+ trace_class = self.__get_trace_class(line, cls_for_unique_word)
+ if not trace_class:
+ self.lines += 1
+ continue
line = line[:-1]
- special_fields_match = special_fields_regexp.match(line)
- if not special_fields_match:
- raise FTraceParseError("Couldn't match special fields in '{}'".format(line))
- comm = special_fields_match.group('comm')
- pid = int(special_fields_match.group('pid'))
- cpu = int(special_fields_match.group('cpu'))
- tgid = special_fields_match.group('tgid')
- if not tgid or tgid[0] == '-':
- tgid = -1
- else:
- tgid = int(tgid)
-
- timestamp = float(special_fields_match.group('timestamp'))
+ fields_match = SPECIAL_FIELDS_RE.match(line)
+ if not fields_match:
+ raise FTraceParseError("Couldn't match fields in '{}'".format(line))
+ comm = fields_match.group('comm')
+ pid = int(fields_match.group('pid'))
+ cpu = int(fields_match.group('cpu'))
+ tgid = fields_match.group('tgid')
+ tgid = -1 if (not tgid or '-' in tgid) else int(tgid)
+
+ # The timestamp, depending on the trace_clock configuration, can be
+ # reported either in [s].[us] or [ns] format. Let's ensure that we
+ # always generate DF which have the index expressed in:
+ # [s].[decimals]
+ timestamp = float(fields_match.group('timestamp'))
+ if not fields_match.group('us'):
+ timestamp /= 1e9
+ data_str = fields_match.group('data')
if not self.basetime:
self.basetime = timestamp
- if (timestamp < window[0] + self.basetime) or \
- (timestamp < abs_window[0]):
+ if (timestamp < self.window[0] + self.basetime) or \
+ (timestamp < self.abs_window[0]):
+ self.lines += 1
continue
- if (window[1] and timestamp > window[1] + self.basetime) or \
- (abs_window[1] and timestamp > abs_window[1]):
+ if (self.window[1] and timestamp > self.window[1] + self.basetime) or \
+ (self.abs_window[1] and timestamp > self.abs_window[1]):
return
- try:
- data_start_idx = start_match.search(line).start()
- except AttributeError:
- continue
-
- if self.normalized_time:
- timestamp = timestamp - self.basetime
-
- data_str = line[data_start_idx:]
-
# Remove empty arrays from the trace
- data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
+ if "={}" in data_str:
+ data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
- trace_class.append_data(timestamp, comm, pid, tgid, cpu, data_str)
+ trace_class.append_data(timestamp, comm, pid, tgid, cpu, self.lines, data_str)
+ self.lines += 1
def trace_hasnt_started(self):
"""Return a function that accepts a line and returns true if this line
@@ -241,7 +337,7 @@ is not part of the trace.
started).
"""
- return lambda x: False
+ return lambda line: not SPECIAL_FIELDS_RE.match(line)
def trace_hasnt_finished(self):
"""Return a function that accepts a line and returns true if this line
@@ -260,15 +356,14 @@ is part of the trace.
"""
return lambda x: True
- def __parse_trace_file(self, trace_file, window, abs_window, raw=False):
+ def __parse_trace_file(self, trace_file):
"""parse the trace and create a pandas DataFrame"""
# Memoize the unique words to speed up parsing the trace file
cls_for_unique_word = {}
for trace_name in self.class_definitions.iterkeys():
trace_class = getattr(self, trace_name)
-
- if self.needs_raw_parsing and (trace_class.parse_raw != raw):
+ if trace_class.cached:
continue
unique_word = trace_class.unique_word
@@ -279,8 +374,9 @@ is part of the trace.
try:
with open(trace_file) as fin:
+ self.lines = 0
self.__populate_data(
- fin, cls_for_unique_word, window, abs_window)
+ fin, cls_for_unique_word)
except FTraceParseError as e:
raise ValueError('Failed to parse ftrace file {}:\n{}'.format(
trace_file, str(e)))
@@ -319,6 +415,60 @@ is part of the trace.
return ret
+ def apply_callbacks(self, fn_map, *kwarg):
+ """
+ Apply callback functions to trace events in chronological order.
+
+ This method iterates over a user-specified subset of the available trace
+ event dataframes, calling different user-specified functions for each
+ event type. These functions are passed a dictionary mapping 'Index' and
+ the column names to their values for that row.
+
+ For example, to iterate over trace t, applying your functions callback_fn1
+ and callback_fn2 to each sched_switch and sched_wakeup event respectively:
+
+ t.apply_callbacks({
+ "sched_switch": callback_fn1,
+ "sched_wakeup": callback_fn2
+ })
+ """
+ dfs = {event: getattr(self, event).data_frame for event in fn_map.keys()}
+ events = [event for event in fn_map.keys() if not dfs[event].empty]
+ iters = {event: dfs[event].itertuples() for event in events}
+ next_rows = {event: iterator.next() for event,iterator in iters.iteritems()}
+
+ # Column names beginning with underscore will not be preserved in tuples
+ # due to constraints on namedtuple field names, so store mappings from
+ # column name to column number for each trace event.
+ col_idxs = {event: {
+ name: idx for idx, name in enumerate(
+ ['Index'] + dfs[event].columns.tolist()
+ )
+ } for event in events}
+
+ def getLine(event):
+ line_col_idx = col_idxs[event]['__line']
+ return next_rows[event][line_col_idx]
+
+ while events:
+ event_name = min(events, key=getLine)
+ event_tuple = next_rows[event_name]
+
+ event_dict = {
+ col: event_tuple[idx] for col, idx in col_idxs[event_name].iteritems()
+ }
+
+ if kwarg:
+ fn_map[event_name](event_dict, kwarg)
+ else:
+ fn_map[event_name](event_dict)
+
+ event_row = next(iters[event_name], None)
+ if event_row:
+ next_rows[event_name] = event_row
+ else:
+ events.remove(event_name)
+
def plot_freq_hists(self, map_label, ax):
"""Plot histograms for each actor input and output frequency
@@ -500,16 +650,26 @@ class FTrace(GenericFTrace):
"""
def __init__(self, path=".", name="", normalize_time=True, scope="all",
- events=[], event_callbacks={}, window=(0, None),
- abs_window=(0, None), build_df=True):
- self.trace_path, self.trace_path_raw = self.__process_path(path)
- self.needs_raw_parsing = True
-
+ events=[], window=(0, None), abs_window=(0, None)):
+ super(FTrace, self).__init__(name, normalize_time, scope, events,
+ window, abs_window)
+ self.raw_events = []
+ self.trace_path = self.__process_path(path)
self.__populate_metadata()
+ self._do_parse()
- super(FTrace, self).__init__(name, normalize_time, scope, events,
- event_callbacks, window, abs_window,
- build_df)
+ def __warn_about_txt_trace_files(self, trace_dat, raw_txt, formatted_txt):
+ self.__get_raw_event_list()
+ warn_text = ( "You appear to be parsing both raw and formatted "
+ "trace files. TRAPpy now uses a unified format. "
+ "If you have the {} file, remove the .txt files "
+ "and try again. If not, you can manually move "
+ "lines with the following events from {} to {} :"
+ ).format(trace_dat, raw_txt, formatted_txt)
+ for raw_event in self.raw_events:
+ warn_text = warn_text+" \"{}\"".format(raw_event)
+
+ raise RuntimeError(warn_text)
def __process_path(self, basepath):
"""Process the path and return the path to the trace text file"""
@@ -520,32 +680,42 @@ class FTrace(GenericFTrace):
trace_name = os.path.join(basepath, "trace")
trace_txt = trace_name + ".txt"
- trace_raw = trace_name + ".raw.txt"
+ trace_raw_txt = trace_name + ".raw.txt"
trace_dat = trace_name + ".dat"
if os.path.isfile(trace_dat):
- # Both TXT and RAW traces must always be generated
- if not os.path.isfile(trace_txt) or \
- not os.path.isfile(trace_raw):
+ # Warn users if raw.txt files are present
+ if os.path.isfile(trace_raw_txt):
+ self.__warn_about_txt_trace_files(trace_dat, trace_raw_txt, trace_txt)
+ # TXT traces must always be generated
+ if not os.path.isfile(trace_txt):
self.__run_trace_cmd_report(trace_dat)
- # TXT (and RAW) traces must match the most recent binary trace
+ # TXT traces must match the most recent binary trace
elif os.path.getmtime(trace_txt) < os.path.getmtime(trace_dat):
self.__run_trace_cmd_report(trace_dat)
- if not os.path.isfile(trace_raw):
- trace_raw = None
+ return trace_txt
- return trace_txt, trace_raw
+ def __get_raw_event_list(self):
+ self.raw_events = []
+ # Generate list of events which need to be parsed in raw format
+ for event_class in (self.thermal_classes, self.sched_classes, self.dynamic_classes):
+ for trace_class in event_class.itervalues():
+ raw = getattr(trace_class, 'parse_raw', None)
+ if raw:
+ name = getattr(trace_class, 'name', None)
+ if name:
+ self.raw_events.append(name)
def __run_trace_cmd_report(self, fname):
- """Run "trace-cmd report fname > fname.txt"
- and "trace-cmd report -R fname > fname.raw.txt"
+ """Run "trace-cmd report [ -r raw_event ]* fname > fname.txt"
- The resulting traces are stored in files with extension ".txt"
- and ".raw.txt" respectively. If fname is "my_trace.dat", the
- trace is stored in "my_trace.txt" and "my_trace.raw.txt". The
- contents of the destination files are overwritten if they
- exist.
+ The resulting trace is stored in files with extension ".txt". If
+ fname is "my_trace.dat", the trace is stored in "my_trace.txt". The
+ contents of the destination file is overwritten if it exists.
+ Trace events which require unformatted output (raw_event == True)
+ are added to the command line with one '-r <event>' each event and
+ trace-cmd then prints those events without formatting.
"""
from subprocess import check_output
@@ -555,8 +725,12 @@ class FTrace(GenericFTrace):
if not os.path.isfile(fname):
raise IOError("No such file or directory: {}".format(fname))
- raw_trace_output = os.path.splitext(fname)[0] + ".raw.txt"
trace_output = os.path.splitext(fname)[0] + ".txt"
+ # Ask for the raw event list and request them unformatted
+ self.__get_raw_event_list()
+ for raw_event in self.raw_events:
+ cmd.extend([ '-r', raw_event ])
+
cmd.append(fname)
with open(os.devnull) as devnull:
@@ -567,17 +741,9 @@ class FTrace(GenericFTrace):
raise OSError(2, "trace-cmd not found in PATH, is it installed?")
else:
raise
-
- # Add the -R flag to the trace-cmd
- # for raw parsing
- cmd.insert(-1, "-R")
- raw_out = check_output(cmd, stderr=devnull)
-
with open(trace_output, "w") as fout:
fout.write(out)
- with open(raw_trace_output, "w") as fout:
- fout.write(raw_out)
def __populate_metadata(self):
"""Populates trace metadata"""
@@ -600,6 +766,6 @@ class FTrace(GenericFTrace):
setattr(self, "_" + match.group(1), match.group(2))
metadata_keys.remove(match.group(1))
- if re.search(r"^\s+[^\[]+-\d+\s+\[\d+\]\s+\d+\.\d+:", line):
+ if SPECIAL_FIELDS_RE.match(line):
# Reached a valid trace line, abort metadata population
return
diff --git a/trappy/sched.py b/trappy/sched.py
index ebcb79d..84e30be 100644
--- a/trappy/sched.py
+++ b/trappy/sched.py
@@ -104,14 +104,16 @@ class SchedSwitch(Base):
"""Parse sched_switch"""
unique_word = "sched_switch:"
+ parse_raw = True
def __init__(self):
- super(SchedSwitch, self).__init__(parse_raw=True)
+ super(SchedSwitch, self).__init__(parse_raw=self.parse_raw)
- def append_data(self, time, comm, pid, tgid, cpu, data):
- data_rep = data.replace(" ==> ", " ")
- super(SchedSwitch, self).append_data(time, comm, pid, tgid, cpu,
- data_rep)
+ def create_dataframe(self):
+ self.data_array = [line.replace(" ==> ", " ", 1)
+ for line in self.data_array]
+
+ super(SchedSwitch, self).create_dataframe()
register_ftrace_parser(SchedSwitch, "sched")
diff --git a/trappy/systrace.py b/trappy/systrace.py
index c11601d..c7e65e4 100644
--- a/trappy/systrace.py
+++ b/trappy/systrace.py
@@ -14,6 +14,10 @@
#
from trappy.ftrace import GenericFTrace
+import re
+
+SYSTRACE_EVENT = re.compile(
+ r'^(?P<event>[A-Z])(\|(?P<pid>\d+)\|(?P<func>[^|]*)(\|(?P<data>.*))?)?')
class drop_before_trace(object):
"""Object that, when called, returns True if the line is not part of
@@ -23,24 +27,26 @@ the trace
the headers that start with #
"""
- def __init__(self):
+ def __init__(self, tracer):
self.before_begin_trace = True
- self.before_script_trace_data = True
self.before_actual_trace = True
+ self.tracer = tracer
def __call__(self, line):
if self.before_begin_trace:
if line.startswith("<!-- BEGIN TRACE -->") or \
line.startswith("<title>Android System Trace</title>"):
self.before_begin_trace = False
- elif self.before_script_trace_data:
+ elif self.before_actual_trace:
if line.startswith(' <script class="trace-data"') or \
line.startswith(" var linuxPerfData"):
- self.before_script_trace_data = False
- elif not line.startswith("#"):
- self.before_actual_trace = False
+ self.before_actual_trace = False
- return self.before_actual_trace
+ if not self.before_actual_trace:
+ base_call = super(SysTrace, self.tracer).trace_hasnt_started()
+ return base_call(line)
+ else:
+ return True
class SysTrace(GenericFTrace):
"""A wrapper that parses all events of a SysTrace run
@@ -50,23 +56,21 @@ class SysTrace(GenericFTrace):
"""
def __init__(self, path=".", name="", normalize_time=True, scope="all",
- events=[], event_callbacks={}, window=(0, None),
- abs_window=(0, None), build_df=True):
+ events=[], window=(0, None), abs_window=(0, None)):
self.trace_path = path
super(SysTrace, self).__init__(name, normalize_time, scope, events,
- event_callbacks, window, abs_window,
- build_df)
- if not build_df:
- return
+ window, abs_window)
+
+ self._do_parse()
try:
self._cpus = 1 + self.sched_switch.data_frame["__cpu"].max()
except AttributeError:
pass
def trace_hasnt_started(self):
- return drop_before_trace()
+ return drop_before_trace(self)
def trace_hasnt_finished(self):
"""Return a function that returns True while the current line is still part of the trace
@@ -78,3 +82,18 @@ class SysTrace(GenericFTrace):
"""
return lambda x: not x.endswith("</script>\n")
+
+ def generate_data_dict(self, data_str):
+ """ Custom parsing for systrace's userspace events """
+ data_dict = None
+
+ match = SYSTRACE_EVENT.match(data_str)
+ if match:
+ data_dict = {
+ 'event': match.group('event'),
+ 'pid' : int(match.group('pid')) if match.group('pid') else None,
+ 'func' : match.group('func' ),
+ 'data' : match.group('data' )
+ }
+
+ return data_dict
diff --git a/trappy/tracing_mark_write.py b/trappy/tracing_mark_write.py
new file mode 100644
index 0000000..49a23b0
--- /dev/null
+++ b/trappy/tracing_mark_write.py
@@ -0,0 +1,43 @@
+# Copyright 2017 ARM Limited, Google and contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""This module contains the class for representing a tracing_mark_write
+trace_event used for ftrace events injected from userspace.
+"""
+
+from trappy.base import Base
+from trappy.dynamic import register_ftrace_parser
+
+class TracingMarkWrite(Base):
+ """Parse tracing_mark_write events that couldn't be matched with more specific unique words
+ This class is always used as a fallback if nothing more specific could match the particular
+ tracing_mark_write event.
+ """
+
+ unique_word = "tracing_mark_write"
+
+ def generate_data_dict(self, data_str):
+ if self.tracer:
+ data_dict = self.tracer.generate_data_dict(data_str)
+ if data_dict:
+ return data_dict
+
+ data_dict = { 'string': data_str }
+ return data_dict
+
+ def __init__(self):
+ super(TracingMarkWrite, self).__init__(fallback=True)
+
+register_ftrace_parser(TracingMarkWrite)
diff --git a/trappy/utils.py b/trappy/utils.py
index eb73752..47ef4ab 100644
--- a/trappy/utils.py
+++ b/trappy/utils.py
@@ -13,6 +13,9 @@
# limitations under the License.
#
+import pandas as pd
+import numpy as np
+
"""Generic functions that can be used in multiple places in trappy
"""
@@ -102,3 +105,59 @@ def handle_duplicate_index(data,
dup_index_left += 1
return data.reindex(new_index)
+
+# Iterate fast over all rows in a data frame and apply fn
+def apply_callback(df, fn, *kwargs):
+ iters = df.itertuples()
+ event_tuple = iters.next()
+
+ # Column names beginning with underscore will not be preserved in tuples
+ # due to constraints on namedtuple field names, so store mappings from
+ # column name to column number for each trace event.
+ col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }
+
+ while True:
+ if not event_tuple:
+ break
+ event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }
+
+ if kwargs:
+ fn(event_dict, kwargs)
+ else:
+ fn(event_dict)
+
+ event_tuple = next(iters, None)
+
+
+def merge_dfs(pr_df, sec_df, pivot):
+ # Keep track of last secondary event
+ pivot_map = {}
+
+ # An array accumating dicts with merged data
+ merged_data = []
+ def df_fn(data):
+ # Store the latest secondary info
+ if data['Time'][0] == 'secondary':
+ pivot_map[data[pivot]] = data
+ # Get rid of primary/secondary labels
+ data['Time'] = data['Time'][1]
+ return
+
+ # Propogate latest secondary info
+ for key, value in data.iteritems():
+ if key == pivot:
+ continue
+ # Fast check for if value is nan (faster than np.isnan + try/except)
+ if value != value and pivot_map.has_key(data[pivot]):
+ data[key] = pivot_map[data[pivot]][key]
+
+ # Get rid of primary/secondary labels
+ data['Time'] = data['Time'][1]
+ merged_data.append(data)
+
+ df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort(columns='__line')
+ apply_callback(df, df_fn)
+ merged_df = pd.DataFrame.from_dict(merged_data)
+ merged_df.set_index('Time', inplace=True)
+
+ return merged_df