diff options
Diffstat (limited to 'trappy')
-rw-r--r-- | trappy/bare_trace.py | 15 | ||||
-rw-r--r-- | trappy/base.py | 66 | ||||
-rw-r--r-- | trappy/dynamic.py | 2 | ||||
-rw-r--r-- | trappy/ftrace.py | 366 | ||||
-rw-r--r-- | trappy/sched.py | 12 | ||||
-rw-r--r-- | trappy/systrace.py | 47 | ||||
-rw-r--r-- | trappy/tracing_mark_write.py | 43 | ||||
-rw-r--r-- | trappy/utils.py | 59 |
8 files changed, 465 insertions, 145 deletions
diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py index 4900e8a..a953a60 100644 --- a/trappy/bare_trace.py +++ b/trappy/bare_trace.py @@ -27,13 +27,12 @@ class BareTrace(object): """ - def __init__(self, name="", build_df=True): + def __init__(self, name=""): self.name = name self.normalized_time = False self.class_definitions = {} self.trace_classes = [] self.basetime = 0 - self.build_df = build_df def get_duration(self): """Returns the largest time value of all classes, @@ -68,14 +67,13 @@ class BareTrace(object): return filters - def normalize_time(self, basetime=None): + def _normalize_time(self, basetime=None): """Normalize the time of all the trace classes :param basetime: The offset which needs to be subtracted from the time index :type basetime: float """ - return # HACK: Since we're not normalizing anymore after the fact if basetime is not None: self.basetime = basetime @@ -135,8 +133,13 @@ class BareTrace(object): setattr(self, name, event) def finalize_objects(self): - if not self.build_df: - return for trace_class in self.trace_classes: + # If cached, don't need to do any other DF operation + if trace_class.cached: + continue + trace_class.tracer = self trace_class.create_dataframe() trace_class.finalize_object() + + def generate_data_dict(self): + return None diff --git a/trappy/base.py b/trappy/base.py index c0238cf..8a7fb38 100644 --- a/trappy/base.py +++ b/trappy/base.py @@ -92,22 +92,29 @@ class Base(object): e.g. "sched_switch:") but it can be anything else for trace points generated using trace_printk(). - :param parse_raw: If :code:`True`, raw trace data (-R option) to + :param parse_raw: If :code:`True`, raw trace data (-r option) to trace-cmd will be used + :param fallback: If :code:`True`, the parsing class will be used + only if no other candidate class's unique_word matched. subclasses + should override this (for ex. TracingMarkWrite uses it) + This class acts as a base class for all TRAPpy events """ - def __init__(self, parse_raw=False): + def __init__(self, parse_raw=False, fallback=False): + self.fallback = fallback + self.tracer = None self.data_frame = pd.DataFrame() + self.line_array = [] self.data_array = [] self.time_array = [] self.comm_array = [] self.pid_array = [] self.tgid_array = [] self.cpu_array = [] - self.callback = None self.parse_raw = parse_raw + self.cached = False def finalize_object(self): pass @@ -146,7 +153,7 @@ class Base(object): return ret - def append_data(self, time, comm, pid, tgid, cpu, data): + def append_data(self, time, comm, pid, tgid, cpu, line, data): """Append data parsed from a line to the corresponding arrays The :mod:`DataFrame` will be created from this when the whole trace @@ -172,15 +179,32 @@ class Base(object): self.pid_array.append(pid) self.tgid_array.append(tgid) self.cpu_array.append(cpu) + self.line_array.append(line) self.data_array.append(data) - if not self.callback: + def string_cast(self, string, type): + """ Attempt to convert string to another type + + Here we attempt to cast string to a type. Currently only + integer conversion is supported with future expansion + left open to other types. + + :param string: The value to convert. + :type string: str + + :param type: The type to convert to. + :type type: type + """ + # Currently this function only supports int conversion + if type != int: return - data_dict = self.generate_data_dict(comm, pid, cpu, data) - self.callback(time, data_dict) + # Handle false-positives for negative numbers + if not string.lstrip("-").isdigit(): + return string + return int(string) - def generate_data_dict(self, comm, pid, tgid, cpu, data_str): - data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu} + def generate_data_dict(self, data_str): + data_dict = {} prev_key = None for field in data_str.split(): if "=" not in field: @@ -190,10 +214,7 @@ class Base(object): data_dict[prev_key] += ' ' + field continue (key, value) = field.split('=', 1) - try: - value = int(value) - except ValueError: - pass + value = self.string_cast(value, int) data_dict[key] = value prev_key = key return data_dict @@ -207,10 +228,11 @@ class Base(object): check_memory_usage = True check_memory_count = 1 - for (comm, pid, tgid, cpu, data_str) in zip(self.comm_array, self.pid_array, + for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array, self.tgid_array, self.cpu_array, - self.data_array): - data_dict = self.generate_data_dict(comm, pid, tgid, cpu, data_str) + self.line_array, self.data_array): + data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line} + data_dict.update(self.generate_data_dict(data_str)) # When running out of memory, Pandas has been observed to segfault # rather than throwing a proper Python error. @@ -244,6 +266,7 @@ class Base(object): self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx) self.time_array = [] + self.line_array = [] self.comm_array = [] self.pid_array = [] self.cpu_array = [] @@ -257,6 +280,14 @@ class Base(object): """ self.data_frame.to_csv(fname) + def read_csv(self, fname): + """Read the csv data into a DataFrame + + :param fname: The name of the CSV file + :type fname: str + """ + self.data_frame = pd.read_csv(fname, index_col = 0) + def normalize_time(self, basetime): """Substract basetime from the Time of the data frame @@ -264,9 +295,6 @@ class Base(object): the time index :type basetime: float """ - # HACK: We don't normalize anymore after the fact - return - if basetime and not self.data_frame.empty: self.data_frame.reset_index(inplace=True) self.data_frame["Time"] = self.data_frame["Time"] - basetime diff --git a/trappy/dynamic.py b/trappy/dynamic.py index f764232..5b094ce 100644 --- a/trappy/dynamic.py +++ b/trappy/dynamic.py @@ -81,7 +81,7 @@ def register_dynamic_ftrace(class_name, unique_word, scope="all", the parsing of events and group them together) :type scope: str - :param parse_raw: If, true, raw trace output (-R flag) + :param parse_raw: If, true, raw trace output (-r flag) will be used :type parse_raw: bool diff --git a/trappy/ftrace.py b/trappy/ftrace.py index 07cb94b..3948cb6 100644 --- a/trappy/ftrace.py +++ b/trappy/ftrace.py @@ -18,9 +18,13 @@ # pylint: disable=no-member import itertools +import json import os import re import pandas as pd +import hashlib +import shutil +import warnings from trappy.bare_trace import BareTrace from trappy.utils import listify @@ -46,6 +50,12 @@ def _plot_freq_hists(allfreqs, what, axis, title): trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20, "Frequency", xlim, "default") +SPECIAL_FIELDS_RE = re.compile( + r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\ + r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+"\ + r"(?P<timestamp>[0-9]+(?P<us>\.[0-9]+)?): (\w+:\s+)+(?P<data>.+)" +) + class GenericFTrace(BareTrace): """Generic class to parse output of FTrace. This class is meant to be subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" @@ -56,15 +66,70 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" dynamic_classes = {} - def __init__(self, name="", normalize_time=True, scope="all", - events=[], event_callbacks={}, window=(0, None), - abs_window=(0, None), build_df=True): - super(GenericFTrace, self).__init__(name, build_df) + disable_cache = False + + def _trace_cache_path(self): + trace_file = self.trace_path + cache_dir = '.' + os.path.basename(trace_file) + '.cache' + tracefile_dir = os.path.dirname(os.path.abspath(trace_file)) + cache_path = os.path.join(tracefile_dir, cache_dir) + return cache_path + + def _check_trace_cache(self, params): + cache_path = self._trace_cache_path() + md5file = os.path.join(cache_path, 'md5sum') + basetime_path = os.path.join(cache_path, 'basetime') + params_path = os.path.join(cache_path, 'params.json') + + for path in [cache_path, md5file, params_path]: + if not os.path.exists(path): + return False + + with open(md5file) as f: + cache_md5sum = f.read() + with open(basetime_path) as f: + self.basetime = float(f.read()) + with open(self.trace_path, 'rb') as f: + trace_md5sum = hashlib.md5(f.read()).hexdigest() + with open(params_path) as f: + cache_params = json.dumps(json.load(f)) + + # Convert to a json string for comparison + params = json.dumps(params) + + # check if cache is valid + if cache_md5sum != trace_md5sum or cache_params != params: + shutil.rmtree(cache_path) + return False + return True + + def _create_trace_cache(self, params): + cache_path = self._trace_cache_path() + md5file = os.path.join(cache_path, 'md5sum') + basetime_path = os.path.join(cache_path, 'basetime') + params_path = os.path.join(cache_path, 'params.json') + + if os.path.exists(cache_path): + shutil.rmtree(cache_path) + os.mkdir(cache_path) + + md5sum = hashlib.md5(open(self.trace_path, 'rb').read()).hexdigest() + with open(md5file, 'w') as f: + f.write(md5sum) - self.normalized_time = normalize_time + with open(basetime_path, 'w') as f: + f.write(str(self.basetime)) - if not hasattr(self, "needs_raw_parsing"): - self.needs_raw_parsing = False + with open(params_path, 'w') as f: + json.dump(params, f) + + def _get_csv_path(self, trace_class): + path = self._trace_cache_path() + return os.path.join(path, trace_class.__class__.__name__ + '.csv') + + def __init__(self, name="", normalize_time=True, scope="all", + events=[], window=(0, None), abs_window=(0, None)): + super(GenericFTrace, self).__init__(name) self.class_definitions.update(self.dynamic_classes.items()) self.__add_events(listify(events)) @@ -79,16 +144,13 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" for attr, class_def in self.class_definitions.iteritems(): trace_class = class_def() - if event_callbacks.has_key(attr): - trace_class.callback = event_callbacks[attr] setattr(self, attr, trace_class) self.trace_classes.append(trace_class) - self.__parse_trace_file(self.trace_path, window, abs_window) - if self.needs_raw_parsing and (self.trace_path_raw is not None): - self.__parse_trace_file(self.trace_path_raw, window, abs_window, - raw=True) - self.finalize_objects() + # save parameters to complete init later + self.normalize_time = normalize_time + self.window = window + self.abs_window = abs_window @classmethod def register_parser(cls, cobject, scope): @@ -129,6 +191,47 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" if cobject == obj: del scope_classes[name] + def _do_parse(self): + params = {'window': self.window, 'abs_window': self.abs_window} + if not self.__class__.disable_cache and self._check_trace_cache(params): + # Read csv into frames + for trace_class in self.trace_classes: + try: + csv_file = self._get_csv_path(trace_class) + trace_class.read_csv(csv_file) + trace_class.cached = True + except: + warnstr = "TRAPpy: Couldn't read {} from cache, reading it from trace".format(trace_class) + warnings.warn(warnstr) + + if all([c.cached for c in self.trace_classes]): + if self.normalize_time: + self._normalize_time() + return + + self.__parse_trace_file(self.trace_path) + + self.finalize_objects() + + if not self.__class__.disable_cache: + try: + # Recreate basic cache directories only if nothing cached + if not any([c.cached for c in self.trace_classes]): + self._create_trace_cache(params) + + # Write out only events that weren't cached before + for trace_class in self.trace_classes: + if trace_class.cached: + continue + csv_file = self._get_csv_path(trace_class) + trace_class.write_csv(csv_file) + except OSError as err: + warnings.warn( + "TRAPpy: Cache not created due to OS error: {0}".format(err)) + + if self.normalize_time: + self._normalize_time() + def __add_events(self, events): """Add events to the class_definitions @@ -161,73 +264,66 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" trace_class = DynamicTypeFactory(event_name, (Base,), kwords) self.class_definitions[event_name] = trace_class - def __populate_data(self, fin, cls_for_unique_word, window, abs_window): - """Append to trace data from a txt trace""" - - def contains_unique_word(line, unique_words=cls_for_unique_word.keys()): - for unique_word in unique_words: - if unique_word in line: - return True - return False + def __get_trace_class(self, line, cls_word): + trace_class = None + for unique_word, cls in cls_word.iteritems(): + if unique_word in line: + trace_class = cls + if not cls.fallback: + return trace_class + return trace_class - special_fields_regexp = r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\ - r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+(?P<timestamp>[0-9]+\.[0-9]+):" - special_fields_regexp = re.compile(special_fields_regexp) - start_match = re.compile(r"[A-Za-z0-9_]+=") + def __populate_data(self, fin, cls_for_unique_word): + """Append to trace data from a txt trace""" actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin) actual_trace = itertools.takewhile(self.trace_hasnt_finished(), actual_trace) - for line in itertools.ifilter(contains_unique_word, actual_trace): - for unique_word, cls in cls_for_unique_word.iteritems(): - if unique_word in line: - trace_class = cls - break - else: - raise FTraceParseError("No unique word in '{}'".format(line)) + for line in actual_trace: + trace_class = self.__get_trace_class(line, cls_for_unique_word) + if not trace_class: + self.lines += 1 + continue line = line[:-1] - special_fields_match = special_fields_regexp.match(line) - if not special_fields_match: - raise FTraceParseError("Couldn't match special fields in '{}'".format(line)) - comm = special_fields_match.group('comm') - pid = int(special_fields_match.group('pid')) - cpu = int(special_fields_match.group('cpu')) - tgid = special_fields_match.group('tgid') - if not tgid or tgid[0] == '-': - tgid = -1 - else: - tgid = int(tgid) - - timestamp = float(special_fields_match.group('timestamp')) + fields_match = SPECIAL_FIELDS_RE.match(line) + if not fields_match: + raise FTraceParseError("Couldn't match fields in '{}'".format(line)) + comm = fields_match.group('comm') + pid = int(fields_match.group('pid')) + cpu = int(fields_match.group('cpu')) + tgid = fields_match.group('tgid') + tgid = -1 if (not tgid or '-' in tgid) else int(tgid) + + # The timestamp, depending on the trace_clock configuration, can be + # reported either in [s].[us] or [ns] format. Let's ensure that we + # always generate DF which have the index expressed in: + # [s].[decimals] + timestamp = float(fields_match.group('timestamp')) + if not fields_match.group('us'): + timestamp /= 1e9 + data_str = fields_match.group('data') if not self.basetime: self.basetime = timestamp - if (timestamp < window[0] + self.basetime) or \ - (timestamp < abs_window[0]): + if (timestamp < self.window[0] + self.basetime) or \ + (timestamp < self.abs_window[0]): + self.lines += 1 continue - if (window[1] and timestamp > window[1] + self.basetime) or \ - (abs_window[1] and timestamp > abs_window[1]): + if (self.window[1] and timestamp > self.window[1] + self.basetime) or \ + (self.abs_window[1] and timestamp > self.abs_window[1]): return - try: - data_start_idx = start_match.search(line).start() - except AttributeError: - continue - - if self.normalized_time: - timestamp = timestamp - self.basetime - - data_str = line[data_start_idx:] - # Remove empty arrays from the trace - data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str) + if "={}" in data_str: + data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str) - trace_class.append_data(timestamp, comm, pid, tgid, cpu, data_str) + trace_class.append_data(timestamp, comm, pid, tgid, cpu, self.lines, data_str) + self.lines += 1 def trace_hasnt_started(self): """Return a function that accepts a line and returns true if this line @@ -241,7 +337,7 @@ is not part of the trace. started). """ - return lambda x: False + return lambda line: not SPECIAL_FIELDS_RE.match(line) def trace_hasnt_finished(self): """Return a function that accepts a line and returns true if this line @@ -260,15 +356,14 @@ is part of the trace. """ return lambda x: True - def __parse_trace_file(self, trace_file, window, abs_window, raw=False): + def __parse_trace_file(self, trace_file): """parse the trace and create a pandas DataFrame""" # Memoize the unique words to speed up parsing the trace file cls_for_unique_word = {} for trace_name in self.class_definitions.iterkeys(): trace_class = getattr(self, trace_name) - - if self.needs_raw_parsing and (trace_class.parse_raw != raw): + if trace_class.cached: continue unique_word = trace_class.unique_word @@ -279,8 +374,9 @@ is part of the trace. try: with open(trace_file) as fin: + self.lines = 0 self.__populate_data( - fin, cls_for_unique_word, window, abs_window) + fin, cls_for_unique_word) except FTraceParseError as e: raise ValueError('Failed to parse ftrace file {}:\n{}'.format( trace_file, str(e))) @@ -319,6 +415,60 @@ is part of the trace. return ret + def apply_callbacks(self, fn_map, *kwarg): + """ + Apply callback functions to trace events in chronological order. + + This method iterates over a user-specified subset of the available trace + event dataframes, calling different user-specified functions for each + event type. These functions are passed a dictionary mapping 'Index' and + the column names to their values for that row. + + For example, to iterate over trace t, applying your functions callback_fn1 + and callback_fn2 to each sched_switch and sched_wakeup event respectively: + + t.apply_callbacks({ + "sched_switch": callback_fn1, + "sched_wakeup": callback_fn2 + }) + """ + dfs = {event: getattr(self, event).data_frame for event in fn_map.keys()} + events = [event for event in fn_map.keys() if not dfs[event].empty] + iters = {event: dfs[event].itertuples() for event in events} + next_rows = {event: iterator.next() for event,iterator in iters.iteritems()} + + # Column names beginning with underscore will not be preserved in tuples + # due to constraints on namedtuple field names, so store mappings from + # column name to column number for each trace event. + col_idxs = {event: { + name: idx for idx, name in enumerate( + ['Index'] + dfs[event].columns.tolist() + ) + } for event in events} + + def getLine(event): + line_col_idx = col_idxs[event]['__line'] + return next_rows[event][line_col_idx] + + while events: + event_name = min(events, key=getLine) + event_tuple = next_rows[event_name] + + event_dict = { + col: event_tuple[idx] for col, idx in col_idxs[event_name].iteritems() + } + + if kwarg: + fn_map[event_name](event_dict, kwarg) + else: + fn_map[event_name](event_dict) + + event_row = next(iters[event_name], None) + if event_row: + next_rows[event_name] = event_row + else: + events.remove(event_name) + def plot_freq_hists(self, map_label, ax): """Plot histograms for each actor input and output frequency @@ -500,16 +650,26 @@ class FTrace(GenericFTrace): """ def __init__(self, path=".", name="", normalize_time=True, scope="all", - events=[], event_callbacks={}, window=(0, None), - abs_window=(0, None), build_df=True): - self.trace_path, self.trace_path_raw = self.__process_path(path) - self.needs_raw_parsing = True - + events=[], window=(0, None), abs_window=(0, None)): + super(FTrace, self).__init__(name, normalize_time, scope, events, + window, abs_window) + self.raw_events = [] + self.trace_path = self.__process_path(path) self.__populate_metadata() + self._do_parse() - super(FTrace, self).__init__(name, normalize_time, scope, events, - event_callbacks, window, abs_window, - build_df) + def __warn_about_txt_trace_files(self, trace_dat, raw_txt, formatted_txt): + self.__get_raw_event_list() + warn_text = ( "You appear to be parsing both raw and formatted " + "trace files. TRAPpy now uses a unified format. " + "If you have the {} file, remove the .txt files " + "and try again. If not, you can manually move " + "lines with the following events from {} to {} :" + ).format(trace_dat, raw_txt, formatted_txt) + for raw_event in self.raw_events: + warn_text = warn_text+" \"{}\"".format(raw_event) + + raise RuntimeError(warn_text) def __process_path(self, basepath): """Process the path and return the path to the trace text file""" @@ -520,32 +680,42 @@ class FTrace(GenericFTrace): trace_name = os.path.join(basepath, "trace") trace_txt = trace_name + ".txt" - trace_raw = trace_name + ".raw.txt" + trace_raw_txt = trace_name + ".raw.txt" trace_dat = trace_name + ".dat" if os.path.isfile(trace_dat): - # Both TXT and RAW traces must always be generated - if not os.path.isfile(trace_txt) or \ - not os.path.isfile(trace_raw): + # Warn users if raw.txt files are present + if os.path.isfile(trace_raw_txt): + self.__warn_about_txt_trace_files(trace_dat, trace_raw_txt, trace_txt) + # TXT traces must always be generated + if not os.path.isfile(trace_txt): self.__run_trace_cmd_report(trace_dat) - # TXT (and RAW) traces must match the most recent binary trace + # TXT traces must match the most recent binary trace elif os.path.getmtime(trace_txt) < os.path.getmtime(trace_dat): self.__run_trace_cmd_report(trace_dat) - if not os.path.isfile(trace_raw): - trace_raw = None + return trace_txt - return trace_txt, trace_raw + def __get_raw_event_list(self): + self.raw_events = [] + # Generate list of events which need to be parsed in raw format + for event_class in (self.thermal_classes, self.sched_classes, self.dynamic_classes): + for trace_class in event_class.itervalues(): + raw = getattr(trace_class, 'parse_raw', None) + if raw: + name = getattr(trace_class, 'name', None) + if name: + self.raw_events.append(name) def __run_trace_cmd_report(self, fname): - """Run "trace-cmd report fname > fname.txt" - and "trace-cmd report -R fname > fname.raw.txt" + """Run "trace-cmd report [ -r raw_event ]* fname > fname.txt" - The resulting traces are stored in files with extension ".txt" - and ".raw.txt" respectively. If fname is "my_trace.dat", the - trace is stored in "my_trace.txt" and "my_trace.raw.txt". The - contents of the destination files are overwritten if they - exist. + The resulting trace is stored in files with extension ".txt". If + fname is "my_trace.dat", the trace is stored in "my_trace.txt". The + contents of the destination file is overwritten if it exists. + Trace events which require unformatted output (raw_event == True) + are added to the command line with one '-r <event>' each event and + trace-cmd then prints those events without formatting. """ from subprocess import check_output @@ -555,8 +725,12 @@ class FTrace(GenericFTrace): if not os.path.isfile(fname): raise IOError("No such file or directory: {}".format(fname)) - raw_trace_output = os.path.splitext(fname)[0] + ".raw.txt" trace_output = os.path.splitext(fname)[0] + ".txt" + # Ask for the raw event list and request them unformatted + self.__get_raw_event_list() + for raw_event in self.raw_events: + cmd.extend([ '-r', raw_event ]) + cmd.append(fname) with open(os.devnull) as devnull: @@ -567,17 +741,9 @@ class FTrace(GenericFTrace): raise OSError(2, "trace-cmd not found in PATH, is it installed?") else: raise - - # Add the -R flag to the trace-cmd - # for raw parsing - cmd.insert(-1, "-R") - raw_out = check_output(cmd, stderr=devnull) - with open(trace_output, "w") as fout: fout.write(out) - with open(raw_trace_output, "w") as fout: - fout.write(raw_out) def __populate_metadata(self): """Populates trace metadata""" @@ -600,6 +766,6 @@ class FTrace(GenericFTrace): setattr(self, "_" + match.group(1), match.group(2)) metadata_keys.remove(match.group(1)) - if re.search(r"^\s+[^\[]+-\d+\s+\[\d+\]\s+\d+\.\d+:", line): + if SPECIAL_FIELDS_RE.match(line): # Reached a valid trace line, abort metadata population return diff --git a/trappy/sched.py b/trappy/sched.py index ebcb79d..84e30be 100644 --- a/trappy/sched.py +++ b/trappy/sched.py @@ -104,14 +104,16 @@ class SchedSwitch(Base): """Parse sched_switch""" unique_word = "sched_switch:" + parse_raw = True def __init__(self): - super(SchedSwitch, self).__init__(parse_raw=True) + super(SchedSwitch, self).__init__(parse_raw=self.parse_raw) - def append_data(self, time, comm, pid, tgid, cpu, data): - data_rep = data.replace(" ==> ", " ") - super(SchedSwitch, self).append_data(time, comm, pid, tgid, cpu, - data_rep) + def create_dataframe(self): + self.data_array = [line.replace(" ==> ", " ", 1) + for line in self.data_array] + + super(SchedSwitch, self).create_dataframe() register_ftrace_parser(SchedSwitch, "sched") diff --git a/trappy/systrace.py b/trappy/systrace.py index c11601d..c7e65e4 100644 --- a/trappy/systrace.py +++ b/trappy/systrace.py @@ -14,6 +14,10 @@ # from trappy.ftrace import GenericFTrace +import re + +SYSTRACE_EVENT = re.compile( + r'^(?P<event>[A-Z])(\|(?P<pid>\d+)\|(?P<func>[^|]*)(\|(?P<data>.*))?)?') class drop_before_trace(object): """Object that, when called, returns True if the line is not part of @@ -23,24 +27,26 @@ the trace the headers that start with # """ - def __init__(self): + def __init__(self, tracer): self.before_begin_trace = True - self.before_script_trace_data = True self.before_actual_trace = True + self.tracer = tracer def __call__(self, line): if self.before_begin_trace: if line.startswith("<!-- BEGIN TRACE -->") or \ line.startswith("<title>Android System Trace</title>"): self.before_begin_trace = False - elif self.before_script_trace_data: + elif self.before_actual_trace: if line.startswith(' <script class="trace-data"') or \ line.startswith(" var linuxPerfData"): - self.before_script_trace_data = False - elif not line.startswith("#"): - self.before_actual_trace = False + self.before_actual_trace = False - return self.before_actual_trace + if not self.before_actual_trace: + base_call = super(SysTrace, self.tracer).trace_hasnt_started() + return base_call(line) + else: + return True class SysTrace(GenericFTrace): """A wrapper that parses all events of a SysTrace run @@ -50,23 +56,21 @@ class SysTrace(GenericFTrace): """ def __init__(self, path=".", name="", normalize_time=True, scope="all", - events=[], event_callbacks={}, window=(0, None), - abs_window=(0, None), build_df=True): + events=[], window=(0, None), abs_window=(0, None)): self.trace_path = path super(SysTrace, self).__init__(name, normalize_time, scope, events, - event_callbacks, window, abs_window, - build_df) - if not build_df: - return + window, abs_window) + + self._do_parse() try: self._cpus = 1 + self.sched_switch.data_frame["__cpu"].max() except AttributeError: pass def trace_hasnt_started(self): - return drop_before_trace() + return drop_before_trace(self) def trace_hasnt_finished(self): """Return a function that returns True while the current line is still part of the trace @@ -78,3 +82,18 @@ class SysTrace(GenericFTrace): """ return lambda x: not x.endswith("</script>\n") + + def generate_data_dict(self, data_str): + """ Custom parsing for systrace's userspace events """ + data_dict = None + + match = SYSTRACE_EVENT.match(data_str) + if match: + data_dict = { + 'event': match.group('event'), + 'pid' : int(match.group('pid')) if match.group('pid') else None, + 'func' : match.group('func' ), + 'data' : match.group('data' ) + } + + return data_dict diff --git a/trappy/tracing_mark_write.py b/trappy/tracing_mark_write.py new file mode 100644 index 0000000..49a23b0 --- /dev/null +++ b/trappy/tracing_mark_write.py @@ -0,0 +1,43 @@ +# Copyright 2017 ARM Limited, Google and contributors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""This module contains the class for representing a tracing_mark_write +trace_event used for ftrace events injected from userspace. +""" + +from trappy.base import Base +from trappy.dynamic import register_ftrace_parser + +class TracingMarkWrite(Base): + """Parse tracing_mark_write events that couldn't be matched with more specific unique words + This class is always used as a fallback if nothing more specific could match the particular + tracing_mark_write event. + """ + + unique_word = "tracing_mark_write" + + def generate_data_dict(self, data_str): + if self.tracer: + data_dict = self.tracer.generate_data_dict(data_str) + if data_dict: + return data_dict + + data_dict = { 'string': data_str } + return data_dict + + def __init__(self): + super(TracingMarkWrite, self).__init__(fallback=True) + +register_ftrace_parser(TracingMarkWrite) diff --git a/trappy/utils.py b/trappy/utils.py index eb73752..47ef4ab 100644 --- a/trappy/utils.py +++ b/trappy/utils.py @@ -13,6 +13,9 @@ # limitations under the License. # +import pandas as pd +import numpy as np + """Generic functions that can be used in multiple places in trappy """ @@ -102,3 +105,59 @@ def handle_duplicate_index(data, dup_index_left += 1 return data.reindex(new_index) + +# Iterate fast over all rows in a data frame and apply fn +def apply_callback(df, fn, *kwargs): + iters = df.itertuples() + event_tuple = iters.next() + + # Column names beginning with underscore will not be preserved in tuples + # due to constraints on namedtuple field names, so store mappings from + # column name to column number for each trace event. + col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) } + + while True: + if not event_tuple: + break + event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() } + + if kwargs: + fn(event_dict, kwargs) + else: + fn(event_dict) + + event_tuple = next(iters, None) + + +def merge_dfs(pr_df, sec_df, pivot): + # Keep track of last secondary event + pivot_map = {} + + # An array accumating dicts with merged data + merged_data = [] + def df_fn(data): + # Store the latest secondary info + if data['Time'][0] == 'secondary': + pivot_map[data[pivot]] = data + # Get rid of primary/secondary labels + data['Time'] = data['Time'][1] + return + + # Propogate latest secondary info + for key, value in data.iteritems(): + if key == pivot: + continue + # Fast check for if value is nan (faster than np.isnan + try/except) + if value != value and pivot_map.has_key(data[pivot]): + data[key] = pivot_map[data[pivot]][key] + + # Get rid of primary/secondary labels + data['Time'] = data['Time'][1] + merged_data.append(data) + + df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort(columns='__line') + apply_callback(df, df_fn) + merged_df = pd.DataFrame.from_dict(merged_data) + merged_df.set_index('Time', inplace=True) + + return merged_df |