8 files changed, 465 insertions, 145 deletions
diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py
index 4900e8a..a953a60 100644
--- a/trappy/bare_trace.py
+++ b/trappy/bare_trace.py
@@ -27,13 +27,12 @@ class BareTrace(object):
 
     """
 
-    def __init__(self, name="", build_df=True):
+    def __init__(self, name=""):
         self.name = name
         self.normalized_time = False
         self.class_definitions = {}
         self.trace_classes = []
         self.basetime = 0
-        self.build_df = build_df
 
     def get_duration(self):
         """Returns the largest time value of all classes,
@@ -68,14 +67,13 @@ class BareTrace(object):
 
         return filters
 
-    def normalize_time(self, basetime=None):
+    def _normalize_time(self, basetime=None):
         """Normalize the time of all the trace classes
 
         :param basetime: The offset which needs to be subtracted from
             the time index
         :type basetime: float
         """
-        return # HACK: Since we're not normalizing anymore after the fact
 
         if basetime is not None:
             self.basetime = basetime
@@ -135,8 +133,13 @@ class BareTrace(object):
         setattr(self, name, event)
 
     def finalize_objects(self):
-        if not self.build_df:
-            return
         for trace_class in self.trace_classes:
+            # If cached, don't need to do any other DF operation
+            if trace_class.cached:
+                continue
+            trace_class.tracer = self
             trace_class.create_dataframe()
             trace_class.finalize_object()
+
+    def generate_data_dict(self):
+        return None
diff --git a/trappy/base.py b/trappy/base.py
index c0238cf..8a7fb38 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -92,22 +92,29 @@ class Base(object):
     e.g. "sched_switch:") but it can be anything else for trace points
     generated using trace_printk().
 
-    :param parse_raw: If :code:`True`, raw trace data (-R option) to
+    :param parse_raw: If :code:`True`, raw trace data (-r option) to
         trace-cmd will be used
 
+    :param fallback: If :code:`True`, the parsing class will be used
+        only if no other candidate class's unique_word matched. subclasses
+        should override this (for ex. TracingMarkWrite uses it)
+
     This class acts as a base class for all TRAPpy events
 
     """
-    def __init__(self, parse_raw=False):
+    def __init__(self, parse_raw=False, fallback=False):
+        self.fallback = fallback
+        self.tracer = None
         self.data_frame = pd.DataFrame()
+        self.line_array = []
         self.data_array = []
         self.time_array = []
         self.comm_array = []
         self.pid_array = []
         self.tgid_array = []
         self.cpu_array = []
-        self.callback = None
         self.parse_raw = parse_raw
+        self.cached = False
 
     def finalize_object(self):
         pass
@@ -146,7 +153,7 @@ class Base(object):
 
         return ret
 
-    def append_data(self, time, comm, pid, tgid, cpu, data):
+    def append_data(self, time, comm, pid, tgid, cpu, line, data):
         """Append data parsed from a line to the corresponding arrays
 
         The :mod:`DataFrame` will be created from this when the whole trace
@@ -172,15 +179,32 @@ class Base(object):
         self.pid_array.append(pid)
         self.tgid_array.append(tgid)
         self.cpu_array.append(cpu)
+        self.line_array.append(line)
         self.data_array.append(data)
 
-	if not self.callback:
+    def string_cast(self, string, type):
+        """ Attempt to convert string to another type
+
+        Here we attempt to cast string to a type. Currently only
+        integer conversion is supported with future expansion
+        left open to other types.
+
+        :param string: The value to convert.
+        :type string: str
+
+        :param type: The type to convert to.
+        :type type: type
+        """
+        # Currently this function only supports int conversion
+        if type != int:
             return
-        data_dict = self.generate_data_dict(comm, pid, cpu, data)
-        self.callback(time, data_dict)
+        # Handle false-positives for negative numbers
+        if not string.lstrip("-").isdigit():
+            return string
+        return int(string)
 
-    def generate_data_dict(self, comm, pid, tgid, cpu, data_str):
-        data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu}
+    def generate_data_dict(self, data_str):
+        data_dict = {}
         prev_key = None
         for field in data_str.split():
             if "=" not in field:
@@ -190,10 +214,7 @@ class Base(object):
                 data_dict[prev_key] += ' ' + field
                 continue
             (key, value) = field.split('=', 1)
-            try:
-                value = int(value)
-            except ValueError:
-                pass
+            value = self.string_cast(value, int)
             data_dict[key] = value
             prev_key = key
         return data_dict
@@ -207,10 +228,11 @@ class Base(object):
         check_memory_usage = True
         check_memory_count = 1
 
-        for (comm, pid, tgid, cpu, data_str) in zip(self.comm_array, self.pid_array,
+        for (comm, pid, tgid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
                                               self.tgid_array, self.cpu_array,
-                                              self.data_array):
-            data_dict = self.generate_data_dict(comm, pid, tgid, cpu, data_str)
+                                              self.line_array, self.data_array):
+            data_dict = {"__comm": comm, "__pid": pid, "__tgid": tgid, "__cpu": cpu, "__line": line}
+            data_dict.update(self.generate_data_dict(data_str))
 
             # When running out of memory, Pandas has been observed to segfault
             # rather than throwing a proper Python error.
@@ -244,6 +266,7 @@ class Base(object):
         self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx)
 
         self.time_array = []
+        self.line_array = []
         self.comm_array = []
         self.pid_array = []
         self.cpu_array = []
@@ -257,6 +280,14 @@ class Base(object):
         """
         self.data_frame.to_csv(fname)
 
+    def read_csv(self, fname):
+        """Read the csv data into a DataFrame
+
+        :param fname: The name of the CSV file
+        :type fname: str
+        """
+        self.data_frame = pd.read_csv(fname, index_col = 0)
+
     def normalize_time(self, basetime):
         """Substract basetime from the Time of the data frame
 
@@ -264,9 +295,6 @@ class Base(object):
             the time index
         :type basetime: float
         """
-        # HACK: We don't normalize anymore after the fact
-        return
-
         if basetime and not self.data_frame.empty:
             self.data_frame.reset_index(inplace=True)
             self.data_frame["Time"] = self.data_frame["Time"] - basetime
diff --git a/trappy/dynamic.py b/trappy/dynamic.py
index f764232..5b094ce 100644
--- a/trappy/dynamic.py
+++ b/trappy/dynamic.py
@@ -81,7 +81,7 @@ def register_dynamic_ftrace(class_name, unique_word, scope="all",
         the parsing of events and group them together)
     :type scope: str
 
-    :param parse_raw: If, true, raw trace output (-R flag)
+    :param parse_raw: If, true, raw trace output (-r flag)
         will be used
     :type parse_raw: bool
 
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index 07cb94b..3948cb6 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -18,9 +18,13 @@
 # pylint: disable=no-member
 
 import itertools
+import json
 import os
 import re
 import pandas as pd
+import hashlib
+import shutil
+import warnings
 
 from trappy.bare_trace import BareTrace
 from trappy.utils import listify
@@ -46,6 +50,12 @@ def _plot_freq_hists(allfreqs, what, axis, title):
         trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20,
                              "Frequency", xlim, "default")
 
+SPECIAL_FIELDS_RE = re.compile(
+                        r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\
+                        r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+"\
+                        r"(?P<timestamp>[0-9]+(?P<us>\.[0-9]+)?): (\w+:\s+)+(?P<data>.+)"
+)
+
 class GenericFTrace(BareTrace):
     """Generic class to parse output of FTrace.  This class is meant to be
 subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
@@ -56,15 +66,70 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
 
     dynamic_classes = {}
 
-    def __init__(self, name="", normalize_time=True, scope="all",
-                 events=[], event_callbacks={}, window=(0, None),
-                 abs_window=(0, None), build_df=True):
-        super(GenericFTrace, self).__init__(name, build_df)
+    disable_cache = False
+
+    def _trace_cache_path(self):
+        trace_file = self.trace_path
+        cache_dir  = '.' +  os.path.basename(trace_file) + '.cache'
+        tracefile_dir = os.path.dirname(os.path.abspath(trace_file))
+        cache_path = os.path.join(tracefile_dir, cache_dir)
+        return cache_path
+
+    def _check_trace_cache(self, params):
+        cache_path = self._trace_cache_path()
+        md5file = os.path.join(cache_path, 'md5sum')
+        basetime_path = os.path.join(cache_path, 'basetime')
+        params_path = os.path.join(cache_path, 'params.json')
+
+        for path in [cache_path, md5file, params_path]:
+            if not os.path.exists(path):
+                return False
+
+        with open(md5file) as f:
+            cache_md5sum = f.read()
+        with open(basetime_path) as f:
+            self.basetime = float(f.read())
+        with open(self.trace_path, 'rb') as f:
+            trace_md5sum = hashlib.md5(f.read()).hexdigest()
+        with open(params_path) as f:
+            cache_params = json.dumps(json.load(f))
+
+        # Convert to a json string for comparison
+        params = json.dumps(params)
+
+        # check if cache is valid
+        if cache_md5sum != trace_md5sum or cache_params != params:
+            shutil.rmtree(cache_path)
+            return False
+        return True
+
+    def _create_trace_cache(self, params):
+        cache_path = self._trace_cache_path()
+        md5file = os.path.join(cache_path, 'md5sum')
+        basetime_path = os.path.join(cache_path, 'basetime')
+        params_path = os.path.join(cache_path, 'params.json')
+
+        if os.path.exists(cache_path):
+            shutil.rmtree(cache_path)
+        os.mkdir(cache_path)
+
+        md5sum = hashlib.md5(open(self.trace_path, 'rb').read()).hexdigest()
+        with open(md5file, 'w') as f:
+            f.write(md5sum)
 
-        self.normalized_time = normalize_time
+        with open(basetime_path, 'w') as f:
+            f.write(str(self.basetime))
 
-        if not hasattr(self, "needs_raw_parsing"):
-            self.needs_raw_parsing = False
+        with open(params_path, 'w') as f:
+            json.dump(params, f)
+
+    def _get_csv_path(self, trace_class):
+        path = self._trace_cache_path()
+        return os.path.join(path, trace_class.__class__.__name__ + '.csv')
+
+    def __init__(self, name="", normalize_time=True, scope="all",
+                 events=[], window=(0, None), abs_window=(0, None)):
+        super(GenericFTrace, self).__init__(name)
 
         self.class_definitions.update(self.dynamic_classes.items())
         self.__add_events(listify(events))
@@ -79,16 +144,13 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
 
         for attr, class_def in self.class_definitions.iteritems():
             trace_class = class_def()
-            if event_callbacks.has_key(attr):
-                trace_class.callback = event_callbacks[attr]
             setattr(self, attr, trace_class)
             self.trace_classes.append(trace_class)
 
-        self.__parse_trace_file(self.trace_path, window, abs_window)
-        if self.needs_raw_parsing and (self.trace_path_raw is not None):
-            self.__parse_trace_file(self.trace_path_raw, window, abs_window,
-                                    raw=True)
-        self.finalize_objects()
+        # save parameters to complete init later
+        self.normalize_time = normalize_time
+        self.window = window
+        self.abs_window = abs_window
 
     @classmethod
     def register_parser(cls, cobject, scope):
@@ -129,6 +191,47 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
             if cobject == obj:
                 del scope_classes[name]
 
+    def _do_parse(self):
+        params = {'window': self.window, 'abs_window': self.abs_window}
+        if not self.__class__.disable_cache and self._check_trace_cache(params):
+            # Read csv into frames
+            for trace_class in self.trace_classes:
+                try:
+                    csv_file = self._get_csv_path(trace_class)
+                    trace_class.read_csv(csv_file)
+                    trace_class.cached = True
+                except:
+                    warnstr = "TRAPpy: Couldn't read {} from cache, reading it from trace".format(trace_class)
+                    warnings.warn(warnstr)
+
+        if all([c.cached for c in self.trace_classes]):
+            if self.normalize_time:
+                self._normalize_time()
+            return
+
+        self.__parse_trace_file(self.trace_path)
+
+        self.finalize_objects()
+
+        if not self.__class__.disable_cache:
+            try:
+                # Recreate basic cache directories only if nothing cached
+                if not any([c.cached for c in self.trace_classes]):
+                    self._create_trace_cache(params)
+
+                # Write out only events that weren't cached before
+                for trace_class in self.trace_classes:
+                    if trace_class.cached:
+                        continue
+                    csv_file = self._get_csv_path(trace_class)
+                    trace_class.write_csv(csv_file)
+            except OSError as err:
+                warnings.warn(
+                    "TRAPpy: Cache not created due to OS error: {0}".format(err))
+
+        if self.normalize_time:
+            self._normalize_time()
+
     def __add_events(self, events):
         """Add events to the class_definitions
 
@@ -161,73 +264,66 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
                 trace_class = DynamicTypeFactory(event_name, (Base,), kwords)
                 self.class_definitions[event_name] = trace_class
 
-    def __populate_data(self, fin, cls_for_unique_word, window, abs_window):
-        """Append to trace data from a txt trace"""
-
-        def contains_unique_word(line, unique_words=cls_for_unique_word.keys()):
-            for unique_word in unique_words:
-                if unique_word in line:
-                    return True
-            return False
+    def __get_trace_class(self, line, cls_word):
+        trace_class = None
+        for unique_word, cls in cls_word.iteritems():
+            if unique_word in line:
+                trace_class = cls
+                if not cls.fallback:
+                    return trace_class
+        return trace_class
 
-        special_fields_regexp = r"^\s*(?P<comm>.*)-(?P<pid>\d+)\s+\(?(?P<tgid>.*?)?\)"\
-                                r"?\s*\[(?P<cpu>\d+)\](?:\s+....)?\s+(?P<timestamp>[0-9]+\.[0-9]+):"
-        special_fields_regexp = re.compile(special_fields_regexp)
-        start_match = re.compile(r"[A-Za-z0-9_]+=")
+    def __populate_data(self, fin, cls_for_unique_word):
+        """Append to trace data from a txt trace"""
 
         actual_trace = itertools.dropwhile(self.trace_hasnt_started(), fin)
         actual_trace = itertools.takewhile(self.trace_hasnt_finished(),
                                            actual_trace)
 
-        for line in itertools.ifilter(contains_unique_word, actual_trace):
-            for unique_word, cls in cls_for_unique_word.iteritems():
-                if unique_word in line:
-                    trace_class = cls
-                    break
-            else:
-                raise FTraceParseError("No unique word in '{}'".format(line))
+        for line in actual_trace:
+            trace_class = self.__get_trace_class(line, cls_for_unique_word)
+            if not trace_class:
+                self.lines += 1
+                continue
 
             line = line[:-1]
 
-            special_fields_match = special_fields_regexp.match(line)
-            if not special_fields_match:
-                raise FTraceParseError("Couldn't match special fields in '{}'".format(line))
-            comm = special_fields_match.group('comm')
-            pid = int(special_fields_match.group('pid'))
-            cpu = int(special_fields_match.group('cpu'))
-            tgid = special_fields_match.group('tgid')
-            if not tgid or tgid[0] == '-':
-                tgid = -1
-            else:
-                tgid = int(tgid)
-
-            timestamp = float(special_fields_match.group('timestamp'))
+            fields_match = SPECIAL_FIELDS_RE.match(line)
+            if not fields_match:
+                raise FTraceParseError("Couldn't match fields in '{}'".format(line))
+            comm = fields_match.group('comm')
+            pid = int(fields_match.group('pid'))
+            cpu = int(fields_match.group('cpu'))
+            tgid = fields_match.group('tgid')
+            tgid = -1 if (not tgid or '-' in tgid) else int(tgid)
+
+            # The timestamp, depending on the trace_clock configuration, can be
+            # reported either in [s].[us] or [ns] format. Let's ensure that we
+            # always generate DF which have the index expressed in:
+            #    [s].[decimals]
+            timestamp = float(fields_match.group('timestamp'))
+            if not fields_match.group('us'):
+                timestamp /= 1e9
+            data_str = fields_match.group('data')
 
             if not self.basetime:
                 self.basetime = timestamp
 
-            if (timestamp < window[0] + self.basetime) or \
-               (timestamp < abs_window[0]):
+            if (timestamp < self.window[0] + self.basetime) or \
+               (timestamp < self.abs_window[0]):
+                self.lines += 1
                 continue
 
-            if (window[1] and timestamp > window[1] + self.basetime) or \
-               (abs_window[1] and timestamp > abs_window[1]):
+            if (self.window[1] and timestamp > self.window[1] + self.basetime) or \
+               (self.abs_window[1] and timestamp > self.abs_window[1]):
                 return
 
-            try:
-                data_start_idx =  start_match.search(line).start()
-            except AttributeError:
-                continue
-
-            if self.normalized_time:
-                timestamp = timestamp - self.basetime
-
-            data_str = line[data_start_idx:]
-
             # Remove empty arrays from the trace
-            data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
+            if "={}" in data_str:
+                data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
 
-            trace_class.append_data(timestamp, comm, pid, tgid, cpu, data_str)
+            trace_class.append_data(timestamp, comm, pid, tgid, cpu, self.lines, data_str)
+            self.lines += 1
 
     def trace_hasnt_started(self):
         """Return a function that accepts a line and returns true if this line
@@ -241,7 +337,7 @@ is not part of the trace.
         started).
 
         """
-        return lambda x: False
+        return lambda line: not SPECIAL_FIELDS_RE.match(line)
 
     def trace_hasnt_finished(self):
         """Return a function that accepts a line and returns true if this line
@@ -260,15 +356,14 @@ is part of the trace.
         """
         return lambda x: True
 
-    def __parse_trace_file(self, trace_file, window, abs_window, raw=False):
+    def __parse_trace_file(self, trace_file):
         """parse the trace and create a pandas DataFrame"""
 
         # Memoize the unique words to speed up parsing the trace file
         cls_for_unique_word = {}
         for trace_name in self.class_definitions.iterkeys():
             trace_class = getattr(self, trace_name)
-
-            if self.needs_raw_parsing and (trace_class.parse_raw != raw):
+            if trace_class.cached:
                 continue
 
             unique_word = trace_class.unique_word
@@ -279,8 +374,9 @@ is part of the trace.
 
         try:
             with open(trace_file) as fin:
+                self.lines = 0
                 self.__populate_data(
-                    fin, cls_for_unique_word, window, abs_window)
+                    fin, cls_for_unique_word)
         except FTraceParseError as e:
             raise ValueError('Failed to parse ftrace file {}:\n{}'.format(
                 trace_file, str(e)))
@@ -319,6 +415,60 @@ is part of the trace.
 
         return ret
 
+    def apply_callbacks(self, fn_map, *kwarg):
+        """
+        Apply callback functions to trace events in chronological order.
+
+        This method iterates over a user-specified subset of the available trace
+        event dataframes, calling different user-specified functions for each
+        event type. These functions are passed a dictionary mapping 'Index' and
+        the column names to their values for that row.
+
+        For example, to iterate over trace t, applying your functions callback_fn1
+        and callback_fn2 to each sched_switch and sched_wakeup event respectively:
+
+        t.apply_callbacks({
+            "sched_switch": callback_fn1,
+            "sched_wakeup": callback_fn2
+        })
+        """
+        dfs = {event: getattr(self, event).data_frame for event in fn_map.keys()}
+        events = [event for event in fn_map.keys() if not dfs[event].empty]
+        iters = {event: dfs[event].itertuples() for event in events}
+        next_rows = {event: iterator.next() for event,iterator in iters.iteritems()}
+
+        # Column names beginning with underscore will not be preserved in tuples
+        # due to constraints on namedtuple field names, so store mappings from
+        # column name to column number for each trace event.
+        col_idxs = {event: {
+            name: idx for idx, name in enumerate(
+                ['Index'] + dfs[event].columns.tolist()
+            )
+        } for event in events}
+
+        def getLine(event):
+            line_col_idx = col_idxs[event]['__line']
+            return next_rows[event][line_col_idx]
+
+        while events:
+            event_name = min(events, key=getLine)
+            event_tuple = next_rows[event_name]
+
+            event_dict = {
+                col: event_tuple[idx] for col, idx in col_idxs[event_name].iteritems()
+            }
+
+            if kwarg:
+                fn_map[event_name](event_dict, kwarg)
+            else:
+                fn_map[event_name](event_dict)
+
+            event_row = next(iters[event_name], None)
+            if event_row:
+                next_rows[event_name] = event_row
+            else:
+                events.remove(event_name)
+
     def plot_freq_hists(self, map_label, ax):
         """Plot histograms for each actor input and output frequency
 
@@ -500,16 +650,26 @@ class FTrace(GenericFTrace):
     """
 
     def __init__(self, path=".", name="", normalize_time=True, scope="all",
-                 events=[], event_callbacks={}, window=(0, None),
-                 abs_window=(0, None), build_df=True):
-        self.trace_path, self.trace_path_raw = self.__process_path(path)
-        self.needs_raw_parsing = True
-
+                 events=[], window=(0, None), abs_window=(0, None)):
+        super(FTrace, self).__init__(name, normalize_time, scope, events,
+                                     window, abs_window)
+        self.raw_events = []
+        self.trace_path = self.__process_path(path)
         self.__populate_metadata()
+        self._do_parse()
 
-        super(FTrace, self).__init__(name, normalize_time, scope, events,
-                                     event_callbacks, window, abs_window,
-                                     build_df)
+    def __warn_about_txt_trace_files(self, trace_dat, raw_txt, formatted_txt):
+        self.__get_raw_event_list()
+        warn_text = ( "You appear to be parsing both raw and formatted "
+                      "trace files. TRAPpy now uses a unified format. "
+                      "If you have the {} file, remove the .txt files "
+                      "and try again. If not, you can manually move "
+                      "lines with the following events from {} to {} :"
+                      ).format(trace_dat, raw_txt, formatted_txt)
+        for raw_event in self.raw_events:
+            warn_text = warn_text+" \"{}\"".format(raw_event)
+
+        raise RuntimeError(warn_text)
 
     def __process_path(self, basepath):
         """Process the path and return the path to the trace text file"""
@@ -520,32 +680,42 @@ class FTrace(GenericFTrace):
             trace_name = os.path.join(basepath, "trace")
 
         trace_txt = trace_name + ".txt"
-        trace_raw = trace_name + ".raw.txt"
+        trace_raw_txt = trace_name + ".raw.txt"
         trace_dat = trace_name + ".dat"
 
         if os.path.isfile(trace_dat):
-            # Both TXT and RAW traces must always be generated
-            if not os.path.isfile(trace_txt) or \
-               not os.path.isfile(trace_raw):
+            # Warn users if raw.txt files are present
+            if os.path.isfile(trace_raw_txt):
+                self.__warn_about_txt_trace_files(trace_dat, trace_raw_txt, trace_txt)
+            # TXT traces must always be generated
+            if not os.path.isfile(trace_txt):
                 self.__run_trace_cmd_report(trace_dat)
-            # TXT (and RAW) traces must match the most recent binary trace
+            # TXT traces must match the most recent binary trace
             elif os.path.getmtime(trace_txt) < os.path.getmtime(trace_dat):
                 self.__run_trace_cmd_report(trace_dat)
 
-        if not os.path.isfile(trace_raw):
-            trace_raw = None
+        return trace_txt
 
-        return trace_txt, trace_raw
+    def __get_raw_event_list(self):
+        self.raw_events = []
+        # Generate list of events which need to be parsed in raw format
+        for event_class in (self.thermal_classes, self.sched_classes, self.dynamic_classes):
+            for trace_class in event_class.itervalues():
+                raw = getattr(trace_class, 'parse_raw', None)
+                if raw:
+                    name = getattr(trace_class, 'name', None)
+                    if name:
+                        self.raw_events.append(name)
 
     def __run_trace_cmd_report(self, fname):
-        """Run "trace-cmd report fname > fname.txt"
-           and "trace-cmd report -R fname > fname.raw.txt"
+        """Run "trace-cmd report [ -r raw_event ]* fname > fname.txt"
 
-        The resulting traces are stored in files with extension ".txt"
-        and ".raw.txt" respectively.  If fname is "my_trace.dat", the
-        trace is stored in "my_trace.txt" and "my_trace.raw.txt".  The
-        contents of the destination files are overwritten if they
-        exist.
+        The resulting trace is stored in files with extension ".txt". If
+        fname is "my_trace.dat", the trace is stored in "my_trace.txt". The
+        contents of the destination file is overwritten if it exists.
+        Trace events which require unformatted output (raw_event == True)
+        are added to the command line with one '-r <event>' each event and
+        trace-cmd then prints those events without formatting.
 
         """
         from subprocess import check_output
@@ -555,8 +725,12 @@ class FTrace(GenericFTrace):
         if not os.path.isfile(fname):
             raise IOError("No such file or directory: {}".format(fname))
 
-        raw_trace_output = os.path.splitext(fname)[0] + ".raw.txt"
         trace_output = os.path.splitext(fname)[0] + ".txt"
+        # Ask for the raw event list and request them unformatted
+        self.__get_raw_event_list()
+        for raw_event in self.raw_events:
+            cmd.extend([ '-r', raw_event ])
+
         cmd.append(fname)
 
         with open(os.devnull) as devnull:
@@ -567,17 +741,9 @@ class FTrace(GenericFTrace):
                     raise OSError(2, "trace-cmd not found in PATH, is it installed?")
                 else:
                     raise
-
-            # Add the -R flag to the trace-cmd
-            # for raw parsing
-            cmd.insert(-1, "-R")
-            raw_out = check_output(cmd, stderr=devnull)
-
         with open(trace_output, "w") as fout:
             fout.write(out)
 
-        with open(raw_trace_output, "w") as fout:
-            fout.write(raw_out)
 
     def __populate_metadata(self):
         """Populates trace metadata"""
@@ -600,6 +766,6 @@ class FTrace(GenericFTrace):
                     setattr(self, "_" + match.group(1), match.group(2))
                     metadata_keys.remove(match.group(1))
 
-                if re.search(r"^\s+[^\[]+-\d+\s+\[\d+\]\s+\d+\.\d+:", line):
+                if SPECIAL_FIELDS_RE.match(line):
                     # Reached a valid trace line, abort metadata population
                     return
diff --git a/trappy/sched.py b/trappy/sched.py
index ebcb79d..84e30be 100644
--- a/trappy/sched.py
+++ b/trappy/sched.py
@@ -104,14 +104,16 @@ class SchedSwitch(Base):
     """Parse sched_switch"""
 
     unique_word = "sched_switch:"
+    parse_raw = True
 
     def __init__(self):
-        super(SchedSwitch, self).__init__(parse_raw=True)
+        super(SchedSwitch, self).__init__(parse_raw=self.parse_raw)
 
-    def append_data(self, time, comm, pid, tgid, cpu, data):
-        data_rep = data.replace(" ==> ", " ")
-        super(SchedSwitch, self).append_data(time, comm, pid, tgid, cpu,
-                                             data_rep)
+    def create_dataframe(self):
+        self.data_array = [line.replace(" ==> ", " ", 1)
+                           for line in self.data_array]
+
+        super(SchedSwitch, self).create_dataframe()
 
 register_ftrace_parser(SchedSwitch, "sched")
 
diff --git a/trappy/systrace.py b/trappy/systrace.py
index c11601d..c7e65e4 100644
--- a/trappy/systrace.py
+++ b/trappy/systrace.py
@@ -14,6 +14,10 @@
 #
 
 from trappy.ftrace import GenericFTrace
+import re
+
+SYSTRACE_EVENT = re.compile(
+    r'^(?P<event>[A-Z])(\|(?P<pid>\d+)\|(?P<func>[^|]*)(\|(?P<data>.*))?)?')
 
 class drop_before_trace(object):
     """Object that, when called, returns True if the line is not part of
@@ -23,24 +27,26 @@ the trace
     the headers that start with #
 
     """
-    def __init__(self):
+    def __init__(self, tracer):
         self.before_begin_trace = True
-        self.before_script_trace_data = True
         self.before_actual_trace = True
+        self.tracer = tracer
 
     def __call__(self, line):
         if self.before_begin_trace:
             if line.startswith("<!-- BEGIN TRACE -->") or \
                line.startswith("<title>Android System Trace</title>"):
                 self.before_begin_trace = False
-        elif self.before_script_trace_data:
+        elif self.before_actual_trace:
             if line.startswith('  <script class="trace-data"') or \
                line.startswith("  var linuxPerfData"):
-                self.before_script_trace_data = False
-        elif not line.startswith("#"):
-            self.before_actual_trace = False
+                self.before_actual_trace = False
 
-        return self.before_actual_trace
+        if not self.before_actual_trace:
+            base_call = super(SysTrace, self.tracer).trace_hasnt_started()
+            return base_call(line)
+        else:
+            return True
 
 class SysTrace(GenericFTrace):
     """A wrapper that parses all events of a SysTrace run
@@ -50,23 +56,21 @@ class SysTrace(GenericFTrace):
     """
 
     def __init__(self, path=".", name="", normalize_time=True, scope="all",
-                 events=[], event_callbacks={}, window=(0, None),
-                 abs_window=(0, None), build_df=True):
+                 events=[], window=(0, None), abs_window=(0, None)):
 
         self.trace_path = path
 
         super(SysTrace, self).__init__(name, normalize_time, scope, events,
-                                       event_callbacks, window, abs_window,
-                                       build_df)
-        if not build_df:
-            return
+                                       window, abs_window)
+
+        self._do_parse()
         try:
             self._cpus = 1 + self.sched_switch.data_frame["__cpu"].max()
         except AttributeError:
             pass
 
     def trace_hasnt_started(self):
-        return drop_before_trace()
+        return drop_before_trace(self)
 
     def trace_hasnt_finished(self):
         """Return a function that returns True while the current line is still part of the trace
@@ -78,3 +82,18 @@ class SysTrace(GenericFTrace):
 
         """
         return lambda x: not x.endswith("</script>\n")
+
+    def generate_data_dict(self, data_str):
+        """ Custom parsing for systrace's userspace events """
+        data_dict = None
+
+        match = SYSTRACE_EVENT.match(data_str)
+        if match:
+            data_dict = {
+                          'event': match.group('event'),
+                          'pid'  : int(match.group('pid')) if match.group('pid') else None,
+                          'func' : match.group('func' ),
+                          'data' : match.group('data' )
+                        }
+
+        return data_dict
diff --git a/trappy/tracing_mark_write.py b/trappy/tracing_mark_write.py
new file mode 100644
index 0000000..49a23b0
--- /dev/null
+++ b/trappy/tracing_mark_write.py
@@ -0,0 +1,43 @@
+#    Copyright 2017 ARM Limited, Google and contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""This module contains the class for representing a tracing_mark_write
+trace_event used for ftrace events injected from userspace.
+"""
+
+from trappy.base import Base
+from trappy.dynamic import register_ftrace_parser
+
+class TracingMarkWrite(Base):
+    """Parse tracing_mark_write events that couldn't be matched with more specific unique words
+       This class is always used as a fallback if nothing more specific could match the particular
+       tracing_mark_write event.
+    """
+
+    unique_word = "tracing_mark_write"
+
+    def generate_data_dict(self, data_str):
+        if self.tracer:
+            data_dict = self.tracer.generate_data_dict(data_str)
+            if data_dict:
+                return data_dict
+
+        data_dict = { 'string': data_str }
+        return data_dict
+
+    def __init__(self):
+        super(TracingMarkWrite, self).__init__(fallback=True)
+
+register_ftrace_parser(TracingMarkWrite)
diff --git a/trappy/utils.py b/trappy/utils.py
index eb73752..47ef4ab 100644
--- a/trappy/utils.py
+++ b/trappy/utils.py
@@ -13,6 +13,9 @@
 # limitations under the License.
 #
 
+import pandas as pd
+import numpy as np
+
 """Generic functions that can be used in multiple places in trappy
 """
 
@@ -102,3 +105,59 @@ def handle_duplicate_index(data,
             dup_index_left += 1
 
     return data.reindex(new_index)
+
+# Iterate fast over all rows in a data frame and apply fn
+def apply_callback(df, fn, *kwargs):
+    iters = df.itertuples()
+    event_tuple = iters.next()
+
+    # Column names beginning with underscore will not be preserved in tuples
+    # due to constraints on namedtuple field names, so store mappings from
+    # column name to column number for each trace event.
+    col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }
+
+    while True:
+        if not event_tuple:
+            break
+        event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }
+
+        if kwargs:
+            fn(event_dict, kwargs)
+        else:
+            fn(event_dict)
+
+        event_tuple = next(iters, None)
+
+
+def merge_dfs(pr_df, sec_df, pivot):
+    # Keep track of last secondary event
+    pivot_map = {}
+
+    # An array accumating dicts with merged data
+    merged_data = []
+    def df_fn(data):
+        # Store the latest secondary info
+        if data['Time'][0] == 'secondary':
+            pivot_map[data[pivot]] = data
+            # Get rid of primary/secondary labels
+            data['Time'] = data['Time'][1]
+            return
+
+        # Propogate latest secondary info
+        for key, value in data.iteritems():
+            if key == pivot:
+                continue
+            # Fast check for if value is nan (faster than np.isnan + try/except)
+            if value != value and pivot_map.has_key(data[pivot]):
+                data[key] = pivot_map[data[pivot]][key]
+
+        # Get rid of primary/secondary labels
+        data['Time'] = data['Time'][1]
+        merged_data.append(data)
+
+    df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort(columns='__line')
+    apply_callback(df, df_fn)
+    merged_df = pd.DataFrame.from_dict(merged_data)
+    merged_df.set_index('Time', inplace=True)
+
+    return merged_df