1 files changed, 172 insertions, 0 deletions
diff --git a/cr2/base.py b/cr2/base.py
new file mode 100644
index 0000000..112fdea
--- /dev/null
+++ b/cr2/base.py
@@ -0,0 +1,172 @@
+#!/usr/bin/python
+"""Base class to parse trace.dat dumps"""
+
+import os
+import re
+import pandas as pd
+
+def trace_parser_explode_array(string, array_lengths):
+    """Explode an array in the trace into individual elements for easy parsing
+
+    Basically, turn "load={1 1 2 2}" into "load0=1 load1=1 load2=2
+    load3=2".  array_lengths is a dictionary of array names and their
+    expected length.  If we get array that's shorter than the expected
+    length, additional keys have to be introduced with value 0 to
+    compensate.  For example, "load={1 2}" with array_lengths being
+    {"load": 4} returns "load0=1 load1=2 load2=0 load3=0"
+
+    """
+
+    while True:
+        match = re.search(r"[^ ]+={[^}]+}", string)
+        if match is None:
+            break
+
+        to_explode = match.group()
+        col_basename = re.match(r"([^=]+)=", to_explode).groups()[0]
+        vals_str = re.search(r"{(.+)}", to_explode).groups()[0]
+        vals_array = vals_str.split(' ')
+
+        exploded_str = ""
+        for (idx, val) in enumerate(vals_array):
+            exploded_str += "{}{}={} ".format(col_basename, idx, val)
+
+        vals_added = len(vals_array)
+        if vals_added < array_lengths[col_basename]:
+            for idx in range(vals_added, array_lengths[col_basename]):
+                exploded_str += "{}{}=0 ".format(col_basename, idx)
+
+        exploded_str = exploded_str[:-1]
+        begin_idx = match.start()
+        end_idx = match.end()
+
+        string = string[:begin_idx] + exploded_str + string[end_idx:]
+
+    return string
+
+class Base(object):
+    """Base class to parse trace.dat dumps.
+
+    Don't use directly, create a subclass that defines the unique_word
+    you want to match in the output"""
+    def __init__(self, basepath, unique_word):
+        if basepath is None:
+            basepath = "."
+
+        self.basepath = basepath
+        self.data_frame = pd.DataFrame()
+        self.unique_word = unique_word
+
+        if not os.path.isfile(os.path.join(basepath, "trace.txt")):
+            self.__run_trace_cmd_report()
+
+        self.__parse_into_dataframe()
+
+    def __run_trace_cmd_report(self):
+        """Run "trace-cmd report > trace.txt".
+
+        Overwrites the contents of trace.txt if it exists."""
+        from subprocess import check_output
+
+        trace_fname = os.path.join(self.basepath, "trace.dat")
+        if not os.path.isfile(trace_fname):
+            raise IOError("No such file or directory: {}".format(trace_fname))
+
+        with open(os.devnull) as devnull:
+            out = check_output(["trace-cmd", "report", trace_fname],
+                               stderr=devnull)
+
+        with open(os.path.join(self.basepath, "trace.txt"), "w") as fout:
+            fout.write(out)
+
+    def get_trace_array_lengths(self, fname):
+        """Calculate the lengths of all arrays in the trace
+
+        Returns a dict with the name of each array found in the trace
+        as keys and their corresponding length as value
+
+        """
+        from collections import defaultdict
+
+        pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}")
+
+        ret = defaultdict(int)
+
+        with open(fname) as fin:
+            for line in fin:
+                if not re.search(self.unique_word, line):
+                    continue
+
+                while True:
+                    match = re.search(pat_array, line)
+                    if not match:
+                        break
+
+                    (array_name, array_elements) = match.groups()
+
+                    array_len = len(array_elements.split(' '))
+
+                    if array_len > ret[array_name]:
+                        ret[array_name] = array_len
+
+                    line = line[match.end():]
+
+        return ret
+
+    def __parse_into_dataframe(self):
+        """parse the trace and create a pandas DataFrame"""
+
+        fin_fname = os.path.join(self.basepath, "trace.txt")
+
+        array_lengths = self.get_trace_array_lengths(fin_fname)
+
+        pat_timestamp = re.compile(r"([0-9]+\.[0-9]+):")
+        pat_data_start = re.compile("[A-Za-z0-9_]+=")
+        pat_empty_array = re.compile(r"[A-Za-z0-9_]+=\{\} ")
+
+        parsed_data = []
+        time_array = []
+
+        with open(fin_fname) as fin:
+            for line in fin:
+                if not re.search(self.unique_word, line):
+                    continue
+
+                line = line[:-1]
+
+                timestamp_match = re.search(pat_timestamp, line)
+                timestamp = float(timestamp_match.group(1))
+                time_array.append(timestamp)
+
+                data_start_idx = re.search(pat_data_start, line).start()
+                data_str = line[data_start_idx:]
+
+                # Remove empty arrays from the trace
+                data_str = re.sub(pat_empty_array, r"", data_str)
+
+                data_str = trace_parser_explode_array(data_str, array_lengths)
+
+                line_data = {}
+                for field in data_str.split():
+                    (key, value) = field.split('=')
+                    try:
+                        value = int(value)
+                    except ValueError:
+                        pass
+                    line_data[key] = value
+
+                parsed_data.append(line_data)
+
+        time_idx = pd.Index(time_array, name="Time")
+        self.data_frame = pd.DataFrame(parsed_data, index=time_idx)
+
+    def write_csv(self, fname):
+        """Write the csv info in thermal.csv"""
+        self.data_frame.to_csv(fname)
+
+    def normalize_time(self, basetime):
+        """Substract basetime from the Time of the data frame"""
+        if basetime:
+            self.data_frame.reset_index(inplace=True)
+            self.data_frame["Time"] = self.data_frame["Time"] - basetime
+            self.data_frame.set_index("Time", inplace=True)