aboutsummaryrefslogtreecommitdiff
path: root/cr2/base.py
diff options
context:
space:
mode:
Diffstat (limited to 'cr2/base.py')
-rw-r--r--cr2/base.py172
1 files changed, 172 insertions, 0 deletions
diff --git a/cr2/base.py b/cr2/base.py
new file mode 100644
index 0000000..112fdea
--- /dev/null
+++ b/cr2/base.py
@@ -0,0 +1,172 @@
+#!/usr/bin/python
+"""Base class to parse trace.dat dumps"""
+
+import os
+import re
+import pandas as pd
+
+def trace_parser_explode_array(string, array_lengths):
+ """Explode an array in the trace into individual elements for easy parsing
+
+ Basically, turn "load={1 1 2 2}" into "load0=1 load1=1 load2=2
+ load3=2". array_lengths is a dictionary of array names and their
+ expected length. If we get array that's shorter than the expected
+ length, additional keys have to be introduced with value 0 to
+ compensate. For example, "load={1 2}" with array_lengths being
+ {"load": 4} returns "load0=1 load1=2 load2=0 load3=0"
+
+ """
+
+ while True:
+ match = re.search(r"[^ ]+={[^}]+}", string)
+ if match is None:
+ break
+
+ to_explode = match.group()
+ col_basename = re.match(r"([^=]+)=", to_explode).groups()[0]
+ vals_str = re.search(r"{(.+)}", to_explode).groups()[0]
+ vals_array = vals_str.split(' ')
+
+ exploded_str = ""
+ for (idx, val) in enumerate(vals_array):
+ exploded_str += "{}{}={} ".format(col_basename, idx, val)
+
+ vals_added = len(vals_array)
+ if vals_added < array_lengths[col_basename]:
+ for idx in range(vals_added, array_lengths[col_basename]):
+ exploded_str += "{}{}=0 ".format(col_basename, idx)
+
+ exploded_str = exploded_str[:-1]
+ begin_idx = match.start()
+ end_idx = match.end()
+
+ string = string[:begin_idx] + exploded_str + string[end_idx:]
+
+ return string
+
+class Base(object):
+ """Base class to parse trace.dat dumps.
+
+ Don't use directly, create a subclass that defines the unique_word
+ you want to match in the output"""
+ def __init__(self, basepath, unique_word):
+ if basepath is None:
+ basepath = "."
+
+ self.basepath = basepath
+ self.data_frame = pd.DataFrame()
+ self.unique_word = unique_word
+
+ if not os.path.isfile(os.path.join(basepath, "trace.txt")):
+ self.__run_trace_cmd_report()
+
+ self.__parse_into_dataframe()
+
+ def __run_trace_cmd_report(self):
+ """Run "trace-cmd report > trace.txt".
+
+ Overwrites the contents of trace.txt if it exists."""
+ from subprocess import check_output
+
+ trace_fname = os.path.join(self.basepath, "trace.dat")
+ if not os.path.isfile(trace_fname):
+ raise IOError("No such file or directory: {}".format(trace_fname))
+
+ with open(os.devnull) as devnull:
+ out = check_output(["trace-cmd", "report", trace_fname],
+ stderr=devnull)
+
+ with open(os.path.join(self.basepath, "trace.txt"), "w") as fout:
+ fout.write(out)
+
+ def get_trace_array_lengths(self, fname):
+ """Calculate the lengths of all arrays in the trace
+
+ Returns a dict with the name of each array found in the trace
+ as keys and their corresponding length as value
+
+ """
+ from collections import defaultdict
+
+ pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}")
+
+ ret = defaultdict(int)
+
+ with open(fname) as fin:
+ for line in fin:
+ if not re.search(self.unique_word, line):
+ continue
+
+ while True:
+ match = re.search(pat_array, line)
+ if not match:
+ break
+
+ (array_name, array_elements) = match.groups()
+
+ array_len = len(array_elements.split(' '))
+
+ if array_len > ret[array_name]:
+ ret[array_name] = array_len
+
+ line = line[match.end():]
+
+ return ret
+
+ def __parse_into_dataframe(self):
+ """parse the trace and create a pandas DataFrame"""
+
+ fin_fname = os.path.join(self.basepath, "trace.txt")
+
+ array_lengths = self.get_trace_array_lengths(fin_fname)
+
+ pat_timestamp = re.compile(r"([0-9]+\.[0-9]+):")
+ pat_data_start = re.compile("[A-Za-z0-9_]+=")
+ pat_empty_array = re.compile(r"[A-Za-z0-9_]+=\{\} ")
+
+ parsed_data = []
+ time_array = []
+
+ with open(fin_fname) as fin:
+ for line in fin:
+ if not re.search(self.unique_word, line):
+ continue
+
+ line = line[:-1]
+
+ timestamp_match = re.search(pat_timestamp, line)
+ timestamp = float(timestamp_match.group(1))
+ time_array.append(timestamp)
+
+ data_start_idx = re.search(pat_data_start, line).start()
+ data_str = line[data_start_idx:]
+
+ # Remove empty arrays from the trace
+ data_str = re.sub(pat_empty_array, r"", data_str)
+
+ data_str = trace_parser_explode_array(data_str, array_lengths)
+
+ line_data = {}
+ for field in data_str.split():
+ (key, value) = field.split('=')
+ try:
+ value = int(value)
+ except ValueError:
+ pass
+ line_data[key] = value
+
+ parsed_data.append(line_data)
+
+ time_idx = pd.Index(time_array, name="Time")
+ self.data_frame = pd.DataFrame(parsed_data, index=time_idx)
+
+ def write_csv(self, fname):
+ """Write the csv info in thermal.csv"""
+ self.data_frame.to_csv(fname)
+
+ def normalize_time(self, basetime):
+ """Substract basetime from the Time of the data frame"""
+ if basetime:
+ self.data_frame.reset_index(inplace=True)
+ self.data_frame["Time"] = self.data_frame["Time"] - basetime
+ self.data_frame.set_index("Time", inplace=True)