diff options
Diffstat (limited to 'cr2/base.py')
-rw-r--r-- | cr2/base.py | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/cr2/base.py b/cr2/base.py new file mode 100644 index 0000000..112fdea --- /dev/null +++ b/cr2/base.py @@ -0,0 +1,172 @@ +#!/usr/bin/python +"""Base class to parse trace.dat dumps""" + +import os +import re +import pandas as pd + +def trace_parser_explode_array(string, array_lengths): + """Explode an array in the trace into individual elements for easy parsing + + Basically, turn "load={1 1 2 2}" into "load0=1 load1=1 load2=2 + load3=2". array_lengths is a dictionary of array names and their + expected length. If we get array that's shorter than the expected + length, additional keys have to be introduced with value 0 to + compensate. For example, "load={1 2}" with array_lengths being + {"load": 4} returns "load0=1 load1=2 load2=0 load3=0" + + """ + + while True: + match = re.search(r"[^ ]+={[^}]+}", string) + if match is None: + break + + to_explode = match.group() + col_basename = re.match(r"([^=]+)=", to_explode).groups()[0] + vals_str = re.search(r"{(.+)}", to_explode).groups()[0] + vals_array = vals_str.split(' ') + + exploded_str = "" + for (idx, val) in enumerate(vals_array): + exploded_str += "{}{}={} ".format(col_basename, idx, val) + + vals_added = len(vals_array) + if vals_added < array_lengths[col_basename]: + for idx in range(vals_added, array_lengths[col_basename]): + exploded_str += "{}{}=0 ".format(col_basename, idx) + + exploded_str = exploded_str[:-1] + begin_idx = match.start() + end_idx = match.end() + + string = string[:begin_idx] + exploded_str + string[end_idx:] + + return string + +class Base(object): + """Base class to parse trace.dat dumps. + + Don't use directly, create a subclass that defines the unique_word + you want to match in the output""" + def __init__(self, basepath, unique_word): + if basepath is None: + basepath = "." + + self.basepath = basepath + self.data_frame = pd.DataFrame() + self.unique_word = unique_word + + if not os.path.isfile(os.path.join(basepath, "trace.txt")): + self.__run_trace_cmd_report() + + self.__parse_into_dataframe() + + def __run_trace_cmd_report(self): + """Run "trace-cmd report > trace.txt". + + Overwrites the contents of trace.txt if it exists.""" + from subprocess import check_output + + trace_fname = os.path.join(self.basepath, "trace.dat") + if not os.path.isfile(trace_fname): + raise IOError("No such file or directory: {}".format(trace_fname)) + + with open(os.devnull) as devnull: + out = check_output(["trace-cmd", "report", trace_fname], + stderr=devnull) + + with open(os.path.join(self.basepath, "trace.txt"), "w") as fout: + fout.write(out) + + def get_trace_array_lengths(self, fname): + """Calculate the lengths of all arrays in the trace + + Returns a dict with the name of each array found in the trace + as keys and their corresponding length as value + + """ + from collections import defaultdict + + pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}") + + ret = defaultdict(int) + + with open(fname) as fin: + for line in fin: + if not re.search(self.unique_word, line): + continue + + while True: + match = re.search(pat_array, line) + if not match: + break + + (array_name, array_elements) = match.groups() + + array_len = len(array_elements.split(' ')) + + if array_len > ret[array_name]: + ret[array_name] = array_len + + line = line[match.end():] + + return ret + + def __parse_into_dataframe(self): + """parse the trace and create a pandas DataFrame""" + + fin_fname = os.path.join(self.basepath, "trace.txt") + + array_lengths = self.get_trace_array_lengths(fin_fname) + + pat_timestamp = re.compile(r"([0-9]+\.[0-9]+):") + pat_data_start = re.compile("[A-Za-z0-9_]+=") + pat_empty_array = re.compile(r"[A-Za-z0-9_]+=\{\} ") + + parsed_data = [] + time_array = [] + + with open(fin_fname) as fin: + for line in fin: + if not re.search(self.unique_word, line): + continue + + line = line[:-1] + + timestamp_match = re.search(pat_timestamp, line) + timestamp = float(timestamp_match.group(1)) + time_array.append(timestamp) + + data_start_idx = re.search(pat_data_start, line).start() + data_str = line[data_start_idx:] + + # Remove empty arrays from the trace + data_str = re.sub(pat_empty_array, r"", data_str) + + data_str = trace_parser_explode_array(data_str, array_lengths) + + line_data = {} + for field in data_str.split(): + (key, value) = field.split('=') + try: + value = int(value) + except ValueError: + pass + line_data[key] = value + + parsed_data.append(line_data) + + time_idx = pd.Index(time_array, name="Time") + self.data_frame = pd.DataFrame(parsed_data, index=time_idx) + + def write_csv(self, fname): + """Write the csv info in thermal.csv""" + self.data_frame.to_csv(fname) + + def normalize_time(self, basetime): + """Substract basetime from the Time of the data frame""" + if basetime: + self.data_frame.reset_index(inplace=True) + self.data_frame["Time"] = self.data_frame["Time"] - basetime + self.data_frame.set_index("Time", inplace=True) |