cr2/base.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

#!/usr/bin/python
"""Base class to parse trace.dat dumps"""

import os
import re
import pandas as pd

def trace_parser_explode_array(string, array_lengths):
    """Explode an array in the trace into individual elements for easy parsing

    Basically, turn "load={1 1 2 2}" into "load0=1 load1=1 load2=2
    load3=2".  array_lengths is a dictionary of array names and their
    expected length.  If we get array that's shorter than the expected
    length, additional keys have to be introduced with value 0 to
    compensate.  For example, "load={1 2}" with array_lengths being
    {"load": 4} returns "load0=1 load1=2 load2=0 load3=0"

    """

    while True:
        match = re.search(r"[^ ]+={[^}]+}", string)
        if match is None:
            break

        to_explode = match.group()
        col_basename = re.match(r"([^=]+)=", to_explode).groups()[0]
        vals_str = re.search(r"{(.+)}", to_explode).groups()[0]
        vals_array = vals_str.split(' ')

        exploded_str = ""
        for (idx, val) in enumerate(vals_array):
            exploded_str += "{}{}={} ".format(col_basename, idx, val)

        vals_added = len(vals_array)
        if vals_added < array_lengths[col_basename]:
            for idx in range(vals_added, array_lengths[col_basename]):
                exploded_str += "{}{}=0 ".format(col_basename, idx)

        exploded_str = exploded_str[:-1]
        begin_idx = match.start()
        end_idx = match.end()

        string = string[:begin_idx] + exploded_str + string[end_idx:]

    return string

class Base(object):
    """Base class to parse trace.dat dumps.

    Don't use directly, create a subclass that defines the unique_word
    you want to match in the output"""
    def __init__(self, basepath, unique_word):
        if basepath is None:
            basepath = "."

        self.basepath = basepath
        self.data_frame = pd.DataFrame()
        self.unique_word = unique_word

        if not os.path.isfile(os.path.join(basepath, "trace.txt")):
            self.__run_trace_cmd_report()

        self.__parse_into_dataframe()

    def __run_trace_cmd_report(self):
        """Run "trace-cmd report > trace.txt".

        Overwrites the contents of trace.txt if it exists."""
        from subprocess import check_output

        trace_fname = os.path.join(self.basepath, "trace.dat")
        if not os.path.isfile(trace_fname):
            raise IOError("No such file or directory: {}".format(trace_fname))

        with open(os.devnull) as devnull:
            out = check_output(["trace-cmd", "report", trace_fname],
                               stderr=devnull)

        with open(os.path.join(self.basepath, "trace.txt"), "w") as fout:
            fout.write(out)

    def get_trace_array_lengths(self, fname):
        """Calculate the lengths of all arrays in the trace

        Returns a dict with the name of each array found in the trace
        as keys and their corresponding length as value

        """
        from collections import defaultdict

        pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}")

        ret = defaultdict(int)

        with open(fname) as fin:
            for line in fin:
                if not re.search(self.unique_word, line):
                    continue

                while True:
                    match = re.search(pat_array, line)
                    if not match:
                        break

                    (array_name, array_elements) = match.groups()

                    array_len = len(array_elements.split(' '))

                    if array_len > ret[array_name]:
                        ret[array_name] = array_len

                    line = line[match.end():]

        return ret

    def __parse_into_dataframe(self):
        """parse the trace and create a pandas DataFrame"""

        fin_fname = os.path.join(self.basepath, "trace.txt")

        array_lengths = self.get_trace_array_lengths(fin_fname)

        pat_timestamp = re.compile(r"([0-9]+\.[0-9]+):")
        pat_data_start = re.compile("[A-Za-z0-9_]+=")
        pat_empty_array = re.compile(r"[A-Za-z0-9_]+=\{\} ")

        parsed_data = []
        time_array = []

        with open(fin_fname) as fin:
            for line in fin:
                if not re.search(self.unique_word, line):
                    continue

                line = line[:-1]

                timestamp_match = re.search(pat_timestamp, line)
                timestamp = float(timestamp_match.group(1))
                time_array.append(timestamp)

                data_start_idx = re.search(pat_data_start, line).start()
                data_str = line[data_start_idx:]

                # Remove empty arrays from the trace
                data_str = re.sub(pat_empty_array, r"", data_str)

                data_str = trace_parser_explode_array(data_str, array_lengths)

                line_data = {}
                for field in data_str.split():
                    (key, value) = field.split('=')
                    try:
                        value = int(value)
                    except ValueError:
                        pass
                    line_data[key] = value

                parsed_data.append(line_data)

        time_idx = pd.Index(time_array, name="Time")
        self.data_frame = pd.DataFrame(parsed_data, index=time_idx)

    def write_csv(self, fname):
        """Write the csv info in thermal.csv"""
        self.data_frame.to_csv(fname)

    def normalize_time(self, basetime):
        """Substract basetime from the Time of the data frame"""
        if basetime:
            self.data_frame.reset_index(inplace=True)
            self.data_frame["Time"] = self.data_frame["Time"] - basetime
            self.data_frame.set_index("Time", inplace=True)