#!/usr/bin/python
"""Base class to parse trace.dat dumps"""

import os
import re
import pandas as pd

def trace_parser_explode_array(string, array_lengths):
    """Explode an array in the trace into individual elements for easy parsing

    Basically, turn "load={1 1 2 2}" into "load0=1 load1=1 load2=2
    load3=2".  array_lengths is a dictionary of array names and their
    expected length.  If we get array that's shorter than the expected
    length, additional keys have to be introduced with value 0 to
    compensate.  For example, "load={1 2}" with array_lengths being
    {"load": 4} returns "load0=1 load1=2 load2=0 load3=0"

    """

    while True:
        match = re.search(r"[^ ]+={[^}]+}", string)
        if match is None:
            break

        to_explode = match.group()
        col_basename = re.match(r"([^=]+)=", to_explode).groups()[0]
        vals_str = re.search(r"{(.+)}", to_explode).groups()[0]
        vals_array = vals_str.split(' ')

        exploded_str = ""
        for (idx, val) in enumerate(vals_array):
            exploded_str += "{}{}={} ".format(col_basename, idx, val)

        vals_added = len(vals_array)
        if vals_added < array_lengths[col_basename]:
            for idx in range(vals_added, array_lengths[col_basename]):
                exploded_str += "{}{}=0 ".format(col_basename, idx)

        exploded_str = exploded_str[:-1]
        begin_idx = match.start()
        end_idx = match.end()

        string = string[:begin_idx] + exploded_str + string[end_idx:]

    return string

class Base(object):
    """Base class to parse trace.dat dumps.

    Don't use directly, create a subclass that defines the unique_word
    you want to match in the output"""
    def __init__(self, basepath, unique_word):
        if basepath is None:
            basepath = "."

        self.basepath = basepath
        self.data_frame = pd.DataFrame()
        self.unique_word = unique_word

        if not os.path.isfile(os.path.join(basepath, "trace.txt")):
            self.__run_trace_cmd_report()

        self.__parse_into_dataframe()

    def __run_trace_cmd_report(self):
        """Run "trace-cmd report > trace.txt".

        Overwrites the contents of trace.txt if it exists."""
        from subprocess import check_output

        trace_fname = os.path.join(self.basepath, "trace.dat")
        if not os.path.isfile(trace_fname):
            raise IOError("No such file or directory: {}".format(trace_fname))

        with open(os.devnull) as devnull:
            out = check_output(["trace-cmd", "report", trace_fname],
                               stderr=devnull)

        with open(os.path.join(self.basepath, "trace.txt"), "w") as fout:
            fout.write(out)

    def get_trace_array_lengths(self, fname):
        """Calculate the lengths of all arrays in the trace

        Returns a dict with the name of each array found in the trace
        as keys and their corresponding length as value

        """
        from collections import defaultdict

        pat_array = re.compile(r"([A-Za-z0-9_]+)={([^}]+)}")

        ret = defaultdict(int)

        with open(fname) as fin:
            for line in fin:
                if not re.search(self.unique_word, line):
                    continue

                while True:
                    match = re.search(pat_array, line)
                    if not match:
                        break

                    (array_name, array_elements) = match.groups()

                    array_len = len(array_elements.split(' '))

                    if array_len > ret[array_name]:
                        ret[array_name] = array_len

                    line = line[match.end():]

        return ret

    def __parse_into_dataframe(self):
        """parse the trace and create a pandas DataFrame"""

        fin_fname = os.path.join(self.basepath, "trace.txt")

        array_lengths = self.get_trace_array_lengths(fin_fname)

        pat_timestamp = re.compile(r"([0-9]+\.[0-9]+):")
        pat_data_start = re.compile("[A-Za-z0-9_]+=")
        pat_empty_array = re.compile(r"[A-Za-z0-9_]+=\{\} ")

        parsed_data = []
        time_array = []

        with open(fin_fname) as fin:
            for line in fin:
                if not re.search(self.unique_word, line):
                    continue

                line = line[:-1]

                timestamp_match = re.search(pat_timestamp, line)
                timestamp = float(timestamp_match.group(1))
                time_array.append(timestamp)

                data_start_idx = re.search(pat_data_start, line).start()
                data_str = line[data_start_idx:]

                # Remove empty arrays from the trace
                data_str = re.sub(pat_empty_array, r"", data_str)

                data_str = trace_parser_explode_array(data_str, array_lengths)

                line_data = {}
                for field in data_str.split():
                    (key, value) = field.split('=')
                    try:
                        value = int(value)
                    except ValueError:
                        pass
                    line_data[key] = value

                parsed_data.append(line_data)

        time_idx = pd.Index(time_array, name="Time")
        self.data_frame = pd.DataFrame(parsed_data, index=time_idx)

    def write_csv(self, fname):
        """Write the csv info in thermal.csv"""
        self.data_frame.to_csv(fname)

    def normalize_time(self, basetime):
        """Substract basetime from the Time of the data frame"""
        if basetime:
            self.data_frame.reset_index(inplace=True)
            self.data_frame["Time"] = self.data_frame["Time"] - basetime
            self.data_frame.set_index("Time", inplace=True)