#    Copyright 2015-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


# pylint can't see any of the dynamically allocated classes of Run
# pylint: disable=no-member

import os
import re
import pandas as pd

import trappy.plot_utils

def _plot_freq_hists(allfreqs, what, axis, title):
    """Helper function for plot_freq_hists

    allfreqs is the output of a Cpu*Power().get_all_freqs() (for
    example, CpuInPower.get_all_freqs()).  what is a string: "in" or
    "out"

    """
    for ax, actor in zip(axis, allfreqs):
        this_title = "freq {} {}".format(what, actor)
        this_title = trappy.plot_utils.normalize_title(this_title, title)
        xlim = (0, allfreqs[actor].max())

        trappy.plot_utils.plot_hist(allfreqs[actor], ax, this_title, "KHz", 20,
                             "Frequency", xlim, "default")

class Run(object):
    """A wrapper class that initializes all the classes of a given run

The run class can receive the following optional parameters.

path contains the path to the trace file.  If no path is given, it
uses the current directory by default.  If path is a file, and ends in
.dat, it's run through "trace-cmd report".  If it doesn't end in
".dat", then it must be the output of a trace-cmd report run.  If path
is a directory that contains a trace.txt, that is assumed to be the
output of "trace-cmd report".  If path is a directory that doesn't
have a trace.txt but has a trace.dat, it runs trace-cmd report on the
trace.dat, saves it in trace.txt and then uses that.

name is a string describing the trace.

normalize_time is used to make all traces start from time 0 (the
default).  If normalize_time is False, the trace times are the same as
in the trace file.

scope can be used to limit the parsing done on the trace.  The default
scope parses all the traces known to trappy.  If scope is thermal, only
the thermal classes are parsed.  If scope is sched, only the sched
classes are parsed.

    """

    thermal_classes = {}

    sched_classes = {}

    dynamic_classes = {}

    def __init__(self, path=".", name="", normalize_time=True, scope="all"):
        self.name = name
        self.trace_path, self.trace_path_raw = self.__process_path(path)
        self.class_definitions = self.dynamic_classes.copy()

        if scope == "thermal":
            self.class_definitions.update(self.thermal_classes.items())
        elif scope == "sched":
            self.class_definitions.update(self.sched_classes.items())
        else:
            self.class_definitions.update(self.thermal_classes.items() +
                                          self.sched_classes.items())

        self.trace_classes = []
        for attr, class_name in self.class_definitions.iteritems():
            trace_class = globals()[class_name]()
            setattr(self, attr, trace_class)
            self.trace_classes.append(trace_class)

        self.__parse_trace_file()
        self.__parse_trace_file(raw=True)
        self.__finalize_objects()

        if normalize_time:
            basetime = self.get_basetime()
            self.normalize_time(basetime)

    def __process_path(self, basepath):
        """Process the path and return the path to the trace text file"""

        if os.path.isfile(basepath):
            trace_name = os.path.splitext(basepath)[0]
            trace_raw = trace_name + ".raw.txt"
            trace_txt = trace_name + ".txt"

            if basepath.endswith(".dat"):
                self.__run_trace_cmd_report(basepath)

            elif basepath.endswith(".txt"):
                trace_txt = basepath
                if not os.path.isfile(trace_raw):
                    trace_raw = None
        else:
            trace_txt = os.path.join(basepath, "trace.txt")
            trace_raw = os.path.join(basepath, "trace.raw.txt")
            trace_dat = os.path.join(basepath, "trace.dat")

            if not os.path.isfile(trace_txt):
                self.__run_trace_cmd_report(trace_dat)

            # The condition below handles the the following cases
            # trace.dat and trace.txt are both present
            # We can still generate the trace.raw.txt
            if not os.path.isfile(trace_raw):
                if os.path.isfile(trace_dat):
                    self.__run_trace_cmd_report(trace_dat)
                else:
                    trace_raw = None

        return trace_txt, trace_raw

    def __run_trace_cmd_report(self, fname):
        """Run "trace-cmd report fname > fname.txt"
           and "trace-cmd report -R fname > fname.raw.txt"

        The resulting traces are stored in files with extension ".txt"
        and ".raw.txt" respectively.  If fname is "my_trace.dat", the
        trace is stored in "my_trace.txt" and "my_trace.raw.txt".  The
        contents of the destination files are overwritten if they
        exist.

        """
        from subprocess import check_output

        cmd = ["trace-cmd", "report"]

        if not os.path.isfile(fname):
            raise IOError("No such file or directory: {}".format(fname))

        raw_trace_output = os.path.splitext(fname)[0] + ".raw.txt"
        trace_output = os.path.splitext(fname)[0] + ".txt"
        cmd.append(fname)

        with open(os.devnull) as devnull:
            out = check_output(cmd, stderr=devnull)

            # Add the -R flag to the trace-cmd
            # for raw parsing
            cmd.insert(-1, "-R")
            raw_out = check_output(cmd, stderr=devnull)

        with open(trace_output, "w") as fout:
            fout.write(out)

        with open(raw_trace_output, "w") as fout:
            fout.write(raw_out)

    def get_basetime(self):
        """Returns the smallest time value of all classes,
        returns 0 if the data frames of all classes are empty"""
        basetimes = []

        for trace_class in self.trace_classes:
            try:
                basetimes.append(trace_class.data_frame.index[0])
            except IndexError:
                pass

        if len(basetimes) == 0:
            return 0

        return min(basetimes)

    def get_duration(self):
        """Returns the largest time value of all classes,
        returns 0 if the data frames of all classes are empty"""
        durations = []

        for trace_class in self.trace_classes:
            try:
                durations.append(trace_class.data_frame.index[-1])
            except IndexError:
                pass

        if len(durations) == 0:
            return 0

        return max(durations) - self.get_basetime()

    @classmethod
    def register_class(cls, cobject, scope="all"):
        # Add the class to the classes dictionary
        if scope == "all":
            cls.dynamic_classes[cobject.name] = cobject.__name__
        else:
            getattr(cls, scope + "_classes")[cobject.name] = cobject.__name__
        globals()[cobject.__name__] = cobject

    def get_filters(self, key=""):
        """Returns an array with the available filters.
        If 'key' is specified, returns a subset of the available filters
        that contain 'key' in their name (e.g., key="sched" returns
        only the "sched" related filters)."""
        filters = []

        for cls in self.class_definitions:
            if re.search(key, cls):
                filters.append(cls)

        return filters

    def normalize_time(self, basetime):
        """Normalize the time of all the trace classes"""
        for trace_class in self.trace_classes:
            trace_class.normalize_time(basetime)

    def __contains_unique_word(self, line, unique_words):
        """The line contains any unique word that we are matching"""

        for unique_word, trace_name in unique_words:
            if unique_word in line:
                return trace_name
        return None


    def __populate_metadata(self, trace_fh, unique_words):
        """Populates trace metadata"""

        # Meta Data as expected to be found in the parsed trace header
        metadata_keys = ["version", "cpus"]

        for key in metadata_keys:
            setattr(self, "_" + key, None)

        while metadata_keys:
            line = trace_fh.readline()

            #The trace has been exhausted
            if not line:
                return

            metadata_pattern = r"^\b(" + "|".join(metadata_keys) + \
                               r")\b\s*=\s*([0-9]+)"
            match = re.search(metadata_pattern, line)
            if match:
                setattr(self, "_" + match.group(1), match.group(2))
                metadata_keys.remove(match.group(1))

            # Reached a valid trace line, abort metadata population
            elif self.__populate_data_from_line(line, unique_words):
                return

    def __populate_data_from_line(self, line, unique_words):
        """Append to trace data from a txt trace line"""

        attr = self.__contains_unique_word(line, unique_words)
        if not attr:
            return False

        line = line[:-1]

        special_fields_match = re.search(r"^\s+([^\[]+)-(\d+)\s+\[(\d+)\]\s+([0-9]+\.[0-9]+):",
                                                 line)
        comm = special_fields_match.group(1)
        pid = int(special_fields_match.group(2))
        cpu = int(special_fields_match.group(3))
        timestamp = float(special_fields_match.group(4))

        try:
            data_start_idx = re.search(r"[A-Za-z0-9_]+=", line).start()
        except AttributeError:
            return False

        data_str = line[data_start_idx:]

        # Remove empty arrays from the trace
        data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)

        getattr(self, attr).append_data(timestamp, comm, pid, cpu,
                                                data_str)
        return True

    def __parse_trace_file(self, raw=False):
        """parse the trace and create a pandas DataFrame"""

        # Memoize the unique words to speed up parsing the trace file
        unique_words = []
        for trace_name in self.class_definitions.iterkeys():
            parse_raw = getattr(self, trace_name).parse_raw

            if parse_raw != raw:
                continue

            unique_word = getattr(self, trace_name).unique_word
            unique_words.append((unique_word, trace_name))

        if len(unique_words) == 0:
            return

        if raw:
            if self.trace_path_raw != None:
                trace_file = self.trace_path_raw
            else:
                return
        else:
            trace_file = self.trace_path

        with open(trace_file) as fin:
            self.__populate_metadata(fin, unique_words)

            for line in fin:
                self.__populate_data_from_line(line, unique_words)

    def __finalize_objects(self):
        for trace_class in self.trace_classes:
            trace_class.create_dataframe()
            trace_class.finalize_object()

    def get_all_freqs_data(self, map_label):
        """get an array of tuple of names and DataFrames suitable for the
        allfreqs plot"""

        cpu_in_freqs = self.cpu_in_power.get_all_freqs(map_label)
        cpu_out_freqs = self.cpu_out_power.get_all_freqs(map_label)

        ret = []
        for label in map_label.values():
            in_label = label + "_freq_in"
            out_label = label + "_freq_out"

            cpu_inout_freq_dict = {in_label: cpu_in_freqs[label],
                                   out_label: cpu_out_freqs[label]}
            dfr = pd.DataFrame(cpu_inout_freq_dict).fillna(method="pad")
            ret.append((label, dfr))

        try:
            gpu_freq_in_data = self.devfreq_in_power.get_all_freqs()
            gpu_freq_out_data = self.devfreq_out_power.get_all_freqs()
        except KeyError:
            gpu_freq_in_data = gpu_freq_out_data = None

        if gpu_freq_in_data is not None:
            inout_freq_dict = {"gpu_freq_in": gpu_freq_in_data["freq"],
                               "gpu_freq_out": gpu_freq_out_data["freq"]
                           }
            dfr = pd.DataFrame(inout_freq_dict).fillna(method="pad")
            ret.append(("GPU", dfr))

        return ret

    def plot_freq_hists(self, map_label, ax):
        """Plot histograms for each actor input and output frequency

        ax is an array of axis, one for the input power and one for
        the output power

        """

        in_base_idx = len(ax) / 2

        try:
            devfreq_out_all_freqs = self.devfreq_out_power.get_all_freqs()
            devfreq_in_all_freqs = self.devfreq_in_power.get_all_freqs()
        except KeyError:
            devfreq_out_all_freqs = None
            devfreq_in_all_freqs = None

        out_allfreqs = (self.cpu_out_power.get_all_freqs(map_label),
                        devfreq_out_all_freqs, ax[0:in_base_idx])
        in_allfreqs = (self.cpu_in_power.get_all_freqs(map_label),
                       devfreq_in_all_freqs, ax[in_base_idx:])

        for cpu_allfreqs, devfreq_freqs, axis in (out_allfreqs, in_allfreqs):
            if devfreq_freqs is not None:
                devfreq_freqs.name = "GPU"
                allfreqs = pd.concat([cpu_allfreqs, devfreq_freqs], axis=1)
            else:
                allfreqs = cpu_allfreqs

            allfreqs.fillna(method="pad", inplace=True)
            _plot_freq_hists(allfreqs, "out", axis, self.name)

    def plot_load(self, mapping_label, title="", width=None, height=None,
                  ax=None):
        """plot the load of all the clusters, similar to how compare runs did it

        the mapping_label has to be a dict whose keys are the cluster
        numbers as found in the trace and values are the names that
        will appear in the legend.

        """

        load_data = self.cpu_in_power.get_load_data(mapping_label)
        try:
            gpu_data = pd.DataFrame({"GPU":
                                     self.devfreq_in_power.data_frame["load"]})
            load_data = pd.concat([load_data, gpu_data], axis=1)
        except KeyError:
            pass

        load_data = load_data.fillna(method="pad")
        title = trappy.plot_utils.normalize_title("Utilization", title)

        if not ax:
            ax = trappy.plot_utils.pre_plot_setup(width=width, height=height)

        load_data.plot(ax=ax)

        trappy.plot_utils.post_plot_setup(ax, title=title)

    def plot_normalized_load(self, mapping_label, title="", width=None,
                             height=None, ax=None):
        """plot the normalized load of all the clusters, similar to how compare runs did it

        the mapping_label has to be a dict whose keys are the cluster
        numbers as found in the trace and values are the names that
        will appear in the legend.

        """

        load_data = self.cpu_in_power.get_normalized_load_data(mapping_label)
        if "load" in self.devfreq_in_power.data_frame:
            gpu_dfr = self.devfreq_in_power.data_frame
            gpu_max_freq = max(gpu_dfr["freq"])
            gpu_load = gpu_dfr["load"] * gpu_dfr["freq"] / gpu_max_freq

            gpu_data = pd.DataFrame({"GPU": gpu_load})
            load_data = pd.concat([load_data, gpu_data], axis=1)

        load_data = load_data.fillna(method="pad")
        title = trappy.plot_utils.normalize_title("Normalized Utilization", title)

        if not ax:
            ax = trappy.plot_utils.pre_plot_setup(width=width, height=height)

        load_data.plot(ax=ax)

        trappy.plot_utils.post_plot_setup(ax, title=title)

    def plot_allfreqs(self, map_label, width=None, height=None, ax=None):
        """Do allfreqs plots similar to those of CompareRuns

        if ax is not none, it must be an array of the same size as
        map_label.  Each plot will be done in each of the axis in
        ax

        """
        all_freqs = self.get_all_freqs_data(map_label)

        setup_plot = False
        if ax is None:
            ax = [None] * len(all_freqs)
            setup_plot = True

        for this_ax, (label, dfr) in zip(ax, all_freqs):
            this_title = trappy.plot_utils.normalize_title("allfreqs " + label,
                                                        self.name)

            if setup_plot:
                this_ax = trappy.plot_utils.pre_plot_setup(width=width,
                                                        height=height)

            dfr.plot(ax=this_ax)
            trappy.plot_utils.post_plot_setup(this_ax, title=this_title)