diff options
author | Joel Fernandes <joelaf@google.com> | 2017-07-08 13:38:55 -0700 |
---|---|---|
committer | Joel Fernandes <joelaf@google.com> | 2017-07-08 19:41:00 -0700 |
commit | 89ce9a07de9a9a7720b72a8204fdfc0b331ef5e8 (patch) | |
tree | d2f4556523a6b9805b30ed0c2d51d22498b924b2 | |
parent | ff807880438eb0c480bda11c961764e155b49eb2 (diff) | |
download | trappy-89ce9a07de9a9a7720b72a8204fdfc0b331ef5e8.tar.gz |
trappy/ftrace: merge primary and secondary DFs based on pivot
Forward propogate secondary DF into primary DF and return the merged DF.
Implements: https://github.com/ARM-software/trappy/issues/250
Change-Id: I312d77302bbca8bb13bfa598785ebc0cc879fe34
Signed-off-by: Joel Fernandes <joelaf@google.com>
-rw-r--r-- | trappy/utils.py | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/trappy/utils.py b/trappy/utils.py index eb73752..cee6f6c 100644 --- a/trappy/utils.py +++ b/trappy/utils.py @@ -13,6 +13,9 @@ # limitations under the License. # +import pandas as pd +import numpy as np + """Generic functions that can be used in multiple places in trappy """ @@ -102,3 +105,55 @@ def handle_duplicate_index(data, dup_index_left += 1 return data.reindex(new_index) + +def merge_dfs(pr_df, sec_df, pivot): + # Keep track of last secondary event + pivot_map = {} + + # An array accumating dicts with merged data + merged_data = [] + def df_fn(data): + # Store the latest secondary info + if data['Time'][0] == 'secondary': + pivot_map[data[pivot]] = data + # Get rid of primary/secondary labels + data['Time'] = data['Time'][1] + return + + # Propogate latest secondary info + for key, value in data.iteritems(): + if key == pivot: + continue + try: + if np.isnan(value): + data[key] = pivot_map[data[pivot]][key] + except: + pass + + # Get rid of primary/secondary labels + data['Time'] = data['Time'][1] + merged_data.append(data) + + # Iterate fast over all rows in a data frame and apply fn + def apply_callbacks(df, fn): + iters = df.itertuples() + event_tuple = iters.next() + + # Column names beginning with underscore will not be preserved in tuples + # due to constraints on namedtuple field names, so store mappings from + # column name to column number for each trace event. + col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) } + + while True: + if not event_tuple: + break + event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() } + fn(event_dict) + event_tuple = next(iters, None) + + df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort(columns='__line') + apply_callbacks(df, df_fn) + merged_df = pd.DataFrame.from_dict(merged_data) + merged_df.set_index('Time', inplace=True) + + return merged_df |