aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Fernandes <joelaf@google.com>2017-07-08 13:38:55 -0700
committerJoel Fernandes <joelaf@google.com>2017-07-08 19:41:00 -0700
commit89ce9a07de9a9a7720b72a8204fdfc0b331ef5e8 (patch)
treed2f4556523a6b9805b30ed0c2d51d22498b924b2
parentff807880438eb0c480bda11c961764e155b49eb2 (diff)
downloadtrappy-89ce9a07de9a9a7720b72a8204fdfc0b331ef5e8.tar.gz
trappy/ftrace: merge primary and secondary DFs based on pivot
Forward propogate secondary DF into primary DF and return the merged DF. Implements: https://github.com/ARM-software/trappy/issues/250 Change-Id: I312d77302bbca8bb13bfa598785ebc0cc879fe34 Signed-off-by: Joel Fernandes <joelaf@google.com>
-rw-r--r--trappy/utils.py55
1 files changed, 55 insertions, 0 deletions
diff --git a/trappy/utils.py b/trappy/utils.py
index eb73752..cee6f6c 100644
--- a/trappy/utils.py
+++ b/trappy/utils.py
@@ -13,6 +13,9 @@
# limitations under the License.
#
+import pandas as pd
+import numpy as np
+
"""Generic functions that can be used in multiple places in trappy
"""
@@ -102,3 +105,55 @@ def handle_duplicate_index(data,
dup_index_left += 1
return data.reindex(new_index)
+
+def merge_dfs(pr_df, sec_df, pivot):
+ # Keep track of last secondary event
+ pivot_map = {}
+
+ # An array accumating dicts with merged data
+ merged_data = []
+ def df_fn(data):
+ # Store the latest secondary info
+ if data['Time'][0] == 'secondary':
+ pivot_map[data[pivot]] = data
+ # Get rid of primary/secondary labels
+ data['Time'] = data['Time'][1]
+ return
+
+ # Propogate latest secondary info
+ for key, value in data.iteritems():
+ if key == pivot:
+ continue
+ try:
+ if np.isnan(value):
+ data[key] = pivot_map[data[pivot]][key]
+ except:
+ pass
+
+ # Get rid of primary/secondary labels
+ data['Time'] = data['Time'][1]
+ merged_data.append(data)
+
+ # Iterate fast over all rows in a data frame and apply fn
+ def apply_callbacks(df, fn):
+ iters = df.itertuples()
+ event_tuple = iters.next()
+
+ # Column names beginning with underscore will not be preserved in tuples
+ # due to constraints on namedtuple field names, so store mappings from
+ # column name to column number for each trace event.
+ col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }
+
+ while True:
+ if not event_tuple:
+ break
+ event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }
+ fn(event_dict)
+ event_tuple = next(iters, None)
+
+ df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort(columns='__line')
+ apply_callbacks(df, df_fn)
+ merged_df = pd.DataFrame.from_dict(merged_data)
+ merged_df.set_index('Time', inplace=True)
+
+ return merged_df