From 658d4cc50a81cc0234bd4cdb3fa87f38782223b1 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Wed, 14 Jun 2017 12:30:31 +0100
Subject: trappy/ftrace: add a line number column

Useful for joining DataFrames that have timestamp collisions or for
iterating through a group of DataFrames in line order.

Signed-off-by: Joel Fernandes <joelaf@google.com>
Reviewed-by: KP Singh <kpsingh@google.com>
---
 tests/test_base.py |  8 ++++----
 trappy/base.py     | 12 ++++++++----
 trappy/ftrace.py   |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/test_base.py b/tests/test_base.py
index c186ecc..3311f11 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -85,7 +85,7 @@ class TestBase(utils_tests.SetupDirectory):
         in_data = """     kworker/4:1-397   [004]   720.741315: thermal_power_cpu_get: cpus=000000f0 freq=1900000 raw_cpu_power=1259 load={} power=61
      kworker/4:1-397   [004]   720.741349: thermal_power_cpu_get: cpus=0000000f freq=1400000 raw_cpu_power=189 load={} power=14"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "cpus", "freq",
+        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "cpus", "freq",
                                 "raw_cpu_power", "power"])
 
         with open("trace.txt", "w") as fout:
@@ -121,7 +121,7 @@ class TestBase(utils_tests.SetupDirectory):
                         timestamp
                         )
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "tag"])
+        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "tag"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -145,7 +145,7 @@ class TestBase(utils_tests.SetupDirectory):
 
         in_data = """     rcu_preempt-7     [000]    73.604532: my_sched_stat_runtime:   comm=Space separated taskname pid=7 runtime=262875 [ns] vruntime=17096359856 [ns]"""
 
-        expected_columns = set(["__comm", "__pid", "__cpu", "comm", "pid", "runtime", "vruntime"])
+        expected_columns = set(["__comm", "__pid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"])
 
         with open("trace.txt", "w") as fout:
             fout.write(in_data)
@@ -209,7 +209,7 @@ class TestBase(utils_tests.SetupDirectory):
 
         df = trace.equals_event.data_frame
         self.assertSetEqual(set(df.columns),
-                            set(["__comm", "__pid", "__cpu", "my_field"]))
+                            set(["__comm", "__pid", "__cpu", "__line", "my_field"]))
         self.assertListEqual(df["my_field"].tolist(),
                              ["foo", "foo=bar", "foo=bar=baz", 1,
                               "1=2", "1=foo", "1foo=2"])
diff --git a/trappy/base.py b/trappy/base.py
index 93ce60c..4502c77 100644
--- a/trappy/base.py
+++ b/trappy/base.py
@@ -106,6 +106,7 @@ class Base(object):
         self.fallback = fallback
         self.tracer = None
         self.data_frame = pd.DataFrame()
+        self.line_array = []
         self.data_array = []
         self.time_array = []
         self.comm_array = []
@@ -150,7 +151,7 @@ class Base(object):
 
         return ret
 
-    def append_data(self, time, comm, pid, cpu, data):
+    def append_data(self, time, comm, pid, cpu, line, data):
         """Append data parsed from a line to the corresponding arrays
 
         The :mod:`DataFrame` will be created from this when the whole trace
@@ -175,6 +176,7 @@ class Base(object):
         self.comm_array.append(comm)
         self.pid_array.append(pid)
         self.cpu_array.append(cpu)
+        self.line_array.append(line)
         self.data_array.append(data)
 
     def generate_data_dict(self, data_str):
@@ -205,9 +207,10 @@ class Base(object):
         check_memory_usage = True
         check_memory_count = 1
 
-        for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array,
-                                              self.cpu_array, self.data_array):
-            data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu}
+        for (comm, pid, cpu, line, data_str) in zip(self.comm_array, self.pid_array,
+                                              self.cpu_array, self.line_array,
+                                              self.data_array):
+            data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu, "__line": line}
             data_dict.update(self.generate_data_dict(data_str))
 
             # When running out of memory, Pandas has been observed to segfault
@@ -242,6 +245,7 @@ class Base(object):
         self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx)
 
         self.time_array = []
+        self.line_array = []
         self.comm_array = []
         self.pid_array = []
         self.cpu_array = []
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
index 56d6199..23189d1 100644
--- a/trappy/ftrace.py
+++ b/trappy/ftrace.py
@@ -217,7 +217,7 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace."""
             # Remove empty arrays from the trace
             data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str)
 
-            trace_class.append_data(timestamp, comm, pid, cpu, data_str)
+            trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str)
             self.lines += 1
 
     def trace_hasnt_started(self):
-- 
cgit v1.2.3