From 658d4cc50a81cc0234bd4cdb3fa87f38782223b1 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 14 Jun 2017 12:30:31 +0100 Subject: trappy/ftrace: add a line number column Useful for joining DataFrames that have timestamp collisions or for iterating through a group of DataFrames in line order. Signed-off-by: Joel Fernandes Reviewed-by: KP Singh --- tests/test_base.py | 8 ++++---- trappy/base.py | 12 ++++++++---- trappy/ftrace.py | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/test_base.py b/tests/test_base.py index c186ecc..3311f11 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -85,7 +85,7 @@ class TestBase(utils_tests.SetupDirectory): in_data = """ kworker/4:1-397 [004] 720.741315: thermal_power_cpu_get: cpus=000000f0 freq=1900000 raw_cpu_power=1259 load={} power=61 kworker/4:1-397 [004] 720.741349: thermal_power_cpu_get: cpus=0000000f freq=1400000 raw_cpu_power=189 load={} power=14""" - expected_columns = set(["__comm", "__pid", "__cpu", "cpus", "freq", + expected_columns = set(["__comm", "__pid", "__cpu", "__line", "cpus", "freq", "raw_cpu_power", "power"]) with open("trace.txt", "w") as fout: @@ -121,7 +121,7 @@ class TestBase(utils_tests.SetupDirectory): timestamp ) - expected_columns = set(["__comm", "__pid", "__cpu", "tag"]) + expected_columns = set(["__comm", "__pid", "__cpu", "__line", "tag"]) with open("trace.txt", "w") as fout: fout.write(in_data) @@ -145,7 +145,7 @@ class TestBase(utils_tests.SetupDirectory): in_data = """ rcu_preempt-7 [000] 73.604532: my_sched_stat_runtime: comm=Space separated taskname pid=7 runtime=262875 [ns] vruntime=17096359856 [ns]""" - expected_columns = set(["__comm", "__pid", "__cpu", "comm", "pid", "runtime", "vruntime"]) + expected_columns = set(["__comm", "__pid", "__cpu", "__line", "comm", "pid", "runtime", "vruntime"]) with open("trace.txt", "w") as fout: fout.write(in_data) @@ -209,7 +209,7 @@ class TestBase(utils_tests.SetupDirectory): df = trace.equals_event.data_frame self.assertSetEqual(set(df.columns), - set(["__comm", "__pid", "__cpu", "my_field"])) + set(["__comm", "__pid", "__cpu", "__line", "my_field"])) self.assertListEqual(df["my_field"].tolist(), ["foo", "foo=bar", "foo=bar=baz", 1, "1=2", "1=foo", "1foo=2"]) diff --git a/trappy/base.py b/trappy/base.py index 93ce60c..4502c77 100644 --- a/trappy/base.py +++ b/trappy/base.py @@ -106,6 +106,7 @@ class Base(object): self.fallback = fallback self.tracer = None self.data_frame = pd.DataFrame() + self.line_array = [] self.data_array = [] self.time_array = [] self.comm_array = [] @@ -150,7 +151,7 @@ class Base(object): return ret - def append_data(self, time, comm, pid, cpu, data): + def append_data(self, time, comm, pid, cpu, line, data): """Append data parsed from a line to the corresponding arrays The :mod:`DataFrame` will be created from this when the whole trace @@ -175,6 +176,7 @@ class Base(object): self.comm_array.append(comm) self.pid_array.append(pid) self.cpu_array.append(cpu) + self.line_array.append(line) self.data_array.append(data) def generate_data_dict(self, data_str): @@ -205,9 +207,10 @@ class Base(object): check_memory_usage = True check_memory_count = 1 - for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array, - self.cpu_array, self.data_array): - data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu} + for (comm, pid, cpu, line, data_str) in zip(self.comm_array, self.pid_array, + self.cpu_array, self.line_array, + self.data_array): + data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu, "__line": line} data_dict.update(self.generate_data_dict(data_str)) # When running out of memory, Pandas has been observed to segfault @@ -242,6 +245,7 @@ class Base(object): self.data_frame = pd.DataFrame(self.generate_parsed_data(), index=time_idx) self.time_array = [] + self.line_array = [] self.comm_array = [] self.pid_array = [] self.cpu_array = [] diff --git a/trappy/ftrace.py b/trappy/ftrace.py index 56d6199..23189d1 100644 --- a/trappy/ftrace.py +++ b/trappy/ftrace.py @@ -217,7 +217,7 @@ subclassed by FTrace (for parsing FTrace coming from trace-cmd) and SysTrace.""" # Remove empty arrays from the trace data_str = re.sub(r"[A-Za-z0-9_]+=\{\} ", r"", data_str) - trace_class.append_data(timestamp, comm, pid, cpu, data_str) + trace_class.append_data(timestamp, comm, pid, cpu, self.lines, data_str) self.lines += 1 def trace_hasnt_started(self): -- cgit v1.2.3