aboutsummaryrefslogtreecommitdiff
path: root/opcontrol/opcontrol.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'opcontrol/opcontrol.cpp')
-rw-r--r--opcontrol/opcontrol.cpp453
1 files changed, 194 insertions, 259 deletions
diff --git a/opcontrol/opcontrol.cpp b/opcontrol/opcontrol.cpp
index 6b440e2..44eca77 100644
--- a/opcontrol/opcontrol.cpp
+++ b/opcontrol/opcontrol.cpp
@@ -36,21 +36,97 @@
#define verbose(fmt...) if (verbose_print) printf(fmt)
-/* Experiments found that using a small interval may hang the device, and the
- * more events tracked simultaneously, the longer the interval has to be.
- */
+struct event_info {
+ int id;
+ int counters;
+ int um;
+ const char *name;
+ const char *explanation;
+};
+
+#define CTR(n) (1<<(n))
#if defined(__i386__) || defined(__x86_64__)
+struct event_info event_info_arch_perfmon[] = {
+ #include "../events/i386/arch_perfmon/events.h"
+};
+
#define MAX_EVENTS 2
int min_count[MAX_EVENTS] = {60000, 100000};
-#elif !defined(WITH_ARM_V7_A)
+
+const char *default_event = "CPU_CLK_UNHALTED";
+#endif
+
+#if defined(__arm__)
+#if !defined(WITH_ARM_V7_A)
+struct event_info event_info_armv6[] = {
+ #include "../events/arm/armv6/events.h"
+};
+
#define MAX_EVENTS 3
int min_count[MAX_EVENTS] = {150000, 200000, 250000};
+
#else
+struct event_info event_info_armv7[] = {
+ #include "../events/arm/armv7/events.h"
+};
+
#define MAX_EVENTS 5
int min_count[MAX_EVENTS] = {150000, 20000, 25000, 30000, 35000};
#endif
+const char *default_event = "CPU_CYCLES";
+#endif
+
+#if defined(__mips__)
+struct event_info event_info_24K[] = {
+ #include "../events/mips/24K/events.h"
+};
+struct event_info event_info_34K[] = {
+ #include "../events/mips/34K/events.h"
+};
+struct event_info event_info_74K[] = {
+ #include "../events/mips/74K/events.h"
+};
+struct event_info event_info_1004K[] = {
+ #include "../events/mips/1004K/events.h"
+};
+
+#define MAX_EVENTS 4
+int min_count[MAX_EVENTS] = {150000, 20000, 25000, 30000};
+
+const char *default_event = "CYCLES";
+#endif /* defined(__mips__) */
+
+#define ARRAYSZ(x) (sizeof(x)/sizeof((x)[0]))
+
+struct cpuevents {
+ const char *cpu;
+ struct event_info *event_info;
+ unsigned int nevents;
+} cpuevents[] = {
+#if defined(__i386__) || defined(__x86_64__)
+ {"i386/arch_perfmon", event_info_arch_perfmon, ARRAYSZ(event_info_arch_perfmon)},
+#endif /* defined(__i386__) || defined(__x86_64__) */
+#if defined(__arm__)
+#if !defined(WITH_ARM_V7_A)
+ {"arm/armv6", event_info_armv6, ARRAYSZ(event_info_armv6)},
+#else
+ {"arm/armv7", event_info_armv7, ARRAYSZ(event_info_armv7)},
+#endif
+#endif /* defined(__arm__) */
+#if defined(__mips__)
+ {"mips/24K", event_info_24K, ARRAYSZ(event_info_24K)},
+ {"mips/34K", event_info_34K, ARRAYSZ(event_info_34K)},
+ {"mips/74K", event_info_74K, ARRAYSZ(event_info_74K)},
+ {"mips/1004K", event_info_1004K, ARRAYSZ(event_info_1004K)},
+#endif /* defined(__mips__) */
+};
+
+struct cpuevents *cpuevent;
+#define event_info cpuevent->event_info
+#define NEVENTS cpuevent->nevents
+
int verbose_print;
int list_events;
int show_usage;
@@ -64,6 +140,7 @@ int reset;
int selected_events[MAX_EVENTS];
int selected_counts[MAX_EVENTS];
+int max_events;
char callgraph[8];
char kernel_range[512];
@@ -90,205 +167,6 @@ struct option long_options[] = {
{0, 0, 0, 0},
};
-struct event_info {
- int id;
- int um;
- const char *name;
- const char *explanation;
-} event_info[] = {
-#if defined(__i386__) || defined(__x86_64__)
- /* INTEL_ARCH_PERFMON events */
-
- /* 0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED :
- * Clock cycles when not halted
- */
- {0x3c, 0, "CPU_CLK_UNHALTED",
- "Clock cycles when not halted" },
-
- /* event:0x3c counters:cpuid um:one minimum:6000 filter:2 name:UNHALTED_REFERENCE_CYCLES :
- * Unhalted reference cycles
- */
- {0x3c, 1, "UNHALTED_REFERENCE_CYCLES",
- "Unhalted reference cycles" },
-
- /* event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 name:INST_RETIRED :
- * number of instructions retired
- */
- {0xc0, 0, "INST_RETIRED",
- "number of instructions retired"},
-
- /* event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES :
- * Last level cache demand requests from this core that missed the LLC
- */
- {0x2e, 0x41, "LLC_MISSES",
- "Last level cache demand requests from this core that missed the LLC"},
-
- /* event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS :
- * Last level cache demand requests from this core
- */
- {0x2e, 0x4f, "LLC_REFS",
- "Last level cache demand requests from this core"},
-
- /* event:0xc4 counters:cpuid um:zero minimum:500 filter:6 name:BR_INST_RETIRED :
- * number of branch instructions retired
- */
- {0xc4, 0, "BR_INST_RETIRED",
- "number of branch instructions retired"},
-
- /* event:0xc5 counters:cpuid um:zero minimum:500 filter:7 name:BR_MISS_PRED_RETIRED :
- * number of mispredicted branches retired (precise)
- */
- {0xc5, 0, "BR_MISS_PRED_RETIRED",
- "number of mispredicted branches retired (precise)"},
-
-#elif !defined(WITH_ARM_V7_A)
- /* ARM V6 events */
- {0x00, 0, "IFU_IFETCH_MISS",
- "number of instruction fetch misses"},
- {0x01, 0, "CYCLES_IFU_MEM_STALL",
- "cycles instruction fetch pipe is stalled"},
- {0x02, 0, "CYCLES_DATA_STALL",
- "cycles stall occurs for due to data dependency"},
- {0x03, 0, "ITLB_MISS",
- "number of Instruction MicroTLB misses"},
- {0x04, 0, "DTLB_MISS",
- "number of Data MicroTLB misses"},
- {0x05, 0, "BR_INST_EXECUTED",
- "branch instruction executed w/ or w/o program flow change"},
- {0x06, 0, "BR_INST_MISS_PRED",
- "branch mispredicted"},
- {0x07, 0, "INSN_EXECUTED",
- "instructions executed"},
- {0x09, 0, "DCACHE_ACCESS",
- "data cache access, cacheable locations"},
- {0x0a, 0, "DCACHE_ACCESS_ALL",
- "data cache access, all locations"},
- {0x0b, 0, "DCACHE_MISS",
- "data cache miss"},
- {0x0c, 0, "DCACHE_WB",
- "data cache writeback, 1 event for every half cacheline"},
- {0x0d, 0, "PC_CHANGE",
- "number of times the program counter was changed without a mode switch"},
- {0x0f, 0, "TLB_MISS",
- "Main TLB miss"},
- {0x10, 0, "EXP_EXTERNAL",
- "Explicit external data access"},
- {0x11, 0, "LSU_STALL",
- "cycles stalled because Load Store request queue is full"},
- {0x12, 0, "WRITE_DRAIN",
- "Times write buffer was drained"},
- {0xff, 0, "CPU_CYCLES",
- "clock cycles counter"},
-#else
- /* ARM V7 events */
- {0x00, 0, "PMNC_SW_INCR",
- "Software increment of PMNC registers"},
- {0x01, 0, "IFETCH_MISS",
- "Instruction fetch misses from cache or normal cacheable memory"},
- {0x02, 0, "ITLB_MISS",
- "Instruction fetch misses from TLB"},
- {0x03, 0, "DCACHE_REFILL",
- "Data R/W operation that causes a refill from cache or normal cacheable"
- "memory"},
- {0x04, 0, "DCACHE_ACCESS",
- "Data R/W from cache"},
- {0x05, 0, "DTLB_REFILL",
- "Data R/W that causes a TLB refill"},
- {0x06, 0, "DREAD",
- "Data read architecturally executed (note: architecturally executed = for"
- "instructions that are unconditional or that pass the condition code)"},
- {0x07, 0, "DWRITE",
- "Data write architecturally executed"},
- {0x08, 0, "INSTR_EXECUTED",
- "All executed instructions"},
- {0x09, 0, "EXC_TAKEN",
- "Exception taken"},
- {0x0A, 0, "EXC_EXECUTED",
- "Exception return architecturally executed"},
- {0x0B, 0, "CID_WRITE",
- "Instruction that writes to the Context ID Register architecturally"
- "executed"},
- {0x0C, 0, "PC_WRITE",
- "SW change of PC, architecturally executed (not by exceptions)"},
- {0x0D, 0, "PC_IMM_BRANCH",
- "Immediate branch instruction executed (taken or not)"},
- {0x0E, 0, "PC_PROC_RETURN",
- "Procedure return architecturally executed (not by exceptions)"},
- {0x0F, 0, "UNALIGNED_ACCESS",
- "Unaligned access architecturally executed"},
- {0x10, 0, "PC_BRANCH_MIS_PRED",
- "Branch mispredicted or not predicted. Counts pipeline flushes because of"
- "misprediction"},
- {0x12, 0, "PC_BRANCH_MIS_USED",
- "Branch or change in program flow that could have been predicted"},
- {0x40, 0, "WRITE_BUFFER_FULL",
- "Any write buffer full cycle"},
- {0x41, 0, "L2_STORE_MERGED",
- "Any store that is merged in L2 cache"},
- {0x42, 0, "L2_STORE_BUFF",
- "Any bufferable store from load/store to L2 cache"},
- {0x43, 0, "L2_ACCESS",
- "Any access to L2 cache"},
- {0x44, 0, "L2_CACH_MISS",
- "Any cacheable miss in L2 cache"},
- {0x45, 0, "AXI_READ_CYCLES",
- "Number of cycles for an active AXI read"},
- {0x46, 0, "AXI_WRITE_CYCLES",
- "Number of cycles for an active AXI write"},
- {0x47, 0, "MEMORY_REPLAY",
- "Any replay event in the memory subsystem"},
- {0x48, 0, "UNALIGNED_ACCESS_REPLAY",
- "Unaligned access that causes a replay"},
- {0x49, 0, "L1_DATA_MISS",
- "L1 data cache miss as a result of the hashing algorithm"},
- {0x4A, 0, "L1_INST_MISS",
- "L1 instruction cache miss as a result of the hashing algorithm"},
- {0x4B, 0, "L1_DATA_COLORING",
- "L1 data access in which a page coloring alias occurs"},
- {0x4C, 0, "L1_NEON_DATA",
- "NEON data access that hits L1 cache"},
- {0x4D, 0, "L1_NEON_CACH_DATA",
- "NEON cacheable data access that hits L1 cache"},
- {0x4E, 0, "L2_NEON",
- "L2 access as a result of NEON memory access"},
- {0x4F, 0, "L2_NEON_HIT",
- "Any NEON hit in L2 cache"},
- {0x50, 0, "L1_INST",
- "Any L1 instruction cache access, excluding CP15 cache accesses"},
- {0x51, 0, "PC_RETURN_MIS_PRED",
- "Return stack misprediction at return stack pop"
- "(incorrect target address)"},
- {0x52, 0, "PC_BRANCH_FAILED",
- "Branch prediction misprediction"},
- {0x53, 0, "PC_BRANCH_TAKEN",
- "Any predicted branch that is taken"},
- {0x54, 0, "PC_BRANCH_EXECUTED",
- "Any taken branch that is executed"},
- {0x55, 0, "OP_EXECUTED",
- "Number of operations executed"
- "(in instruction or mutli-cycle instruction)"},
- {0x56, 0, "CYCLES_INST_STALL",
- "Cycles where no instruction available"},
- {0x57, 0, "CYCLES_INST",
- "Number of instructions issued in a cycle"},
- {0x58, 0, "CYCLES_NEON_DATA_STALL",
- "Number of cycles the processor waits on MRC data from NEON"},
- {0x59, 0, "CYCLES_NEON_INST_STALL",
- "Number of cycles the processor waits on NEON instruction queue or"
- "NEON load queue"},
- {0x5A, 0, "NEON_CYCLES",
- "Number of cycles NEON and integer processors are not idle"},
- {0x70, 0, "PMU0_EVENTS",
- "Number of events from external input source PMUEXTIN[0]"},
- {0x71, 0, "PMU1_EVENTS",
- "Number of events from external input source PMUEXTIN[1]"},
- {0x72, 0, "PMU_EVENTS",
- "Number of events from both external input sources PMUEXTIN[0]"
- "and PMUEXTIN[1]"},
- {0xFF, 0, "CPU_CYCLES",
- "Number of CPU cycles"},
-#endif
-};
void usage()
{
@@ -301,15 +179,17 @@ void usage()
" --setup setup directories\n"
#if defined(__i386__) || defined(__x86_64__)
" --quick setup and select CPU_CLK_UNHALTED:60000\n"
-#else
+#elif defined(__arm__)
" --quick setup and select CPU_CYCLES:150000\n"
+#elif defined(__mips__)
+ " --quick setup and select CYCLES:150000\n"
#endif
" --timer timer-based profiling\n"
" --status show configuration\n"
" --start start data collection\n"
" --stop stop data collection\n"
" --reset clears out data from current session\n"
- " --shutdown kill the oprofile daeman\n"
+ " --shutdown kill the oprofile daemon\n"
" --callgraph=depth callgraph depth\n"
" --event=eventspec\n"
" Choose an event. May be specified multiple times.\n"
@@ -322,16 +202,67 @@ void usage()
);
}
-void setup_session_dir()
+int setup_device(void)
{
- int fd;
+ if (mkdir(OP_DRIVER_BASE, 0755)) {
+ if (errno != EEXIST) {
+ fprintf(stderr, "Cannot create directory "OP_DRIVER_BASE": %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
- fd = open(OP_DATA_DIR, O_RDONLY);
- if (fd != -1) {
- system("rm -r "OP_DATA_DIR);
- close(fd);
+ if (access(OP_DRIVER_BASE"/stats", F_OK)) {
+ if (system("mount -t oprofilefs nodev "OP_DRIVER_BASE)) {
+ return -1;
+ }
+ }
+
+ /* Selecting the event information by cpu_type has only been tested on MIPS */
+#if defined(__mips__)
+ /* Use cpu_type to select the events */
+ int fd = open(OP_DRIVER_BASE "/cpu_type", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, OP_DRIVER_BASE "/cpu_type: %s\n",
+ strerror(errno));
+ return -1;
}
+ char buf[512];
+ int n = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ if (n < 0) {
+ fprintf(stderr, OP_DRIVER_BASE "/cpu_type: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ buf[n] = '\0';
+ for (unsigned int i = 0; i < ARRAYSZ(cpuevents); i++) {
+ if (strcmp(buf, cpuevents[i].cpu) == 0) {
+ cpuevent = &cpuevents[i];
+ }
+ }
+ if (cpuevent == NULL) {
+ fprintf(stderr, "Unrecognised CPU type %s\n", buf);
+ return -1;
+ }
+ for (max_events = 0; max_events < MAX_EVENTS; max_events++) {
+ snprintf(buf, sizeof(buf), OP_DRIVER_BASE"/%d", max_events);
+ if (access(buf, F_OK) < 0)
+ break;
+ }
+#else
+ max_events = MAX_EVENTS;
+ cpuevent = &cpuevents[0];
+#endif
+ return 0;
+}
+
+void setup_session_dir()
+{
+ if (access(OP_DATA_DIR, F_OK) == 0)
+ system("rm -r "OP_DATA_DIR);
+
if (mkdir(OP_DATA_DIR, 0755)) {
fprintf(stderr, "Cannot create directory \"%s\": %s\n",
OP_DATA_DIR, strerror(errno));
@@ -370,31 +301,36 @@ int do_setup()
setup_session_dir();
- if (mkdir(OP_DRIVER_BASE, 0755)) {
- if (errno != EEXIST) {
- fprintf(stderr, "Cannot create directory "OP_DRIVER_BASE": %s\n",
- strerror(errno));
- return -1;
- }
- }
+ return 0;
+}
- if (access(OP_DRIVER_BASE"/stats", F_OK)) {
- if (system("mount -t oprofilefs nodev "OP_DRIVER_BASE)) {
- return -1;
- }
+void stringify_counters(char *ctr_string, int ctr_mask)
+{
+ int i, n, len;
+ char *p = ctr_string;
+
+ *p = '\0';
+ for (i=0; i<32; ++i) {
+ if (ctr_mask & (1<<i)) {
+ p += sprintf(p, "%d,", i);
+ }
+ }
+ if (p != ctr_string) {
+ *(p-1) = '\0'; /* erase the final comma */
}
- return 0;
}
void do_list_events()
{
unsigned int i;
+ char ctrs[32*3+1];
- printf("%-20s: %s\n", "name", "meaning");
+ printf("%-12s | %-30s: %s\n", "counter", "name", "meaning");
printf("----------------------------------------"
"--------------------------------------\n");
- for (i = 0; i < sizeof(event_info)/sizeof(struct event_info); i++) {
- printf("%-20s: %s\n", event_info[i].name, event_info[i].explanation);
+ for (i = 0; i < NEVENTS; i++) {
+ stringify_counters(ctrs, event_info[i].counters);
+ printf("%-12s | %-30s: %s\n", ctrs, event_info[i].name, event_info[i].explanation);
}
}
@@ -402,7 +338,7 @@ int find_event_idx_from_name(const char *name)
{
unsigned int i;
- for (i = 0; i < sizeof(event_info)/sizeof(struct event_info); i++) {
+ for (i = 0; i < NEVENTS; i++) {
if (!strcmp(name, event_info[i].name)) {
return i;
}
@@ -410,16 +346,16 @@ int find_event_idx_from_name(const char *name)
return -1;
}
-const char * find_event_name_from_id(int id)
+const char * find_event_name_from_id(int id, int mask)
{
unsigned int i;
- for (i = 0; i < sizeof(event_info)/sizeof(struct event_info); i++) {
- if (event_info[i].id == id) {
+ for (i = 0; i < NEVENTS; i++) {
+ if (event_info[i].id == id && (event_info[i].counters == 0 || (event_info[i].counters & mask))) {
return event_info[i].name;
}
}
- return NULL;
+ return "Undefined Event";
}
int process_event(const char *event_spec)
@@ -450,6 +386,20 @@ int process_event(const char *event_spec)
return -1;
}
+ /*
+ * check that the named event is valid for this event counter
+ * 'num_events' represents the cpu internal counter number
+ */
+ verbose("idx: %d, name: %s, mask: %02x, ctr#: %d\n",
+ event_idx, event_info[event_idx].name,
+ event_info[event_idx].counters, num_events);
+ if (event_info[event_idx].counters != 0 &&
+ (event_info[event_idx].counters & CTR(num_events)) == 0) {
+ fprintf(stderr, "Bad event name: %s for counter %d, see --list\n",
+ event_name, num_events);
+ return -1;
+ }
+
/* Use default count */
if (count_name[0] == 0) {
count_val = min_count[0];
@@ -501,7 +451,7 @@ void do_status()
printf("Driver directory: %s\n", OP_DRIVER_BASE);
printf("Session directory: %s\n", OP_DATA_DIR);
- for (i = 0; i < MAX_EVENTS; i++) {
+ for (i = 0; i < max_events; i++) {
sprintf(fullname, OP_DRIVER_BASE"/%d/enabled", i);
num = read_num(fullname);
if (num > 0) {
@@ -510,7 +460,7 @@ void do_status()
/* event name */
sprintf(fullname, OP_DRIVER_BASE"/%d/event", i);
num = read_num(fullname);
- printf(" name: %s\n", find_event_name_from_id(num));
+ printf(" name: %s\n", find_event_name_from_id(num, CTR(i)));
/* profile interval */
sprintf(fullname, OP_DRIVER_BASE"/%d/count", i);
@@ -524,17 +474,14 @@ void do_status()
num = read_num(OP_DATA_DIR"/lock");
if (num >= 0) {
- int fd;
/* Still needs to check if this lock is left-over */
sprintf(fullname, "/proc/%d", num);
- fd = open(fullname, O_RDONLY);
- if (fd == -1) {
+ if (access(fullname, R_OK) != 0) {
printf("OProfile daemon exited prematurely - redo setup"
" before you continue\n");
return;
}
else {
- close(fd);
printf("oprofiled pid: %d\n", num);
num = read_num(OP_DRIVER_BASE"/enable");
@@ -598,20 +545,12 @@ void do_reset()
* reset.
*/
#if 0
- int fd;
-
- fd = open(OP_DATA_DIR"/samples/current", O_RDONLY);
- if (fd == -1) {
- return;
- }
- close(fd);
- system("rm -r "OP_DATA_DIR"/samples/current");
- int num = read_num(OP_DATA_DIR"/lock");
+ int pid = read_num(OP_DATA_DIR"/lock");
+ if (pid >= 0)
+ kill(pid, SIGHUP); /* HUP makes oprofiled close its sample files */
- if (num >= 0) {
- printf("Signalling daemon...\n");
- kill(num, SIGHUP);
- }
+ if (access(OP_DATA_DIR"/samples/current", R_OK) == 0)
+ system("rm -r "OP_DATA_DIR"/samples/current");
#endif
}
@@ -625,6 +564,8 @@ int main(int argc, char * const argv[])
strcpy(vmlinux, "--no-vmlinux");
strcpy(kernel_range, "");
+ setup_device();
+
while (1) {
int c = getopt_long(argc, argv, "c:e:v:r:dhVtl:", long_options, &option_index);
if (c == -1) {
@@ -700,11 +641,7 @@ int main(int argc, char * const argv[])
}
if (quick) {
-#if defined(__i386__) || defined(__x86_64__)
- process_event("CPU_CLK_UNHALTED");
-#else
- process_event("CPU_CYCLES");
-#endif
+ process_event(default_event);
setup = 1;
}
@@ -739,9 +676,7 @@ int main(int argc, char * const argv[])
char* slash = strrchr(command, '/');
strcpy(slash ? slash + 1 : command, "oprofiled --session-dir="OP_DATA_DIR);
-#if defined(__i386__) || defined(__x86_64__)
- /* Nothing */
-#elif !defined(WITH_ARM_V7_A)
+#if defined(__arm__) && !defined(WITH_ARM_V7_A)
/* Since counter #3 can only handle CPU_CYCLES, check and shuffle the
* order a bit so that the maximal number of events can be profiled
* simultaneously
@@ -815,7 +750,7 @@ int main(int argc, char * const argv[])
if (timer == 0) {
/* If not in timer mode, disable unused counters */
- for (i = num_events; i < MAX_EVENTS; i++) {
+ for (i = num_events; i < max_events; i++) {
echo_dev("0", 0, "enabled", i);
}
} else {