diff options
author | Jean-Baptiste Queru <jbq@google.com> | 2009-11-15 12:06:02 -0800 |
---|---|---|
committer | Jean-Baptiste Queru <jbq@google.com> | 2009-11-15 12:06:02 -0800 |
commit | b088e0b51cccd0c6c67115c351f99311c668ee32 (patch) | |
tree | 2cf361b308f14d09ff39c755302376059c232bfb | |
parent | 004a95dd33d26fab20dea67063c39181f6cd486b (diff) | |
parent | 84862f9f1c18e2906bd17871ed0970751de7eeae (diff) | |
download | oprofile-b088e0b51cccd0c6c67115c351f99311c668ee32.tar.gz |
merge from eclair
44 files changed, 3269 insertions, 173 deletions
@@ -65,7 +65,7 @@ #define OP_BINDIR "/usr/local/bin/" /* package data directory */ -#define OP_DATADIR "/data/oprofile/" +#define OP_DATADIR "/usr/local/share/oprofile/" /* Name of package */ #define PACKAGE "oprofile" @@ -95,7 +95,7 @@ /* #undef TRUE_FALSE_ALREADY_DEFINED */ /* Version number of package */ -#define VERSION "0.9.4" +#define VERSION "0.9.1" /* Define to 1 if the X Window System is missing or not being used. */ /* #undef X_DISPLAY_MISSING */ diff --git a/daemon/Android.mk b/daemon/Android.mk index abee74c..322c76e 100644 --- a/daemon/Android.mk +++ b/daemon/Android.mk @@ -6,6 +6,9 @@ LOCAL_SRC_FILES:= \ opd_anon.c \ opd_cookie.c \ opd_events.c \ + opd_extended.c \ + opd_ibs.c \ + opd_ibs_trans.c \ opd_kernel.c \ opd_mangling.c \ opd_perfmon.c \ diff --git a/daemon/init.c b/daemon/init.c index be0b9da..b4a63cc 100644 --- a/daemon/init.c +++ b/daemon/init.c @@ -174,14 +174,14 @@ static void opd_do_jitdumps(void) sprintf(end_time_str, "%llu", end_time); sprintf(opjitconv_path, "%s/%s", OP_BINDIR, "opjitconv"); arg_num = 0; - exec_args[arg_num++] = opjitconv_path; + exec_args[arg_num++] = "opjitconv"; if (vmisc) exec_args[arg_num++] = "-d"; exec_args[arg_num++] = session_dir; exec_args[arg_num++] = start_time_str; exec_args[arg_num++] = end_time_str; exec_args[arg_num] = (char *) NULL; - execvp("opjitconv", exec_args); + execvp(opjitconv_path, exec_args); fprintf(stderr, "Failed to exec %s: %s\n", exec_args[0], strerror(errno)); /* We don't want any cleanup in the child */ diff --git a/daemon/opd_events.c b/daemon/opd_events.c index 81a87d2..b544fb3 100644 --- a/daemon/opd_events.c +++ b/daemon/opd_events.c @@ -13,6 +13,7 @@ #include "opd_events.h" #include "opd_printf.h" +#include "opd_extended.h" #include "oprofiled.h" #include "op_string.h" @@ -35,7 +36,7 @@ static double cpu_speed; static void malformed_events(void) { fprintf(stderr, "oprofiled: malformed events passed " - "on the command line\n"); + "on the command line\n"); exit(EXIT_FAILURE); } @@ -128,6 +129,12 @@ void opd_parse_events(char const * events) struct opd_event * find_counter_event(unsigned long counter) { size_t i; + struct opd_event * ret = NULL; + + if (counter >= OP_MAX_COUNTERS) { + if((ret = opd_ext_find_counter_event(counter)) != NULL) + return ret; + } for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) { if (counter == opd_events[i].counter) @@ -141,9 +148,9 @@ struct opd_event * find_counter_event(unsigned long counter) void fill_header(struct opd_header * header, unsigned long counter, - vma_t anon_start, vma_t cg_to_anon_start, - int is_kernel, int cg_to_is_kernel, - int spu_samples, uint64_t embed_offset, time_t mtime) + vma_t anon_start, vma_t cg_to_anon_start, + int is_kernel, int cg_to_is_kernel, + int spu_samples, uint64_t embed_offset, time_t mtime) { struct opd_event * event = find_counter_event(counter); diff --git a/daemon/opd_events.h b/daemon/opd_events.h index 3bd0106..1e8b801 100644 --- a/daemon/opd_events.h +++ b/daemon/opd_events.h @@ -40,8 +40,8 @@ struct opd_header; /** fill the sample file header with event info etc. */ void fill_header(struct opd_header * header, unsigned long counter, - vma_t anon_start, vma_t anon_end, - int is_kernel, int cg_to_is_kernel, + vma_t anon_start, vma_t anon_end, + int is_kernel, int cg_to_is_kernel, int spu_samples, uint64_t embed_offset, time_t mtime); #endif /* OPD_EVENTS_H */ diff --git a/daemon/opd_extended.c b/daemon/opd_extended.c new file mode 100644 index 0000000..d88c285 --- /dev/null +++ b/daemon/opd_extended.c @@ -0,0 +1,181 @@ +/** + * @file opd_extended.c + * OProfile Extended Feature + * + * @remark Copyright 2007-2009 OProfile authors + * @remark Read the file COPYING + * + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Copyright (c) 2009 Advanced Micro Devices, Inc. + */ + +#include "opd_extended.h" +#include "op_string.h" + +#include <string.h> +#include <stdio.h> + +/* This global variable is >= 0 + * if extended feature is enabled */ +static int opd_ext_feat_index; + +extern struct opd_ext_handlers ibs_handlers; + +/** + * OProfile Extended Feature Table + * + * This table contains a list of extended features. + */ +static struct opd_ext_feature ext_feature_table[] = { + {"ibs", &ibs_handlers }, + { NULL, NULL } +}; + + +static int get_index_for_feature(char const * name) +{ + int ret = -1; + unsigned int i; + + if(!name) + return ret; + + for (i = 0 ; ext_feature_table[i].feature != NULL ; i++ ) { + if(!strncmp(name, ext_feature_table[i].feature, + strlen(ext_feature_table[i].feature))) { + ret = i; + break; + } + } + + return ret; +} + + +static inline int is_ext_enabled() +{ + if (opd_ext_feat_index >= 0 + && ext_feature_table[opd_ext_feat_index].handlers != NULL) + return 1; + else + return 0; +} + + +static inline int is_ext_sfile_enabled() +{ + if (opd_ext_feat_index >= 0 + && ext_feature_table[opd_ext_feat_index].handlers != NULL + && ext_feature_table[opd_ext_feat_index].handlers->ext_sfile != NULL) + return 1; + else + return 0; +} + + +/** + * Param "value" is the input from CML option with the format: + * + * <feature name>:<param1>:<param2>:<param3>:..... + * + * where param1,2.3,..n are optional. + */ +int opd_ext_initialize(char const * value) +{ + int ret = EXIT_FAILURE; + char * tmp = NULL, * name = NULL, * args = NULL; + + if(!value) { + opd_ext_feat_index = -1; + return 0; + } + + tmp = op_xstrndup(value, strlen(value)); + + /* Parse feature name*/ + if((name = strtok_r(tmp, ":", &args)) == NULL) + goto err_out; + + if((opd_ext_feat_index = get_index_for_feature(name)) < 0) + goto err_out; + + ret = ext_feature_table[opd_ext_feat_index].handlers->ext_init(args); + + return ret; + +err_out: + fprintf(stderr,"opd_ext_initialize: Invalid extended feature option: %s\n", value); + return ret; +} + + +void opd_ext_print_stats() +{ + if (is_ext_enabled() + && ext_feature_table[opd_ext_feat_index].handlers->ext_print_stats != NULL) { + printf("\n-- OProfile Extended-Feature Statistics --\n"); + ext_feature_table[opd_ext_feat_index].handlers->ext_print_stats(); + } +} + + +/** + * opd_sfile extended APIs + */ +void opd_ext_sfile_create(struct sfile * sf) +{ + /* Creating ext sfile only if extended feature is enable*/ + if (is_ext_sfile_enabled() + && ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->create != NULL) + ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->create(sf); +} + + +void opd_ext_sfile_dup (struct sfile * to, struct sfile * from) +{ + /* Duplicate ext sfile only if extended feature is enable*/ + if (is_ext_sfile_enabled() + && ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->dup != NULL) + ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->dup(to, from); +} + + +void opd_ext_sfile_close (struct sfile * sf) +{ + /* Close ext sfile only if extended feature is enable*/ + if (is_ext_sfile_enabled() + && ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->close != NULL) + ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->close(sf); +} + + +void opd_ext_sfile_sync(struct sfile * sf) +{ + /* Sync ext sfile only if extended feature is enable*/ + if (is_ext_sfile_enabled() + && ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->sync != NULL) + ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->sync(sf); +} + + +odb_t * opd_ext_sfile_get(struct transient const * trans, int is_cg) +{ + /* Get ext sfile only if extended feature is enable*/ + if (is_ext_sfile_enabled() + && ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->get != NULL) + return ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->get(trans, is_cg); + + return NULL; +} + + +struct opd_event * opd_ext_find_counter_event(unsigned long counter) +{ + /* Only if extended feature is enable*/ + if (is_ext_sfile_enabled() + && ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->find_counter_event != NULL) + return ext_feature_table[opd_ext_feat_index].handlers->ext_sfile->find_counter_event(counter); + + return NULL; +} + diff --git a/daemon/opd_extended.h b/daemon/opd_extended.h new file mode 100644 index 0000000..715041d --- /dev/null +++ b/daemon/opd_extended.h @@ -0,0 +1,85 @@ +/** + * @file opd_extended.h + * OProfile Extended Feature + * + * @remark Copyright 2007-2009 OProfile authors + * @remark Read the file COPYING + * + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Copyright (c) 2009 Advanced Micro Devices, Inc. + */ + +#ifndef OPD_EXTENDED_H +#define OPD_EXTENDED_H + +#include "opd_trans.h" +#include "odb.h" + +#include <stdlib.h> +#include <stdint.h> + + +/** + * OProfile Extended Feature Table Entry + */ +struct opd_ext_feature { + // Feature name + const char* feature; + // Feature handlers + struct opd_ext_handlers * handlers; +}; + +/** + * OProfile Extended handlers + */ +struct opd_ext_handlers { + // Extended init + int (*ext_init)(char const *); + // Extended statistics + int (*ext_print_stats)(); + // Extended sfile handlers + struct opd_ext_sfile_handlers * ext_sfile; +}; + +/** + * OProfile Extended sub-handlers (sfile) + */ +struct opd_ext_sfile_handlers { + int (*create)(struct sfile *); + int (*dup)(struct sfile *, struct sfile *); + int (*close)(struct sfile *); + int (*sync)(struct sfile *); + odb_t * (*get)(struct transient const *, int); + struct opd_event * (*find_counter_event)(unsigned long); +}; + +/** + * @param value: commandline input option string + * + * Parse the specified extended feature + */ +extern int opd_ext_initialize(char const * value); + +/** + * Print out extended feature statistics in oprofiled.log file + */ +extern void opd_ext_print_stats(); + +/** + * opd_sfile extended sfile handling functions + */ +extern void opd_ext_sfile_create(struct sfile * sf); +extern void opd_ext_sfile_dup (struct sfile * to, struct sfile * from); +extern void opd_ext_sfile_close(struct sfile * sf); +extern void opd_ext_sfile_sync(struct sfile * sf); +extern odb_t * opd_ext_sfile_get(struct transient const * trans, int is_cg); + +/** + * @param counter: counter index + * + * Get event struct opd_event from the counter index value. + */ +extern struct opd_event * opd_ext_find_counter_event(unsigned long counter); + + +#endif diff --git a/daemon/opd_ibs.c b/daemon/opd_ibs.c new file mode 100644 index 0000000..c57554a --- /dev/null +++ b/daemon/opd_ibs.c @@ -0,0 +1,692 @@ +/** + * @file daemon/opd_ibs.c + * AMD Family10h Instruction Based Sampling (IBS) handling. + * + * @remark Copyright 2007 OProfile authors + * @remark Read the file COPYING + * + * @author Jason Yeh <jason.yeh@amd.com> + * @author Paul Drongowski <paul.drongowski@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Copyright (c) 2008 Advanced Micro Devices, Inc. + */ + +#include "op_hw_config.h" +#include "op_events.h" +#include "op_string.h" +#include "op_libiberty.h" +#include "opd_printf.h" +#include "opd_trans.h" +#include "opd_events.h" +#include "opd_kernel.h" +#include "opd_anon.h" +#include "opd_sfile.h" +#include "opd_interface.h" +#include "opd_mangling.h" +#include "opd_extended.h" +#include "opd_ibs.h" +#include "opd_ibs_trans.h" +#include "opd_ibs_macro.h" + +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> + +extern op_cpu cpu_type; +extern int no_event_ok; +extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2); +extern void sfile_dup(struct sfile * to, struct sfile * from); + +/* IBS Select Arrays/Counters */ +static unsigned int ibs_selected_size; +static unsigned int ibs_fetch_selected_flag; +static unsigned int ibs_fetch_selected_size; +static unsigned int ibs_op_selected_flag; +static unsigned int ibs_op_selected_size; +static unsigned int ibs_op_ls_selected_flag; +static unsigned int ibs_op_ls_selected_size; +static unsigned int ibs_op_nb_selected_flag; +static unsigned int ibs_op_nb_selected_size; + +/* IBS Statistics */ +static unsigned long ibs_fetch_sample_stats; +static unsigned long ibs_fetch_incomplete_stats; +static unsigned long ibs_op_sample_stats; +static unsigned long ibs_op_incomplete_stats; +static unsigned long ibs_derived_event_stats; + +/* + * IBS Virtual Counter + */ +struct opd_event ibs_vc[OP_MAX_IBS_COUNTERS]; + +/* IBS Virtual Counter Index(VCI) Map*/ +unsigned int ibs_vci_map[OP_MAX_IBS_COUNTERS]; + +/** + * This function converts IBS fetch event flags and values into + * derived events. If the tagged (sampled) fetched caused a derived + * event, the derived event is tallied. + */ +static void opd_log_ibs_fetch(struct transient * trans) +{ + struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch; + if (!trans_fetch) + return; + + trans_ibs_fetch(trans, ibs_fetch_selected_flag, ibs_fetch_selected_size); +} + + +/** + * This function translates the IBS op event flags and values into + * IBS op derived events. If an op derived event occured, it's tallied. + */ +static void opd_log_ibs_op(struct transient * trans) +{ + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + if (!trans_op) + return; + + trans_ibs_op(trans, ibs_op_selected_flag, ibs_op_selected_size); + trans_ibs_op_ls(trans, ibs_op_ls_selected_flag, ibs_op_ls_selected_size); + trans_ibs_op_nb(trans, ibs_op_nb_selected_flag, ibs_op_nb_selected_size); +} + + +static void opd_put_ibs_sample(struct transient * trans) +{ + unsigned long long event = 0; + struct kernel_image * k_image = NULL; + struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch; + + if (!enough_remaining(trans, 1)) { + trans->remaining = 0; + return; + } + + /* IBS can generate samples with invalid dcookie and + * in kernel address range. Map such samples to vmlinux + * only if the user either specifies a range, or vmlinux. + */ + if (trans->cookie == INVALID_COOKIE + && (k_image = find_kernel_image(trans)) != NULL + && (k_image->start != 0 && k_image->end != 0) + && trans->in_kernel == 0) + trans->in_kernel = 1; + + if (trans->tracing != TRACING_ON) + trans->event = event; + + /* sfile can change at each sample for kernel */ + if (trans->in_kernel != 0) + clear_trans_current(trans); + + if (!trans->in_kernel && trans->cookie == NO_COOKIE) + trans->anon = find_anon_mapping(trans); + + /* get the current sfile if needed */ + if (!trans->current) + trans->current = sfile_find(trans); + + /* + * can happen if kernel sample falls through the cracks, or if + * it's a sample from an anon region we couldn't find + */ + if (!trans->current) + goto out; + + if (trans_fetch) + opd_log_ibs_fetch(trans); + else + opd_log_ibs_op(trans); +out: + /* switch to trace mode */ + if (trans->tracing == TRACING_START) + trans->tracing = TRACING_ON; + + update_trans_last(trans); +} + + +void code_ibs_fetch_sample(struct transient * trans) +{ + struct ibs_fetch_sample * trans_fetch = NULL; + + if (!enough_remaining(trans, 7)) { + verbprintf(vext, "not enough remaining\n"); + trans->remaining = 0; + ibs_fetch_incomplete_stats++; + return; + } + + ibs_fetch_sample_stats++; + + trans->ext = xmalloc(sizeof(struct ibs_sample)); + ((struct ibs_sample*)(trans->ext))->fetch = xmalloc(sizeof(struct ibs_fetch_sample)); + trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch; + + trans_fetch->rip = pop_buffer_value(trans); + + trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans); + trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans); + + trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans); + trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans); + trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans); + trans_fetch->ibs_fetch_phys_addr_high = pop_buffer_value(trans); + + verbprintf(vsamples, + "FETCH_X CPU:%ld PID:%ld RIP:%lx CTL_H:%x LAT:%d P_HI:%x P_LO:%x L_HI:%x L_LO:%x\n", + trans->cpu, + (long)trans->tgid, + trans_fetch->rip, + (trans_fetch->ibs_fetch_ctl_high >> 16) & 0x3ff, + (trans_fetch->ibs_fetch_ctl_high) & 0xffff, + trans_fetch->ibs_fetch_phys_addr_high, + trans_fetch->ibs_fetch_phys_addr_low, + trans_fetch->ibs_fetch_lin_addr_high, + trans_fetch->ibs_fetch_lin_addr_low) ; + + /* Overwrite the trans->pc with the more accurate trans_fetch->rip */ + trans->pc = trans_fetch->rip; + + opd_put_ibs_sample(trans); + + free(trans_fetch); + free(trans->ext); + trans->ext = NULL; +} + + +void code_ibs_op_sample(struct transient * trans) +{ + struct ibs_op_sample * trans_op= NULL; + + if (!enough_remaining(trans, 13)) { + verbprintf(vext, "not enough remaining\n"); + trans->remaining = 0; + ibs_op_incomplete_stats++; + return; + } + + ibs_op_sample_stats++; + + trans->ext = xmalloc(sizeof(struct ibs_sample)); + ((struct ibs_sample*)(trans->ext))->op = xmalloc(sizeof(struct ibs_op_sample)); + trans_op = ((struct ibs_sample*)(trans->ext))->op; + + trans_op->rip = pop_buffer_value(trans); + + trans_op->ibs_op_lin_addr_low = pop_buffer_value(trans); + trans_op->ibs_op_lin_addr_high = pop_buffer_value(trans); + + trans_op->ibs_op_data1_low = pop_buffer_value(trans); + trans_op->ibs_op_data1_high = pop_buffer_value(trans); + trans_op->ibs_op_data2_low = pop_buffer_value(trans); + trans_op->ibs_op_data2_high = pop_buffer_value(trans); + trans_op->ibs_op_data3_low = pop_buffer_value(trans); + trans_op->ibs_op_data3_high = pop_buffer_value(trans); + trans_op->ibs_op_ldst_linaddr_low = pop_buffer_value(trans); + trans_op->ibs_op_ldst_linaddr_high = pop_buffer_value(trans); + trans_op->ibs_op_phys_addr_low = pop_buffer_value(trans); + trans_op->ibs_op_phys_addr_high = pop_buffer_value(trans); + + verbprintf(vsamples, + "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n", + trans->cpu, + trans->tgid, + trans_op->rip, + trans_op->ibs_op_data1_high, + trans_op->ibs_op_data1_low, + trans_op->ibs_op_data2_low, + trans_op->ibs_op_data3_high, + trans_op->ibs_op_data3_low, + trans_op->ibs_op_ldst_linaddr_low, + trans_op->ibs_op_phys_addr_low); + + /* Overwrite the trans->pc with the more accurate trans_op->rip */ + trans->pc = trans_op->rip; + + opd_put_ibs_sample(trans); + + free(trans_op); + free(trans->ext); + trans->ext = NULL; +} + + +/** Convert IBS event to value used for data structure indexing */ +static unsigned long ibs_event_to_counter(unsigned long x) +{ + unsigned long ret = ~0UL; + + if (IS_IBS_FETCH(x)) + ret = (x - IBS_FETCH_BASE); + else if (IS_IBS_OP(x)) + ret = (x - IBS_OP_BASE + IBS_FETCH_MAX); + else if (IS_IBS_OP_LS(x)) + ret = (x - IBS_OP_LS_BASE + IBS_OP_MAX + IBS_FETCH_MAX); + else if (IS_IBS_OP_NB(x)) + ret = (x - IBS_OP_NB_BASE + IBS_OP_LS_MAX + IBS_OP_MAX + IBS_FETCH_MAX); + + return (ret != ~0UL) ? ret + OP_MAX_COUNTERS : ret; +} + + +void opd_log_ibs_event(unsigned int event, + struct transient * trans) +{ + ibs_derived_event_stats++; + trans->event = event; + sfile_log_sample_count(trans, 1); +} + + +void opd_log_ibs_count(unsigned int event, + struct transient * trans, + unsigned int count) +{ + ibs_derived_event_stats++; + trans->event = event; + sfile_log_sample_count(trans, count); +} + + +static unsigned long get_ibs_vci_key(unsigned int event) +{ + unsigned long key = ibs_event_to_counter(event); + if (key == ~0UL || key < OP_MAX_COUNTERS) + return ~0UL; + + key = key - OP_MAX_COUNTERS; + + return key; +} + + +static int ibs_parse_and_set_events(char * str) +{ + char * tmp, * ptr, * tok1, * tok2 = NULL; + int is_done = 0; + struct op_event * event = NULL; + op_cpu cpu_type = CPU_NO_GOOD; + unsigned long key; + + if (!str) + return -1; + + cpu_type = op_get_cpu_type(); + op_events(cpu_type); + + tmp = op_xstrndup(str, strlen(str)); + ptr = tmp; + + while (is_done != 1 + && (tok1 = strtok_r(ptr, ",", &tok2)) != NULL) { + + if ((ptr = strstr(tok1, ":")) != NULL) { + *ptr = '\0'; + is_done = 1; + } + + // Resove event number + event = find_event_by_name(tok1, 0, 0); + if (!event) + return -1; + + // Grouping + if (IS_IBS_FETCH(event->val)) { + ibs_fetch_selected_flag |= 1 << IBS_FETCH_OFFSET(event->val); + ibs_fetch_selected_size++; + } else if (IS_IBS_OP(event->val)) { + ibs_op_selected_flag |= 1 << IBS_OP_OFFSET(event->val); + ibs_op_selected_size++; + } else if (IS_IBS_OP_LS(event->val)) { + ibs_op_ls_selected_flag |= 1 << IBS_OP_LS_OFFSET(event->val); + ibs_op_ls_selected_size++; + } else if (IS_IBS_OP_NB(event->val)) { + ibs_op_nb_selected_flag |= 1 << IBS_OP_NB_OFFSET(event->val); + ibs_op_nb_selected_size++; + } else { + return -1; + } + + key = get_ibs_vci_key(event->val); + if (key == ~0UL) + return -1; + + ibs_vci_map[key] = ibs_selected_size; + + /* Initialize part of ibs_vc */ + ibs_vc[ibs_selected_size].name = tok1; + ibs_vc[ibs_selected_size].value = event->val; + ibs_vc[ibs_selected_size].counter = ibs_selected_size + OP_MAX_COUNTERS; + ibs_vc[ibs_selected_size].kernel = 1; + ibs_vc[ibs_selected_size].user = 1; + + ibs_selected_size++; + + ptr = NULL; + } + + return 0; +} + + +static int ibs_parse_counts(char * str, unsigned long int * count) +{ + char * tmp, * tok1, * tok2 = NULL, *end = NULL; + if (!str) + return -1; + + tmp = op_xstrndup(str, strlen(str)); + tok1 = strtok_r(tmp, ":", &tok2); + *count = strtoul(tok1, &end, 10); + if ((end && *end) || *count == 0 + || errno == EINVAL || errno == ERANGE) { + fprintf(stderr,"Invalid count (%s)\n", str); + return -1; + } + + return 0; +} + + +static int ibs_parse_and_set_um_fetch(char const * str) +{ + if (!str) + return -1; + return 0; +} + + + +static int ibs_parse_and_set_um_op(char const * str, unsigned long int * ibs_op_um) +{ + char * end = NULL; + if (!str) + return -1; + + *ibs_op_um = strtoul(str, &end, 16); + if ((end && *end) || errno == EINVAL || errno == ERANGE) { + fprintf(stderr,"Invalid unitmaks (%s)\n", str); + return -1; + } + return 0; +} + + +static int ibs_init(char const * argv) +{ + char * tmp, * ptr, * tok1, * tok2 = NULL; + unsigned int i = 0; + unsigned long int ibs_fetch_count = 0; + unsigned long int ibs_op_count = 0; + unsigned long int ibs_op_um = 0; + + if (!argv) + return -1; + + if (empty_line(argv) != 0) + return -1; + + tmp = op_xstrndup(argv, strlen(argv)); + ptr = (char *) skip_ws(tmp); + + // "fetch:event1,event2,....:count:um|op:event1,event2,.....:count:um" + tok1 = strtok_r(ptr, "|", &tok2); + + while (tok1 != NULL) { + + if (!strncmp("fetch:", tok1, strlen("fetch:"))) { + // Get to event section + tok1 = tok1 + strlen("fetch:"); + if (ibs_parse_and_set_events(tok1) == -1) + return -1; + + // Get to count section + while (tok1) { + if (*tok1 == '\0') + return -1; + if (*tok1 != ':') { + tok1++; + } else { + tok1++; + break; + } + } + + if (ibs_parse_counts(tok1, &ibs_fetch_count) == -1) + return -1; + + // Get to um section + while (tok1) { + if (*tok1 == '\0') + return -1; + if (*tok1 != ':') { + tok1++; + } else { + tok1++; + break; + } + } + + if (ibs_parse_and_set_um_fetch(tok1) == -1) + return -1; + + } else if (!strncmp("op:", tok1, strlen("op:"))) { + // Get to event section + tok1 = tok1 + strlen("op:"); + if (ibs_parse_and_set_events(tok1) == -1) + return -1; + + // Get to count section + while (tok1) { + if (*tok1 == '\0') + return -1; + if (*tok1 != ':') { + tok1++; + } else { + tok1++; + break; + } + } + + if (ibs_parse_counts(tok1, &ibs_op_count) == -1) + return -1; + + // Get to um section + while (tok1) { + if (*tok1 == '\0') + return -1; + if (*tok1 != ':') { + tok1++; + } else { + tok1++; + break; + } + } + + if (ibs_parse_and_set_um_op(tok1, &ibs_op_um)) + return -1; + + } else + return -1; + + tok1 = strtok_r(NULL, "|", &tok2); + } + + /* Initialize ibs_vc */ + for (i = 0 ; i < ibs_selected_size ; i++) + { + if (IS_IBS_FETCH(ibs_vc[i].value)) { + ibs_vc[i].count = ibs_fetch_count; + ibs_vc[i].um = 0; + } else { + ibs_vc[i].count = ibs_op_count; + ibs_vc[i].um = ibs_op_um; + } + } + + // Allow no event + no_event_ok = 1; + return 0; +} + + +static int ibs_print_stats() +{ + printf("Nr. IBS Fetch samples : %lu (%lu entries)\n", ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7)); + printf("Nr. IBS Fetch incompletes : %lu\n", ibs_fetch_incomplete_stats); + printf("Nr. IBS Op samples : %lu (%lu entries)\n", ibs_op_sample_stats, (ibs_op_sample_stats * 13)); + printf("Nr. IBS Op incompletes : %lu\n", ibs_op_incomplete_stats); + printf("Nr. IBS derived events : %lu\n", ibs_derived_event_stats); + return 0; +} + + +static int ibs_sfile_create(struct sfile * sf) +{ + unsigned int i; + sf->ext_files = xmalloc(ibs_selected_size * sizeof(odb_t)); + for (i = 0 ; i < ibs_selected_size ; ++i) + odb_init(&sf->ext_files[i]); + + return 0; +} + + +static int ibs_sfile_dup (struct sfile * to, struct sfile * from) +{ + unsigned int i; + if (from->ext_files != NULL) { + to->ext_files = xmalloc(ibs_selected_size * sizeof(odb_t)); + for (i = 0 ; i < ibs_selected_size ; ++i) + odb_init(&to->ext_files[i]); + } else { + to->ext_files = NULL; + } + return 0; +} + +static int ibs_sfile_close(struct sfile * sf) +{ + unsigned int i; + if (sf->ext_files != NULL) { + for (i = 0; i < ibs_selected_size ; ++i) + odb_close(&sf->ext_files[i]); + + free(sf->ext_files); + sf->ext_files= NULL; + } + return 0; +} + +static int ibs_sfile_sync(struct sfile * sf) +{ + unsigned int i; + if (sf->ext_files != NULL) { + for (i = 0; i < ibs_selected_size ; ++i) + odb_sync(&sf->ext_files[i]); + } + return 0; +} + +static odb_t * ibs_sfile_get(struct transient const * trans, int is_cg) +{ + struct sfile * sf = trans->current; + struct sfile * last = trans->last; + struct cg_entry * cg; + struct list_head * pos; + unsigned long hash; + odb_t * file; + unsigned long counter, ibs_vci, key; + + /* Note: "trans->event" for IBS is not the same as traditional + * events. Here, it has the actual event (0xfxxx), while the + * traditional event has the event index. + */ + key = get_ibs_vci_key(trans->event); + if (key == ~0UL) { + fprintf(stderr, "%s: Invalid IBS event %lu\n", __func__, trans->event); + abort(); + } + ibs_vci = ibs_vci_map[key]; + counter = ibs_vci + OP_MAX_COUNTERS; + + /* Creating IBS sfile if it not already exists */ + if (sf->ext_files == NULL) + ibs_sfile_create(sf); + + file = &(sf->ext_files[ibs_vci]); + if (!is_cg) + goto open; + + hash = last->hashval & (CG_HASH_SIZE - 1); + + /* Need to look for the right 'to'. Since we're looking for + * 'last', we use its hash. + */ + list_for_each(pos, &sf->cg_hash[hash]) { + cg = list_entry(pos, struct cg_entry, hash); + if (sfile_equal(last, &cg->to)) { + file = &(cg->to.ext_files[ibs_vci]); + goto open; + } + } + + cg = xmalloc(sizeof(struct cg_entry)); + sfile_dup(&cg->to, last); + list_add(&cg->hash, &sf->cg_hash[hash]); + file = &(cg->to.ext_files[ibs_vci]); + +open: + if (!odb_open_count(file)) + opd_open_sample_file(file, last, sf, counter, is_cg); + + /* Error is logged by opd_open_sample_file */ + if (!odb_open_count(file)) + return NULL; + + return file; +} + + +/** Filled opd_event structure with IBS derived event information + * from the given counter value. + */ +static struct opd_event * ibs_sfile_find_counter_event(unsigned long counter) +{ + unsigned long ibs_vci; + + if (counter >= OP_MAX_COUNTERS + OP_MAX_IBS_COUNTERS + || counter < OP_MAX_COUNTERS) { + fprintf(stderr,"Error: find_ibs_counter_event : " + "invalid counter value %lu.\n", counter); + abort(); + } + + ibs_vci = counter - OP_MAX_COUNTERS; + return &ibs_vc[ibs_vci]; +} + + +struct opd_ext_sfile_handlers ibs_sfile_handlers = +{ + .create = &ibs_sfile_create, + .dup = &ibs_sfile_dup, + .close = &ibs_sfile_close, + .sync = &ibs_sfile_sync, + .get = &ibs_sfile_get, + .find_counter_event = &ibs_sfile_find_counter_event +}; + + +struct opd_ext_handlers ibs_handlers = +{ + .ext_init = &ibs_init, + .ext_print_stats = &ibs_print_stats, + .ext_sfile = &ibs_sfile_handlers +}; diff --git a/daemon/opd_ibs.h b/daemon/opd_ibs.h new file mode 100644 index 0000000..9ccc482 --- /dev/null +++ b/daemon/opd_ibs.h @@ -0,0 +1,137 @@ +/** + * @file daemon/opd_ibs.h + * AMD Family10h Instruction Based Sampling (IBS) handling. + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Jason Yeh <jason.yeh@amd.com> + * @author Paul Drongowski <paul.drongowski@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Copyright (c) 2008 Advanced Micro Devices, Inc. + */ + +#ifndef OPD_IBS_H +#define OPD_IBS_H + +#include <stdint.h> + +#include "opd_ibs_macro.h" + +struct transient; +struct opd_event; + +/** + * IBS information is processed in two steps. The first step decodes + * hardware-level IBS information and saves it in decoded form. The + * second step translates the decoded IBS information into IBS derived + * events. IBS information is tallied and is reported as derived events. + */ + +struct ibs_sample { + struct ibs_fetch_sample * fetch; + struct ibs_op_sample * op; +}; + +/** + * This struct represents the hardware-level IBS fetch information. + * Each field corresponds to a model-specific register (MSR.) See the + * BIOS and Kernel Developer's Guide for AMD Model Family 10h Processors + * for further details. + */ +struct ibs_fetch_sample { + unsigned long int rip; + /* MSRC001_1030 IBS Fetch Control Register */ + unsigned int ibs_fetch_ctl_low; + unsigned int ibs_fetch_ctl_high; + /* MSRC001_1031 IBS Fetch Linear Address Register */ + unsigned int ibs_fetch_lin_addr_low; + unsigned int ibs_fetch_lin_addr_high; + /* MSRC001_1032 IBS Fetch Physical Address Register */ + unsigned int ibs_fetch_phys_addr_low; + unsigned int ibs_fetch_phys_addr_high; + unsigned int dummy_event; +}; + + + +/** This struct represents the hardware-level IBS op information. */ +struct ibs_op_sample { + unsigned long int rip; + /* MSRC001_1034 IBS Op Logical Address Register */ + unsigned int ibs_op_lin_addr_low; + unsigned int ibs_op_lin_addr_high; + /* MSRC001_1035 IBS Op Data Register */ + unsigned int ibs_op_data1_low; + unsigned int ibs_op_data1_high; + /* MSRC001_1036 IBS Op Data 2 Register */ + unsigned int ibs_op_data2_low; + unsigned int ibs_op_data2_high; + /* MSRC001_1037 IBS Op Data 3 Register */ + unsigned int ibs_op_data3_low; + unsigned int ibs_op_data3_high; + unsigned int ibs_op_ldst_linaddr_low; + unsigned int ibs_op_ldst_linaddr_high; + unsigned int ibs_op_phys_addr_low; + unsigned int ibs_op_phys_addr_high; +}; + + +enum IBSL1PAGESIZE { + L1TLB4K = 0, + L1TLB2M, + L1TLB1G, + L1TLB_INVALID +}; + + +/** + * Handle an IBS fetch sample escape code sequence. An IBS fetch sample + * is represented as an escape code sequence. (See the comment for the + * function code_ibs_op_sample() for the sequence of entries in the event + * buffer.) When this function is called, the ESCAPE_CODE and IBS_FETCH_CODE + * have already been removed from the event buffer. Thus, 7 more event buffer + * entries are needed in order to process a complete IBS fetch sample. + */ +extern void code_ibs_fetch_sample(struct transient * trans); + +/** + * Handle an IBS op sample escape code sequence. An IBS op sample + * is represented as an escape code sequence: + * + * IBS fetch IBS op + * --------------- ---------------- + * ESCAPE_CODE ESCAPE_CODE + * IBS_FETCH_CODE IBS_OP_CODE + * Offset Offset + * IbsFetchLinAd low IbsOpRip low <-- Logical (virtual) RIP + * IbsFetchLinAd high IbsOpRip high <-- Logical (virtual) RIP + * IbsFetchCtl low IbsOpData low + * IbsFetchCtl high IbsOpData high + * IbsFetchPhysAd low IbsOpData2 low + * IbsFetchPhysAd high IbsOpData2 high + * IbsOpData3 low + * IbsOpData3 high + * IbsDcLinAd low + * IbsDcLinAd high + * IbsDcPhysAd low + * IbsDcPhysAd high + * + * When this function is called, the ESCAPE_CODE and IBS_OP_CODE have + * already been removed from the event buffer. Thus, 13 more event buffer + * entries are needed to process a complete IBS op sample. + * + * The IbsFetchLinAd and IbsOpRip are the linear (virtual) addresses + * that were generated by the IBS hardware. These addresses are mapped + * into the offset. + */ +extern void code_ibs_op_sample(struct transient * trans); + +/** Log the specified IBS derived event. */ +extern void opd_log_ibs_event(unsigned int event, struct transient * trans); + +/** Log the specified IBS cycle count. */ +extern void opd_log_ibs_count(unsigned int event, struct transient * trans, unsigned int count); + + +#endif /*OPD_IBS_H*/ diff --git a/daemon/opd_ibs_macro.h b/daemon/opd_ibs_macro.h new file mode 100644 index 0000000..565d22f --- /dev/null +++ b/daemon/opd_ibs_macro.h @@ -0,0 +1,366 @@ +/** + * @file daemon/opd_ibs_macro.h + * AMD Family10h Instruction Based Sampling (IBS) related macro. + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Jason Yeh <jason.yeh@amd.com> + * @author Paul Drongowski <paul.drongowski@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Copyright (c) 2008 Advanced Micro Devices, Inc. + */ + +#ifndef OPD_IBS_MACRO_H +#define OPD_IBS_MACRO_H + +/** + * The following defines are bit masks that are used to select + * IBS fetch event flags and values at the MSR level. + */ +#define FETCH_MASK_LATENCY 0x0000ffff +#define FETCH_MASK_COMPLETE 0x00040000 +#define FETCH_MASK_IC_MISS 0x00080000 +#define FETCH_MASK_PHY_ADDR 0x00100000 +#define FETCH_MASK_PG_SIZE 0x00600000 +#define FETCH_MASK_L1_MISS 0x00800000 +#define FETCH_MASK_L2_MISS 0x01000000 +#define FETCH_MASK_KILLED \ + (FETCH_MASK_L1_MISS|FETCH_MASK_L2_MISS|FETCH_MASK_PHY_ADDR|\ + FETCH_MASK_COMPLETE|FETCH_MASK_IC_MISS) + + +/** + * The following defines are bit masks that are used to select + * IBS op event flags and values at the MSR level. + */ +#define BR_MASK_RETIRE 0x0000ffff +#define BR_MASK_BRN_RET 0x00000020 +#define BR_MASK_BRN_MISP 0x00000010 +#define BR_MASK_BRN_TAKEN 0x00000008 +#define BR_MASK_RETURN 0x00000004 +#define BR_MASK_MISP_RETURN 0x00000002 +#define BR_MASK_BRN_RESYNC 0x00000001 + +#define NB_MASK_L3_STATE 0x00000020 +#define NB_MASK_REQ_DST_PROC 0x00000010 +#define NB_MASK_REQ_DATA_SRC 0x00000007 + +#define DC_MASK_L2_HIT_1G 0x00080000 +#define DC_MASK_PHY_ADDR_VALID 0x00040000 +#define DC_MASK_LIN_ADDR_VALID 0x00020000 +#define DC_MASK_MAB_HIT 0x00010000 +#define DC_MASK_LOCKED_OP 0x00008000 +#define DC_MASK_WC_MEM_ACCESS 0x00004000 +#define DC_MASK_UC_MEM_ACCESS 0x00002000 +#define DC_MASK_ST_TO_LD_CANCEL 0x00001000 +#define DC_MASK_ST_TO_LD_FOR 0x00000800 +#define DC_MASK_ST_BANK_CONFLICT 0x00000400 +#define DC_MASK_LD_BANK_CONFLICT 0x00000200 +#define DC_MASK_MISALIGN_ACCESS 0x00000100 +#define DC_MASK_DC_MISS 0x00000080 +#define DC_MASK_L2_HIT_2M 0x00000040 +#define DC_MASK_L1_HIT_1G 0x00000020 +#define DC_MASK_L1_HIT_2M 0x00000010 +#define DC_MASK_L2_TLB_MISS 0x00000008 +#define DC_MASK_L1_TLB_MISS 0x00000004 +#define DC_MASK_STORE_OP 0x00000002 +#define DC_MASK_LOAD_OP 0x00000001 + + +/** + * IBS derived events: + * + * IBS derived events are identified by event select values which are + * similar to the event select values that identify performance monitoring + * counter (PMC) events. Event select values for IBS derived events begin + * at 0xf000. + * + * The definitions in this file *must* match definitions + * of IBS derived events in gh-events.xml and in the + * oprofile AMD Family 10h events file. More information + * about IBS derived events is given in the Software Oprimization + * Guide for AMD Family 10h Processors. + */ + +/** + * The following defines associate a 16-bit select value with an IBS + * derived fetch event. + */ +#define DE_IBS_FETCH_ALL 0xf000 +#define DE_IBS_FETCH_KILLED 0xf001 +#define DE_IBS_FETCH_ATTEMPTED 0xf002 +#define DE_IBS_FETCH_COMPLETED 0xf003 +#define DE_IBS_FETCH_ABORTED 0xf004 +#define DE_IBS_L1_ITLB_HIT 0xf005 +#define DE_IBS_ITLB_L1M_L2H 0xf006 +#define DE_IBS_ITLB_L1M_L2M 0xf007 +#define DE_IBS_IC_MISS 0xf008 +#define DE_IBS_IC_HIT 0xf009 +#define DE_IBS_FETCH_4K_PAGE 0xf00a +#define DE_IBS_FETCH_2M_PAGE 0xf00b +#define DE_IBS_FETCH_1G_PAGE 0xf00c +#define DE_IBS_FETCH_XX_PAGE 0xf00d +#define DE_IBS_FETCH_LATENCY 0xf00e + +#define IBS_FETCH_BASE 0xf000 +#define IBS_FETCH_END 0xf00e +#define IBS_FETCH_MAX (IBS_FETCH_END - IBS_FETCH_BASE + 1) +#define IS_IBS_FETCH(x) (IBS_FETCH_BASE <= x && x <= IBS_FETCH_END) +#define IBS_FETCH_OFFSET(x) (x - IBS_FETCH_BASE) + +/** + * The following defines associate a 16-bit select value with an IBS + * derived branch/return macro-op event. + */ +#define DE_IBS_OP_ALL 0xf100 +#define DE_IBS_OP_TAG_TO_RETIRE 0xf101 +#define DE_IBS_OP_COMP_TO_RETIRE 0xf102 +#define DE_IBS_BRANCH_RETIRED 0xf103 +#define DE_IBS_BRANCH_MISP 0xf104 +#define DE_IBS_BRANCH_TAKEN 0xf105 +#define DE_IBS_BRANCH_MISP_TAKEN 0xf106 +#define DE_IBS_RETURN 0xf107 +#define DE_IBS_RETURN_MISP 0xf108 +#define DE_IBS_RESYNC 0xf109 + +#define IBS_OP_BASE 0xf100 +#define IBS_OP_END 0xf109 +#define IBS_OP_MAX (IBS_OP_END - IBS_OP_BASE + 1) +#define IS_IBS_OP(x) (IBS_OP_BASE <= x && x <= IBS_OP_END) +#define IBS_OP_OFFSET(x) (x - IBS_OP_BASE) + +/** + * The following defines associate a 16-bit select value with an IBS + * derived load/store event. + */ +#define DE_IBS_LS_ALL_OP 0xf200 +#define DE_IBS_LS_LOAD_OP 0xf201 +#define DE_IBS_LS_STORE_OP 0xf202 +#define DE_IBS_LS_DTLB_L1H 0xf203 +#define DE_IBS_LS_DTLB_L1M_L2H 0xf204 +#define DE_IBS_LS_DTLB_L1M_L2M 0xf205 +#define DE_IBS_LS_DC_MISS 0xf206 +#define DE_IBS_LS_DC_HIT 0xf207 +#define DE_IBS_LS_MISALIGNED 0xf208 +#define DE_IBS_LS_BNK_CONF_LOAD 0xf209 +#define DE_IBS_LS_BNK_CONF_STORE 0xf20a +#define DE_IBS_LS_STL_FORWARDED 0xf20b +#define DE_IBS_LS_STL_CANCELLED 0xf20c +#define DE_IBS_LS_UC_MEM_ACCESS 0xf20d +#define DE_IBS_LS_WC_MEM_ACCESS 0xf20e +#define DE_IBS_LS_LOCKED_OP 0xf20f +#define DE_IBS_LS_MAB_HIT 0xf210 +#define DE_IBS_LS_L1_DTLB_4K 0xf211 +#define DE_IBS_LS_L1_DTLB_2M 0xf212 +#define DE_IBS_LS_L1_DTLB_1G 0xf213 +#define DE_IBS_LS_L1_DTLB_RES 0xf214 +#define DE_IBS_LS_L2_DTLB_4K 0xf215 +#define DE_IBS_LS_L2_DTLB_2M 0xf216 +#define DE_IBS_LS_L2_DTLB_1G 0xf217 +#define DE_IBS_LS_L2_DTLB_RES2 0xf218 +#define DE_IBS_LS_DC_LOAD_LAT 0xf219 + +#define IBS_OP_LS_BASE 0xf200 +#define IBS_OP_LS_END 0xf219 +#define IBS_OP_LS_MAX (IBS_OP_LS_END - IBS_OP_LS_BASE + 1) +#define IS_IBS_OP_LS(x) (IBS_OP_LS_BASE <= x && x <= IBS_OP_LS_END) +#define IBS_OP_LS_OFFSET(x) (x - IBS_OP_LS_BASE) + + +/** + * The following defines associate a 16-bit select value with an IBS + * derived Northbridge (NB) event. + */ +#define DE_IBS_NB_LOCAL 0xf240 +#define DE_IBS_NB_REMOTE 0xf241 +#define DE_IBS_NB_LOCAL_L3 0xf242 +#define DE_IBS_NB_LOCAL_CACHE 0xf243 +#define DE_IBS_NB_REMOTE_CACHE 0xf244 +#define DE_IBS_NB_LOCAL_DRAM 0xf245 +#define DE_IBS_NB_REMOTE_DRAM 0xf246 +#define DE_IBS_NB_LOCAL_OTHER 0xf247 +#define DE_IBS_NB_REMOTE_OTHER 0xf248 +#define DE_IBS_NB_CACHE_STATE_M 0xf249 +#define DE_IBS_NB_CACHE_STATE_O 0xf24a +#define DE_IBS_NB_LOCAL_LATENCY 0xf24b +#define DE_IBS_NB_REMOTE_LATENCY 0xf24c + +#define IBS_OP_NB_BASE 0xf240 +#define IBS_OP_NB_END 0xf24c +#define IBS_OP_NB_MAX (IBS_OP_NB_END - IBS_OP_NB_BASE + 1) +#define IS_IBS_OP_NB(x) (IBS_OP_NB_BASE <= x && x <= IBS_OP_NB_END) +#define IBS_OP_NB_OFFSET(x) (x - IBS_OP_NB_BASE) + + +#define OP_MAX_IBS_COUNTERS (IBS_FETCH_MAX + IBS_OP_MAX + IBS_OP_LS_MAX + IBS_OP_NB_MAX) + + +/** + * These macro decodes IBS hardware-level event flags and fields. + * Translation results are either zero (false) or non-zero (true), except + * the fetch latency, which is a 16-bit cycle count, and the fetch page size + * field, which is a 2-bit unsigned integer. + */ + +/** Bits 47:32 IbsFetchLat: instruction fetch latency */ +#define IBS_FETCH_FETCH_LATENCY(x) ((unsigned short)(x->ibs_fetch_ctl_high & FETCH_MASK_LATENCY)) + +/** Bit 50 IbsFetchComp: instruction fetch complete. */ +#define IBS_FETCH_FETCH_COMPLETION(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_COMPLETE) != 0) + +/** Bit 51 IbsIcMiss: instruction cache miss. */ +#define IBS_FETCH_INST_CACHE_MISS(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_IC_MISS) != 0) + +/** Bit 52 IbsPhyAddrValid: instruction fetch physical address valid. */ +#define IBS_FETCH_PHYS_ADDR_VALID(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_PHY_ADDR) != 0) + +/** Bits 54:53 IbsL1TlbPgSz: instruction cache L1TLB page size. */ +#define IBS_FETCH_TLB_PAGE_SIZE(x) ((unsigned short)((x->ibs_fetch_ctl_high >> 21) & 0x3)) + +/** Bit 55 IbsL1TlbMiss: instruction cache L1TLB miss. */ +#define IBS_FETCH_M_L1_TLB_MISS(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_L1_MISS) != 0) + +/** Bit 56 IbsL2TlbMiss: instruction cache L2TLB miss. */ +#define IBS_FETCH_L2_TLB_MISS(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_L2_MISS) != 0) + +/** A fetch is a killed fetch if all the masked bits are clear */ +#define IBS_FETCH_KILLED(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_KILLED) == 0) + +#define IBS_FETCH_INST_CACHE_HIT(x) (IBS_FETCH_FETCH_COMPLETION(x) && !IBS_FETCH_INST_CACHE_MISS(x)) + +#define IBS_FETCH_L1_TLB_HIT(x) (!IBS_FETCH_M_L1_TLB_MISS(x) && IBS_FETCH_PHYS_ADDR_VALID(x)) + +#define IBS_FETCH_ITLB_L1M_L2H(x) (IBS_FETCH_M_L1_TLB_MISS(x) && !IBS_FETCH_L2_TLB_MISS(x)) + +#define IBS_FETCH_ITLB_L1M_L2M(x) (IBS_FETCH_M_L1_TLB_MISS(x) && IBS_FETCH_L2_TLB_MISS(x)) + + +/** + * These macros translates IBS op event data from its hardware-level + * representation .It hides the MSR layout of IBS op data. + */ + +/** + * MSRC001_1035 IBS OP Data Register (IbsOpData) + * + * 15:0 IbsCompToRetCtr: macro-op completion to retire count + */ +#define IBS_OP_COM_TO_RETIRE_CYCLES(x) ((unsigned short)(x->ibs_op_data1_low & BR_MASK_RETIRE)) + +/** 31:16 tag_to_retire_cycles : macro-op tag to retire count. */ +#define IBS_OP_TAG_TO_RETIRE_CYCLES(x) ((unsigned short)((x->ibs_op_data1_low >> 16) & BR_MASK_RETIRE)) + +/** 32 op_branch_resync : resync macro-op. */ +#define IBS_OP_OP_BRANCH_RESYNC(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0) + +/** 33 op_mispredict_return : mispredicted return macro-op. */ +#define IBS_OP_OP_MISPREDICT_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0) + +/** 34 IbsOpReturn: return macro-op. */ +#define IBS_OP_OP_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0) + +/** 35 IbsOpBrnTaken: taken branch macro-op. */ +#define IBS_OP_OP_BRANCH_TAKEN(x) ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0) + +/** 36 IbsOpBrnMisp: mispredicted branch macro-op. */ +#define IBS_OP_OP_BRANCH_MISPREDICT(x) ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0) + +/** 37 IbsOpBrnRet: branch macro-op retired. */ +#define IBS_OP_OP_BRANCH_RETIRED(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0) + +/** + * MSRC001_1036 IBS Op Data 2 Register (IbsOpData2) + * + * 5 NbIbsReqCacheHitSt: IBS L3 cache state + */ +#define IBS_OP_NB_IBS_CACHE_HIT_ST(x) ((x->ibs_op_data2_low & NB_MASK_L3_STATE) != 0) + +/** 4 NbIbsReqDstProc: IBS request destination processor */ +#define IBS_OP_NB_IBS_REQ_DST_PROC(x) ((x->ibs_op_data2_low & NB_MASK_REQ_DST_PROC) != 0) + +/** 2:0 NbIbsReqSrc: Northbridge IBS request data source */ +#define IBS_OP_NB_IBS_REQ_SRC(x) ((unsigned char)(x->ibs_op_data2_low & NB_MASK_REQ_DATA_SRC)) + +/** + * MSRC001_1037 IBS Op Data3 Register + * + * Bits 48:32 IbsDcMissLat + */ +#define IBS_OP_DC_MISS_LATENCY(x) ((unsigned short)(x->ibs_op_data3_high & 0xffff)) + +/** 0 IbsLdOp: Load op */ +#define IBS_OP_IBS_LD_OP(x) ((x->ibs_op_data3_low & DC_MASK_LOAD_OP) != 0) + +/** 1 IbsStOp: Store op */ +#define IBS_OP_IBS_ST_OP(x) ((x->ibs_op_data3_low & DC_MASK_STORE_OP) != 0) + +/** 2 ibs_dc_l1_tlb_miss: Data cache L1TLB miss */ +#define IBS_OP_IBS_DC_L1_TLB_MISS(x) ((x->ibs_op_data3_low & DC_MASK_L1_TLB_MISS) != 0) + +/** 3 ibs_dc_l2_tlb_miss: Data cache L2TLB miss */ +#define IBS_OP_IBS_DC_L2_TLB_MISS(x) ((x->ibs_op_data3_low & DC_MASK_L2_TLB_MISS) != 0) + +/** 4 IbsDcL1tlbHit2M: Data cache L1TLB hit in 2M page */ +#define IBS_OP_IBS_DC_L1_TLB_HIT_2MB(x) ((x->ibs_op_data3_low & DC_MASK_L1_HIT_2M) != 0) + +/** 5 ibs_dc_l1_tlb_hit_1gb: Data cache L1TLB hit in 1G page */ +#define IBS_OP_IBS_DC_L1_TLB_HIT_1GB(x) ((x->ibs_op_data3_low & DC_MASK_L1_HIT_1G) != 0) + +/** 6 ibs_dc_l2_tlb_hit_2mb: Data cache L2TLB hit in 2M page */ +#define IBS_OP_IBS_DC_L2_TLB_HIT_2MB(x) ((x->ibs_op_data3_low & DC_MASK_L2_HIT_2M) != 0) + +/** 7 ibs_dc_miss: Data cache miss */ +#define IBS_OP_IBS_DC_MISS(x) ((x->ibs_op_data3_low & DC_MASK_DC_MISS) != 0) + +/** 8 ibs_dc_miss_acc: Misaligned access */ +#define IBS_OP_IBS_DC_MISS_ACC(x) ((x->ibs_op_data3_low & DC_MASK_MISALIGN_ACCESS) != 0) + +/** 9 ibs_dc_ld_bnk_con: Bank conflict on load operation */ +#define IBS_OP_IBS_DC_LD_BNK_CON(x) ((x->ibs_op_data3_low & DC_MASK_LD_BANK_CONFLICT) != 0) + +/** 10 ibs_dc_st_bnk_con: Bank conflict on store operation */ +#define IBS_OP_IBS_DC_ST_BNK_CON(x) ((x->ibs_op_data3_low & DC_MASK_ST_BANK_CONFLICT) != 0) + +/** 11 ibs_dc_st_to_ld_fwd : Data forwarded from store to load operation */ +#define IBS_OP_IBS_DC_ST_TO_LD_FWD(x) ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_FOR) != 0) + +/** 12 ibs_dc_st_to_ld_can: Data forwarding from store to load operation cancelled */ +#define IBS_OP_IBS_DC_ST_TO_LD_CAN(x) ((x->ibs_op_data3_low & DC_MASK_ST_TO_LD_CANCEL) != 0) + +/** 13 ibs_dc_uc_mem_acc: UC memory access */ +#define IBS_OP_IBS_DC_UC_MEM_ACC(x) ((x->ibs_op_data3_low & DC_MASK_UC_MEM_ACCESS) != 0) + +/** 14 ibs_dc_wc_mem_acc : WC memory access */ +#define IBS_OP_IBS_DC_WC_MEM_ACC(x) ((x->ibs_op_data3_low & DC_MASK_WC_MEM_ACCESS) != 0) + +/** 15 ibs_locked_op: Locked operation */ +#define IBS_OP_IBS_LOCKED_OP(x) ((x->ibs_op_data3_low & DC_MASK_LOCKED_OP) != 0) + +/** 16 ibs_dc_mab_hit : MAB hit */ +#define IBS_OP_IBS_DC_MAB_HIT(x) ((x->ibs_op_data3_low & DC_MASK_MAB_HIT) != 0) + +/** 17 IbsDcLinAddrValid: Data cache linear address valid */ +#define IBS_OP_IBS_DC_LIN_ADDR_VALID(x) ((x->ibs_op_data3_low & DC_MASK_LIN_ADDR_VALID) != 0) + +/** 18 ibs_dc_phy_addr_valid: Data cache physical address valid */ +#define IBS_OP_IBS_DC_PHY_ADDR_VALID(x) ((x->ibs_op_data3_low & DC_MASK_PHY_ADDR_VALID) != 0) + +/** 19 ibs_dc_l2_tlb_hit_1gb: Data cache L2TLB hit in 1G page */ +#define IBS_OP_IBS_DC_L2_TLB_HIT_1GB(x) ((x->ibs_op_data3_low & DC_MASK_L2_HIT_1G) != 0) + + +/** + * Aggregate the IBS derived event. Increase the + * derived event count by one. + */ +#define AGG_IBS_EVENT(EV) opd_log_ibs_event(EV, trans) + +/** + * Aggregate the IBS latency/cycle counts. Increase the + * derived event count by the specified count value. + */ +#define AGG_IBS_COUNT(EV, COUNT) opd_log_ibs_count(EV, trans, COUNT) + + +#endif /*OPD_IBS_MACRO_H*/ diff --git a/daemon/opd_ibs_trans.c b/daemon/opd_ibs_trans.c new file mode 100644 index 0000000..3b2c2f8 --- /dev/null +++ b/daemon/opd_ibs_trans.c @@ -0,0 +1,554 @@ +/** + * @file daemon/opd_ibs_trans.c + * AMD Family10h Instruction Based Sampling (IBS) translation. + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Jason Yeh <jason.yeh@amd.com> + * @author Paul Drongowski <paul.drongowski@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Copyright (c) 2008 Advanced Micro Devices, Inc. + */ + +#include "opd_ibs.h" +#include "opd_ibs_macro.h" +#include "opd_ibs_trans.h" +#include "opd_trans.h" +#include "opd_printf.h" + +#include <stdlib.h> +#include <stdio.h> + +#define MAX_EVENTS_PER_GROUP 32 + +/* + * --------------------- OP DERIVED FUNCTION + */ +void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size) +{ + struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch; + unsigned int i, j, mask = 1; + + for (i = IBS_FETCH_BASE, j =0 ; i <= IBS_FETCH_END && j < size ; i++, mask = mask << 1) { + + if ((selected_flag & mask) == 0) + continue; + + j++; + + switch (i) { + + case DE_IBS_FETCH_ALL: + /* IBS all fetch samples (kills + attempts) */ + AGG_IBS_EVENT(DE_IBS_FETCH_ALL); + break; + + case DE_IBS_FETCH_KILLED: + /* IBS killed fetches ("case 0") -- All interesting event + * flags are clear */ + if (IBS_FETCH_KILLED(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_FETCH_KILLED); + break; + + case DE_IBS_FETCH_ATTEMPTED: + /* Any non-killed fetch is an attempted fetch */ + AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED); + break; + + case DE_IBS_FETCH_COMPLETED: + if (IBS_FETCH_FETCH_COMPLETION(trans_fetch)) + /* IBS Fetch Completed */ + AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED); + break; + + case DE_IBS_FETCH_ABORTED: + if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch)) + /* IBS Fetch Aborted */ + AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED); + break; + + case DE_IBS_L1_ITLB_HIT: + /* IBS L1 ITLB hit */ + if (IBS_FETCH_L1_TLB_HIT(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT); + break; + + case DE_IBS_ITLB_L1M_L2H: + /* IBS L1 ITLB miss and L2 ITLB hit */ + if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H); + break; + + case DE_IBS_ITLB_L1M_L2M: + /* IBS L1 & L2 ITLB miss; complete ITLB miss */ + if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M); + break; + + case DE_IBS_IC_MISS: + /* IBS instruction cache miss */ + if (IBS_FETCH_INST_CACHE_MISS(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_IC_MISS); + break; + + case DE_IBS_IC_HIT: + /* IBS instruction cache hit */ + if (IBS_FETCH_INST_CACHE_HIT(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_IC_HIT); + break; + + case DE_IBS_FETCH_4K_PAGE: + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) + && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB4K) + AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE); + break; + + case DE_IBS_FETCH_2M_PAGE: + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) + && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB2M) + AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE); + break; + + case DE_IBS_FETCH_1G_PAGE: + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) + && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB1G) + AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE); + break; + + case DE_IBS_FETCH_XX_PAGE: + break; + + case DE_IBS_FETCH_LATENCY: + if (IBS_FETCH_FETCH_LATENCY(trans_fetch)) + AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY, + IBS_FETCH_FETCH_LATENCY(trans_fetch)); + break; + default: + break; + } + } +} + +/* + * --------------------- OP DERIVED FUNCTION + */ +void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size) +{ + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + unsigned int i, j, mask = 1; + + for (i = IBS_OP_BASE, j =0 ; i <= IBS_OP_END && j < size ; i++, mask = mask << 1) { + + if ((selected_flag & mask) == 0) + continue; + + j++; + + switch (i) { + + case DE_IBS_OP_ALL: + /* All IBS op samples */ + AGG_IBS_EVENT(DE_IBS_OP_ALL); + break; + + case DE_IBS_OP_TAG_TO_RETIRE: + /* Tally retire cycle counts for all sampled macro-ops + * IBS tag to retire cycles */ + if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)) + AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE, + IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)); + break; + + case DE_IBS_OP_COMP_TO_RETIRE: + /* IBS completion to retire cycles */ + if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)) + AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE, + IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)); + break; + + case DE_IBS_BRANCH_RETIRED: + if (IBS_OP_OP_BRANCH_RETIRED(trans_op)) + /* IBS Branch retired op */ + AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ; + break; + + case DE_IBS_BRANCH_MISP: + if (IBS_OP_OP_BRANCH_RETIRED(trans_op) + /* Test branch-specific event flags */ + /* IBS mispredicted Branch op */ + && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) + AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ; + break; + + case DE_IBS_BRANCH_TAKEN: + if (IBS_OP_OP_BRANCH_RETIRED(trans_op) + /* IBS taken Branch op */ + && IBS_OP_OP_BRANCH_TAKEN(trans_op)) + AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN); + break; + + case DE_IBS_BRANCH_MISP_TAKEN: + if (IBS_OP_OP_BRANCH_RETIRED(trans_op) + /* IBS mispredicted taken branch op */ + && IBS_OP_OP_BRANCH_TAKEN(trans_op) + && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) + AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN); + break; + + case DE_IBS_RETURN: + if (IBS_OP_OP_BRANCH_RETIRED(trans_op) + /* IBS return op */ + && IBS_OP_OP_RETURN(trans_op)) + AGG_IBS_EVENT(DE_IBS_RETURN); + break; + + case DE_IBS_RETURN_MISP: + if (IBS_OP_OP_BRANCH_RETIRED(trans_op) + /* IBS mispredicted return op */ + && IBS_OP_OP_RETURN(trans_op) + && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) + AGG_IBS_EVENT(DE_IBS_RETURN_MISP); + break; + + case DE_IBS_RESYNC: + /* Test for a resync macro-op */ + if (IBS_OP_OP_BRANCH_RESYNC(trans_op)) + AGG_IBS_EVENT(DE_IBS_RESYNC); + break; + default: + break; + } + } +} + + +/* + * --------------------- OP LS DERIVED FUNCTION + */ +void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size) +{ + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + unsigned int i, j, mask = 1; + + /* Preliminary check */ + if (!IBS_OP_IBS_LD_OP(trans_op) && !IBS_OP_IBS_ST_OP(trans_op)) + return; + + + for (i = IBS_OP_LS_BASE, j =0 ; i <= IBS_OP_LS_END && j < size ; i++, mask = mask << 1) { + + if ((selected_flag & mask) == 0) + continue; + + j++; + + switch (i) { + + case DE_IBS_LS_ALL_OP: + /* Count the number of LS op samples */ + AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ; + break; + + case DE_IBS_LS_LOAD_OP: + if (IBS_OP_IBS_LD_OP(trans_op)) + /* TALLy an IBS load derived event */ + AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ; + break; + + case DE_IBS_LS_STORE_OP: + if (IBS_OP_IBS_ST_OP(trans_op)) + /* Count and handle store operations */ + AGG_IBS_EVENT(DE_IBS_LS_STORE_OP); + break; + + case DE_IBS_LS_DTLB_L1H: + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)) + /* L1 DTLB hit -- This is the most frequent case */ + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H); + break; + + case DE_IBS_LS_DTLB_L1M_L2H: + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) + /* L1 DTLB miss, L2 DTLB hit */ + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H); + break; + + case DE_IBS_LS_DTLB_L1M_L2M: + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) + /* L1 DTLB miss, L2 DTLB miss */ + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M); + break; + + case DE_IBS_LS_DC_MISS: + if (IBS_OP_IBS_DC_MISS(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_DC_MISS); + break; + + case DE_IBS_LS_DC_HIT: + if (!IBS_OP_IBS_DC_MISS(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_DC_HIT); + break; + + case DE_IBS_LS_MISALIGNED: + if (IBS_OP_IBS_DC_MISS_ACC(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED); + break; + + case DE_IBS_LS_BNK_CONF_LOAD: + if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD); + break; + + case DE_IBS_LS_BNK_CONF_STORE: + if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE); + break; + + case DE_IBS_LS_STL_FORWARDED: + if (IBS_OP_IBS_LD_OP(trans_op) + /* Data forwarding info are valid only for load ops */ + && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ; + break; + + case DE_IBS_LS_STL_CANCELLED: + if (IBS_OP_IBS_LD_OP(trans_op)) + if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ; + break; + + case DE_IBS_LS_UC_MEM_ACCESS: + if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS); + break; + + case DE_IBS_LS_WC_MEM_ACCESS: + if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS); + break; + + case DE_IBS_LS_LOCKED_OP: + if (IBS_OP_IBS_LOCKED_OP(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP); + break; + + case DE_IBS_LS_MAB_HIT: + if (IBS_OP_IBS_DC_MAB_HIT(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT); + break; + + case DE_IBS_LS_L1_DTLB_4K: + /* l1_translation */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + + && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) + /* This is the most common case, unfortunately */ + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ; + break; + + case DE_IBS_LS_L1_DTLB_2M: + /* l1_translation */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + + && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)) + /* 2M L1 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M); + break; + + case DE_IBS_LS_L1_DTLB_1G: + /* l1_translation */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + + && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) + && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) + /* 1G L1 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G); + break; + + case DE_IBS_LS_L1_DTLB_RES: + break; + + case DE_IBS_LS_L2_DTLB_4K: + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) + + /* L2 DTLB page translation */ + && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) + /* 4K L2 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K); + break; + + case DE_IBS_LS_L2_DTLB_2M: + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) + + /* L2 DTLB page translation */ + && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) + /* 2M L2 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M); + break; + + case DE_IBS_LS_L2_DTLB_1G: + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) + + /* L2 DTLB page translation */ + && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) + && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) + /* 2M L2 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G); + break; + + case DE_IBS_LS_L2_DTLB_RES2: + break; + + case DE_IBS_LS_DC_LOAD_LAT: + if (IBS_OP_IBS_LD_OP(trans_op) + /* If the load missed in DC, tally the DC load miss latency */ + && IBS_OP_IBS_DC_MISS(trans_op)) + /* DC load miss latency is only reliable for load ops */ + AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT, + IBS_OP_DC_MISS_LATENCY(trans_op)) ; + break; + + default: + break; + } + } +} + +/* + * --------------------- OP NB DERIVED FUNCTION + * + * NB data is only guaranteed reliable for load operations + * that miss in L1 and L2 cache. NB data arrives too late + * to be reliable for store operations + */ +void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size) +{ + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + unsigned int i, j, mask = 1; + + /* Preliminary check */ + if (!IBS_OP_IBS_LD_OP(trans_op)) + return; + + if (!IBS_OP_IBS_DC_MISS(trans_op)) + return; + + if (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0) + return; + + for (i = IBS_OP_NB_BASE, j =0 ; i <= IBS_OP_NB_END && j < size ; i++, mask = mask << 1) { + + if ((selected_flag & mask) == 0) + continue; + + j++; + + switch (i) { + + case DE_IBS_NB_LOCAL: + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by local processor */ + AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ; + break; + + case DE_IBS_NB_REMOTE: + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by remote processor */ + AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ; + break; + + case DE_IBS_NB_LOCAL_L3: + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x1)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3); + break; + + case DE_IBS_NB_LOCAL_CACHE: + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE); + break; + + case DE_IBS_NB_REMOTE_CACHE: + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)) + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ; + break; + + case DE_IBS_NB_LOCAL_DRAM: + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM); + break; + + case DE_IBS_NB_REMOTE_DRAM: + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3)) + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ; + break; + + case DE_IBS_NB_LOCAL_OTHER: + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER); + break; + + case DE_IBS_NB_REMOTE_OTHER: + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7)) + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ; + break; + + case DE_IBS_NB_CACHE_STATE_M: + if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2) + && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ; + break; + + case DE_IBS_NB_CACHE_STATE_O: + if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2) + && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ; + break; + + case DE_IBS_NB_LOCAL_LATENCY: + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by local processor */ + AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY, + IBS_OP_DC_MISS_LATENCY(trans_op)); + break; + + case DE_IBS_NB_REMOTE_LATENCY: + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by remote processor */ + AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY, + IBS_OP_DC_MISS_LATENCY(trans_op)); + break; + + default: + break; + } + } +} diff --git a/daemon/opd_ibs_trans.h b/daemon/opd_ibs_trans.h new file mode 100644 index 0000000..d01e3d9 --- /dev/null +++ b/daemon/opd_ibs_trans.h @@ -0,0 +1,31 @@ +/** + * @file daemon/opd_ibs_trans.h + * AMD Family10h Instruction Based Sampling (IBS) translation. + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Jason Yeh <jason.yeh@amd.com> + * @author Paul Drongowski <paul.drongowski@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + * Copyright (c) 2008 Advanced Micro Devices, Inc. + */ + +#ifndef OPD_IBS_TRANS_H +#define OPD_IBS_TRANS_H + +struct ibs_fetch_sample; +struct ibs_op_sample; +struct transient; + +struct ibs_translation_table { + unsigned int event; + void (*translator)(struct transient *); +}; + + +extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size); +extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size); +extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size); +extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size); +#endif // OPD_IBS_TRANS_H diff --git a/daemon/opd_interface.h b/daemon/opd_interface.h index c876830..ef3b02c 100644 --- a/daemon/opd_interface.h +++ b/daemon/opd_interface.h @@ -35,11 +35,14 @@ #if defined(__powerpc__) #define SPU_PROFILING_CODE 11 #define SPU_CTX_SWITCH_CODE 12 -#define DOMAIN_SWITCH_CODE 13 -#define LAST_CODE 14 #else #define DOMAIN_SWITCH_CODE 11 -#define LAST_CODE 12 +/* Code 12 is now considered an unknown escape code */ #endif + +/* AMD's Instruction-Based Sampling (IBS) escape code */ +#define IBS_FETCH_SAMPLE 13 +#define IBS_OP_SAMPLE 14 +#define LAST_CODE 15 #endif /* OPD_INTERFACE_H */ diff --git a/daemon/opd_mangling.c b/daemon/opd_mangling.c index 08a6079..b4768a6 100644 --- a/daemon/opd_mangling.c +++ b/daemon/opd_mangling.c @@ -138,7 +138,7 @@ mangle_filename(struct sfile * last, struct sfile const * sf, int counter, int c } -int opd_open_sample_file(odb_t * file, struct sfile * last, +int opd_open_sample_file(odb_t *file, struct sfile *last, struct sfile * sf, int counter, int cg) { char * mangled; diff --git a/daemon/opd_mangling.h b/daemon/opd_mangling.h index 0e46ec4..d1b2a78 100644 --- a/daemon/opd_mangling.h +++ b/daemon/opd_mangling.h @@ -27,7 +27,7 @@ struct sfile; * * Returns 0 on success. */ -int opd_open_sample_file(odb_t * file, struct sfile * last, +int opd_open_sample_file(odb_t *file, struct sfile *last, struct sfile * sf, int counter, int cg); #endif /* OPD_MANGLING_H */ diff --git a/daemon/opd_printf.h b/daemon/opd_printf.h index e1f8476..09df07f 100644 --- a/daemon/opd_printf.h +++ b/daemon/opd_printf.h @@ -22,6 +22,8 @@ extern int vsamples; extern int varcs; /// kernel module handling extern int vmodule; +/// extended feature +extern int vext; /// all others not fitting in above category, not voluminous. extern int vmisc; diff --git a/daemon/opd_sfile.c b/daemon/opd_sfile.c index 03ebf55..c2dea20 100644 --- a/daemon/opd_sfile.c +++ b/daemon/opd_sfile.c @@ -17,6 +17,7 @@ #include "opd_anon.h" #include "opd_printf.h" #include "opd_stats.h" +#include "opd_extended.h" #include "oprofiled.h" #include "op_libiberty.h" @@ -126,7 +127,7 @@ trans_match(struct transient const * trans, struct sfile const * sfile, } -static int +int sfile_equal(struct sfile const * sf, struct sfile const * sf2) { return do_match(sf, sf2->cookie, sf2->app_cookie, sf2->kernel, @@ -183,6 +184,11 @@ create_sfile(unsigned long hash, struct transient const * trans, for (i = 0 ; i < op_nr_counters ; ++i) odb_init(&sf->files[i]); + if (trans->ext) + opd_ext_sfile_create(sf); + else + sf->ext_files = NULL; + for (i = 0; i < CG_HASH_SIZE; ++i) list_init(&sf->cg_hash[i]); @@ -269,7 +275,7 @@ lru: } -static void sfile_dup(struct sfile * to, struct sfile * from) +void sfile_dup(struct sfile * to, struct sfile * from) { size_t i; @@ -278,6 +284,8 @@ static void sfile_dup(struct sfile * to, struct sfile * from) for (i = 0 ; i < op_nr_counters ; ++i) odb_init(&to->files[i]); + opd_ext_sfile_dup(to, from); + for (i = 0; i < CG_HASH_SIZE; ++i) list_init(&to->cg_hash[i]); @@ -295,6 +303,9 @@ static odb_t * get_file(struct transient const * trans, int is_cg) unsigned long hash; odb_t * file; + if ((trans->ext) != NULL) + return opd_ext_sfile_get(trans, is_cg); + if (trans->event >= op_nr_counters) { fprintf(stderr, "%s: Invalid counter %lu\n", __FUNCTION__, trans->event); @@ -417,6 +428,13 @@ static void sfile_log_arc(struct transient const * trans) void sfile_log_sample(struct transient const * trans) { + sfile_log_sample_count(trans, 1); +} + + +void sfile_log_sample_count(struct transient const * trans, + unsigned long int count) +{ int err; vma_t pc = trans->pc; odb_t * file; @@ -437,7 +455,7 @@ void sfile_log_sample(struct transient const * trans) if (trans->current->anon) pc -= trans->current->anon->start; - + if (vsamples) verbose_sample(trans, pc); @@ -446,7 +464,9 @@ void sfile_log_sample(struct transient const * trans) return; } - err = odb_update_node(file, (uint64_t)pc); + err = odb_update_node_with_offset(file, + (odb_key_t)pc, + count); if (err) { fprintf(stderr, "%s: %s\n", __FUNCTION__, strerror(err)); abort(); @@ -462,6 +482,8 @@ static int close_sfile(struct sfile * sf, void * data __attribute__((unused))) for (i = 0; i < op_nr_counters; ++i) odb_close(&sf->files[i]); + opd_ext_sfile_close(sf); + return 0; } @@ -481,6 +503,8 @@ static int sync_sfile(struct sfile * sf, void * data __attribute__((unused))) for (i = 0; i < op_nr_counters; ++i) odb_sync(&sf->files[i]); + opd_ext_sfile_sync(sf); + return 0; } diff --git a/daemon/opd_sfile.h b/daemon/opd_sfile.h index 86d5025..76e5e63 100644 --- a/daemon/opd_sfile.h +++ b/daemon/opd_sfile.h @@ -62,6 +62,8 @@ struct sfile { int ignored; /** opened sample files */ odb_t files[OP_MAX_COUNTERS]; + /** extended sample files */ + odb_t * ext_files; /** hash table of opened cg sample files */ struct list_head cg_hash[CG_HASH_SIZE]; }; @@ -107,6 +109,10 @@ struct sfile * sfile_find(struct transient const * trans); /** Log the sample in a previously located sfile. */ void sfile_log_sample(struct transient const * trans); +/** Log the event/cycle count in a previously located sfile */ +void sfile_log_sample_count(struct transient const * trans, + unsigned long int count); + /** initialise hashes */ void sfile_init(void); diff --git a/daemon/opd_stats.c b/daemon/opd_stats.c index ddb1940..e7af72b 100644 --- a/daemon/opd_stats.c +++ b/daemon/opd_stats.c @@ -10,6 +10,7 @@ */ #include "opd_stats.h" +#include "opd_extended.h" #include "oprofiled.h" #include "op_get_time.h" @@ -40,6 +41,7 @@ void opd_print_stats(void) struct dirent * dirent; printf("\n%s\n", op_get_time()); + printf("\n-- OProfile Statistics --\n"); printf("Nr. sample dumps: %lu\n", opd_stats[OPD_DUMP_COUNT]); printf("Nr. non-backtrace samples: %lu\n", opd_stats[OPD_SAMPLES]); printf("Nr. kernel samples: %lu\n", opd_stats[OPD_KERNEL]); @@ -59,6 +61,8 @@ void opd_print_stats(void) print_if("Nr. samples lost due to no mm: %u\n", "/dev/oprofile/stats", "sample_lost_no_mm", 1); + opd_ext_print_stats(); + if (!(dir = opendir("/dev/oprofile/stats/"))) goto out; while ((dirent = readdir(dir))) { @@ -68,6 +72,7 @@ void opd_print_stats(void) continue; snprintf(path, 256, "/dev/oprofile/stats/%s", dirent->d_name); + printf("\n---- Statistics for cpu : %d\n", cpu_nr); print_if("Nr. samples lost cpu buffer overflow: %u\n", path, "sample_lost_overflow", 1); print_if("Nr. samples lost task exit: %u\n", diff --git a/daemon/opd_trans.c b/daemon/opd_trans.c index 871e6e6..76296a0 100644 --- a/daemon/opd_trans.c +++ b/daemon/opd_trans.c @@ -194,7 +194,7 @@ static void code_cookie_switch(struct transient * trans) if (vmisc) { char const * name = verbose_cookie(trans->cookie); verbprintf(vmisc, "COOKIE_SWITCH to cookie %s(%llx)\n", - name, trans->cookie); + name, trans->cookie); } } @@ -246,11 +246,11 @@ static void code_xen_enter(struct transient * trans) verbprintf(vmisc, "XEN_ENTER_SWITCH to xen\n"); trans->in_kernel = 1; trans->current = NULL; - /* subtlety: we must keep trans->cookie cached, even though it's - * meaningless for Xen - we won't necessarily get a cookie switch - * on Xen exit. See comments in opd_sfile.c. It seems that we can - * get away with in_kernel = 1 as long as we supply the correct - * Xen image, and its address range in startup find_kernel_image + /* subtlety: we must keep trans->cookie cached, even though it's + * meaningless for Xen - we won't necessarily get a cookie switch + * on Xen exit. See comments in opd_sfile.c. It seems that we can + * get away with in_kernel = 1 as long as we supply the correct + * Xen image, and its address range in startup find_kernel_image * is modified to look in the Xen image also */ } @@ -258,24 +258,31 @@ static void code_xen_enter(struct transient * trans) extern void code_spu_profiling(struct transient * trans); extern void code_spu_ctx_switch(struct transient * trans); +extern void code_ibs_fetch_sample(struct transient * trans); +extern void code_ibs_op_sample(struct transient * trans); + handler_t handlers[LAST_CODE + 1] = { &code_unknown, &code_ctx_switch, &code_cpu_switch, &code_cookie_switch, &code_kernel_enter, - &code_user_enter, + &code_user_enter, &code_module_loaded, /* tgid handled differently */ &code_unknown, &code_trace_begin, &code_unknown, - &code_xen_enter, + &code_xen_enter, #if defined(__powerpc__) &code_spu_profiling, &code_spu_ctx_switch, -#endif +#else &code_unknown, + &code_unknown, +#endif + &code_ibs_fetch_sample, + &code_ibs_op_sample, }; extern void (*special_processor)(struct transient *); @@ -299,7 +306,8 @@ void opd_process_samples(char const * buffer, size_t count) .cpu = -1, .tid = -1, .embedded_offset = UNUSED_EMBEDDED_OFFSET, - .tgid = -1 + .tgid = -1, + .ext = NULL }; /* FIXME: was uint64_t but it can't compile on alpha where uint64_t @@ -313,17 +321,9 @@ void opd_process_samples(char const * buffer, size_t count) return; } - int i; - - for (i = 0; i < count && i < 200; i++) { - verbprintf(vmisc, "buffer[%d] is %x\n", i, buffer[i]); - } - while (trans.remaining) { code = pop_buffer_value(&trans); - verbprintf(vmisc, "In opd_process_samples (code is %lld)\n", code); - if (!is_escape_code(code)) { opd_put_sample(&trans, code); continue; @@ -338,7 +338,6 @@ void opd_process_samples(char const * buffer, size_t count) // started with ESCAPE_CODE, next is type code = pop_buffer_value(&trans); - verbprintf(vmisc, "next code is %lld\n", code); if (code >= LAST_CODE) { fprintf(stderr, "Unknown code %llu\n", code); abort(); diff --git a/daemon/opd_trans.h b/daemon/opd_trans.h index ab4e816..c0a868b 100644 --- a/daemon/opd_trans.h +++ b/daemon/opd_trans.h @@ -54,6 +54,7 @@ struct transient { pid_t tid; pid_t tgid; uint64_t embedded_offset; + void * ext; }; typedef void (*handler_t)(struct transient *); diff --git a/daemon/oprofiled.c b/daemon/oprofiled.c index ec2ea1b..173d972 100644 --- a/daemon/oprofiled.c +++ b/daemon/oprofiled.c @@ -17,6 +17,7 @@ #include "oprofiled.h" #include "opd_printf.h" #include "opd_events.h" +#include "opd_extended.h" #include "op_config.h" #include "op_version.h" @@ -52,11 +53,13 @@ sig_atomic_t signal_usr2; uint op_nr_counters; op_cpu cpu_type; +int no_event_ok; int vsfile; int vsamples; int varcs; int vmodule; int vmisc; +int vext; int separate_lib; int separate_kernel; int separate_thread; @@ -71,6 +74,7 @@ char * xen_range; static char * verbose; static char * binary_name_filter; static char * events; +static char * ext_feature; static int showvers; static struct oprofiled_ops * opd_ops; extern struct oprofiled_ops opd_24_ops; @@ -94,6 +98,7 @@ static struct poptOption options[] = { { "events", 'e', POPT_ARG_STRING, &events, 0, "events list", "[events]" }, { "version", 'v', POPT_ARG_NONE, &showvers, 0, "show version", NULL, }, { "verbose", 'V', POPT_ARG_STRING, &verbose, 0, "be verbose in log file", "all,sfile,arcs,samples,module,misc", }, + { "ext-feature", 'x', POPT_ARG_STRING, &ext_feature, 1, "enable extended feature", "<extended-feature-name>:[args]", }, POPT_AUTOHELP { NULL, 0, 0, NULL, 0, NULL, NULL, }, }; @@ -353,6 +358,7 @@ static void opd_handle_verbose_option(char const * name) varcs = 1; vmodule = 1; vmisc = 1; + vext= 1; } else if (!strcmp(name, "sfile")) { vsfile = 1; } else if (!strcmp(name, "arcs")) { @@ -363,6 +369,8 @@ static void opd_handle_verbose_option(char const * name) vmodule = 1; } else if (!strcmp(name, "misc")) { vmisc = 1; + } else if (!strcmp(name, "ext")) { + vext= 1; } else { fprintf(stderr, "unknown verbose options\n"); exit(EXIT_FAILURE); @@ -426,7 +434,10 @@ static void opd_options(int argc, char const * argv[]) } } - if (events == NULL) { + if(opd_ext_initialize(ext_feature) != EXIT_SUCCESS) + exit(EXIT_FAILURE); + + if (events == NULL && no_event_ok == 0) { fprintf(stderr, "oprofiled: no events specified.\n"); poptPrintHelp(optcon, stderr, 0); exit(EXIT_FAILURE); @@ -451,7 +462,8 @@ static void opd_options(int argc, char const * argv[]) } } - opd_parse_events(events); + if (events != NULL) + opd_parse_events(events); opd_parse_image_filter(); diff --git a/libdb/db_insert.c b/libdb/db_insert.c index 018c294..6bbd71f 100644 --- a/libdb/db_insert.c +++ b/libdb/db_insert.c @@ -51,6 +51,13 @@ static inline int add_node(odb_data_t * data, odb_key_t key, odb_value_t value) int odb_update_node(odb_t * odb, odb_key_t key) { + return odb_update_node_with_offset(odb, key, 1); +} + +int odb_update_node_with_offset(odb_t * odb, + odb_key_t key, + unsigned long int offset) +{ odb_index_t index; odb_node_t * node; odb_data_t * data; @@ -60,8 +67,8 @@ int odb_update_node(odb_t * odb, odb_key_t key) while (index) { node = &data->node_base[index]; if (node->key == key) { - if (node->value + 1 != 0) { - node->value += 1; + if (node->value + offset != 0) { + node->value += offset; } else { /* post profile tools must handle overflow */ /* FIXME: the tricky way will be just to add @@ -92,7 +99,7 @@ int odb_update_node(odb_t * odb, odb_key_t key) index = node->next; } - return add_node(data, key, 1); + return add_node(data, key, offset); } diff --git a/libdb/db_manage.c b/libdb/db_manage.c index d8a6fcb..17a0be5 100644 --- a/libdb/db_manage.c +++ b/libdb/db_manage.c @@ -11,10 +11,10 @@ #define _GNU_SOURCE #include <stdlib.h> -#ifndef ANDROID -#include <sys/fcntl.h> -#else +#ifdef ANDROID #include <fcntl.h> +#else +#include <sys/fcntl.h> #endif #include <sys/mman.h> #include <sys/types.h> diff --git a/libdb/odb.h b/libdb/odb.h index c190b57..9ad1da2 100644 --- a/libdb/odb.h +++ b/libdb/odb.h @@ -180,6 +180,22 @@ void odb_hash_free_stat(odb_hash_stat_t * stats); */ int odb_update_node(odb_t * odb, odb_key_t key); +/** + * odb_update_node_with_offset + * @param odb the data base object to setup + * @param key the hash key + * @param offset the offset to be added + * + * update info at key by adding the specified offset to its associated value, + * if the key does not exist a new node is created and the value associated + * is set to offset. + * + * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure + */ +int odb_update_node_with_offset(odb_t * odb, + odb_key_t key, + unsigned long int offset); + /** Add a new node w/o regarding if a node with the same key already exists * * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure diff --git a/libop/Android.mk b/libop/Android.mk index 8fbd1e6..e935a45 100644 --- a/libop/Android.mk +++ b/libop/Android.mk @@ -8,7 +8,9 @@ LOCAL_SRC_FILES:= \ op_events.c \ op_get_interface.c \ op_mangle.c \ - op_parse_event.c + op_parse_event.c \ + op_xml_events.c \ + op_xml_out.c LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/.. \ diff --git a/libop/op_alloc_counter.c b/libop/op_alloc_counter.c index 353100a..bb2bd6e 100644 --- a/libop/op_alloc_counter.c +++ b/libop/op_alloc_counter.c @@ -113,6 +113,9 @@ static void delete_counter_arc(counter_arc_head * ctr_arc, int nr_events) * a bitmask of already allocated counter. Walking through node is done in * preorder left to right. * + * In case of extended events (required no phisical counters), the associated + * counter_map entry will be -1. + * * Possible improvment if neccessary: partition counters in class of counter, * two counter belong to the same class if they allow exactly the same set of * event. Now using a variant of the backtrack algo can works on class of @@ -128,18 +131,27 @@ allocate_counter(counter_arc_head const * ctr_arc, int max_depth, int depth, if (depth == max_depth) return 1; - list_for_each(pos, &ctr_arc[depth].next) { - counter_arc const * arc = list_entry(pos, counter_arc, next); - - if (allocated_mask & (1 << arc->counter)) - continue; - - counter_map[depth] = arc->counter; - + /* If ctr_arc is not available, counter_map is -1 */ + if((&ctr_arc[depth].next)->next == &ctr_arc[depth].next) { + counter_map[depth] = -1; if (allocate_counter(ctr_arc, max_depth, depth + 1, - allocated_mask | (1 << arc->counter), + allocated_mask, counter_map)) return 1; + } else { + list_for_each(pos, &ctr_arc[depth].next) { + counter_arc const * arc = list_entry(pos, counter_arc, next); + + if (allocated_mask & (1 << arc->counter)) + continue; + + counter_map[depth] = arc->counter; + + if (allocate_counter(ctr_arc, max_depth, depth + 1, + allocated_mask | (1 << arc->counter), + counter_map)) + return 1; + } } return 0; @@ -167,7 +179,8 @@ static int op_get_counter_mask(u32 * mask) /* assume nothing is available */ u32 available=0; - count = scandir("/dev/oprofile", &counterlist, perfcounterdir, alphasort); + count = scandir("/dev/oprofile", &counterlist, perfcounterdir, + alphasort); if (count < 0) /* unable to determine bit mask */ return -1; @@ -186,21 +199,36 @@ size_t * map_event_to_counter(struct op_event const * pev[], int nr_events, { counter_arc_head * ctr_arc; size_t * counter_map; - int nr_counters; + int i, nr_counters, nr_pmc_events; + op_cpu curr_cpu_type; u32 unavailable_counters = 0; - nr_counters = op_get_counter_mask(&unavailable_counters); + /* Either ophelp or one of the libop tests may invoke this + * function with a non-native cpu_type. If so, we should not + * call op_get_counter_mask because that will look for real counter + * information in oprofilefs. + */ + curr_cpu_type = op_get_cpu_type(); + if (cpu_type != curr_cpu_type) + nr_counters = op_get_nr_counters(cpu_type); + else + nr_counters = op_get_counter_mask(&unavailable_counters); + /* no counters then probably perfmon managing perfmon hw */ if (nr_counters <= 0) { nr_counters = op_get_nr_counters(cpu_type); unavailable_counters = (~0) << nr_counters; } - if (nr_counters < nr_events) - return 0; + + /* Check to see if we have enough physical counters to map events*/ + for (i = 0, nr_pmc_events = 0; i < nr_events; i++) + if(pev[i]->ext == NULL) + if (++nr_pmc_events > nr_counters) + return 0; ctr_arc = build_counter_arc(pev, nr_events); - counter_map = xmalloc(nr_counters * sizeof(size_t)); + counter_map = xmalloc(nr_events * sizeof(size_t)); if (!allocate_counter(ctr_arc, nr_events, 0, unavailable_counters, counter_map)) { diff --git a/libop/op_config.h b/libop/op_config.h index b384497..12e4b96 100644 --- a/libop/op_config.h +++ b/libop/op_config.h @@ -25,6 +25,10 @@ extern "C" { */ void init_op_config_dirs(char const * session_dir); +#ifndef ANDROID +#define OP_SESSION_DIR_DEFAULT "/var/lib/oprofile/" +#endif + /* * various paths, corresponding to opcontrol, that should be * initialized by init_op_config_dirs() above. @@ -37,8 +41,10 @@ extern char op_log_file[]; extern char op_pipe_file[]; extern char op_dump_status[]; +#if ANDROID #define OP_DRIVER_BASE "/dev/oprofile" #define OP_DATA_DIR "/data/oprofile" +#endif /* Global directory that stores debug files */ #ifndef DEBUGDIR diff --git a/libop/op_cpu_type.c b/libop/op_cpu_type.c index b9d13de..e168b43 100644 --- a/libop/op_cpu_type.c +++ b/libop/op_cpu_type.c @@ -14,6 +14,7 @@ #include <string.h> #include "op_cpu_type.h" +#include "op_hw_specific.h" struct cpu_descr { char const * pretty; @@ -74,6 +75,13 @@ static struct cpu_descr const cpu_descrs[MAX_CPU_TYPE] = { { "ppc64 POWER5++", "ppc64/power5++", CPU_PPC64_POWER5pp, 6 }, { "e300", "ppc/e300", CPU_PPC_E300, 4 }, { "AVR32", "avr32", CPU_AVR32, 3 }, + { "ARM V7 PMNC", "arm/armv7", CPU_ARM_V7, 5 }, + { "Intel Architectural Perfmon", "i386/arch_perfmon", CPU_ARCH_PERFMON, 0}, + { "AMD64 family11h", "x86-64/family11h", CPU_FAMILY11H, 4 }, + { "ppc64 POWER7", "ppc64/power7", CPU_PPC64_POWER7, 6 }, + { "ppc64 compat version 1", "ppc64/ibm-compat-v1", CPU_PPC64_IBM_COMPAT_V1, 4 }, + { "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 }, + { "Intel Atom", "i386/atom", CPU_ATOM, 2 }, }; static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr); @@ -151,8 +159,14 @@ char const * op_get_cpu_name(op_cpu cpu_type) int op_get_nr_counters(op_cpu cpu_type) { + int cnt; + if (cpu_type <= CPU_NO_GOOD || cpu_type >= MAX_CPU_TYPE) return 0; + cnt = arch_num_counters(cpu_type); + if (cnt >= 0) + return cnt; + return cpu_descrs[cpu_type].nr_counters; } diff --git a/libop/op_cpu_type.h b/libop/op_cpu_type.h index be95ae2..133a4f8 100644 --- a/libop/op_cpu_type.h +++ b/libop/op_cpu_type.h @@ -72,6 +72,13 @@ typedef enum { CPU_PPC64_POWER5pp, /**< ppc64 Power5++ family */ CPU_PPC_E300, /**< e300 */ CPU_AVR32, /**< AVR32 */ + CPU_ARM_V7, /**< ARM V7 */ + CPU_ARCH_PERFMON, /**< Intel architectural perfmon */ + CPU_FAMILY11H, /**< AMD family 11h */ + CPU_PPC64_POWER7, /**< ppc64 POWER7 family */ + CPU_PPC64_IBM_COMPAT_V1, /**< IBM PPC64 processor compat mode version 1 */ + CPU_CORE_I7, /* Intel Core i7, Nehalem */ + CPU_ATOM, /* First generation Intel Atom */ MAX_CPU_TYPE } op_cpu; diff --git a/libop/op_events.c b/libop/op_events.c index b4a10e7..ad95d86 100644 --- a/libop/op_events.c +++ b/libop/op_events.c @@ -16,6 +16,7 @@ #include "op_fileio.h" #include "op_string.h" #include "op_cpufreq.h" +#include "op_hw_specific.h" #include <string.h> #include <stdlib.h> @@ -27,6 +28,24 @@ static LIST_HEAD(um_list); static char const * filename; static unsigned int line_nr; +static void delete_event(struct op_event * event); +static void read_events(char const * file); +static void read_unit_masks(char const * file); +static void free_unit_mask(struct op_unit_mask * um); + +static char *build_fn(const char *cpu_name, const char *fn) +{ + char *s; + static const char *dir; + if (dir == NULL) + dir = getenv("OPROFILE_EVENTS_DIR"); + if (dir == NULL) + dir = OP_DATADIR; + s = xmalloc(strlen(dir) + strlen(cpu_name) + strlen(fn) + 5); + sprintf(s, "%s/%s/%s", dir, cpu_name, fn); + return s; +} + static void parse_error(char const * context) { fprintf(stderr, "oprofile: parse error in %s, line %u\n", @@ -69,6 +88,23 @@ static u64 parse_long_hex(char const * str) return value; } +static void include_um(const char *start, const char *end) +{ + char *s; + char cpu[end - start + 1]; + int old_line_nr; + const char *old_filename; + + strncpy(cpu, start, end - start); + cpu[end - start] = 0; + s = build_fn(cpu, "unit_masks"); + old_line_nr = line_nr; + old_filename = filename; + read_unit_masks(s); + line_nr = old_line_nr; + filename = old_filename; + free(s); +} /* name:MESI type:bitmask default:0x0f */ static void parse_um(struct op_unit_mask * um, char const * line) @@ -94,6 +130,14 @@ static void parse_um(struct op_unit_mask * um, char const * line) ++tagend; + if (strisprefix(start, "include")) { + if (seen_name + seen_type + seen_default > 0) + parse_error("include must be on its own"); + free_unit_mask(um); + include_um(tagend, valueend); + return; + } + if (strisprefix(start, "name")) { if (seen_name) parse_error("duplicate name: tag"); @@ -125,6 +169,11 @@ static void parse_um(struct op_unit_mask * um, char const * line) tagend = valueend; start = valueend; } + + if (!um->name) + parse_error("Missing name for unit mask"); + if (!seen_type) + parse_error("Missing type for unit mask"); } @@ -158,6 +207,11 @@ static struct op_unit_mask * new_unit_mask(void) return um; } +static void free_unit_mask(struct op_unit_mask * um) +{ + list_del(&um->um_next); + free(um); +} /* * name:zero type:mandatory default:0x0 @@ -227,21 +281,68 @@ static u32 parse_counter_mask(char const * str) return mask; } - -static struct op_unit_mask * find_um(char const * value) +static struct op_unit_mask * try_find_um(char const * value) { struct list_head * pos; list_for_each(pos, &um_list) { struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next); - if (strcmp(value, um->name) == 0) + if (strcmp(value, um->name) == 0) { + um->used = 1; return um; + } } + return NULL; +} +static struct op_unit_mask * find_um(char const * value) +{ + struct op_unit_mask * um = try_find_um(value); + if (um) + return um; fprintf(stderr, "oprofile: could not find unit mask %s\n", value); exit(EXIT_FAILURE); } +/* um:a,b,c,d merge multiple unit masks */ +static struct op_unit_mask * merge_um(char * value) +{ + int num; + char *s; + struct op_unit_mask *new, *um; + enum unit_mask_type type = -1U; + + um = try_find_um(value); + if (um) + return um; + + new = new_unit_mask(); + new->name = xstrdup(value); + new->used = 1; + num = 0; + while ((s = strsep(&value, ",")) != NULL) { + unsigned c; + um = find_um(s); + if (type == -1U) + type = um->unit_type_mask; + if (um->unit_type_mask != type) + parse_error("combined unit mask must be all the same types"); + if (type != utm_bitmask && type != utm_exclusive) + parse_error("combined unit mask must be all bitmasks or exclusive"); + new->default_mask |= um->default_mask; + new->num += um->num; + if (new->num > MAX_UNIT_MASK) + parse_error("too many members in combined unit mask"); + for (c = 0; c < um->num; c++, num++) { + new->um[num] = um->um[c]; + new->um[num].desc = xstrdup(new->um[num].desc); + } + } + if (type == -1U) + parse_error("Empty unit mask"); + new->unit_type_mask = type; + return new; +} /* parse either a "tag:value" or a ": trailing description string" */ static int next_token(char const ** cp, char ** name, char ** value) @@ -287,6 +388,20 @@ static int next_token(char const ** cp, char ** name, char ** value) return 1; } +static void include_events (char *value) +{ + char * event_file; + const char *old_filename; + int old_line_nr; + + event_file = build_fn(value, "events"); + old_line_nr = line_nr; + old_filename = filename; + read_events(event_file); + line_nr = old_line_nr; + filename = old_filename; + free(event_file); +} static struct op_event * new_event(void) { @@ -297,8 +412,14 @@ static struct op_event * new_event(void) return event; } +static void free_event(struct op_event * event) +{ + list_del(&event->event_next); + free(event); +} /* event:0x00 counters:0 um:zero minimum:4096 name:ISSUES : Total issues */ +/* event:0x00 ext:xxxxxx um:zero minimum:4096 name:ISSUES : Total issues */ static void read_events(char const * file) { struct op_event * event = NULL; @@ -306,8 +427,9 @@ static void read_events(char const * file) char * name; char * value; char const * c; - int seen_event, seen_counters, seen_um, seen_minimum, seen_name; + int seen_event, seen_counters, seen_um, seen_minimum, seen_name, seen_ext; FILE * fp = fopen(file, "r"); + int tags; if (!fp) { fprintf(stderr, "oprofile: could not open event description file %s\n", file); @@ -323,13 +445,17 @@ static void read_events(char const * file) if (empty_line(line) || comment_line(line)) goto next; + tags = 0; seen_name = 0; seen_event = 0; seen_counters = 0; + seen_ext = 0; seen_um = 0; seen_minimum = 0; event = new_event(); - + event->filter = -1; + event->ext = NULL; + c = line; while (next_token(&c, &name, &value)) { if (strcmp(name, "name") == 0) { @@ -351,14 +477,24 @@ static void read_events(char const * file) if (seen_counters) parse_error("duplicate counters: tag"); seen_counters = 1; - event->counter_mask = parse_counter_mask(value); + if (!strcmp(value, "cpuid")) + event->counter_mask = arch_get_counter_mask(); + else + event->counter_mask = parse_counter_mask(value); free(value); + } else if (strcmp(name, "ext") == 0) { + if (seen_ext) + parse_error("duplicate ext: tag"); + seen_ext = 1; + event->ext = value; } else if (strcmp(name, "um") == 0) { if (seen_um) parse_error("duplicate um: tag"); seen_um = 1; - event->unit = find_um(value); - event->unit->used = 1; + if (strchr(value, ',')) + event->unit = merge_um(value); + else + event->unit = find_um(value); free(value); } else if (strcmp(name, "minimum") == 0) { if (seen_minimum) @@ -368,9 +504,22 @@ static void read_events(char const * file) free(value); } else if (strcmp(name, "desc") == 0) { event->desc = value; + } else if (strcmp(name, "filter") == 0) { + event->filter = parse_int(value); + free(value); + } else if (strcmp(name, "include") == 0) { + if (tags > 0) + parse_error("tags before include:"); + free_event(event); + include_events(value); + free(value); + c = skip_ws(c); + if (*c != '\0' && *c != '#') + parse_error("non whitespace after include:"); } else { parse_error("unknown tag"); } + tags++; free(name); } @@ -385,20 +534,21 @@ next: /* usefull for make check */ -static void check_unit_mask(struct op_unit_mask const * um, +static int check_unit_mask(struct op_unit_mask const * um, char const * cpu_name) { u32 i; + int err = 0; if (!um->used) { fprintf(stderr, "um %s is not used\n", um->name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } if (um->unit_type_mask == utm_mandatory && um->num != 1) { fprintf(stderr, "mandatory um %s doesn't contain exactly one " "entry (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } else if (um->unit_type_mask == utm_bitmask) { u32 default_mask = um->default_mask; for (i = 0; i < um->num; ++i) @@ -407,7 +557,7 @@ static void check_unit_mask(struct op_unit_mask const * um, if (default_mask) { fprintf(stderr, "um %s default mask is not valid " "(%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } else { for (i = 0; i < um->num; ++i) { @@ -418,63 +568,66 @@ static void check_unit_mask(struct op_unit_mask const * um, if (i == um->num) { fprintf(stderr, "exclusive um %s default value is not " "valid (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } + return err; } +static void arch_filter_events(op_cpu cpu_type) +{ + struct list_head * pos, * pos2; + unsigned filter = arch_get_filter(cpu_type); + if (!filter) + return; + list_for_each_safe (pos, pos2, &events_list) { + struct op_event * event = list_entry(pos, struct op_event, event_next); + if (event->filter >= 0 && ((1U << event->filter) & filter)) + delete_event(event); + } +} -static void load_events(op_cpu cpu_type) +static void load_events_name(const char *cpu_name) { - char const * cpu_name = op_get_cpu_name(cpu_type); - char * event_dir; char * event_file; char * um_file; - char * dir; - struct list_head * pos; - if (!list_empty(&events_list)) - return; + event_file = build_fn(cpu_name, "events"); + um_file = build_fn(cpu_name, "unit_masks"); - dir = getenv("OPROFILE_EVENTS_DIR"); - if (dir == NULL) - dir = OP_DATADIR; - - event_dir = xmalloc(strlen(dir) + strlen("/") + strlen(cpu_name) + - strlen("/") + 1); - strcpy(event_dir, dir); - strcat(event_dir, "/"); + read_unit_masks(um_file); + read_events(event_file); + + free(um_file); + free(event_file); +} - strcat(event_dir, cpu_name); - strcat(event_dir, "/"); +static void load_events(op_cpu cpu_type) +{ + const char * cpu_name = op_get_cpu_name(cpu_type); + struct list_head * pos; + int err = 0; - event_file = xmalloc(strlen(event_dir) + strlen("events") + 1); - strcpy(event_file, event_dir); - strcat(event_file, "events"); + if (!list_empty(&events_list)) + return; - um_file = xmalloc(strlen(event_dir) + strlen("unit_masks") + 1); - strcpy(um_file, event_dir); - strcat(um_file, "unit_masks"); + load_events_name(cpu_name); - read_unit_masks(um_file); - read_events(event_file); + arch_filter_events(cpu_type); /* sanity check: all unit mask must be used */ list_for_each(pos, &um_list) { struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next); - - check_unit_mask(um, cpu_name); + err |= check_unit_mask(um, cpu_name); } - - free(um_file); - free(event_file); - free(event_dir); + if (err) + exit(err); } - struct list_head * op_events(op_cpu cpu_type) { load_events(cpu_type); + arch_filter_events(cpu_type); return &events_list; } @@ -521,8 +674,8 @@ void op_free_events(void) } } - -static struct op_event * find_event(u32 nr) +/* There can be actually multiple events here, so this is not quite correct */ +static struct op_event * find_event_any(u32 nr) { struct list_head * pos; @@ -535,8 +688,25 @@ static struct op_event * find_event(u32 nr) return NULL; } +static struct op_event * find_event_um(u32 nr, u32 um) +{ + struct list_head * pos; + unsigned int i; -static FILE * open_event_mapping_file(char const * cpu_name) + list_for_each(pos, &events_list) { + struct op_event * event = list_entry(pos, struct op_event, event_next); + if (event->val == nr) { + for (i = 0; i < event->unit->num; i++) { + if (event->unit->um[i].value == um) + return event; + } + } + } + + return NULL; +} + +static FILE * open_event_mapping_file(char const * cpu_name) { char * ev_map_file; char * dir; @@ -560,7 +730,7 @@ static FILE * open_event_mapping_file(char const * cpu_name) /** * This function is PPC64-specific. */ -static char const * get_mapping(u32 nr, FILE * fp) +static char const * get_mapping(u32 nr, FILE * fp) { char * line; char * name; @@ -655,6 +825,8 @@ char const * find_mapping_for_event(u32 nr, op_cpu cpu_type) case CPU_PPC64_POWER5p: case CPU_PPC64_POWER5pp: case CPU_PPC64_POWER6: + case CPU_PPC64_POWER7: + case CPU_PPC64_IBM_COMPAT_V1: if (!fp) { fprintf(stderr, "oprofile: could not open event mapping file %s\n", filename); exit(EXIT_FAILURE); @@ -672,67 +844,102 @@ char const * find_mapping_for_event(u32 nr, op_cpu cpu_type) return map; } +static int match_event(int i, struct op_event *event, unsigned um) +{ + unsigned v = event->unit->um[i].value; + + switch (event->unit->unit_type_mask) { + case utm_exclusive: + case utm_mandatory: + return v == um; -struct op_event * find_event_by_name(char const * name) + case utm_bitmask: + return (v & um) || (!v && v == 0); + } + + abort(); +} + +struct op_event * find_event_by_name(char const * name, unsigned um, int um_valid) { struct list_head * pos; list_for_each(pos, &events_list) { struct op_event * event = list_entry(pos, struct op_event, event_next); - if (strcmp(event->name, name) == 0) + if (strcmp(event->name, name) == 0) { + if (um_valid) { + unsigned i; + + for (i = 0; i < event->unit->num; i++) + if (match_event(i, event, um)) + return event; + continue; + } return event; + } } return NULL; } -struct op_event * op_find_event(op_cpu cpu_type, u32 nr) +struct op_event * op_find_event(op_cpu cpu_type, u32 nr, u32 um) { struct op_event * event; load_events(cpu_type); - event = find_event(nr); + event = find_event_um(nr, um); return event; } +struct op_event * op_find_event_any(op_cpu cpu_type, u32 nr) +{ + load_events(cpu_type); + + return find_event_any(nr); +} int op_check_events(int ctr, u32 nr, u32 um, op_cpu cpu_type) { - int ret = OP_OK_EVENT; - struct op_event * event; + int ret = OP_INVALID_EVENT; size_t i; u32 ctr_mask = 1 << ctr; + struct list_head * pos; load_events(cpu_type); - event = find_event(nr); + list_for_each(pos, &events_list) { + struct op_event * event = list_entry(pos, struct op_event, event_next); + if (event->val != nr) + continue; - if (!event) { - ret |= OP_INVALID_EVENT; - return ret; - } + ret = OP_OK_EVENT; - if ((event->counter_mask & ctr_mask) == 0) - ret |= OP_INVALID_COUNTER; + if ((event->counter_mask & ctr_mask) == 0) + ret |= OP_INVALID_COUNTER; - if (event->unit->unit_type_mask == utm_bitmask) { - for (i = 0; i < event->unit->num; ++i) - um &= ~(event->unit->um[i].value); - - if (um) - ret |= OP_INVALID_UM; + if (event->unit->unit_type_mask == utm_bitmask) { + for (i = 0; i < event->unit->num; ++i) + um &= ~(event->unit->um[i].value); + + if (um) + ret |= OP_INVALID_UM; + + } else { + for (i = 0; i < event->unit->num; ++i) { + if (event->unit->um[i].value == um) + break; + } + + if (i == event->unit->num) + ret |= OP_INVALID_UM; - } else { - for (i = 0; i < event->unit->num; ++i) { - if (event->unit->um[i].value == um) - break; } - if (i == event->unit->num) - ret |= OP_INVALID_UM; + if (ret == OP_OK_EVENT) + return ret; } return ret; @@ -759,6 +966,10 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr) case CPU_ATHLON: case CPU_HAMMER: case CPU_FAMILY10: + case CPU_ARCH_PERFMON: + case CPU_FAMILY11H: + case CPU_ATOM: + case CPU_CORE_I7: descr->name = "CPU_CLK_UNHALTED"; break; @@ -793,6 +1004,7 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr) case CPU_ARM_XSCALE2: case CPU_ARM_MPCORE: case CPU_ARM_V6: + case CPU_ARM_V7: case CPU_AVR32: descr->name = "CPU_CYCLES"; break; @@ -807,6 +1019,8 @@ void op_default_event(op_cpu cpu_type, struct op_default_event_descr * descr) case CPU_PPC64_POWER5p: case CPU_PPC64_POWER5pp: case CPU_PPC64_CELL: + case CPU_PPC64_POWER7: + case CPU_PPC64_IBM_COMPAT_V1: descr->name = "CYCLES"; break; diff --git a/libop/op_events.h b/libop/op_events.h index f6462fc..9ffdc49 100644 --- a/libop/op_events.h +++ b/libop/op_events.h @@ -56,6 +56,8 @@ struct op_event { char * name; /**< the event name */ char * desc; /**< the event description */ int min_count; /**< minimum counter value allowed */ + int filter; /**< architecture specific filter or -1 */ + char * ext; /**< extended events */ struct list_head event_next; /**< next event in list */ }; @@ -63,10 +65,12 @@ struct op_event { struct list_head * op_events(op_cpu cpu_type); /** Find a given event, returns NULL on error */ -struct op_event * op_find_event(op_cpu cpu_type, u32 nr); +struct op_event * op_find_event(op_cpu cpu_type, u32 nr, u32 um); +struct op_event * op_find_event_any(op_cpu cpu_type, u32 nr); /** Find a given event by name */ -struct op_event * find_event_by_name(char const * name); +struct op_event * find_event_by_name(char const * name, unsigned um, + int um_valid); /** * Find a mapping for a given event ID for architectures requiring additional information diff --git a/libop/op_hw_specific.h b/libop/op_hw_specific.h new file mode 100644 index 0000000..35080ad --- /dev/null +++ b/libop/op_hw_specific.h @@ -0,0 +1,107 @@ +/* + * @file architecture specific interfaces + * @remark Copyright 2008 Intel Corporation + * @remark Read the file COPYING + * @author Andi Kleen + */ + +#if defined(__i386__) || defined(__x86_64__) + +/* Assume we run on the same host as the profilee */ + +#define num_to_mask(x) ((1U << (x)) - 1) + +static inline int cpuid_vendor(char *vnd) +{ + union { + struct { + unsigned b,d,c; + }; + char v[12]; + } v; + unsigned eax; + asm("cpuid" : "=a" (eax), "=b" (v.b), "=c" (v.c), "=d" (v.d) : "0" (0)); + return !strncmp(v.v, vnd, 12); +} + +/* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates + unhalted reference cycle architectural event is supported. We assume + steppings after C0 report correct data in CPUID. */ +static inline void workaround_nehalem_aaj79(unsigned *ebx) +{ + union { + unsigned eax; + struct { + unsigned stepping : 4; + unsigned model : 4; + unsigned family : 4; + unsigned type : 2; + unsigned res : 2; + unsigned ext_model : 4; + unsigned ext_family : 8; + unsigned res2 : 4; + }; + } v; + unsigned model; + + if (!cpuid_vendor("GenuineIntel")) + return; + asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx"); + model = (v.ext_model << 4) + v.model; + if (v.family != 6 || model != 26 || v.stepping > 4) + return; + *ebx |= (1 << 2); /* disable unsupported event */ +} + +static inline unsigned arch_get_filter(op_cpu cpu_type) +{ + if (cpu_type == CPU_ARCH_PERFMON) { + unsigned ebx, eax; + asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx"); + workaround_nehalem_aaj79(&ebx); + return ebx & num_to_mask(eax >> 24); + } + return -1U; +} + +static inline int arch_num_counters(op_cpu cpu_type) +{ + if (cpu_type == CPU_ARCH_PERFMON) { + unsigned v; + asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx"); + return (v >> 8) & 0xff; + } + return -1; +} + +static inline unsigned arch_get_counter_mask(void) +{ + unsigned v; + asm("cpuid" : "=a" (v) : "0" (0xa) : "ebx","ecx","edx"); + return num_to_mask((v >> 8) & 0xff); +} + +#else + +static inline unsigned arch_get_filter(op_cpu cpu_type) +{ + /* Do something with passed arg to shut up the compiler warning */ + if (cpu_type != CPU_NO_GOOD) + return 0; + return 0; +} + +static inline int arch_num_counters(op_cpu cpu_type) +{ + /* Do something with passed arg to shut up the compiler warning */ + if (cpu_type != CPU_NO_GOOD) + return -1; + return -1; +} + +static inline unsigned arch_get_counter_mask(void) +{ + return 0; +} + +#endif diff --git a/libop/op_parse_event.c b/libop/op_parse_event.c index 920d617..eb99a20 100644 --- a/libop/op_parse_event.c +++ b/libop/op_parse_event.c @@ -93,6 +93,7 @@ size_t parse_events(struct parsed_event * parsed_events, size_t max_events, part = next_part(&cp); if (part) { + parsed_events[i].unit_mask_valid = 1; parsed_events[i].unit_mask = parse_ulong(part); free(part); } diff --git a/libop/op_parse_event.h b/libop/op_parse_event.h index 247a355..c8d4144 100644 --- a/libop/op_parse_event.h +++ b/libop/op_parse_event.h @@ -22,6 +22,7 @@ struct parsed_event { int unit_mask; int kernel; int user; + int unit_mask_valid; }; /** diff --git a/libop/op_xml_events.c b/libop/op_xml_events.c new file mode 100644 index 0000000..5b9ac7d --- /dev/null +++ b/libop/op_xml_events.c @@ -0,0 +1,93 @@ +/** + * @file op_xml_events.c + * routines for generating event files in XML + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Dave Nomura + */ + +#include <stdio.h> +#include <string.h> +#include "op_events.h" +#include "op_list.h" +#include "op_cpu_type.h" +#include "op_xml_out.h" + +static op_cpu cpu_type; +#define MAX_BUFFER 4096 +void open_xml_events(char const * title, char const * doc, op_cpu the_cpu_type) +{ + char const * schema_version = "1.0"; + char buffer[MAX_BUFFER]; + + buffer[0] = '\0'; + cpu_type = the_cpu_type; + open_xml_element(HELP_EVENTS, 0, buffer); + open_xml_element(HELP_HEADER, 1, buffer); + init_xml_str_attr(HELP_TITLE, title, buffer); + init_xml_str_attr(SCHEMA_VERSION, schema_version, buffer); + init_xml_str_attr(HELP_DOC, doc, buffer); + close_xml_element(NONE, 0, buffer); + printf("%s", buffer); +} + +void close_xml_events(void) +{ + char buffer[MAX_BUFFER]; + + buffer[0] = '\0'; + close_xml_element(HELP_EVENTS, 0, buffer); + printf("%s", buffer); +} + +static void xml_do_arch_specific_event_help(struct op_event const * event, + char * buffer) +{ + switch (cpu_type) { + case CPU_PPC64_CELL: + init_xml_int_attr(HELP_EVENT_GROUP, event->val / 100, buffer); + break; + default: + break; + } +} + + +void xml_help_for_event(struct op_event const * event) +{ + uint i; + int nr_counters; + int has_nested = strcmp(event->unit->name, "zero"); + char buffer[MAX_BUFFER]; + + buffer[0] = '\0'; + open_xml_element(HELP_EVENT, 1, buffer); + init_xml_str_attr(HELP_EVENT_NAME, event->name, buffer); + xml_do_arch_specific_event_help(event, buffer); + init_xml_str_attr(HELP_EVENT_DESC, event->desc, buffer); + + nr_counters = op_get_nr_counters(cpu_type); + init_xml_int_attr(HELP_COUNTER_MASK, event->counter_mask, buffer); + init_xml_int_attr(HELP_MIN_COUNT, event->min_count, buffer); + + if (has_nested) { + close_xml_element(NONE, 1, buffer); + open_xml_element(HELP_UNIT_MASKS, 1, buffer); + init_xml_int_attr(HELP_DEFAULT_MASK, event->unit->default_mask, buffer); + close_xml_element(NONE, 1, buffer); + for (i = 0; i < event->unit->num; i++) { + open_xml_element(HELP_UNIT_MASK, 1, buffer); + init_xml_int_attr(HELP_UNIT_MASK_VALUE, + event->unit->um[i].value, buffer); + init_xml_str_attr(HELP_UNIT_MASK_DESC, + event->unit->um[i].desc, buffer); + close_xml_element(NONE, 0, buffer); + } + close_xml_element(HELP_UNIT_MASKS, 0, buffer); + } + close_xml_element(has_nested ? HELP_EVENT : NONE, has_nested, buffer); + printf("%s", buffer); +} + diff --git a/libop/op_xml_events.h b/libop/op_xml_events.h new file mode 100644 index 0000000..e1e092e --- /dev/null +++ b/libop/op_xml_events.h @@ -0,0 +1,20 @@ +/** + * @file op_xml_events.h + * routines for generating event files in XML + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Dave Nomura + */ + +#ifndef OP_XML_EVENTS_H +#define OP_XML_EVENTS_H + +#include "op_events.h" + +void xml_help_for_event(struct op_event const * event); +void open_xml_events(char const * title, char const * doc, op_cpu cpu_type); +void close_xml_events(void); + +#endif /* OP_XML_EVENTS_H */ diff --git a/libop/op_xml_out.c b/libop/op_xml_out.c new file mode 100644 index 0000000..d779c45 --- /dev/null +++ b/libop/op_xml_out.c @@ -0,0 +1,233 @@ +/** + * @file op_xml_out.c + * C utility routines for writing XML + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Dave Nomura + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "op_xml_out.h" + +char const * xml_tag_map[] = { + "NONE", + "id", + "profile", + "processor", + "cputype", + "title", + "schemaversion", + "mhz", + "setup", + "timersetup", + "rtcinterrupts", + "eventsetup", + "eventname", + "unitmask", + "setupcount", + "separatedcpus", + "options", + "session", "debuginfo", "details", "excludedependent", + "excludesymbols", "imagepath", "includesymbols", "merge", + "classes", + "class", + "cpu", + "event", + "mask", + "process", + "pid", + "thread", + "tid", + "binary", + "module", + "name", + "callers", + "callees", + "symbol", + "idref", + "self", + "detaillo", + "detailhi", + "symboltable", + "symboldata", + "startingaddr", + "file", + "line", + "codelength", + "summarydata", + "sampledata", + "count", + "detailtable", + "symboldetails", + "detaildata", + "vmaoffset", + "bytestable", + "bytes", + "help_events", + "header", + "title", + "doc", + "event", + "event_name", + "group", + "desc", + "counter_mask", + "min_count", + "unit_masks", + "default", + "unit_mask", + "mask", + "desc" +}; + +#define MAX_BUF_LEN 2048 +char const * xml_tag_name(tag_t tag) +{ + return xml_tag_map[tag]; +} + + +void open_xml_element(tag_t tag, int with_attrs, char * buffer) +{ + char const * tag_name = xml_tag_name(tag); + unsigned int const max_len = strlen(tag_name) + 3; + char tmp_buf[MAX_BUF_LEN]; + + if (max_len >= sizeof(tmp_buf)) + fprintf(stderr,"Warning: open_xml_element: buffer overflow %d\n", max_len); + + if (snprintf(tmp_buf, sizeof(tmp_buf), "<%s%s", tag_name, + (with_attrs ? " " : ">\n")) < 0) { + fprintf(stderr,"open_xml_element: snprintf failed\n"); + exit(EXIT_FAILURE); + } + strncat(buffer, tmp_buf, sizeof(tmp_buf)); +} + + +void close_xml_element(tag_t tag, int has_nested, char * buffer) +{ + char const * tag_name = xml_tag_name(tag); + unsigned int const max_len = strlen(tag_name) + 3; + char tmp_buf[MAX_BUF_LEN]; + + if (max_len >= sizeof(tmp_buf)) + fprintf(stderr,"Warning: close_xml_element: buffer overflow %d\n", max_len); + + if (tag == NONE) { + if (snprintf(tmp_buf, sizeof(tmp_buf), "%s\n", (has_nested ? ">" : "/>")) < 0) { + fprintf(stderr, "close_xml_element: snprintf failed\n"); + exit(EXIT_FAILURE); + } + } else { + if (snprintf(tmp_buf, sizeof(tmp_buf), "</%s>\n", tag_name) < 0) { + fprintf(stderr, "close_xml_element: snprintf failed\n"); + exit(EXIT_FAILURE); + } + } + strncat(buffer, tmp_buf, sizeof(tmp_buf)); +} + + +void init_xml_int_attr(tag_t attr, int value, char * buffer) +{ + char const * attr_name = xml_tag_name(attr); + char tmp_buf[MAX_BUF_LEN]; + unsigned int const max_len = strlen(attr_name) + 50; + + if (max_len >= sizeof(tmp_buf)) { + fprintf(stderr, + "Warning: init_xml_int_attr: buffer overflow %d\n", max_len); + } + + + if (snprintf(tmp_buf, sizeof(tmp_buf), " %s=\"%d\"", attr_name, value) < 0) { + fprintf(stderr,"init_xml_int_attr: snprintf failed\n"); + exit(EXIT_FAILURE); + } + strncat(buffer, tmp_buf, sizeof(tmp_buf)); +} + + +void init_xml_dbl_attr(tag_t attr, double value, char * buffer) +{ + char const * attr_name = xml_tag_name(attr); + unsigned int const max_len = strlen(attr_name) + 50; + char tmp_buf[MAX_BUF_LEN]; + + if (max_len >= sizeof(tmp_buf)) + fprintf(stderr, "Warning: init_xml_dbl_attr: buffer overflow %d\n", max_len); + + if (snprintf(tmp_buf, sizeof(tmp_buf), " %s=\"%.2f\"", attr_name, value) < 0) { + fprintf(stderr, "init_xml_dbl_attr: snprintf failed\n"); + exit(EXIT_FAILURE); + } + strncat(buffer, tmp_buf, sizeof(tmp_buf)); +} + + +static char * xml_quote(char const * str, char * quote_buf) +{ + int i; + int pos = 0; + int len = strlen(str); + + + quote_buf[pos++] = '"'; + + for (i = 0; i < len; i++) { + if (pos >= MAX_BUF_LEN - 10) { + fprintf(stderr,"quote_str: buffer overflow %d\n", pos); + exit(EXIT_FAILURE); + } + + switch(str[i]) { + case '&': + strncpy(quote_buf + pos, "&", 5); + pos += 5; + break; + case '<': + strncpy(quote_buf + pos, "<", 4); + pos += 4; + break; + case '>': + strncpy(quote_buf + pos, ">", 4); + pos += 4; + break; + case '"': + strncpy(quote_buf + pos, """, 6); + pos += 6; + break; + default: + quote_buf[pos++] = str[i]; + break; + } + } + + quote_buf[pos++] = '"'; + quote_buf[pos++] = '\0'; + return quote_buf; +} + + +void init_xml_str_attr(tag_t attr, char const * str, char * buffer) +{ + char tmp_buf[MAX_BUF_LEN]; + char quote_buf[MAX_BUF_LEN]; + char const * attr_name = xml_tag_name(attr); + char const * quote_str = xml_quote(str, quote_buf); + const unsigned int max_len = strlen(attr_name) + strlen(quote_str) + 10; + + if (max_len >= sizeof(tmp_buf)) + fprintf(stderr, "Warning: init_xml_str_attr: buffer overflow %d\n", max_len); + + if (snprintf(tmp_buf, sizeof(tmp_buf), " %s=""%s""", attr_name, quote_str) < 0) { + fprintf(stderr,"init_xml_str_attr: snprintf failed\n"); + exit(EXIT_FAILURE); + } + strncat(buffer, tmp_buf, sizeof(tmp_buf)); +} diff --git a/libop/op_xml_out.h b/libop/op_xml_out.h new file mode 100644 index 0000000..52e8d8f --- /dev/null +++ b/libop/op_xml_out.h @@ -0,0 +1,72 @@ +/** + * @file op_xml_out.h + * utility routines for writing XML + * + * @remark Copyright 2008 OProfile authors + * @remark Read the file COPYING + * + * @author Dave Nomura + */ + +#ifndef OP_XML_OUT_H +#define OP_XML_OUT_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + NONE=0, TABLE_ID, PROFILE, + PROCESSOR, CPU_NAME, TITLE, SCHEMA_VERSION, MHZ, + SETUP, + TIMER_SETUP, RTC_INTERRUPTS, + EVENT_SETUP, EVENT_NAME, UNIT_MASK, SETUP_COUNT, SEPARATED_CPUS, + OPTIONS, SESSION, DEBUG_INFO, DETAILS, EXCLUDE_DEPENDENT, EXCLUDE_SYMBOLS, + IMAGE_PATH, INCLUDE_SYMBOLS, MERGE, + CLASSES, + CLASS, + CPU_NUM, + EVENT_NUM, + EVENT_MASK, + PROCESS, PROC_ID, + THREAD, THREAD_ID, + BINARY, + MODULE, NAME, + CALLERS, CALLEES, + SYMBOL, ID_REF, SELFREF, DETAIL_LO, DETAIL_HI, + SYMBOL_TABLE, + SYMBOL_DATA, STARTING_ADDR, + SOURCE_FILE, SOURCE_LINE, CODE_LENGTH, + SUMMARY, SAMPLE, + COUNT, + DETAIL_TABLE, SYMBOL_DETAILS, DETAIL_DATA, VMA, + BYTES_TABLE, BYTES, + HELP_EVENTS, + HELP_HEADER, + HELP_TITLE, + HELP_DOC, + HELP_EVENT, + HELP_EVENT_NAME, + HELP_EVENT_GROUP, + HELP_EVENT_DESC, + HELP_COUNTER_MASK, + HELP_MIN_COUNT, + HELP_UNIT_MASKS, + HELP_DEFAULT_MASK, + HELP_UNIT_MASK, + HELP_UNIT_MASK_VALUE, + HELP_UNIT_MASK_DESC + } tag_t; + +char const * xml_tag_name(tag_t tag); +void open_xml_element(tag_t tag, int with_attrs, char * result); +void close_xml_element(tag_t tag, int has_nested, char * result); +void init_xml_int_attr(tag_t attr, int value, char * result); +void init_xml_dbl_attr(tag_t attr, double value, char * result); +void init_xml_str_attr(tag_t attr, char const * str, char * result); + +#ifdef __cplusplus +} +#endif + +#endif /* OP_XML_OUT_H */ diff --git a/libutil/op_file.c b/libutil/op_file.c index e3e6cb6..fd5995a 100644 --- a/libutil/op_file.c +++ b/libutil/op_file.c @@ -94,7 +94,17 @@ static char * make_pathname_from_dirent(char const * basedir, name_len = strlen(basedir) + strlen("/") + strlen(ent->d_name) + 1; name = xmalloc(name_len); sprintf(name, "%s/%s", basedir, ent->d_name); - if (stat(name, st_buf) != 0) { + if (stat(name, st_buf) != 0) + { + struct stat lstat_buf; + int err = errno; + if (lstat(name, &lstat_buf) == 0 && + S_ISLNK(lstat_buf.st_mode)) { + // dangling symlink -- silently ignore + } else { + fprintf(stderr, "stat failed for %s (%s)\n", + name, strerror(err)); + } free(name); name = NULL; } @@ -147,13 +157,14 @@ int get_matching_pathnames(void * name_list, get_pathname_callback getpathname, case MATCH_ANY_ENTRY_RECURSION + MATCH: name = make_pathname_from_dirent(base_dir, ent, &stat_buffer); - if (name && S_ISDIR(stat_buffer.st_mode) && - !S_ISLNK(stat_buffer.st_mode)) { - get_matching_pathnames( - name_list, getpathname, - name, filter, recursion); - } else { - getpathname(name, name_list); + if (name) { + if (S_ISDIR(stat_buffer.st_mode)) { + get_matching_pathnames( + name_list, getpathname, + name, filter, recursion); + } else { + getpathname(name, name_list); + } } free(name); break; @@ -161,8 +172,7 @@ int get_matching_pathnames(void * name_list, get_pathname_callback getpathname, case MATCH_DIR_ONLY_RECURSION + MATCH: name = make_pathname_from_dirent(base_dir, ent, &stat_buffer); - if (name && S_ISDIR(stat_buffer.st_mode) && - !S_ISLNK(stat_buffer.st_mode)) { + if (name && S_ISDIR(stat_buffer.st_mode)) { /* Check if full directory name contains * match to the filter; if so, add it to * name_list and quit; else, recurse. diff --git a/libutil/op_libiberty.h b/libutil/op_libiberty.h index ea02a50..ef2f386 100644 --- a/libutil/op_libiberty.h +++ b/libutil/op_libiberty.h @@ -34,7 +34,6 @@ extern "C" { /* some system have a libiberty.a but no libiberty.h so we must provide * ourself the missing proto */ #ifndef HAVE_LIBIBERTY_H - /* Set the program name used by xmalloc. */ void xmalloc_set_program_name(char const *); @@ -71,7 +70,6 @@ void * xmemdup(void const *, size_t, size_t) OP_ATTRIB_MALLOC; #define xmalloc_set_program_name(n) #endif - #ifdef __cplusplus } #endif diff --git a/opcontrol/Android.mk b/opcontrol/Android.mk index 56211ad..8f04f3a 100644 --- a/opcontrol/Android.mk +++ b/opcontrol/Android.mk @@ -1,6 +1,10 @@ LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) +ifeq ($(TARGET_ARCH_VARIANT), armv7-a) + LOCAL_CFLAGS += -DWITH_ARM_V7_A +endif + LOCAL_SRC_FILES:= \ opcontrol.cpp diff --git a/opcontrol/opcontrol.cpp b/opcontrol/opcontrol.cpp index 2d9cb2f..2558760 100644 --- a/opcontrol/opcontrol.cpp +++ b/opcontrol/opcontrol.cpp @@ -39,7 +39,15 @@ /* Experiments found that using a small interval may hang the device, and the * more events tracked simultaneously, the longer the interval has to be. */ -int min_count[3] = {150000, 200000, 250000}; + +#if !defined(WITH_ARM_V7_A) +#define MAX_EVENTS 3 +int min_count[MAX_EVENTS] = {150000, 200000, 250000}; +#else +#define MAX_EVENTS 5 +int min_count[MAX_EVENTS] = {150000, 200000, 250000, 300000, 350000}; +#endif + int list_events; int show_usage; int setup; @@ -49,8 +57,8 @@ int start; int stop; int reset; -int selected_events[3]; -int selected_counts[3]; +int selected_events[MAX_EVENTS]; +int selected_counts[MAX_EVENTS]; char kernel_range[512]; char vmlinux[512]; @@ -76,6 +84,8 @@ struct event_info { const char *name; const char *explanation; } event_info[] = { +#if !defined(WITH_ARM_V7_A) + /* ARM V6 events */ {0x00, "IFU_IFETCH_MISS", "number of instruction fetch misses"}, {0x01, "CYCLES_IFU_MEM_STALL", @@ -112,9 +122,119 @@ struct event_info { "Times write buffer was drained"}, {0xff, "CPU_CYCLES", "clock cycles counter"}, +#else + /* ARM V7 events */ + {0x00, "PMNC_SW_INCR", + "Software increment of PMNC registers"}, + {0x01, "IFETCH_MISS", + "Instruction fetch misses from cache or normal cacheable memory"}, + {0x02, "ITLB_MISS", + "Instruction fetch misses from TLB"}, + {0x03, "DCACHE_REFILL", + "Data R/W operation that causes a refill from cache or normal cacheable" + "memory"}, + {0x04, "DCACHE_ACCESS", + "Data R/W from cache"}, + {0x05, "DTLB_REFILL", + "Data R/W that causes a TLB refill"}, + {0x06, "DREAD", + "Data read architecturally executed (note: architecturally executed = for" + "instructions that are unconditional or that pass the condition code)"}, + {0x07, "DWRITE", + "Data write architecturally executed"}, + {0x08, "INSTR_EXECUTED", + "All executed instructions"}, + {0x09, "EXC_TAKEN", + "Exception taken"}, + {0x0A, "EXC_EXECUTED", + "Exception return architecturally executed"}, + {0x0B, "CID_WRITE", + "Instruction that writes to the Context ID Register architecturally" + "executed"}, + {0x0C, "PC_WRITE", + "SW change of PC, architecturally executed (not by exceptions)"}, + {0x0D, "PC_IMM_BRANCH", + "Immediate branch instruction executed (taken or not)"}, + {0x0E, "PC_PROC_RETURN", + "Procedure return architecturally executed (not by exceptions)"}, + {0x0F, "UNALIGNED_ACCESS", + "Unaligned access architecturally executed"}, + {0x10, "PC_BRANCH_MIS_PRED", + "Branch mispredicted or not predicted. Counts pipeline flushes because of" + "misprediction"}, + {0x12, "PC_BRANCH_MIS_USED", + "Branch or change in program flow that could have been predicted"}, + {0x40, "WRITE_BUFFER_FULL", + "Any write buffer full cycle"}, + {0x41, "L2_STORE_MERGED", + "Any store that is merged in L2 cache"}, + {0x42, "L2_STORE_BUFF", + "Any bufferable store from load/store to L2 cache"}, + {0x43, "L2_ACCESS", + "Any access to L2 cache"}, + {0x44, "L2_CACH_MISS", + "Any cacheable miss in L2 cache"}, + {0x45, "AXI_READ_CYCLES", + "Number of cycles for an active AXI read"}, + {0x46, "AXI_WRITE_CYCLES", + "Number of cycles for an active AXI write"}, + {0x47, "MEMORY_REPLAY", + "Any replay event in the memory subsystem"}, + {0x48, "UNALIGNED_ACCESS_REPLAY", + "Unaligned access that causes a replay"}, + {0x49, "L1_DATA_MISS", + "L1 data cache miss as a result of the hashing algorithm"}, + {0x4A, "L1_INST_MISS", + "L1 instruction cache miss as a result of the hashing algorithm"}, + {0x4B, "L1_DATA_COLORING", + "L1 data access in which a page coloring alias occurs"}, + {0x4C, "L1_NEON_DATA", + "NEON data access that hits L1 cache"}, + {0x4D, "L1_NEON_CACH_DATA", + "NEON cacheable data access that hits L1 cache"}, + {0x4E, "L2_NEON", + "L2 access as a result of NEON memory access"}, + {0x4F, "L2_NEON_HIT", + "Any NEON hit in L2 cache"}, + {0x50, "L1_INST", + "Any L1 instruction cache access, excluding CP15 cache accesses"}, + {0x51, "PC_RETURN_MIS_PRED", + "Return stack misprediction at return stack pop" + "(incorrect target address)"}, + {0x52, "PC_BRANCH_FAILED", + "Branch prediction misprediction"}, + {0x53, "PC_BRANCH_TAKEN", + "Any predicted branch that is taken"}, + {0x54, "PC_BRANCH_EXECUTED", + "Any taken branch that is executed"}, + {0x55, "OP_EXECUTED", + "Number of operations executed" + "(in instruction or mutli-cycle instruction)"}, + {0x56, "CYCLES_INST_STALL", + "Cycles where no instruction available"}, + {0x57, "CYCLES_INST", + "Number of instructions issued in a cycle"}, + {0x58, "CYCLES_NEON_DATA_STALL", + "Number of cycles the processor waits on MRC data from NEON"}, + {0x59, "CYCLES_NEON_INST_STALL", + "Number of cycles the processor waits on NEON instruction queue or" + "NEON load queue"}, + {0x5A, "NEON_CYCLES", + "Number of cycles NEON and integer processors are not idle"}, + {0x70, "PMU0_EVENTS", + "Number of events from external input source PMUEXTIN[0]"}, + {0x71, "PMU1_EVENTS", + "Number of events from external input source PMUEXTIN[1]"}, + {0x72, "PMU_EVENTS", + "Number of events from both external input sources PMUEXTIN[0]" + "and PMUEXTIN[1]"}, + {0xFF, "CPU_CYCLES", + "Number of CPU cycles"}, +#endif }; -void usage() { +void usage() +{ printf("\nopcontrol: usage:\n" " --list-events list event types\n" " --help this message\n" @@ -136,7 +256,8 @@ void usage() { ); } -void setup_session_dir() { +void setup_session_dir() +{ int fd; fd = open(OP_DATA_DIR, O_RDONLY); @@ -155,7 +276,8 @@ void setup_session_dir() { } } -int do_setup() { +int do_setup() +{ char dir[1024]; setup_session_dir(); @@ -183,7 +305,8 @@ void do_list_events() } } -int find_event_id_from_name(const char *name) { +int find_event_idx_from_name(const char *name) +{ unsigned int i; for (i = 0; i < sizeof(event_info)/sizeof(struct event_info); i++) { @@ -194,7 +317,8 @@ int find_event_id_from_name(const char *name) { return -1; } -const char * find_event_name_from_id(int id) { +const char * find_event_name_from_id(int id) +{ unsigned int i; for (i = 0; i < sizeof(event_info)/sizeof(struct event_info); i++) { @@ -205,11 +329,12 @@ const char * find_event_name_from_id(int id) { return NULL; } -int process_event(const char *event_spec) { +int process_event(const char *event_spec) +{ char event_name[512]; char count_name[512]; unsigned int i; - int event_id; + int event_idx; int count_val; strncpy(event_name, event_spec, 512); @@ -226,8 +351,8 @@ int process_event(const char *event_spec) { break; } } - event_id = find_event_id_from_name(event_name); - if (event_id == -1) { + event_idx = find_event_idx_from_name(event_name); + if (event_idx == -1) { fprintf(stderr, "Unknown event name: %s\n", event_name); return -1; } @@ -239,9 +364,9 @@ int process_event(const char *event_spec) { count_val = atoi(count_name); } - selected_events[num_events] = event_id; + selected_events[num_events] = event_idx; selected_counts[num_events++] = count_val; - verbose("event_id is %d\n", event_id); + verbose("event_id is %d\n", event_info[event_idx].id); verbose("count_val is %d\n", count_val); return 0; } @@ -293,7 +418,7 @@ void do_status() printf("Driver directory: %s\n", OP_DRIVER_BASE); printf("Session directory: %s\n", OP_DATA_DIR); - for (i = 0; i < 3; i++) { + for (i = 0; i < MAX_EVENTS; i++) { sprintf(fullname, OP_DRIVER_BASE"/%d/enabled", i); num = read_num(fullname); if (num > 0) { @@ -379,8 +504,9 @@ int main(int argc, char * const argv[]) break; /* --event */ case 'e': - if (num_events == 3) { - fprintf(stderr, "More than 3 events specified\n"); + if (num_events == MAX_EVENTS) { + fprintf(stderr, "More than %d events specified\n", + MAX_EVENTS); exit(1); } if (process_event(optarg)) { @@ -445,6 +571,7 @@ int main(int argc, char * const argv[]) strcpy(command, "oprofiled --session-dir="OP_DATA_DIR); +#if !defined(WITH_ARM_V7_A) /* Since counter #3 can only handle CPU_CYCLES, check and shuffle the * order a bit so that the maximal number of events can be profiled * simultaneously @@ -477,6 +604,7 @@ int main(int argc, char * const argv[]) selected_counts[i] = temp; } } +#endif /* Configure the counters and enable them */ @@ -518,7 +646,7 @@ int main(int argc, char * const argv[]) } /* Disable the unused counters */ - for (i = num_events; i < 3; i++) { + for (i = num_events; i < MAX_EVENTS; i++) { echo_dev("0", 0, "enabled", i); } diff --git a/opimport_pull b/opimport_pull index 7dbac4a..bc443ec 100755 --- a/opimport_pull +++ b/opimport_pull @@ -5,26 +5,48 @@ import re import sys def PrintUsage(): - print "Usage:" + sys.argv[0] + " dir" + print "Usage:" + sys.argv[0] + " [-r] dir" + print " -r : reuse the directory if it already exists" print " dir: directory on the host to store profile results" -if (len(sys.argv) != 2): +if (len(sys.argv) > 3): PrintUsage() sys.exit(1) +# identify 32-bit vs 64-bit platform +stream = os.popen("uname -m") +arch_name = stream.readline().rstrip("\n"); +stream.close() + +# default path is prebuilt/linux-x86/oprofile +# for 64-bit OS, use prebuilt/linux-x86_64/oprofile instead +if arch_name == "x86_64": + arch_path = "/../../linux-x86_64/oprofile" +else: + arch_path = "" + try: oprofile_event_dir = os.environ['OPROFILE_EVENTS_DIR'] except: print "OPROFILE_EVENTS_DIR not set. Run \". envsetup.sh\" first" sys.exit(1) -output_dir = sys.argv[1]; +if sys.argv[1] == "-r" : + replace_dir = 1 + output_dir = sys.argv[2] +else: + replace_dir = 0 + output_dir = sys.argv[1] + +if (os.path.exists(output_dir) and (replace_dir == 1)): + os.system("rm -fr " + output_dir) try: os.makedirs(output_dir) except: if os.path.exists(output_dir): print "Directory already exists:", output_dir + print "Try \"" + sys.argv[0] + " -r " + output_dir + "\"" else: print "Cannot create", output_dir sys.exit(1) @@ -60,11 +82,12 @@ for line in stream: if not os.path.exists(dir): os.makedirs(dir) - cmd = oprofile_event_dir + "/bin/opimport -a " + oprofile_event_dir + \ + cmd = oprofile_event_dir + arch_path + "/bin/opimport -a " + \ + oprofile_event_dir + \ "/abi/arm_abi -o samples" + middle_part + "/" + file_name + " " + line os.system(cmd) stream.close() # short summary of profiling results -os.system(oprofile_event_dir + "/bin/opreport --session-dir=.") +os.system(oprofile_event_dir + arch_path + "/bin/opreport --session-dir=.") |