aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorweidendo <weidendo@a5019735-40e9-0310-863c-91ae7b9d1cf9>2009-06-15 00:16:36 +0000
committerweidendo <weidendo@a5019735-40e9-0310-863c-91ae7b9d1cf9>2009-06-15 00:16:36 +0000
commit0a1951d64df79f98f885426671fc9d3982647a6b (patch)
treef9a44bf8c444e9da77016303bdbf1a3ee1ab61dd
parent0b23d6eb63a4146dfa17304a2e76ce91f4d5e001 (diff)
downloadvalgrind-0a1951d64df79f98f885426671fc9d3982647a6b.tar.gz
Callgrind: fix instrumentation for arbitrary events per guest instruction
(should fix bug 169505) This uses the same event queue scheme as cachegrind and lackey, and same kind of helpers (1/2/3 Ir events, Ir+Dr, Dr, Ir+Dw, Dw). Note that in contrast to Cachegrind, Callgrind interpretes a modify event as Dw (otherwise the cache model generating write back events would not work). Callgrind uses per-(guest)instruction event sets for cost counters. An per-instruction eventset is incrementally extended as events for the same guest instruction are flushed. Event sets always start with Ir counters, but depending on Dr/Dw order afterwards, there exist IrDr(Dw) and IrDw(Dr). Per-instruction event sets now are consistently named according to event ordering. Event set "sim" is a subset of "full", was never used and was removed. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@10321 a5019735-40e9-0310-863c-91ae7b9d1cf9
-rw-r--r--callgrind/bbcc.c2
-rw-r--r--callgrind/debug.c6
-rw-r--r--callgrind/global.h29
-rw-r--r--callgrind/main.c1090
-rw-r--r--callgrind/sim.c472
5 files changed, 933 insertions, 666 deletions
diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c
index dfe737b2f..7917c2526 100644
--- a/callgrind/bbcc.c
+++ b/callgrind/bbcc.c
@@ -601,7 +601,7 @@ void CLG_(setup_bbcc)(BB* bb)
if (!CLG_(clo).simulate_cache) {
/* update Ir cost */
int instr_count = last_bb->jmp[passed].instr+1;
- CLG_(current_state).cost[CLG_(sets).off_sim_Ir] += instr_count;
+ CLG_(current_state).cost[CLG_(sets).off_full_Ir] += instr_count;
}
}
diff --git a/callgrind/debug.c b/callgrind/debug.c
index f04bab414..2ac38a297 100644
--- a/callgrind/debug.c
+++ b/callgrind/debug.c
@@ -217,9 +217,9 @@ void CLG_(print_short_jcc)(jCC* jcc)
bb_jmpaddr(jcc->from->bb),
bb_addr(jcc->to->bb),
jcc->call_counter,
- jcc->cost ? jcc->cost[CLG_(sets).off_sim_Ir]:0,
- jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dr]:0,
- jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dw]:0);
+ jcc->cost ? jcc->cost[CLG_(sets).off_full_Ir]:0,
+ jcc->cost ? jcc->cost[CLG_(sets).off_full_Dr]:0,
+ jcc->cost ? jcc->cost[CLG_(sets).off_full_Dw]:0);
else
VG_(printf)("[Skipped JCC]");
}
diff --git a/callgrind/global.h b/callgrind/global.h
index 461218a8c..367f2d7d3 100644
--- a/callgrind/global.h
+++ b/callgrind/global.h
@@ -270,7 +270,6 @@ typedef struct _InstrInfo InstrInfo;
struct _InstrInfo {
UInt instr_offset;
UInt instr_size;
- UInt data_size;
UInt cost_offset;
EventSet* eventset;
};
@@ -657,19 +656,19 @@ struct cachesim_if
void (*finish)(void);
void (*log_1I0D)(InstrInfo*) VG_REGPARM(1);
+ void (*log_2I0D)(InstrInfo*, InstrInfo*) VG_REGPARM(2);
+ void (*log_3I0D)(InstrInfo*, InstrInfo*, InstrInfo*) VG_REGPARM(3);
- void (*log_1I1Dr)(InstrInfo*, Addr) VG_REGPARM(2);
- void (*log_1I1Dw)(InstrInfo*, Addr) VG_REGPARM(2);
- void (*log_1I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3);
+ void (*log_1I1Dr)(InstrInfo*, Addr, Word) VG_REGPARM(3);
+ void (*log_1I1Dw)(InstrInfo*, Addr, Word) VG_REGPARM(3);
- void (*log_0I1Dr)(InstrInfo*, Addr) VG_REGPARM(2);
- void (*log_0I1Dw)(InstrInfo*, Addr) VG_REGPARM(2);
- void (*log_0I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3);
+ void (*log_0I1Dr)(InstrInfo*, Addr, Word) VG_REGPARM(3);
+ void (*log_0I1Dw)(InstrInfo*, Addr, Word) VG_REGPARM(3);
// function names of helpers (for debugging generated code)
- Char *log_1I0D_name;
- Char *log_1I1Dr_name, *log_1I1Dw_name, *log_1I2D_name;
- Char *log_0I1Dr_name, *log_0I1Dw_name, *log_0I2D_name;
+ Char *log_1I0D_name, *log_2I0D_name, *log_3I0D_name;
+ Char *log_1I1Dr_name, *log_1I1Dw_name;
+ Char *log_0I1Dr_name, *log_0I1Dw_name;
};
@@ -687,15 +686,13 @@ void CLG_(print_debug_usage)(void);
/* from sim.c */
struct event_sets {
- EventSet *use, *Ir, *Dr, *Dw;
- EventSet *D0, *D1r, *D1w, *D2;
- EventSet *sim;
- EventSet *full; /* sim plus user events */
+ EventSet *Use, *Ir, *Dr, *Dw;
+ EventSet *UIr, *UIrDr, *UIrDrDw, *UIrDw, *UIrDwDr;
+ EventSet *full;
/* offsets into eventsets */
- Int off_sim_Ir, off_sim_Dr, off_sim_Dw;
Int off_full_Ir, off_full_Dr, off_full_Dw;
- Int off_full_user, off_full_alloc, off_full_systime;
+ Int off_full_alloc, off_full_systime;
};
extern struct event_sets CLG_(sets);
diff --git a/callgrind/main.c b/callgrind/main.c
index 68d13814d..f2d125037 100644
--- a/callgrind/main.c
+++ b/callgrind/main.c
@@ -94,224 +94,490 @@ static void CLG_(init_statistics)(Statistics* s)
}
-
-
/*------------------------------------------------------------*/
-/*--- Cache simulation instrumentation phase ---*/
+/*--- Instrumentation structures and event queue handling ---*/
/*------------------------------------------------------------*/
+/* Maintain an ordered list of memory events which are outstanding, in
+ the sense that no IR has yet been generated to do the relevant
+ helper calls. The BB is scanned top to bottom and memory events
+ are added to the end of the list, merging with the most recent
+ notified event where possible (Dw immediately following Dr and
+ having the same size and EA can be merged).
+
+ This merging is done so that for architectures which have
+ load-op-store instructions (x86, amd64), the insn is treated as if
+ it makes just one memory reference (a modify), rather than two (a
+ read followed by a write at the same address).
+
+ At various points the list will need to be flushed, that is, IR
+ generated from it. That must happen before any possible exit from
+ the block (the end, or an IRStmt_Exit). Flushing also takes place
+ when there is no space to add a new event.
+
+ If we require the simulation statistics to be up to date with
+ respect to possible memory exceptions, then the list would have to
+ be flushed before each memory reference. That would however lose
+ performance by inhibiting event-merging during flushing.
+
+ Flushing the list consists of walking it start to end and emitting
+ instrumentation IR for each event, in the order in which they
+ appear. It may be possible to emit a single call for two adjacent
+ events in order to reduce the number of helper function calls made.
+ For example, it could well be profitable to handle two adjacent Ir
+ events with a single helper call. */
+
+typedef
+ IRExpr
+ IRAtom;
+
+typedef
+ enum {
+ Ev_Ir, // Instruction read
+ Ev_Dr, // Data read
+ Ev_Dw, // Data write
+ Ev_Dm, // Data modify (read then write)
+ }
+ EventTag;
+
+typedef
+ struct {
+ EventTag tag;
+ InstrInfo* inode;
+ union {
+ struct {
+ } Ir;
+ struct {
+ IRAtom* ea;
+ Int szB;
+ } Dr;
+ struct {
+ IRAtom* ea;
+ Int szB;
+ } Dw;
+ struct {
+ IRAtom* ea;
+ Int szB;
+ } Dm;
+ } Ev;
+ }
+ Event;
+
+static void init_Event ( Event* ev ) {
+ VG_(memset)(ev, 0, sizeof(Event));
+}
+
+static IRAtom* get_Event_dea ( Event* ev ) {
+ switch (ev->tag) {
+ case Ev_Dr: return ev->Ev.Dr.ea;
+ case Ev_Dw: return ev->Ev.Dw.ea;
+ case Ev_Dm: return ev->Ev.Dm.ea;
+ default: tl_assert(0);
+ }
+}
+
+static Int get_Event_dszB ( Event* ev ) {
+ switch (ev->tag) {
+ case Ev_Dr: return ev->Ev.Dr.szB;
+ case Ev_Dw: return ev->Ev.Dw.szB;
+ case Ev_Dm: return ev->Ev.Dm.szB;
+ default: tl_assert(0);
+ }
+}
+
+
+/* Up to this many unnotified events are allowed. Number is
+ arbitrary. Larger numbers allow more event merging to occur, but
+ potentially induce more spilling due to extending live ranges of
+ address temporaries. */
+#define N_EVENTS 16
+
+
+/* A struct which holds all the running state during instrumentation.
+ Mostly to avoid passing loads of parameters everywhere. */
+typedef struct {
+ /* The current outstanding-memory-event list. */
+ Event events[N_EVENTS];
+ Int events_used;
+
+ /* The array of InstrInfo's is part of BB struct. */
+ BB* bb;
-static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+ /* BB seen before (ie. re-instrumentation) */
+ Bool seen_before;
+
+ /* Number InstrInfo bins 'used' so far. */
+ UInt ii_index;
+
+ // current offset of guest instructions from BB start
+ UInt instr_offset;
+
+ /* The output SB being constructed. */
+ IRSB* sbOut;
+} ClgState;
+
+
+static void showEvent ( Event* ev )
{
- // I'm assuming that for 'modify' instructions, that Vex always makes
- // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp
- // expressions, or both Const expressions.
- CLG_ASSERT(isIRAtom(loadAddrExpr));
- CLG_ASSERT(isIRAtom(storeAddrExpr));
- return eqIRAtom(loadAddrExpr, storeAddrExpr);
+ switch (ev->tag) {
+ case Ev_Ir:
+ VG_(printf)("Ir (InstrInfo %p) at +%d\n",
+ ev->inode, ev->inode->instr_offset);
+ break;
+ case Ev_Dr:
+ VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=",
+ ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
+ ppIRExpr(ev->Ev.Dr.ea);
+ VG_(printf)("\n");
+ break;
+ case Ev_Dw:
+ VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=",
+ ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
+ ppIRExpr(ev->Ev.Dw.ea);
+ VG_(printf)("\n");
+ break;
+ case Ev_Dm:
+ VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=",
+ ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
+ ppIRExpr(ev->Ev.Dm.ea);
+ VG_(printf)("\n");
+ break;
+ default:
+ tl_assert(0);
+ break;
+ }
}
-static
-EventSet* insert_simcall(IRSB* bbOut, InstrInfo* ii, UInt dataSize,
- Bool instrIssued,
- IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+/* Generate code for all outstanding memory events, and mark the queue
+ empty. Code is generated into cgs->sbOut, and this activity
+ 'consumes' slots in cgs->bb. */
+
+static void flushEvents ( ClgState* clgs )
{
- HChar* helperName;
- void* helperAddr;
- Int argc;
- EventSet* es;
- IRExpr *arg1, *arg2 = 0, *arg3 = 0, **argv;
- IRDirty* di;
-
- /* Check type of original instruction regarding memory access,
- * and collect info to be able to generate fitting helper call
- */
- if (!loadAddrExpr && !storeAddrExpr) {
- // no load/store
- CLG_ASSERT(0 == dataSize);
- if (instrIssued) {
- helperName = 0;
- helperAddr = 0;
- }
- else {
- helperName = CLG_(cachesim).log_1I0D_name;
- helperAddr = CLG_(cachesim).log_1I0D;
- }
- argc = 1;
- es = CLG_(sets).D0;
-
- } else if (loadAddrExpr && !storeAddrExpr) {
- // load
- CLG_ASSERT( isIRAtom(loadAddrExpr) );
- if (instrIssued) {
- helperName = CLG_(cachesim).log_0I1Dr_name;
- helperAddr = CLG_(cachesim).log_0I1Dr;
- }
- else {
- helperName = CLG_(cachesim).log_1I1Dr_name;
- helperAddr = CLG_(cachesim).log_1I1Dr;
- }
- argc = 2;
- arg2 = loadAddrExpr;
- es = CLG_(sets).D1r;
-
- } else if (!loadAddrExpr && storeAddrExpr) {
- // store
- CLG_ASSERT( isIRAtom(storeAddrExpr) );
- if (instrIssued) {
- helperName = CLG_(cachesim).log_0I1Dw_name;
- helperAddr = CLG_(cachesim).log_0I1Dw;
- }
- else {
- helperName = CLG_(cachesim).log_1I1Dw_name;
- helperAddr = CLG_(cachesim).log_1I1Dw;
- }
- argc = 2;
- arg2 = storeAddrExpr;
- es = CLG_(sets).D1w;
-
- } else {
- CLG_ASSERT( loadAddrExpr && storeAddrExpr );
- CLG_ASSERT( isIRAtom(loadAddrExpr) );
- CLG_ASSERT( isIRAtom(storeAddrExpr) );
-
- if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) {
- /* modify: suppose write access, as this is
- * more resource consuming (as in callgrind for VG2)
- * Cachegrind does a read here (!)
- * DISCUSS: Best way depends on simulation model?
- */
- if (instrIssued) {
- helperName = CLG_(cachesim).log_0I1Dw_name;
- helperAddr = CLG_(cachesim).log_0I1Dw;
+ Int i, regparms, inew;
+ Char* helperName;
+ void* helperAddr;
+ IRExpr** argv;
+ IRExpr* i_node_expr;
+ IRDirty* di;
+ Event* ev;
+ Event* ev2;
+ Event* ev3;
+
+ if (!clgs->seen_before) {
+ // extend event sets as needed
+ // available sets: D0 Dr
+ for(i=0; i<clgs->events_used; i++) {
+ ev = &clgs->events[i];
+ switch(ev->tag) {
+ case Ev_Ir:
+ // Ir event always is first for a guest instruction
+ CLG_ASSERT(ev->inode->eventset == 0);
+ ev->inode->eventset = CLG_(sets).UIr;
+ break;
+ case Ev_Dr:
+ // extend event set by Dr counter
+ if ((ev->inode->eventset == CLG_(sets).UIrDr) ||
+ (ev->inode->eventset == CLG_(sets).UIrDrDw) ||
+ (ev->inode->eventset == CLG_(sets).UIrDwDr))
+ break;
+ if (ev->inode->eventset == CLG_(sets).UIrDw) {
+ ev->inode->eventset = CLG_(sets).UIrDwDr;
+ break;
+ }
+ CLG_ASSERT(ev->inode->eventset == CLG_(sets).UIr);
+ ev->inode->eventset = CLG_(sets).UIrDr;
+ break;
+ case Ev_Dw:
+ case Ev_Dm:
+ // extend event set by Dw counter
+ if ((ev->inode->eventset == CLG_(sets).UIrDw) ||
+ (ev->inode->eventset == CLG_(sets).UIrDwDr) ||
+ (ev->inode->eventset == CLG_(sets).UIrDrDw))
+ break;
+ if (ev->inode->eventset == CLG_(sets).UIrDr) {
+ ev->inode->eventset = CLG_(sets).UIrDrDw;
+ break;
+ }
+ CLG_ASSERT(ev->inode->eventset == CLG_(sets).UIr);
+ ev->inode->eventset = CLG_(sets).UIrDw;
+ break;
+ default:
+ tl_assert(0);
+ }
+ }
+ }
+
+ for(i = 0; i < clgs->events_used; i = inew) {
+
+ helperName = NULL;
+ helperAddr = NULL;
+ argv = NULL;
+ regparms = 0;
+
+ /* generate IR to notify event i and possibly the ones
+ immediately following it. */
+ tl_assert(i >= 0 && i < clgs->events_used);
+
+ ev = &clgs->events[i];
+ ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
+ ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
+
+ CLG_DEBUGIF(5) {
+ VG_(printf)(" flush ");
+ showEvent( ev );
+ }
+
+ i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
+
+ /* Decide on helper fn to call and args to pass it, and advance
+ i appropriately.
+ Dm events have same effect as Dw events */
+ switch (ev->tag) {
+ case Ev_Ir:
+ /* Merge an Ir with a following Dr. */
+ if (ev2 && ev2->tag == Ev_Dr) {
+ /* Why is this true? It's because we're merging an Ir
+ with a following Dr. The Ir derives from the
+ instruction's IMark and the Dr from data
+ references which follow it. In short it holds
+ because each insn starts with an IMark, hence an
+ Ev_Ir, and so these Dr must pertain to the
+ immediately preceding Ir. Same applies to analogous
+ assertions in the subsequent cases. */
+ tl_assert(ev2->inode == ev->inode);
+ helperName = CLG_(cachesim).log_1I1Dr_name;
+ helperAddr = CLG_(cachesim).log_1I1Dr;
+ argv = mkIRExprVec_3( i_node_expr,
+ get_Event_dea(ev2),
+ mkIRExpr_HWord( get_Event_dszB(ev2) ) );
+ regparms = 3;
+ inew = i+2;
}
- else {
- helperName = CLG_(cachesim).log_1I1Dw_name;
- helperAddr = CLG_(cachesim).log_1I1Dw;
+ /* Merge an Ir with a following Dw/Dm. */
+ else
+ if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
+ tl_assert(ev2->inode == ev->inode);
+ helperName = CLG_(cachesim).log_1I1Dw_name;
+ helperAddr = CLG_(cachesim).log_1I1Dw;
+ argv = mkIRExprVec_3( i_node_expr,
+ get_Event_dea(ev2),
+ mkIRExpr_HWord( get_Event_dszB(ev2) ) );
+ regparms = 3;
+ inew = i+2;
}
- argc = 2;
- arg2 = storeAddrExpr;
- es = CLG_(sets).D1w;
-
- } else {
- // load/store
- if (instrIssued) {
- helperName = CLG_(cachesim).log_0I2D_name;
- helperAddr = CLG_(cachesim).log_0I2D;
+ /* Merge an Ir with two following Irs. */
+ else
+ if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
+ helperName = CLG_(cachesim).log_3I0D_name;
+ helperAddr = CLG_(cachesim).log_3I0D;
+ argv = mkIRExprVec_3( i_node_expr,
+ mkIRExpr_HWord( (HWord)ev2->inode ),
+ mkIRExpr_HWord( (HWord)ev3->inode ) );
+ regparms = 3;
+ inew = i+3;
}
+ /* Merge an Ir with one following Ir. */
+ else
+ if (ev2 && ev2->tag == Ev_Ir) {
+ helperName = CLG_(cachesim).log_2I0D_name;
+ helperAddr = CLG_(cachesim).log_2I0D;
+ argv = mkIRExprVec_2( i_node_expr,
+ mkIRExpr_HWord( (HWord)ev2->inode ) );
+ regparms = 2;
+ inew = i+2;
+ }
+ /* No merging possible; emit as-is. */
else {
- helperName = CLG_(cachesim).log_1I2D_name;
- helperAddr = CLG_(cachesim).log_1I2D;
+ helperName = CLG_(cachesim).log_1I0D_name;
+ helperAddr = CLG_(cachesim).log_1I0D;
+ argv = mkIRExprVec_1( i_node_expr );
+ regparms = 1;
+ inew = i+1;
}
- argc = 3;
- arg2 = loadAddrExpr;
- arg3 = storeAddrExpr;
- es = CLG_(sets).D2;
- }
- }
+ break;
+ case Ev_Dr:
+ /* Data read or modify */
+ helperName = CLG_(cachesim).log_0I1Dr_name;
+ helperAddr = CLG_(cachesim).log_0I1Dr;
+ argv = mkIRExprVec_3( i_node_expr,
+ get_Event_dea(ev),
+ mkIRExpr_HWord( get_Event_dszB(ev) ) );
+ regparms = 3;
+ inew = i+1;
+ break;
+ case Ev_Dw:
+ case Ev_Dm:
+ /* Data write */
+ helperName = CLG_(cachesim).log_0I1Dw_name;
+ helperAddr = CLG_(cachesim).log_0I1Dw;
+ argv = mkIRExprVec_3( i_node_expr,
+ get_Event_dea(ev),
+ mkIRExpr_HWord( get_Event_dszB(ev) ) );
+ regparms = 3;
+ inew = i+1;
+ break;
+ default:
+ tl_assert(0);
+ }
- /* helper could be unset depending on the simulator used */
- if (helperAddr == 0) return 0;
-
- /* Setup 1st arg: InstrInfo */
- arg1 = mkIRExpr_HWord( (HWord)ii );
-
- // Add call to the instrumentation function
- if (argc == 1)
- argv = mkIRExprVec_1(arg1);
- else if (argc == 2)
- argv = mkIRExprVec_2(arg1, arg2);
- else if (argc == 3)
- argv = mkIRExprVec_3(arg1, arg2, arg3);
- else
- VG_(tool_panic)("argc... not 1 or 2 or 3?");
-
- di = unsafeIRDirty_0_N( argc, helperName,
- VG_(fnptr_to_fnentry)( helperAddr ), argv);
- addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
+ CLG_DEBUGIF(5) {
+ if (inew > i+1) {
+ VG_(printf)(" merge ");
+ showEvent( ev2 );
+ }
+ if (inew > i+2) {
+ VG_(printf)(" merge ");
+ showEvent( ev3 );
+ }
+ if (helperAddr)
+ VG_(printf)(" call %s (%p)\n",
+ helperName, helperAddr);
+ }
+
+ /* helper could be unset depending on the simulator used */
+ if (helperAddr == 0) continue;
+
+ /* Add the helper. */
+ tl_assert(helperName);
+ tl_assert(helperAddr);
+ tl_assert(argv);
+ di = unsafeIRDirty_0_N( regparms,
+ helperName, VG_(fnptr_to_fnentry)( helperAddr ),
+ argv );
+ addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
+ }
- return es;
+ clgs->events_used = 0;
}
+static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
+{
+ Event* evt;
+ tl_assert(clgs->seen_before || (inode->eventset == 0));
+ if (!CLG_(clo).simulate_cache) return;
+
+ if (clgs->events_used == N_EVENTS)
+ flushEvents(clgs);
+ tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
+ evt = &clgs->events[clgs->events_used];
+ init_Event(evt);
+ evt->tag = Ev_Ir;
+ evt->inode = inode;
+ clgs->events_used++;
+}
+
+static
+void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
+{
+ Event* evt;
+ tl_assert(isIRAtom(ea));
+ tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+ if (!CLG_(clo).simulate_cache) return;
+
+ if (clgs->events_used == N_EVENTS)
+ flushEvents(clgs);
+ tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
+ evt = &clgs->events[clgs->events_used];
+ init_Event(evt);
+ evt->tag = Ev_Dr;
+ evt->inode = inode;
+ evt->Ev.Dr.szB = datasize;
+ evt->Ev.Dr.ea = ea;
+ clgs->events_used++;
+}
-/* Instrumentation before a conditional jump or at the end
- * of each original instruction.
- * Fills the InstrInfo struct if not seen before
- */
static
-void endOfInstr(IRSB* bbOut, InstrInfo* ii, Bool bb_seen_before,
- UInt instr_offset, UInt instrLen, UInt dataSize,
- UInt* cost_offset, Bool instrIssued,
- IRExpr* loadAddrExpr, IRExpr* storeAddrExpr)
+void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
{
- IRType wordTy;
- EventSet* es;
-
- // Stay sane ...
- CLG_ASSERT(sizeof(HWord) == sizeof(void*));
- if (sizeof(HWord) == 4) {
- wordTy = Ity_I32;
- } else
- if (sizeof(HWord) == 8) {
- wordTy = Ity_I64;
- } else {
- VG_(tool_panic)("endOfInstr: strange word size");
+ Event* lastEvt;
+ Event* evt;
+ tl_assert(isIRAtom(ea));
+ tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+ if (!CLG_(clo).simulate_cache) return;
+
+ /* Is it possible to merge this write with the preceding read? */
+ lastEvt = &clgs->events[clgs->events_used-1];
+ if (clgs->events_used > 0
+ && lastEvt->tag == Ev_Dr
+ && lastEvt->Ev.Dr.szB == datasize
+ && lastEvt->inode == inode
+ && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
+ {
+ lastEvt->tag = Ev_Dm;
+ return;
}
- if (loadAddrExpr)
- CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr));
- if (storeAddrExpr)
- CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr));
-
- // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be
- // done inaccurately, but they're very rare and this avoids errors from
- // hitting more than two cache lines in the simulation.
- if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE;
-
- /* returns 0 if simulator needs no instrumentation */
- es = insert_simcall(bbOut, ii, dataSize, instrIssued,
- loadAddrExpr, storeAddrExpr);
-
- CLG_DEBUG(5, " Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n",
- instr_offset, instrLen, dataSize,
- es ? es->name : (Char*)"(no instrumentation)",
- es ? es->size : 0);
-
- if (bb_seen_before) {
- CLG_DEBUG(5, " before: Instr +%2d (Size %d, DSize %d)\n",
- ii->instr_offset, ii->instr_size, ii->data_size);
-
- CLG_ASSERT(ii->instr_offset == instr_offset);
- CLG_ASSERT(ii->instr_size == instrLen);
- CLG_ASSERT(ii->cost_offset == *cost_offset);
- CLG_ASSERT(ii->eventset == es);
-
- /* Only check size if data size >0.
- * This is needed: e.g. for rep or cmov x86 instructions, the same InstrInfo
- * is used both for 2 simulator calls: for the pure instruction fetch and
- * separately for an memory access (which may not happen depending on flags).
- * If checked always, this triggers an assertion failure on retranslation.
- */
- if (dataSize>0) CLG_ASSERT(ii->data_size == dataSize);
+ /* No. Add as normal. */
+ if (clgs->events_used == N_EVENTS)
+ flushEvents(clgs);
+ tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
+ evt = &clgs->events[clgs->events_used];
+ init_Event(evt);
+ evt->tag = Ev_Dw;
+ evt->inode = inode;
+ evt->Ev.Dw.szB = datasize;
+ evt->Ev.Dw.ea = ea;
+ clgs->events_used++;
+}
+/* Initialise or check (if already seen before) an InstrInfo for next insn.
+ We only can set instr_offset/instr_size here. The required event set and
+ resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
+ instructions. The event set is extended as required on flush of the event
+ queue (when Dm events were determined), cost offsets are determined at
+ end of BB instrumentation. */
+static
+InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
+{
+ InstrInfo* ii;
+ tl_assert(clgs->ii_index >= 0);
+ tl_assert(clgs->ii_index < clgs->bb->instr_count);
+ ii = &clgs->bb->instr[ clgs->ii_index ];
+
+ if (clgs->seen_before) {
+ CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
+ CLG_ASSERT(ii->instr_size == instr_size);
}
else {
- ii->instr_offset = instr_offset;
- ii->instr_size = instrLen;
- ii->cost_offset = *cost_offset;
- ii->eventset = es;
-
- /* data size only relevant if >0 */
- if (dataSize > 0) ii->data_size = dataSize;
+ ii->instr_offset = clgs->instr_offset;
+ ii->instr_size = instr_size;
+ ii->cost_offset = 0;
+ ii->eventset = 0;
+ }
+ clgs->ii_index++;
+ clgs->instr_offset += instr_size;
+ CLG_(stat).distinct_instrs++;
- CLG_(stat).distinct_instrs++;
- }
+ return ii;
+}
- *cost_offset += es ? es->size : 0;
+// return total number of cost values needed for this BB
+static
+UInt update_cost_offsets( ClgState* clgs )
+{
+ Int i;
+ InstrInfo* ii;
+ UInt cost_offset = 0;
+
+ CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
+ for(i=0; i<clgs->ii_index; i++) {
+ ii = &clgs->bb->instr[i];
+ if (clgs->seen_before) {
+ CLG_ASSERT(ii->cost_offset == cost_offset);
+ } else
+ ii->cost_offset = cost_offset;
+ cost_offset += ii->eventset ? ii->eventset->size : 0;
+ }
+ return cost_offset;
}
+/*------------------------------------------------------------*/
+/*--- Instrumentation ---*/
+/*------------------------------------------------------------*/
+
#if defined(VG_BIGENDIAN)
# define CLGEndness Iend_BE
#elif defined(VG_LITTLEENDIAN)
@@ -344,7 +610,7 @@ Addr IRConst2Addr(IRConst* con)
*
* Called from CLG_(get_bb)
*/
-void CLG_(collectBlockInfo)(IRSB* bbIn,
+void CLG_(collectBlockInfo)(IRSB* sbIn,
/*INOUT*/ UInt* instrs,
/*INOUT*/ UInt* cjmps,
/*INOUT*/ Bool* cjmp_inverted)
@@ -360,10 +626,10 @@ void CLG_(collectBlockInfo)(IRSB* bbIn,
// nothing to do with client code
Bool inPreamble = True;
- if (!bbIn) return;
+ if (!sbIn) return;
- for (i = 0; i < bbIn->stmts_used; i++) {
- st = bbIn->stmts[i];
+ for (i = 0; i < sbIn->stmts_used; i++) {
+ st = sbIn->stmts[i];
if (Ist_IMark == st->tag) {
inPreamble = False;
@@ -377,7 +643,7 @@ void CLG_(collectBlockInfo)(IRSB* bbIn,
if (Ist_Exit == st->tag) {
jumpDst = IRConst2Addr(st->Ist.Exit.dst);
toNextInstr = (jumpDst == instrAddr + instrLen);
-
+
(*cjmps)++;
}
}
@@ -389,98 +655,6 @@ void CLG_(collectBlockInfo)(IRSB* bbIn,
}
static
-void collectStatementInfo(IRTypeEnv* tyenv, IRStmt* st,
- Addr* instrAddr, UInt* instrLen,
- IRExpr** loadAddrExpr, IRExpr** storeAddrExpr,
- UInt* dataSize)
-{
- CLG_ASSERT(isFlatIRStmt(st));
-
- switch (st->tag) {
- case Ist_NoOp:
- break;
-
- case Ist_AbiHint:
- /* ABI hints aren't interesting. Ignore. */
- break;
-
- case Ist_IMark:
- /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this
- to the host's native pointer type; if that is 32 bits then it
- discards the upper 32 bits. If we are cachegrinding on a
- 32-bit host then we are also ensured that the guest word size
- is 32 bits, due to the assertion in cg_instrument that the
- host and guest word sizes must be the same. Hence
- st->Ist.IMark.addr will have been derived from a 32-bit guest
- code address and truncation of it is safe. I believe this
- assignment should be correct for both 32- and 64-bit
- machines. */
- *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
- *instrLen = st->Ist.IMark.len;
- break;
-
- case Ist_WrTmp: {
- IRExpr* data = st->Ist.WrTmp.data;
- if (data->tag == Iex_Load) {
- IRExpr* aexpr = data->Iex.Load.addr;
- CLG_ASSERT( isIRAtom(aexpr) );
- // Note also, endianness info is ignored. I guess that's not
- // interesting.
- // XXX: repe cmpsb does two loads... the first one is ignored here!
- //tl_assert( NULL == *loadAddrExpr ); // XXX: ???
- *loadAddrExpr = aexpr;
- *dataSize = sizeofIRType(data->Iex.Load.ty);
- }
- break;
- }
-
- case Ist_Store: {
- IRExpr* data = st->Ist.Store.data;
- IRExpr* aexpr = st->Ist.Store.addr;
- CLG_ASSERT( isIRAtom(aexpr) );
- if ( NULL == *storeAddrExpr ) {
- /* this is a kludge: ignore all except the first store from
- an instruction. */
- *storeAddrExpr = aexpr;
- *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data));
- }
- break;
- }
-
- case Ist_Dirty: {
- IRDirty* d = st->Ist.Dirty.details;
- if (d->mFx != Ifx_None) {
- /* This dirty helper accesses memory. Collect the
- details. */
- CLG_ASSERT(d->mAddr != NULL);
- CLG_ASSERT(d->mSize != 0);
- *dataSize = d->mSize;
- if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
- *loadAddrExpr = d->mAddr;
- if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
- *storeAddrExpr = d->mAddr;
- } else {
- CLG_ASSERT(d->mAddr == NULL);
- CLG_ASSERT(d->mSize == 0);
- }
- break;
- }
-
- case Ist_Put:
- case Ist_PutI:
- case Ist_MBE:
- case Ist_Exit:
- break;
-
- default:
- VG_(printf)("\n");
- ppIRStmt(st);
- VG_(printf)("\n");
- VG_(tool_panic)("Callgrind: unhandled IRStmt");
- }
-}
-
-static
void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
{
addStmtToIRSB( bbOut,
@@ -491,29 +665,56 @@ void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
IRExpr_Const(IRConst_U32(val)) ));
}
+
+/* add helper call to setup_bbcc, with pointer to BB struct as argument
+ *
+ * precondition for setup_bbcc:
+ * - jmps_passed has number of cond.jumps passed in last executed BB
+ * - current_bbcc has a pointer to the BBCC of the last executed BB
+ * Thus, if bbcc_jmpkind is != -1 (JmpNone),
+ * current_bbcc->bb->jmp_addr
+ * gives the address of the jump source.
+ *
+ * the setup does 2 things:
+ * - trace call:
+ * * Unwind own call stack, i.e sync our ESP with real ESP
+ * This is for ESP manipulation (longjmps, C++ exec handling) and RET
+ * * For CALLs or JMPs crossing objects, record call arg +
+ * push are on own call stack
+ *
+ * - prepare for cache log functions:
+ * set current_bbcc to BBCC that gets the costs for this BB execution
+ * attached
+ */
+static
+void addBBSetupCall(ClgState* clgs)
+{
+ IRDirty* di;
+ IRExpr *arg1, **argv;
+
+ arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
+ argv = mkIRExprVec_1(arg1);
+ di = unsafeIRDirty_0_N( 1, "setup_bbcc",
+ VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
+ argv);
+ addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
+}
+
+
static
IRSB* CLG_(instrument)( VgCallbackClosure* closure,
- IRSB* bbIn,
+ IRSB* sbIn,
VexGuestLayout* layout,
VexGuestExtents* vge,
IRType gWordTy, IRType hWordTy )
{
- Int i;
- IRSB* bbOut;
- IRStmt* st, *stnext;
- Addr instrAddr, origAddr;
- UInt instrLen = 0, dataSize;
- UInt instrCount, costOffset;
- IRExpr *loadAddrExpr, *storeAddrExpr;
-
- BB* bb;
+ Int i, isize;
+ IRStmt* st;
+ Addr origAddr;
+ InstrInfo* curr_inode = NULL;
+ ClgState clgs;
+ UInt cJumps = 0;
- IRDirty* di;
- IRExpr *arg1, **argv;
-
- Bool bb_seen_before = False;
- UInt cJumps = 0, cJumpsCorrected;
- Bool beforeIBoundary, instrIssued;
if (gWordTy != hWordTy) {
/* We don't currently support this case. */
@@ -524,173 +725,206 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure,
if (! CLG_(instrument_state)) {
CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
(Addr)closure->readdr);
- return bbIn;
+ return sbIn;
}
CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
/* Set up SB for instrumented IR */
- bbOut = deepCopyIRSBExceptStmts(bbIn);
+ clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
// Copy verbatim any IR preamble preceding the first IMark
i = 0;
- while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) {
- addStmtToIRSB( bbOut, bbIn->stmts[i] );
+ while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
+ addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
i++;
}
// Get the first statement, and origAddr from it
- CLG_ASSERT(bbIn->stmts_used > 0);
- st = bbIn->stmts[i];
+ CLG_ASSERT(sbIn->stmts_used >0);
+ CLG_ASSERT(i < sbIn->stmts_used);
+ st = sbIn->stmts[i];
CLG_ASSERT(Ist_IMark == st->tag);
- instrAddr = origAddr = (Addr)st->Ist.IMark.addr;
+
+ origAddr = (Addr)st->Ist.IMark.addr;
CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow
- /* Get BB (creating if necessary).
+ /* Get BB struct (creating if necessary).
* JS: The hash table is keyed with orig_addr_noredir -- important!
* JW: Why? If it is because of different chasing of the redirection,
* this is not needed, as chasing is switched off in callgrind
*/
- bb = CLG_(get_bb)(origAddr, bbIn, &bb_seen_before);
- //bb = CLG_(get_bb)(orig_addr_noredir, bbIn, &bb_seen_before);
-
- /*
- * Precondition:
- * - jmps_passed has number of cond.jumps passed in last executed BB
- * - current_bbcc has a pointer to the BBCC of the last executed BB
- * Thus, if bbcc_jmpkind is != -1 (JmpNone),
- * current_bbcc->bb->jmp_addr
- * gives the address of the jump source.
- *
- * The BBCC setup does 2 things:
- * - trace call:
- * * Unwind own call stack, i.e sync our ESP with real ESP
- * This is for ESP manipulation (longjmps, C++ exec handling) and RET
- * * For CALLs or JMPs crossing objects, record call arg +
- * push are on own call stack
- *
- * - prepare for cache log functions:
- * Set current_bbcc to BBCC that gets the costs for this BB execution
- * attached
- */
+ clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
+
+ addBBSetupCall(&clgs);
+
+ // Set up running state
+ clgs.events_used = 0;
+ clgs.ii_index = 0;
+ clgs.instr_offset = 0;
+
+ for (/*use current i*/; i < sbIn->stmts_used; i++) {
+
+ st = sbIn->stmts[i];
+ CLG_ASSERT(isFlatIRStmt(st));
+
+ switch (st->tag) {
+ case Ist_NoOp:
+ case Ist_AbiHint:
+ case Ist_Put:
+ case Ist_PutI:
+ case Ist_MBE:
+ break;
+
+ case Ist_IMark: {
+ CLG_ASSERT(clgs.instr_offset == (Addr)st->Ist.IMark.addr - origAddr);
+ isize = st->Ist.IMark.len;
+ // If Vex fails to decode an instruction, the size will be zero.
+ // Pretend otherwise.
+ if (isize == 0) isize = VG_MIN_INSTR_SZB;
+
+ // Sanity-check size.
+ tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
+ || VG_CLREQ_SZB == isize );
+
+ // Init the inode, record it as the current one.
+ // Subsequent Dr/Dw/Dm events from the same instruction will
+ // also use it.
+ curr_inode = next_InstrInfo (&clgs, isize);
+
+ addEvent_Ir( &clgs, curr_inode );
+ break;
+ }
+
+ case Ist_WrTmp: {
+ IRExpr* data = st->Ist.WrTmp.data;
+ if (data->tag == Iex_Load) {
+ IRExpr* aexpr = data->Iex.Load.addr;
+ // Note also, endianness info is ignored. I guess
+ // that's not interesting.
+ addEvent_Dr( &clgs, curr_inode,
+ sizeofIRType(data->Iex.Load.ty), aexpr );
+ }
+ break;
+ }
+
+ case Ist_Store: {
+ IRExpr* data = st->Ist.Store.data;
+ IRExpr* aexpr = st->Ist.Store.addr;
+ addEvent_Dw( &clgs, curr_inode,
+ sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
+ break;
+ }
+
+ case Ist_Dirty: {
+ Int dataSize;
+ IRDirty* d = st->Ist.Dirty.details;
+ if (d->mFx != Ifx_None) {
+ /* This dirty helper accesses memory. Collect the details. */
+ tl_assert(d->mAddr != NULL);
+ tl_assert(d->mSize != 0);
+ dataSize = d->mSize;
+ // Large (eg. 28B, 108B, 512B on x86) data-sized
+ // instructions will be done inaccurately, but they're
+ // very rare and this avoids errors from hitting more
+ // than two cache lines in the simulation.
+ if (dataSize > MIN_LINE_SIZE)
+ dataSize = MIN_LINE_SIZE;
+ if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
+ addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
+ if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
+ addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
+ } else {
+ tl_assert(d->mAddr == NULL);
+ tl_assert(d->mSize == 0);
+ }
+ break;
+ }
- // helper call to setup_bbcc, with pointer to basic block info struct as argument
- arg1 = mkIRExpr_HWord( (HWord)bb );
- argv = mkIRExprVec_1(arg1);
- di = unsafeIRDirty_0_N( 1, "setup_bbcc",
- VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
- argv);
- addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
-
- instrCount = 0;
- costOffset = 0;
-
- // loop for each host instruction (starting from 'i')
- do {
-
- // We should be at an IMark statement
- CLG_ASSERT(Ist_IMark == st->tag);
-
- // Reset stuff for this original instruction
- loadAddrExpr = storeAddrExpr = NULL;
- instrIssued = False;
- dataSize = 0;
-
- // Process all the statements for this original instruction (ie. until
- // the next IMark statement, or the end of the block)
- do {
- i++;
- stnext = ( i < bbIn->stmts_used ? bbIn->stmts[i] : NULL );
- beforeIBoundary = !stnext || (Ist_IMark == stnext->tag);
- collectStatementInfo(bbIn->tyenv, st, &instrAddr, &instrLen,
- &loadAddrExpr, &storeAddrExpr, &dataSize);
-
- // instrument a simulator call before conditional jumps
- if (st->tag == Ist_Exit) {
- // Nb: instrLen will be zero if Vex failed to decode it.
- // Also Client requests can appear to be very large (eg. 18
- // bytes on x86) because they are really multiple instructions.
- CLG_ASSERT( 0 == instrLen ||
- bbIn->jumpkind == Ijk_ClientReq ||
- (instrLen >= VG_MIN_INSTR_SZB &&
- instrLen <= VG_MAX_INSTR_SZB) );
-
- // Add instrumentation before this statement
- endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
- instrAddr - origAddr, instrLen, dataSize, &costOffset,
- instrIssued, loadAddrExpr, storeAddrExpr);
-
- // prepare for a possible further simcall in same host instr
- loadAddrExpr = storeAddrExpr = NULL;
- instrIssued = True;
-
- if (!bb_seen_before) {
- bb->jmp[cJumps].instr = instrCount;
- bb->jmp[cJumps].skip = False;
- }
-
- /* Update global variable jmps_passed (this is before the jump!)
- * A correction is needed if VEX inverted the last jump condition
- */
- cJumpsCorrected = cJumps;
- if ((cJumps+1 == bb->cjmp_count) && bb->cjmp_inverted) cJumpsCorrected++;
- addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
- cJumpsCorrected, hWordTy);
-
- cJumps++;
- }
+ case Ist_Exit: {
+ UInt jmps_passed;
+
+ /* We may never reach the next statement, so need to flush
+ all outstanding transactions now. */
+ flushEvents( &clgs );
- addStmtToIRSB( bbOut, st );
- st = stnext;
- }
- while (!beforeIBoundary);
+ CLG_ASSERT(clgs.ii_index>0);
+ if (!clgs.seen_before) {
+ clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
+ clgs.bb->jmp[cJumps].skip = False;
+ }
+
+ /* Update global variable jmps_passed before the jump
+ * A correction is needed if VEX inverted the last jump condition
+ */
+ jmps_passed = cJumps;
+ if ((cJumps+1 == clgs.bb->cjmp_count) && clgs.bb->cjmp_inverted)
+ jmps_passed++;
+ addConstMemStoreStmt( clgs.sbOut,
+ (UWord) &CLG_(current_state).jmps_passed,
+ jmps_passed, hWordTy);
+ cJumps++;
+
+ break;
+ }
+
+ default:
+ tl_assert(0);
+ break;
+ }
- // Add instrumentation for this original instruction.
- if (!instrIssued || (loadAddrExpr != 0) || (storeAddrExpr !=0))
- endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before,
- instrAddr - origAddr, instrLen, dataSize, &costOffset,
- instrIssued, loadAddrExpr, storeAddrExpr);
+ /* Copy the original statement */
+ addStmtToIRSB( clgs.sbOut, st );
- instrCount++;
+ CLG_DEBUGIF(5) {
+ VG_(printf)(" pass ");
+ ppIRStmt(st);
+ VG_(printf)("\n");
+ }
}
- while (st);
- /* Always update global variable jmps_passed (at end of BB)
+ /* At the end of the bb. Flush outstandings. */
+ flushEvents( &clgs );
+
+ /* Always update global variable jmps_passed at end of bb.
* A correction is needed if VEX inverted the last jump condition
*/
- cJumpsCorrected = cJumps;
- if (bb->cjmp_inverted) cJumpsCorrected--;
- addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed,
- cJumpsCorrected, hWordTy);
+ {
+ UInt jmps_passed = cJumps;
+ if (clgs.bb->cjmp_inverted) jmps_passed--;
+ addConstMemStoreStmt( clgs.sbOut,
+ (UWord) &CLG_(current_state).jmps_passed,
+ jmps_passed, hWordTy);
+ }
+ CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
+ CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index);
/* This stores the instr of the call/ret at BB end */
- bb->jmp[cJumps].instr = instrCount-1;
+ clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
- CLG_ASSERT(bb->cjmp_count == cJumps);
- CLG_ASSERT(bb->instr_count == instrCount);
-
- instrAddr += instrLen;
- if (bb_seen_before) {
- CLG_ASSERT(bb->instr_len == instrAddr - origAddr);
- CLG_ASSERT(bb->cost_count == costOffset);
- CLG_ASSERT(bb->jmpkind == bbIn->jumpkind);
+ if (clgs.seen_before) {
+ CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
+ CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset);
+ CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind);
}
else {
- bb->instr_len = instrAddr - origAddr;
- bb->cost_count = costOffset;
- bb->jmpkind = bbIn->jumpkind;
+ clgs.bb->cost_count = update_cost_offsets(&clgs);
+ clgs.bb->instr_len = clgs.instr_offset;
+ clgs.bb->jmpkind = sbIn->jumpkind;
}
-
+
CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
- origAddr, bb->instr_len, bb->cjmp_count, bb->cost_count);
+ origAddr, clgs.bb->instr_len,
+ clgs.bb->cjmp_count, clgs.bb->cost_count);
if (cJumps>0) {
CLG_DEBUG(3, " [ ");
for (i=0;i<cJumps;i++)
- CLG_DEBUG(3, "%d ", bb->jmp[i].instr);
- CLG_DEBUG(3, "], last inverted: %s \n", bb->cjmp_inverted ? "yes":"no");
+ CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr);
+ CLG_DEBUG(3, "], last inverted: %s \n",
+ clgs.bb->cjmp_inverted ? "yes":"no");
}
- return bbOut;
+ return clgs.sbOut;
}
/*--------------------------------------------------------------------*/
diff --git a/callgrind/sim.c b/callgrind/sim.c
index 9edbecc17..9e53f8916 100644
--- a/callgrind/sim.c
+++ b/callgrind/sim.c
@@ -113,22 +113,21 @@ static Bool clo_collect_cacheuse = False;
* - BBCC* nonskipped (only != 0 when in a function not skipped)
*/
-/* Offset to events in event set, used in log_* functions */
-static Int off_D0_Ir;
-static Int off_D1r_Ir;
-static Int off_D1r_Dr;
-static Int off_D1w_Ir;
-static Int off_D1w_Dw;
-static Int off_D2_Ir;
-static Int off_D2_Dr;
-static Int off_D2_Dw;
+/* Offset to events in event set, used in log_* functions
+ * <off_EventSet_BasicEventSet>: offset where basic set is found
+ */
+static Int off_UIr_Ir;
+static Int off_UIrDr_Ir, off_UIrDr_Dr;
+static Int off_UIrDrDw_Ir, off_UIrDrDw_Dr, off_UIrDrDw_Dw;
+static Int off_UIrDw_Ir, off_UIrDw_Dw;
+static Int off_UIrDwDr_Ir, off_UIrDwDr_Dr, off_UIrDwDr_Dw;
static Addr bb_base;
static ULong* cost_base;
static InstrInfo* current_ii;
/* Cache use offsets */
-/* FIXME: The offsets are only correct because all eventsets get
+/* The offsets are only correct because all per-instruction event sets get
* the "Use" set added first !
*/
static Int off_I1_AcCost = 0;
@@ -984,13 +983,13 @@ static
void cacheuse_finish(void)
{
int i;
- InstrInfo ii = { 0,0,0,0,0 };
+ InstrInfo ii = { 0,0,0,0 };
if (!CLG_(current_state).collect) return;
bb_base = 0;
current_ii = &ii;
- cost_base = 0;
+ cost_base = 0;
/* update usage counters */
if (I1.use)
@@ -1043,6 +1042,19 @@ void inc_costs(CacheModelResult r, ULong* c1, ULong* c2)
}
}
+static
+Char* cacheRes(CacheModelResult r)
+{
+ switch(r) {
+ case L1_Hit: return "L1 Hit ";
+ case L2_Hit: return "L2 Hit ";
+ case MemAccess: return "L2 Miss";
+ case WriteBackMemAccess: return "L2 Miss (dirty)";
+ default:
+ tl_assert(0);
+ }
+ return "??";
+}
VG_REGPARM(1)
static void log_1I0D(InstrInfo* ii)
@@ -1052,37 +1064,101 @@ static void log_1I0D(InstrInfo* ii)
current_ii = ii;
IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
- CLG_DEBUG(6, "log_1I0D: Ir=%#lx/%u => Ir %d\n",
- bb_base + ii->instr_offset, ii->instr_size, IrRes);
+ CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n",
+ bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes));
if (CLG_(current_state).collect) {
ULong* cost_Ir;
-
+
if (CLG_(current_state).nonskipped)
cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;
else
- cost_Ir = cost_base + ii->cost_offset + off_D0_Ir;
+ cost_Ir = cost_base + ii->cost_offset + off_UIr_Ir;
inc_costs(IrRes, cost_Ir,
CLG_(current_state).cost + CLG_(sets).off_full_Ir );
}
}
+VG_REGPARM(2)
+static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2)
+{
+ CacheModelResult Ir1Res, Ir2Res;
+ ULong *global_cost_Ir;
+
+ current_ii = ii1;
+ Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size);
+ current_ii = ii2;
+ Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size);
+
+ CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n",
+ bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
+ bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) );
+
+ if (!CLG_(current_state).collect) return;
+
+ global_cost_Ir = CLG_(current_state).cost + CLG_(sets).off_full_Ir;
+ if (CLG_(current_state).nonskipped) {
+ ULong* skipped_cost_Ir = CLG_(current_state).nonskipped->skipped +
+ CLG_(sets).off_full_Ir;
+ inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
+ inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
+ return;
+ }
+
+ inc_costs(Ir1Res, global_cost_Ir, cost_base + ii1->cost_offset + off_UIr_Ir);
+ inc_costs(Ir2Res, global_cost_Ir, cost_base + ii2->cost_offset + off_UIr_Ir);
+}
+
+VG_REGPARM(3)
+static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3)
+{
+ CacheModelResult Ir1Res, Ir2Res, Ir3Res;
+ ULong *global_cost_Ir;
+
+ current_ii = ii1;
+ Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size);
+ current_ii = ii2;
+ Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size);
+ current_ii = ii3;
+ Ir3Res = (*simulator.I1_Read)(bb_base + ii3->instr_offset, ii3->instr_size);
+
+ CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n",
+ bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res),
+ bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res),
+ bb_base + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) );
+
+ if (!CLG_(current_state).collect) return;
+
+ global_cost_Ir = CLG_(current_state).cost + CLG_(sets).off_full_Ir;
+ if (CLG_(current_state).nonskipped) {
+ ULong* skipped_cost_Ir = CLG_(current_state).nonskipped->skipped +
+ CLG_(sets).off_full_Ir;
+ inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir);
+ inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir);
+ inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir);
+ return;
+ }
+
+ inc_costs(Ir1Res, global_cost_Ir, cost_base + ii1->cost_offset + off_UIr_Ir);
+ inc_costs(Ir2Res, global_cost_Ir, cost_base + ii2->cost_offset + off_UIr_Ir);
+ inc_costs(Ir3Res, global_cost_Ir, cost_base + ii3->cost_offset + off_UIr_Ir);
+}
/* Instruction doing a read access */
-VG_REGPARM(2)
-static void log_1I1Dr(InstrInfo* ii, Addr data)
+VG_REGPARM(3)
+static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
{
CacheModelResult IrRes, DrRes;
current_ii = ii;
IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
- DrRes = (*simulator.D1_Read)(data, ii->data_size);
+ DrRes = (*simulator.D1_Read)(data_addr, data_size);
- CLG_DEBUG(6, "log_1I1Dr: Ir=%#lx/%u, Dr=%#lx/%u => Ir %d, Dr %d\n",
- bb_base + ii->instr_offset, ii->instr_size,
- data, ii->data_size, IrRes, DrRes);
+ CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n",
+ bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
+ data_addr, data_size, cacheRes(DrRes));
if (CLG_(current_state).collect) {
ULong *cost_Ir, *cost_Dr;
@@ -1092,8 +1168,11 @@ static void log_1I1Dr(InstrInfo* ii, Addr data)
cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;
}
else {
- cost_Ir = cost_base + ii->cost_offset + off_D1r_Ir;
- cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr;
+ // event set must be UIrDr or extension
+ CLG_ASSERT((ii->eventset == CLG_(sets).UIrDr) ||
+ (ii->eventset == CLG_(sets).UIrDrDw));
+ cost_Ir = cost_base + ii->cost_offset + off_UIrDr_Ir;
+ cost_Dr = cost_base + ii->cost_offset + off_UIrDr_Dr;
}
inc_costs(IrRes, cost_Ir,
@@ -1104,16 +1183,16 @@ static void log_1I1Dr(InstrInfo* ii, Addr data)
}
-VG_REGPARM(2)
-static void log_0I1Dr(InstrInfo* ii, Addr data)
+VG_REGPARM(3)
+static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size)
{
CacheModelResult DrRes;
current_ii = ii;
- DrRes = (*simulator.D1_Read)(data, ii->data_size);
+ DrRes = (*simulator.D1_Read)(data_addr, data_size);
- CLG_DEBUG(6, "log_0I1Dr: Dr=%#lx/%u => Dr %d\n",
- data, ii->data_size, DrRes);
+ CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n",
+ data_addr, data_size, cacheRes(DrRes));
if (CLG_(current_state).collect) {
ULong *cost_Dr;
@@ -1122,9 +1201,15 @@ static void log_0I1Dr(InstrInfo* ii, Addr data)
cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;
}
else {
- cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr;
+ Int off_Dr;
+ if (ii->eventset == CLG_(sets).UIrDr) off_Dr = off_UIrDr_Dr;
+ else if (ii->eventset == CLG_(sets).UIrDrDw) off_Dr = off_UIrDrDw_Dr;
+ else if (ii->eventset == CLG_(sets).UIrDwDr) off_Dr = off_UIrDwDr_Dr;
+ else CLG_ASSERT(0);
+
+ cost_Dr = cost_base + ii->cost_offset + off_Dr;
}
-
+
inc_costs(DrRes, cost_Dr,
CLG_(current_state).cost + CLG_(sets).off_full_Dr );
}
@@ -1133,29 +1218,33 @@ static void log_0I1Dr(InstrInfo* ii, Addr data)
/* Instruction doing a write access */
-VG_REGPARM(2)
-static void log_1I1Dw(InstrInfo* ii, Addr data)
+VG_REGPARM(3)
+static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
{
CacheModelResult IrRes, DwRes;
current_ii = ii;
IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
- DwRes = (*simulator.D1_Write)(data, ii->data_size);
+ DwRes = (*simulator.D1_Write)(data_addr, data_size);
- CLG_DEBUG(6, "log_1I1Dw: Ir=%#lx/%u, Dw=%#lx/%u => Ir %d, Dw %d\n",
- bb_base + ii->instr_offset, ii->instr_size,
- data, ii->data_size, IrRes, DwRes);
+ CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n",
+ bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes),
+ data_addr, data_size, cacheRes(DwRes));
if (CLG_(current_state).collect) {
ULong *cost_Ir, *cost_Dw;
if (CLG_(current_state).nonskipped) {
- cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir;
- cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;
+ cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;
+ cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw;
}
else {
- cost_Ir = cost_base + ii->cost_offset + off_D1w_Ir;
- cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw;
+ // This helper is called when a Dr event follows Ir;
+ // Event set must be UIrDw or extension
+ CLG_ASSERT((ii->eventset == CLG_(sets).UIrDw) ||
+ (ii->eventset == CLG_(sets).UIrDwDr));
+ cost_Ir = cost_base + ii->cost_offset + off_UIrDw_Ir;
+ cost_Dw = cost_base + ii->cost_offset + off_UIrDw_Dw;
}
inc_costs(IrRes, cost_Ir,
@@ -1165,16 +1254,16 @@ static void log_1I1Dw(InstrInfo* ii, Addr data)
}
}
-VG_REGPARM(2)
-static void log_0I1Dw(InstrInfo* ii, Addr data)
+VG_REGPARM(3)
+static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size)
{
CacheModelResult DwRes;
current_ii = ii;
- DwRes = (*simulator.D1_Write)(data, ii->data_size);
+ DwRes = (*simulator.D1_Write)(data_addr, data_size);
- CLG_DEBUG(6, "log_0I1Dw: Dw=%#lx/%u => Dw %d\n",
- data, ii->data_size, DwRes);
+ CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n",
+ data_addr, data_size, cacheRes(DwRes));
if (CLG_(current_state).collect) {
ULong *cost_Dw;
@@ -1183,7 +1272,13 @@ static void log_0I1Dw(InstrInfo* ii, Addr data)
cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw;
}
else {
- cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw;
+ Int off_Dw;
+ if (ii->eventset == CLG_(sets).UIrDw) off_Dw = off_UIrDw_Dw;
+ else if (ii->eventset == CLG_(sets).UIrDwDr) off_Dw = off_UIrDwDr_Dw;
+ else if (ii->eventset == CLG_(sets).UIrDrDw) off_Dw = off_UIrDrDw_Dw;
+ else CLG_ASSERT(0);
+
+ cost_Dw = cost_base + ii->cost_offset + off_Dw;
}
inc_costs(DwRes, cost_Dw,
@@ -1191,77 +1286,6 @@ static void log_0I1Dw(InstrInfo* ii, Addr data)
}
}
-/* Instruction doing a read and a write access */
-
-VG_REGPARM(3)
-static void log_1I2D(InstrInfo* ii, Addr data1, Addr data2)
-{
- CacheModelResult IrRes, DrRes, DwRes;
-
- current_ii = ii;
- IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);
- DrRes = (*simulator.D1_Read)(data1, ii->data_size);
- DwRes = (*simulator.D1_Write)(data2, ii->data_size);
-
- CLG_DEBUG(6,
- "log_1I2D: Ir=%#lx/%u, Dr=%#lx/%u, Dw=%#lx/%u => Ir %d, Dr %d, Dw %d\n",
- bb_base + ii->instr_offset, ii->instr_size,
- data1, ii->data_size, data2, ii->data_size, IrRes, DrRes, DwRes);
-
- if (CLG_(current_state).collect) {
- ULong *cost_Ir, *cost_Dr, *cost_Dw;
-
- if (CLG_(current_state).nonskipped) {
- cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir;
- cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr;
- cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;
- }
- else {
- cost_Ir = cost_base + ii->cost_offset + off_D2_Ir;
- cost_Dr = cost_base + ii->cost_offset + off_D2_Dr;
- cost_Dw = cost_base + ii->cost_offset + off_D2_Dw;
- }
-
- inc_costs(IrRes, cost_Ir,
- CLG_(current_state).cost + CLG_(sets).off_full_Ir );
- inc_costs(DrRes, cost_Dr,
- CLG_(current_state).cost + CLG_(sets).off_full_Dr );
- inc_costs(DwRes, cost_Dw,
- CLG_(current_state).cost + CLG_(sets).off_full_Dw );
- }
-}
-
-VG_REGPARM(3)
-static void log_0I2D(InstrInfo* ii, Addr data1, Addr data2)
-{
- CacheModelResult DrRes, DwRes;
-
- current_ii = ii;
- DrRes = (*simulator.D1_Read)(data1, ii->data_size);
- DwRes = (*simulator.D1_Write)(data2, ii->data_size);
-
- CLG_DEBUG(6,
- "log_0D2D: Dr=%#lx/%u, Dw=%#lx/%u => Dr %d, Dw %d\n",
- data1, ii->data_size, data2, ii->data_size, DrRes, DwRes);
-
- if (CLG_(current_state).collect) {
- ULong *cost_Dr, *cost_Dw;
-
- if (CLG_(current_state).nonskipped) {
- cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr;
- cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;
- }
- else {
- cost_Dr = cost_base + ii->cost_offset + off_D2_Dr;
- cost_Dw = cost_base + ii->cost_offset + off_D2_Dw;
- }
-
- inc_costs(DrRes, cost_Dr,
- CLG_(current_state).cost + CLG_(sets).off_full_Dr );
- inc_costs(DwRes, cost_Dw,
- CLG_(current_state).cost + CLG_(sets).off_full_Dw );
- }
-}
/*------------------------------------------------------------*/
@@ -1369,20 +1393,20 @@ static void cachesim_post_clo_init(void)
if (!CLG_(clo).simulate_cache) {
CLG_(cachesim).log_1I0D = 0;
CLG_(cachesim).log_1I0D_name = "(no function)";
+ CLG_(cachesim).log_2I0D = 0;
+ CLG_(cachesim).log_2I0D_name = "(no function)";
+ CLG_(cachesim).log_3I0D = 0;
+ CLG_(cachesim).log_3I0D_name = "(no function)";
CLG_(cachesim).log_1I1Dr = 0;
- CLG_(cachesim).log_1I1Dw = 0;
- CLG_(cachesim).log_1I2D = 0;
CLG_(cachesim).log_1I1Dr_name = "(no function)";
+ CLG_(cachesim).log_1I1Dw = 0;
CLG_(cachesim).log_1I1Dw_name = "(no function)";
- CLG_(cachesim).log_1I2D_name = "(no function)";
CLG_(cachesim).log_0I1Dr = 0;
- CLG_(cachesim).log_0I1Dw = 0;
- CLG_(cachesim).log_0I2D = 0;
CLG_(cachesim).log_0I1Dr_name = "(no function)";
+ CLG_(cachesim).log_0I1Dw = 0;
CLG_(cachesim).log_0I1Dw_name = "(no function)";
- CLG_(cachesim).log_0I2D_name = "(no function)";
return;
}
@@ -1402,20 +1426,20 @@ static void cachesim_post_clo_init(void)
CLG_(cachesim).log_1I0D = log_1I0D;
CLG_(cachesim).log_1I0D_name = "log_1I0D";
+ CLG_(cachesim).log_2I0D = log_2I0D;
+ CLG_(cachesim).log_2I0D_name = "log_2I0D";
+ CLG_(cachesim).log_3I0D = log_3I0D;
+ CLG_(cachesim).log_3I0D_name = "log_3I0D";
CLG_(cachesim).log_1I1Dr = log_1I1Dr;
CLG_(cachesim).log_1I1Dw = log_1I1Dw;
- CLG_(cachesim).log_1I2D = log_1I2D;
CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr";
CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw";
- CLG_(cachesim).log_1I2D_name = "log_1I2D";
CLG_(cachesim).log_0I1Dr = log_0I1Dr;
CLG_(cachesim).log_0I1Dw = log_0I1Dw;
- CLG_(cachesim).log_0I2D = log_0I2D;
CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr";
CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw";
- CLG_(cachesim).log_0I2D_name = "log_0I2D";
if (clo_collect_cacheuse) {
@@ -1763,26 +1787,29 @@ struct event_sets CLG_(sets);
void CLG_(init_eventsets)(Int max_user)
{
EventType * e1, *e2, *e3, *e4;
- EventSet *Ir, *Dr, *Dw;
- EventSet *D0, *D1r, *D1w, *D2;
- EventSet *sim, *full;
- EventSet *use;
+ // Basic event sets from which others are composed
+ EventSet *Use, *Ir, *Dr, *Dw;
+ // Compositions of basic sets used for per-instruction counters
+ EventSet *UIr, *UIrDr, *UIrDrDw, *UIrDw, *UIrDwDr;
+ // Composition used for global counters and aggregation
+ EventSet *full;
int sizeOfUseIr;
- use = CLG_(get_eventset)("Use", 4);
+ // the "Use" events types only are used with "cacheuse" simulation
+ Use = CLG_(get_eventset)("Use", 4);
if (clo_collect_cacheuse) {
/* if TUse is 0, there was never a load, and no loss, too */
e1 = CLG_(register_eventtype)("AcCost1");
- CLG_(add_eventtype)(use, e1);
+ CLG_(add_eventtype)(Use, e1);
e1 = CLG_(register_eventtype)("SpLoss1");
- CLG_(add_eventtype)(use, e1);
+ CLG_(add_eventtype)(Use, e1);
e1 = CLG_(register_eventtype)("AcCost2");
- CLG_(add_eventtype)(use, e1);
+ CLG_(add_eventtype)(Use, e1);
e1 = CLG_(register_eventtype)("SpLoss2");
- CLG_(add_eventtype)(use, e1);
+ CLG_(add_eventtype)(Use, e1);
}
- Ir = CLG_(get_eventset)("Ir", 4);
+ Ir = CLG_(get_eventset)("Ir", 4);
Dr = CLG_(get_eventset)("Dr", 4);
Dw = CLG_(get_eventset)("Dw", 4);
if (CLG_(clo).simulate_cache) {
@@ -1822,74 +1849,76 @@ void CLG_(init_eventsets)(Int max_user)
CLG_(add_eventtype)(Ir, e1);
}
- sizeOfUseIr = use->size + Ir->size;
- D0 = CLG_(get_eventset)("D0", sizeOfUseIr);
- CLG_(add_eventset)(D0, use);
- off_D0_Ir = CLG_(add_eventset)(D0, Ir);
-
- D1r = CLG_(get_eventset)("D1r", sizeOfUseIr + Dr->size);
- CLG_(add_eventset)(D1r, use);
- off_D1r_Ir = CLG_(add_eventset)(D1r, Ir);
- off_D1r_Dr = CLG_(add_eventset)(D1r, Dr);
-
- D1w = CLG_(get_eventset)("D1w", sizeOfUseIr + Dw->size);
- CLG_(add_eventset)(D1w, use);
- off_D1w_Ir = CLG_(add_eventset)(D1w, Ir);
- off_D1w_Dw = CLG_(add_eventset)(D1w, Dw);
-
- D2 = CLG_(get_eventset)("D2", sizeOfUseIr + Dr->size + Dw->size);
- CLG_(add_eventset)(D2, use);
- off_D2_Ir = CLG_(add_eventset)(D2, Ir);
- off_D2_Dr = CLG_(add_eventset)(D2, Dr);
- off_D2_Dw = CLG_(add_eventset)(D2, Dw);
-
- sim = CLG_(get_eventset)("sim", sizeOfUseIr + Dr->size + Dw->size);
- CLG_(add_eventset)(sim, use);
- CLG_(sets).off_sim_Ir = CLG_(add_eventset)(sim, Ir);
- CLG_(sets).off_sim_Dr = CLG_(add_eventset)(sim, Dr);
- CLG_(sets).off_sim_Dw = CLG_(add_eventset)(sim, Dw);
+ // Self cost event sets per guest instruction (U used only for cacheUse).
+ // Each basic event set only appears once, as eg. multiple different Dr's
+ // in one guest instruction are counted in the same counter.
- if (CLG_(clo).collect_alloc) max_user += 2;
- if (CLG_(clo).collect_systime) max_user += 2;
+ sizeOfUseIr = Use->size + Ir->size;
+ UIr = CLG_(get_eventset)("UIr", sizeOfUseIr);
+ CLG_(add_eventset)(UIr, Use);
+ off_UIr_Ir = CLG_(add_eventset)(UIr, Ir);
- full = CLG_(get_eventset)("full", sim->size + max_user);
- CLG_(add_eventset)(full, sim);
- CLG_(sets).off_full_Ir = CLG_(sets).off_sim_Ir;
- CLG_(sets).off_full_Dr = CLG_(sets).off_sim_Dr;
- CLG_(sets).off_full_Dw = CLG_(sets).off_sim_Dw;
+ UIrDr = CLG_(get_eventset)("UIrDr", sizeOfUseIr + Dr->size);
+ CLG_(add_eventset)(UIrDr, Use);
+ off_UIrDr_Ir = CLG_(add_eventset)(UIrDr, Ir);
+ off_UIrDr_Dr = CLG_(add_eventset)(UIrDr, Dr);
- CLG_(sets).use = use;
- CLG_(sets).Ir = Ir;
- CLG_(sets).Dr = Dr;
- CLG_(sets).Dw = Dw;
+ UIrDrDw = CLG_(get_eventset)("IrDrDw", sizeOfUseIr + Dr->size + Dw->size);
+ CLG_(add_eventset)(UIrDrDw, Use);
+ off_UIrDrDw_Ir = CLG_(add_eventset)(UIrDrDw, Ir);
+ off_UIrDrDw_Dr = CLG_(add_eventset)(UIrDrDw, Dr);
+ off_UIrDrDw_Dw = CLG_(add_eventset)(UIrDrDw, Dw);
- CLG_(sets).D0 = D0;
- CLG_(sets).D1r = D1r;
- CLG_(sets).D1w = D1w;
- CLG_(sets).D2 = D2;
+ UIrDw = CLG_(get_eventset)("UIrDw", sizeOfUseIr + Dw->size);
+ CLG_(add_eventset)(UIrDw, Use);
+ off_UIrDw_Ir = CLG_(add_eventset)(UIrDw, Ir);
+ off_UIrDw_Dw = CLG_(add_eventset)(UIrDw, Dw);
+
+ UIrDwDr = CLG_(get_eventset)("IrDwDr", sizeOfUseIr + Dw->size + Dr->size);
+ CLG_(add_eventset)(UIrDwDr, Use);
+ off_UIrDwDr_Ir = CLG_(add_eventset)(UIrDrDw, Ir);
+ off_UIrDwDr_Dw = CLG_(add_eventset)(UIrDrDw, Dw);
+ off_UIrDwDr_Dr = CLG_(add_eventset)(UIrDrDw, Dr);
- CLG_(sets).sim = sim;
- CLG_(sets).full = full;
+ // the "full" event set is used as global counter and for aggregation
+ if (CLG_(clo).collect_alloc) max_user += 2;
+ if (CLG_(clo).collect_systime) max_user += 2;
+ full = CLG_(get_eventset)("full",
+ sizeOfUseIr + Dr->size + Dw->size + max_user);
+ CLG_(add_eventset)(full, Use);
+ CLG_(sets).off_full_Ir = CLG_(add_eventset)(full, Ir);
+ CLG_(sets).off_full_Dr = CLG_(add_eventset)(full, Dr);
+ CLG_(sets).off_full_Dw = CLG_(add_eventset)(full, Dw);
if (CLG_(clo).collect_alloc) {
- e1 = CLG_(register_eventtype)("allocCount");
- e2 = CLG_(register_eventtype)("allocSize");
- CLG_(sets).off_full_user = CLG_(add_dep_event2)(full, e1,e2);
+ e1 = CLG_(register_eventtype)("allocCount");
+ e2 = CLG_(register_eventtype)("allocSize");
+ CLG_(sets).off_full_alloc = CLG_(add_dep_event2)(full, e1,e2);
}
-
if (CLG_(clo).collect_systime) {
- e1 = CLG_(register_eventtype)("sysCount");
- e2 = CLG_(register_eventtype)("sysTime");
- CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2);
+ e1 = CLG_(register_eventtype)("sysCount");
+ e2 = CLG_(register_eventtype)("sysTime");
+ CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2);
}
+ CLG_(sets).Use = Use;
+ CLG_(sets).Ir = Ir;
+ CLG_(sets).Dr = Dr;
+ CLG_(sets).Dw = Dw;
+ CLG_(sets).UIr = UIr;
+ CLG_(sets).UIrDr = UIrDr;
+ CLG_(sets).UIrDrDw = UIrDrDw;
+ CLG_(sets).UIrDw = UIrDw;
+ CLG_(sets).UIrDwDr = UIrDwDr;
+ CLG_(sets).full = full;
+
+
CLG_DEBUGIF(1) {
CLG_DEBUG(1, "EventSets:\n");
- CLG_(print_eventset)(-2, use);
+ CLG_(print_eventset)(-2, Use);
CLG_(print_eventset)(-2, Ir);
CLG_(print_eventset)(-2, Dr);
CLG_(print_eventset)(-2, Dw);
- CLG_(print_eventset)(-2, sim);
CLG_(print_eventset)(-2, full);
}
@@ -1924,34 +1953,41 @@ static
void add_and_zero_Dx(EventSet* es, SimCost dst, ULong* cost)
{
/* if eventset use is defined, it is always first (hardcoded!) */
- CLG_(add_and_zero_cost)( CLG_(sets).use, dst, cost);
+ CLG_(add_and_zero_cost)( CLG_(sets).Use, dst, cost);
- /* FIXME: This is hardcoded... */
- if (es == CLG_(sets).D0) {
- CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
- cost + off_D0_Ir);
+ if (es == CLG_(sets).UIr) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
+ cost + off_UIr_Ir);
}
- else if (es == CLG_(sets).D1r) {
- CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
- cost + off_D1r_Ir);
- CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr,
- cost + off_D1r_Dr);
+ else if (es == CLG_(sets).UIrDr) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
+ cost + off_UIrDr_Ir);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr,
+ cost + off_UIrDr_Dr);
}
- else if (es == CLG_(sets).D1w) {
- CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
- cost + off_D1w_Ir);
- CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw,
- cost + off_D1w_Dw);
+ else if (es == CLG_(sets).UIrDrDw) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
+ cost + off_UIrDrDw_Ir);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr,
+ cost + off_UIrDrDw_Dr);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw,
+ cost + off_UIrDrDw_Dw);
}
- else {
- CLG_ASSERT(es == CLG_(sets).D2);
- CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,
- cost + off_D2_Ir);
- CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr,
- cost + off_D2_Dr);
- CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw,
- cost + off_D2_Dw);
+ else if (es == CLG_(sets).UIrDw) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
+ cost + off_UIrDw_Ir);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw,
+ cost + off_UIrDw_Dw);
+ }
+ else if (es == CLG_(sets).UIrDwDr) {
+ CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir,
+ cost + off_UIrDwDr_Ir);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw,
+ cost + off_UIrDwDr_Dw);
+ CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr,
+ cost + off_UIrDwDr_Dr);
}
+ else CLG_ASSERT(0);
}
/* this is called at dump time for every instruction executed */
@@ -1959,7 +1995,7 @@ static void cachesim_add_icost(SimCost cost, BBCC* bbcc,
InstrInfo* ii, ULong exe_count)
{
if (!CLG_(clo).simulate_cache)
- cost[CLG_(sets).off_sim_Ir] += exe_count;
+ cost[CLG_(sets).off_full_Ir] += exe_count;
else {
#if 0
@@ -2019,24 +2055,24 @@ struct cachesim_if CLG_(cachesim) = {
/* these will be set by cachesim_post_clo_init */
.log_1I0D = 0,
+ .log_2I0D = 0,
+ .log_3I0D = 0,
.log_1I1Dr = 0,
.log_1I1Dw = 0,
- .log_1I2D = 0,
.log_0I1Dr = 0,
.log_0I1Dw = 0,
- .log_0I2D = 0,
.log_1I0D_name = "(no function)",
+ .log_2I0D_name = "(no function)",
+ .log_3I0D_name = "(no function)",
.log_1I1Dr_name = "(no function)",
.log_1I1Dw_name = "(no function)",
- .log_1I2D_name = "(no function)",
.log_0I1Dr_name = "(no function)",
.log_0I1Dw_name = "(no function)",
- .log_0I2D_name = "(no function)"
};