diff options
author | weidendo <weidendo@a5019735-40e9-0310-863c-91ae7b9d1cf9> | 2009-06-15 00:16:36 +0000 |
---|---|---|
committer | weidendo <weidendo@a5019735-40e9-0310-863c-91ae7b9d1cf9> | 2009-06-15 00:16:36 +0000 |
commit | 0a1951d64df79f98f885426671fc9d3982647a6b (patch) | |
tree | f9a44bf8c444e9da77016303bdbf1a3ee1ab61dd | |
parent | 0b23d6eb63a4146dfa17304a2e76ce91f4d5e001 (diff) | |
download | valgrind-0a1951d64df79f98f885426671fc9d3982647a6b.tar.gz |
Callgrind: fix instrumentation for arbitrary events per guest instruction
(should fix bug 169505)
This uses the same event queue scheme as cachegrind and lackey, and
same kind of helpers (1/2/3 Ir events, Ir+Dr, Dr, Ir+Dw, Dw).
Note that in contrast to Cachegrind, Callgrind interpretes a modify event
as Dw (otherwise the cache model generating write back events would not work).
Callgrind uses per-(guest)instruction event sets for cost counters.
An per-instruction eventset is incrementally extended as events for the
same guest instruction are flushed. Event sets always start with Ir counters,
but depending on Dr/Dw order afterwards, there exist IrDr(Dw) and IrDw(Dr).
Per-instruction event sets now are consistently named according to event ordering.
Event set "sim" is a subset of "full", was never used and was removed.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@10321 a5019735-40e9-0310-863c-91ae7b9d1cf9
-rw-r--r-- | callgrind/bbcc.c | 2 | ||||
-rw-r--r-- | callgrind/debug.c | 6 | ||||
-rw-r--r-- | callgrind/global.h | 29 | ||||
-rw-r--r-- | callgrind/main.c | 1090 | ||||
-rw-r--r-- | callgrind/sim.c | 472 |
5 files changed, 933 insertions, 666 deletions
diff --git a/callgrind/bbcc.c b/callgrind/bbcc.c index dfe737b2f..7917c2526 100644 --- a/callgrind/bbcc.c +++ b/callgrind/bbcc.c @@ -601,7 +601,7 @@ void CLG_(setup_bbcc)(BB* bb) if (!CLG_(clo).simulate_cache) { /* update Ir cost */ int instr_count = last_bb->jmp[passed].instr+1; - CLG_(current_state).cost[CLG_(sets).off_sim_Ir] += instr_count; + CLG_(current_state).cost[CLG_(sets).off_full_Ir] += instr_count; } } diff --git a/callgrind/debug.c b/callgrind/debug.c index f04bab414..2ac38a297 100644 --- a/callgrind/debug.c +++ b/callgrind/debug.c @@ -217,9 +217,9 @@ void CLG_(print_short_jcc)(jCC* jcc) bb_jmpaddr(jcc->from->bb), bb_addr(jcc->to->bb), jcc->call_counter, - jcc->cost ? jcc->cost[CLG_(sets).off_sim_Ir]:0, - jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dr]:0, - jcc->cost ? jcc->cost[CLG_(sets).off_sim_Dw]:0); + jcc->cost ? jcc->cost[CLG_(sets).off_full_Ir]:0, + jcc->cost ? jcc->cost[CLG_(sets).off_full_Dr]:0, + jcc->cost ? jcc->cost[CLG_(sets).off_full_Dw]:0); else VG_(printf)("[Skipped JCC]"); } diff --git a/callgrind/global.h b/callgrind/global.h index 461218a8c..367f2d7d3 100644 --- a/callgrind/global.h +++ b/callgrind/global.h @@ -270,7 +270,6 @@ typedef struct _InstrInfo InstrInfo; struct _InstrInfo { UInt instr_offset; UInt instr_size; - UInt data_size; UInt cost_offset; EventSet* eventset; }; @@ -657,19 +656,19 @@ struct cachesim_if void (*finish)(void); void (*log_1I0D)(InstrInfo*) VG_REGPARM(1); + void (*log_2I0D)(InstrInfo*, InstrInfo*) VG_REGPARM(2); + void (*log_3I0D)(InstrInfo*, InstrInfo*, InstrInfo*) VG_REGPARM(3); - void (*log_1I1Dr)(InstrInfo*, Addr) VG_REGPARM(2); - void (*log_1I1Dw)(InstrInfo*, Addr) VG_REGPARM(2); - void (*log_1I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3); + void (*log_1I1Dr)(InstrInfo*, Addr, Word) VG_REGPARM(3); + void (*log_1I1Dw)(InstrInfo*, Addr, Word) VG_REGPARM(3); - void (*log_0I1Dr)(InstrInfo*, Addr) VG_REGPARM(2); - void (*log_0I1Dw)(InstrInfo*, Addr) VG_REGPARM(2); - void (*log_0I2D)(InstrInfo*, Addr, Addr) VG_REGPARM(3); + void (*log_0I1Dr)(InstrInfo*, Addr, Word) VG_REGPARM(3); + void (*log_0I1Dw)(InstrInfo*, Addr, Word) VG_REGPARM(3); // function names of helpers (for debugging generated code) - Char *log_1I0D_name; - Char *log_1I1Dr_name, *log_1I1Dw_name, *log_1I2D_name; - Char *log_0I1Dr_name, *log_0I1Dw_name, *log_0I2D_name; + Char *log_1I0D_name, *log_2I0D_name, *log_3I0D_name; + Char *log_1I1Dr_name, *log_1I1Dw_name; + Char *log_0I1Dr_name, *log_0I1Dw_name; }; @@ -687,15 +686,13 @@ void CLG_(print_debug_usage)(void); /* from sim.c */ struct event_sets { - EventSet *use, *Ir, *Dr, *Dw; - EventSet *D0, *D1r, *D1w, *D2; - EventSet *sim; - EventSet *full; /* sim plus user events */ + EventSet *Use, *Ir, *Dr, *Dw; + EventSet *UIr, *UIrDr, *UIrDrDw, *UIrDw, *UIrDwDr; + EventSet *full; /* offsets into eventsets */ - Int off_sim_Ir, off_sim_Dr, off_sim_Dw; Int off_full_Ir, off_full_Dr, off_full_Dw; - Int off_full_user, off_full_alloc, off_full_systime; + Int off_full_alloc, off_full_systime; }; extern struct event_sets CLG_(sets); diff --git a/callgrind/main.c b/callgrind/main.c index 68d13814d..f2d125037 100644 --- a/callgrind/main.c +++ b/callgrind/main.c @@ -94,224 +94,490 @@ static void CLG_(init_statistics)(Statistics* s) } - - /*------------------------------------------------------------*/ -/*--- Cache simulation instrumentation phase ---*/ +/*--- Instrumentation structures and event queue handling ---*/ /*------------------------------------------------------------*/ +/* Maintain an ordered list of memory events which are outstanding, in + the sense that no IR has yet been generated to do the relevant + helper calls. The BB is scanned top to bottom and memory events + are added to the end of the list, merging with the most recent + notified event where possible (Dw immediately following Dr and + having the same size and EA can be merged). + + This merging is done so that for architectures which have + load-op-store instructions (x86, amd64), the insn is treated as if + it makes just one memory reference (a modify), rather than two (a + read followed by a write at the same address). + + At various points the list will need to be flushed, that is, IR + generated from it. That must happen before any possible exit from + the block (the end, or an IRStmt_Exit). Flushing also takes place + when there is no space to add a new event. + + If we require the simulation statistics to be up to date with + respect to possible memory exceptions, then the list would have to + be flushed before each memory reference. That would however lose + performance by inhibiting event-merging during flushing. + + Flushing the list consists of walking it start to end and emitting + instrumentation IR for each event, in the order in which they + appear. It may be possible to emit a single call for two adjacent + events in order to reduce the number of helper function calls made. + For example, it could well be profitable to handle two adjacent Ir + events with a single helper call. */ + +typedef + IRExpr + IRAtom; + +typedef + enum { + Ev_Ir, // Instruction read + Ev_Dr, // Data read + Ev_Dw, // Data write + Ev_Dm, // Data modify (read then write) + } + EventTag; + +typedef + struct { + EventTag tag; + InstrInfo* inode; + union { + struct { + } Ir; + struct { + IRAtom* ea; + Int szB; + } Dr; + struct { + IRAtom* ea; + Int szB; + } Dw; + struct { + IRAtom* ea; + Int szB; + } Dm; + } Ev; + } + Event; + +static void init_Event ( Event* ev ) { + VG_(memset)(ev, 0, sizeof(Event)); +} + +static IRAtom* get_Event_dea ( Event* ev ) { + switch (ev->tag) { + case Ev_Dr: return ev->Ev.Dr.ea; + case Ev_Dw: return ev->Ev.Dw.ea; + case Ev_Dm: return ev->Ev.Dm.ea; + default: tl_assert(0); + } +} + +static Int get_Event_dszB ( Event* ev ) { + switch (ev->tag) { + case Ev_Dr: return ev->Ev.Dr.szB; + case Ev_Dw: return ev->Ev.Dw.szB; + case Ev_Dm: return ev->Ev.Dm.szB; + default: tl_assert(0); + } +} + + +/* Up to this many unnotified events are allowed. Number is + arbitrary. Larger numbers allow more event merging to occur, but + potentially induce more spilling due to extending live ranges of + address temporaries. */ +#define N_EVENTS 16 + + +/* A struct which holds all the running state during instrumentation. + Mostly to avoid passing loads of parameters everywhere. */ +typedef struct { + /* The current outstanding-memory-event list. */ + Event events[N_EVENTS]; + Int events_used; + + /* The array of InstrInfo's is part of BB struct. */ + BB* bb; -static Bool loadStoreAddrsMatch(IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) + /* BB seen before (ie. re-instrumentation) */ + Bool seen_before; + + /* Number InstrInfo bins 'used' so far. */ + UInt ii_index; + + // current offset of guest instructions from BB start + UInt instr_offset; + + /* The output SB being constructed. */ + IRSB* sbOut; +} ClgState; + + +static void showEvent ( Event* ev ) { - // I'm assuming that for 'modify' instructions, that Vex always makes - // the loadAddrExpr and storeAddrExpr be of the same type, ie. both Tmp - // expressions, or both Const expressions. - CLG_ASSERT(isIRAtom(loadAddrExpr)); - CLG_ASSERT(isIRAtom(storeAddrExpr)); - return eqIRAtom(loadAddrExpr, storeAddrExpr); + switch (ev->tag) { + case Ev_Ir: + VG_(printf)("Ir (InstrInfo %p) at +%d\n", + ev->inode, ev->inode->instr_offset); + break; + case Ev_Dr: + VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=", + ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB); + ppIRExpr(ev->Ev.Dr.ea); + VG_(printf)("\n"); + break; + case Ev_Dw: + VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=", + ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB); + ppIRExpr(ev->Ev.Dw.ea); + VG_(printf)("\n"); + break; + case Ev_Dm: + VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=", + ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB); + ppIRExpr(ev->Ev.Dm.ea); + VG_(printf)("\n"); + break; + default: + tl_assert(0); + break; + } } -static -EventSet* insert_simcall(IRSB* bbOut, InstrInfo* ii, UInt dataSize, - Bool instrIssued, - IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) +/* Generate code for all outstanding memory events, and mark the queue + empty. Code is generated into cgs->sbOut, and this activity + 'consumes' slots in cgs->bb. */ + +static void flushEvents ( ClgState* clgs ) { - HChar* helperName; - void* helperAddr; - Int argc; - EventSet* es; - IRExpr *arg1, *arg2 = 0, *arg3 = 0, **argv; - IRDirty* di; - - /* Check type of original instruction regarding memory access, - * and collect info to be able to generate fitting helper call - */ - if (!loadAddrExpr && !storeAddrExpr) { - // no load/store - CLG_ASSERT(0 == dataSize); - if (instrIssued) { - helperName = 0; - helperAddr = 0; - } - else { - helperName = CLG_(cachesim).log_1I0D_name; - helperAddr = CLG_(cachesim).log_1I0D; - } - argc = 1; - es = CLG_(sets).D0; - - } else if (loadAddrExpr && !storeAddrExpr) { - // load - CLG_ASSERT( isIRAtom(loadAddrExpr) ); - if (instrIssued) { - helperName = CLG_(cachesim).log_0I1Dr_name; - helperAddr = CLG_(cachesim).log_0I1Dr; - } - else { - helperName = CLG_(cachesim).log_1I1Dr_name; - helperAddr = CLG_(cachesim).log_1I1Dr; - } - argc = 2; - arg2 = loadAddrExpr; - es = CLG_(sets).D1r; - - } else if (!loadAddrExpr && storeAddrExpr) { - // store - CLG_ASSERT( isIRAtom(storeAddrExpr) ); - if (instrIssued) { - helperName = CLG_(cachesim).log_0I1Dw_name; - helperAddr = CLG_(cachesim).log_0I1Dw; - } - else { - helperName = CLG_(cachesim).log_1I1Dw_name; - helperAddr = CLG_(cachesim).log_1I1Dw; - } - argc = 2; - arg2 = storeAddrExpr; - es = CLG_(sets).D1w; - - } else { - CLG_ASSERT( loadAddrExpr && storeAddrExpr ); - CLG_ASSERT( isIRAtom(loadAddrExpr) ); - CLG_ASSERT( isIRAtom(storeAddrExpr) ); - - if ( loadStoreAddrsMatch(loadAddrExpr, storeAddrExpr) ) { - /* modify: suppose write access, as this is - * more resource consuming (as in callgrind for VG2) - * Cachegrind does a read here (!) - * DISCUSS: Best way depends on simulation model? - */ - if (instrIssued) { - helperName = CLG_(cachesim).log_0I1Dw_name; - helperAddr = CLG_(cachesim).log_0I1Dw; + Int i, regparms, inew; + Char* helperName; + void* helperAddr; + IRExpr** argv; + IRExpr* i_node_expr; + IRDirty* di; + Event* ev; + Event* ev2; + Event* ev3; + + if (!clgs->seen_before) { + // extend event sets as needed + // available sets: D0 Dr + for(i=0; i<clgs->events_used; i++) { + ev = &clgs->events[i]; + switch(ev->tag) { + case Ev_Ir: + // Ir event always is first for a guest instruction + CLG_ASSERT(ev->inode->eventset == 0); + ev->inode->eventset = CLG_(sets).UIr; + break; + case Ev_Dr: + // extend event set by Dr counter + if ((ev->inode->eventset == CLG_(sets).UIrDr) || + (ev->inode->eventset == CLG_(sets).UIrDrDw) || + (ev->inode->eventset == CLG_(sets).UIrDwDr)) + break; + if (ev->inode->eventset == CLG_(sets).UIrDw) { + ev->inode->eventset = CLG_(sets).UIrDwDr; + break; + } + CLG_ASSERT(ev->inode->eventset == CLG_(sets).UIr); + ev->inode->eventset = CLG_(sets).UIrDr; + break; + case Ev_Dw: + case Ev_Dm: + // extend event set by Dw counter + if ((ev->inode->eventset == CLG_(sets).UIrDw) || + (ev->inode->eventset == CLG_(sets).UIrDwDr) || + (ev->inode->eventset == CLG_(sets).UIrDrDw)) + break; + if (ev->inode->eventset == CLG_(sets).UIrDr) { + ev->inode->eventset = CLG_(sets).UIrDrDw; + break; + } + CLG_ASSERT(ev->inode->eventset == CLG_(sets).UIr); + ev->inode->eventset = CLG_(sets).UIrDw; + break; + default: + tl_assert(0); + } + } + } + + for(i = 0; i < clgs->events_used; i = inew) { + + helperName = NULL; + helperAddr = NULL; + argv = NULL; + regparms = 0; + + /* generate IR to notify event i and possibly the ones + immediately following it. */ + tl_assert(i >= 0 && i < clgs->events_used); + + ev = &clgs->events[i]; + ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL ); + ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL ); + + CLG_DEBUGIF(5) { + VG_(printf)(" flush "); + showEvent( ev ); + } + + i_node_expr = mkIRExpr_HWord( (HWord)ev->inode ); + + /* Decide on helper fn to call and args to pass it, and advance + i appropriately. + Dm events have same effect as Dw events */ + switch (ev->tag) { + case Ev_Ir: + /* Merge an Ir with a following Dr. */ + if (ev2 && ev2->tag == Ev_Dr) { + /* Why is this true? It's because we're merging an Ir + with a following Dr. The Ir derives from the + instruction's IMark and the Dr from data + references which follow it. In short it holds + because each insn starts with an IMark, hence an + Ev_Ir, and so these Dr must pertain to the + immediately preceding Ir. Same applies to analogous + assertions in the subsequent cases. */ + tl_assert(ev2->inode == ev->inode); + helperName = CLG_(cachesim).log_1I1Dr_name; + helperAddr = CLG_(cachesim).log_1I1Dr; + argv = mkIRExprVec_3( i_node_expr, + get_Event_dea(ev2), + mkIRExpr_HWord( get_Event_dszB(ev2) ) ); + regparms = 3; + inew = i+2; } - else { - helperName = CLG_(cachesim).log_1I1Dw_name; - helperAddr = CLG_(cachesim).log_1I1Dw; + /* Merge an Ir with a following Dw/Dm. */ + else + if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) { + tl_assert(ev2->inode == ev->inode); + helperName = CLG_(cachesim).log_1I1Dw_name; + helperAddr = CLG_(cachesim).log_1I1Dw; + argv = mkIRExprVec_3( i_node_expr, + get_Event_dea(ev2), + mkIRExpr_HWord( get_Event_dszB(ev2) ) ); + regparms = 3; + inew = i+2; } - argc = 2; - arg2 = storeAddrExpr; - es = CLG_(sets).D1w; - - } else { - // load/store - if (instrIssued) { - helperName = CLG_(cachesim).log_0I2D_name; - helperAddr = CLG_(cachesim).log_0I2D; + /* Merge an Ir with two following Irs. */ + else + if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) { + helperName = CLG_(cachesim).log_3I0D_name; + helperAddr = CLG_(cachesim).log_3I0D; + argv = mkIRExprVec_3( i_node_expr, + mkIRExpr_HWord( (HWord)ev2->inode ), + mkIRExpr_HWord( (HWord)ev3->inode ) ); + regparms = 3; + inew = i+3; } + /* Merge an Ir with one following Ir. */ + else + if (ev2 && ev2->tag == Ev_Ir) { + helperName = CLG_(cachesim).log_2I0D_name; + helperAddr = CLG_(cachesim).log_2I0D; + argv = mkIRExprVec_2( i_node_expr, + mkIRExpr_HWord( (HWord)ev2->inode ) ); + regparms = 2; + inew = i+2; + } + /* No merging possible; emit as-is. */ else { - helperName = CLG_(cachesim).log_1I2D_name; - helperAddr = CLG_(cachesim).log_1I2D; + helperName = CLG_(cachesim).log_1I0D_name; + helperAddr = CLG_(cachesim).log_1I0D; + argv = mkIRExprVec_1( i_node_expr ); + regparms = 1; + inew = i+1; } - argc = 3; - arg2 = loadAddrExpr; - arg3 = storeAddrExpr; - es = CLG_(sets).D2; - } - } + break; + case Ev_Dr: + /* Data read or modify */ + helperName = CLG_(cachesim).log_0I1Dr_name; + helperAddr = CLG_(cachesim).log_0I1Dr; + argv = mkIRExprVec_3( i_node_expr, + get_Event_dea(ev), + mkIRExpr_HWord( get_Event_dszB(ev) ) ); + regparms = 3; + inew = i+1; + break; + case Ev_Dw: + case Ev_Dm: + /* Data write */ + helperName = CLG_(cachesim).log_0I1Dw_name; + helperAddr = CLG_(cachesim).log_0I1Dw; + argv = mkIRExprVec_3( i_node_expr, + get_Event_dea(ev), + mkIRExpr_HWord( get_Event_dszB(ev) ) ); + regparms = 3; + inew = i+1; + break; + default: + tl_assert(0); + } - /* helper could be unset depending on the simulator used */ - if (helperAddr == 0) return 0; - - /* Setup 1st arg: InstrInfo */ - arg1 = mkIRExpr_HWord( (HWord)ii ); - - // Add call to the instrumentation function - if (argc == 1) - argv = mkIRExprVec_1(arg1); - else if (argc == 2) - argv = mkIRExprVec_2(arg1, arg2); - else if (argc == 3) - argv = mkIRExprVec_3(arg1, arg2, arg3); - else - VG_(tool_panic)("argc... not 1 or 2 or 3?"); - - di = unsafeIRDirty_0_N( argc, helperName, - VG_(fnptr_to_fnentry)( helperAddr ), argv); - addStmtToIRSB( bbOut, IRStmt_Dirty(di) ); + CLG_DEBUGIF(5) { + if (inew > i+1) { + VG_(printf)(" merge "); + showEvent( ev2 ); + } + if (inew > i+2) { + VG_(printf)(" merge "); + showEvent( ev3 ); + } + if (helperAddr) + VG_(printf)(" call %s (%p)\n", + helperName, helperAddr); + } + + /* helper could be unset depending on the simulator used */ + if (helperAddr == 0) continue; + + /* Add the helper. */ + tl_assert(helperName); + tl_assert(helperAddr); + tl_assert(argv); + di = unsafeIRDirty_0_N( regparms, + helperName, VG_(fnptr_to_fnentry)( helperAddr ), + argv ); + addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); + } - return es; + clgs->events_used = 0; } +static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode ) +{ + Event* evt; + tl_assert(clgs->seen_before || (inode->eventset == 0)); + if (!CLG_(clo).simulate_cache) return; + + if (clgs->events_used == N_EVENTS) + flushEvents(clgs); + tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); + evt = &clgs->events[clgs->events_used]; + init_Event(evt); + evt->tag = Ev_Ir; + evt->inode = inode; + clgs->events_used++; +} + +static +void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) +{ + Event* evt; + tl_assert(isIRAtom(ea)); + tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); + if (!CLG_(clo).simulate_cache) return; + + if (clgs->events_used == N_EVENTS) + flushEvents(clgs); + tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); + evt = &clgs->events[clgs->events_used]; + init_Event(evt); + evt->tag = Ev_Dr; + evt->inode = inode; + evt->Ev.Dr.szB = datasize; + evt->Ev.Dr.ea = ea; + clgs->events_used++; +} -/* Instrumentation before a conditional jump or at the end - * of each original instruction. - * Fills the InstrInfo struct if not seen before - */ static -void endOfInstr(IRSB* bbOut, InstrInfo* ii, Bool bb_seen_before, - UInt instr_offset, UInt instrLen, UInt dataSize, - UInt* cost_offset, Bool instrIssued, - IRExpr* loadAddrExpr, IRExpr* storeAddrExpr) +void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea ) { - IRType wordTy; - EventSet* es; - - // Stay sane ... - CLG_ASSERT(sizeof(HWord) == sizeof(void*)); - if (sizeof(HWord) == 4) { - wordTy = Ity_I32; - } else - if (sizeof(HWord) == 8) { - wordTy = Ity_I64; - } else { - VG_(tool_panic)("endOfInstr: strange word size"); + Event* lastEvt; + Event* evt; + tl_assert(isIRAtom(ea)); + tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); + if (!CLG_(clo).simulate_cache) return; + + /* Is it possible to merge this write with the preceding read? */ + lastEvt = &clgs->events[clgs->events_used-1]; + if (clgs->events_used > 0 + && lastEvt->tag == Ev_Dr + && lastEvt->Ev.Dr.szB == datasize + && lastEvt->inode == inode + && eqIRAtom(lastEvt->Ev.Dr.ea, ea)) + { + lastEvt->tag = Ev_Dm; + return; } - if (loadAddrExpr) - CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, loadAddrExpr)); - if (storeAddrExpr) - CLG_ASSERT(wordTy == typeOfIRExpr(bbOut->tyenv, storeAddrExpr)); - - // Large (eg. 28B, 108B, 512B on x86) data-sized instructions will be - // done inaccurately, but they're very rare and this avoids errors from - // hitting more than two cache lines in the simulation. - if (dataSize > MIN_LINE_SIZE) dataSize = MIN_LINE_SIZE; - - /* returns 0 if simulator needs no instrumentation */ - es = insert_simcall(bbOut, ii, dataSize, instrIssued, - loadAddrExpr, storeAddrExpr); - - CLG_DEBUG(5, " Instr +%2d (Size %d, DSize %d): ESet %s (Size %d)\n", - instr_offset, instrLen, dataSize, - es ? es->name : (Char*)"(no instrumentation)", - es ? es->size : 0); - - if (bb_seen_before) { - CLG_DEBUG(5, " before: Instr +%2d (Size %d, DSize %d)\n", - ii->instr_offset, ii->instr_size, ii->data_size); - - CLG_ASSERT(ii->instr_offset == instr_offset); - CLG_ASSERT(ii->instr_size == instrLen); - CLG_ASSERT(ii->cost_offset == *cost_offset); - CLG_ASSERT(ii->eventset == es); - - /* Only check size if data size >0. - * This is needed: e.g. for rep or cmov x86 instructions, the same InstrInfo - * is used both for 2 simulator calls: for the pure instruction fetch and - * separately for an memory access (which may not happen depending on flags). - * If checked always, this triggers an assertion failure on retranslation. - */ - if (dataSize>0) CLG_ASSERT(ii->data_size == dataSize); + /* No. Add as normal. */ + if (clgs->events_used == N_EVENTS) + flushEvents(clgs); + tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS); + evt = &clgs->events[clgs->events_used]; + init_Event(evt); + evt->tag = Ev_Dw; + evt->inode = inode; + evt->Ev.Dw.szB = datasize; + evt->Ev.Dw.ea = ea; + clgs->events_used++; +} +/* Initialise or check (if already seen before) an InstrInfo for next insn. + We only can set instr_offset/instr_size here. The required event set and + resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest + instructions. The event set is extended as required on flush of the event + queue (when Dm events were determined), cost offsets are determined at + end of BB instrumentation. */ +static +InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size ) +{ + InstrInfo* ii; + tl_assert(clgs->ii_index >= 0); + tl_assert(clgs->ii_index < clgs->bb->instr_count); + ii = &clgs->bb->instr[ clgs->ii_index ]; + + if (clgs->seen_before) { + CLG_ASSERT(ii->instr_offset == clgs->instr_offset); + CLG_ASSERT(ii->instr_size == instr_size); } else { - ii->instr_offset = instr_offset; - ii->instr_size = instrLen; - ii->cost_offset = *cost_offset; - ii->eventset = es; - - /* data size only relevant if >0 */ - if (dataSize > 0) ii->data_size = dataSize; + ii->instr_offset = clgs->instr_offset; + ii->instr_size = instr_size; + ii->cost_offset = 0; + ii->eventset = 0; + } + clgs->ii_index++; + clgs->instr_offset += instr_size; + CLG_(stat).distinct_instrs++; - CLG_(stat).distinct_instrs++; - } + return ii; +} - *cost_offset += es ? es->size : 0; +// return total number of cost values needed for this BB +static +UInt update_cost_offsets( ClgState* clgs ) +{ + Int i; + InstrInfo* ii; + UInt cost_offset = 0; + + CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index); + for(i=0; i<clgs->ii_index; i++) { + ii = &clgs->bb->instr[i]; + if (clgs->seen_before) { + CLG_ASSERT(ii->cost_offset == cost_offset); + } else + ii->cost_offset = cost_offset; + cost_offset += ii->eventset ? ii->eventset->size : 0; + } + return cost_offset; } +/*------------------------------------------------------------*/ +/*--- Instrumentation ---*/ +/*------------------------------------------------------------*/ + #if defined(VG_BIGENDIAN) # define CLGEndness Iend_BE #elif defined(VG_LITTLEENDIAN) @@ -344,7 +610,7 @@ Addr IRConst2Addr(IRConst* con) * * Called from CLG_(get_bb) */ -void CLG_(collectBlockInfo)(IRSB* bbIn, +void CLG_(collectBlockInfo)(IRSB* sbIn, /*INOUT*/ UInt* instrs, /*INOUT*/ UInt* cjmps, /*INOUT*/ Bool* cjmp_inverted) @@ -360,10 +626,10 @@ void CLG_(collectBlockInfo)(IRSB* bbIn, // nothing to do with client code Bool inPreamble = True; - if (!bbIn) return; + if (!sbIn) return; - for (i = 0; i < bbIn->stmts_used; i++) { - st = bbIn->stmts[i]; + for (i = 0; i < sbIn->stmts_used; i++) { + st = sbIn->stmts[i]; if (Ist_IMark == st->tag) { inPreamble = False; @@ -377,7 +643,7 @@ void CLG_(collectBlockInfo)(IRSB* bbIn, if (Ist_Exit == st->tag) { jumpDst = IRConst2Addr(st->Ist.Exit.dst); toNextInstr = (jumpDst == instrAddr + instrLen); - + (*cjmps)++; } } @@ -389,98 +655,6 @@ void CLG_(collectBlockInfo)(IRSB* bbIn, } static -void collectStatementInfo(IRTypeEnv* tyenv, IRStmt* st, - Addr* instrAddr, UInt* instrLen, - IRExpr** loadAddrExpr, IRExpr** storeAddrExpr, - UInt* dataSize) -{ - CLG_ASSERT(isFlatIRStmt(st)); - - switch (st->tag) { - case Ist_NoOp: - break; - - case Ist_AbiHint: - /* ABI hints aren't interesting. Ignore. */ - break; - - case Ist_IMark: - /* st->Ist.IMark.addr is a 64-bit int. ULong_to_Ptr casts this - to the host's native pointer type; if that is 32 bits then it - discards the upper 32 bits. If we are cachegrinding on a - 32-bit host then we are also ensured that the guest word size - is 32 bits, due to the assertion in cg_instrument that the - host and guest word sizes must be the same. Hence - st->Ist.IMark.addr will have been derived from a 32-bit guest - code address and truncation of it is safe. I believe this - assignment should be correct for both 32- and 64-bit - machines. */ - *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr); - *instrLen = st->Ist.IMark.len; - break; - - case Ist_WrTmp: { - IRExpr* data = st->Ist.WrTmp.data; - if (data->tag == Iex_Load) { - IRExpr* aexpr = data->Iex.Load.addr; - CLG_ASSERT( isIRAtom(aexpr) ); - // Note also, endianness info is ignored. I guess that's not - // interesting. - // XXX: repe cmpsb does two loads... the first one is ignored here! - //tl_assert( NULL == *loadAddrExpr ); // XXX: ??? - *loadAddrExpr = aexpr; - *dataSize = sizeofIRType(data->Iex.Load.ty); - } - break; - } - - case Ist_Store: { - IRExpr* data = st->Ist.Store.data; - IRExpr* aexpr = st->Ist.Store.addr; - CLG_ASSERT( isIRAtom(aexpr) ); - if ( NULL == *storeAddrExpr ) { - /* this is a kludge: ignore all except the first store from - an instruction. */ - *storeAddrExpr = aexpr; - *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data)); - } - break; - } - - case Ist_Dirty: { - IRDirty* d = st->Ist.Dirty.details; - if (d->mFx != Ifx_None) { - /* This dirty helper accesses memory. Collect the - details. */ - CLG_ASSERT(d->mAddr != NULL); - CLG_ASSERT(d->mSize != 0); - *dataSize = d->mSize; - if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) - *loadAddrExpr = d->mAddr; - if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) - *storeAddrExpr = d->mAddr; - } else { - CLG_ASSERT(d->mAddr == NULL); - CLG_ASSERT(d->mSize == 0); - } - break; - } - - case Ist_Put: - case Ist_PutI: - case Ist_MBE: - case Ist_Exit: - break; - - default: - VG_(printf)("\n"); - ppIRStmt(st); - VG_(printf)("\n"); - VG_(tool_panic)("Callgrind: unhandled IRStmt"); - } -} - -static void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy) { addStmtToIRSB( bbOut, @@ -491,29 +665,56 @@ void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy) IRExpr_Const(IRConst_U32(val)) )); } + +/* add helper call to setup_bbcc, with pointer to BB struct as argument + * + * precondition for setup_bbcc: + * - jmps_passed has number of cond.jumps passed in last executed BB + * - current_bbcc has a pointer to the BBCC of the last executed BB + * Thus, if bbcc_jmpkind is != -1 (JmpNone), + * current_bbcc->bb->jmp_addr + * gives the address of the jump source. + * + * the setup does 2 things: + * - trace call: + * * Unwind own call stack, i.e sync our ESP with real ESP + * This is for ESP manipulation (longjmps, C++ exec handling) and RET + * * For CALLs or JMPs crossing objects, record call arg + + * push are on own call stack + * + * - prepare for cache log functions: + * set current_bbcc to BBCC that gets the costs for this BB execution + * attached + */ +static +void addBBSetupCall(ClgState* clgs) +{ + IRDirty* di; + IRExpr *arg1, **argv; + + arg1 = mkIRExpr_HWord( (HWord)clgs->bb ); + argv = mkIRExprVec_1(arg1); + di = unsafeIRDirty_0_N( 1, "setup_bbcc", + VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ), + argv); + addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) ); +} + + static IRSB* CLG_(instrument)( VgCallbackClosure* closure, - IRSB* bbIn, + IRSB* sbIn, VexGuestLayout* layout, VexGuestExtents* vge, IRType gWordTy, IRType hWordTy ) { - Int i; - IRSB* bbOut; - IRStmt* st, *stnext; - Addr instrAddr, origAddr; - UInt instrLen = 0, dataSize; - UInt instrCount, costOffset; - IRExpr *loadAddrExpr, *storeAddrExpr; - - BB* bb; + Int i, isize; + IRStmt* st; + Addr origAddr; + InstrInfo* curr_inode = NULL; + ClgState clgs; + UInt cJumps = 0; - IRDirty* di; - IRExpr *arg1, **argv; - - Bool bb_seen_before = False; - UInt cJumps = 0, cJumpsCorrected; - Bool beforeIBoundary, instrIssued; if (gWordTy != hWordTy) { /* We don't currently support this case. */ @@ -524,173 +725,206 @@ IRSB* CLG_(instrument)( VgCallbackClosure* closure, if (! CLG_(instrument_state)) { CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n", (Addr)closure->readdr); - return bbIn; + return sbIn; } CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr); /* Set up SB for instrumented IR */ - bbOut = deepCopyIRSBExceptStmts(bbIn); + clgs.sbOut = deepCopyIRSBExceptStmts(sbIn); // Copy verbatim any IR preamble preceding the first IMark i = 0; - while (i < bbIn->stmts_used && bbIn->stmts[i]->tag != Ist_IMark) { - addStmtToIRSB( bbOut, bbIn->stmts[i] ); + while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { + addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] ); i++; } // Get the first statement, and origAddr from it - CLG_ASSERT(bbIn->stmts_used > 0); - st = bbIn->stmts[i]; + CLG_ASSERT(sbIn->stmts_used >0); + CLG_ASSERT(i < sbIn->stmts_used); + st = sbIn->stmts[i]; CLG_ASSERT(Ist_IMark == st->tag); - instrAddr = origAddr = (Addr)st->Ist.IMark.addr; + + origAddr = (Addr)st->Ist.IMark.addr; CLG_ASSERT(origAddr == st->Ist.IMark.addr); // XXX: check no overflow - /* Get BB (creating if necessary). + /* Get BB struct (creating if necessary). * JS: The hash table is keyed with orig_addr_noredir -- important! * JW: Why? If it is because of different chasing of the redirection, * this is not needed, as chasing is switched off in callgrind */ - bb = CLG_(get_bb)(origAddr, bbIn, &bb_seen_before); - //bb = CLG_(get_bb)(orig_addr_noredir, bbIn, &bb_seen_before); - - /* - * Precondition: - * - jmps_passed has number of cond.jumps passed in last executed BB - * - current_bbcc has a pointer to the BBCC of the last executed BB - * Thus, if bbcc_jmpkind is != -1 (JmpNone), - * current_bbcc->bb->jmp_addr - * gives the address of the jump source. - * - * The BBCC setup does 2 things: - * - trace call: - * * Unwind own call stack, i.e sync our ESP with real ESP - * This is for ESP manipulation (longjmps, C++ exec handling) and RET - * * For CALLs or JMPs crossing objects, record call arg + - * push are on own call stack - * - * - prepare for cache log functions: - * Set current_bbcc to BBCC that gets the costs for this BB execution - * attached - */ + clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before)); + + addBBSetupCall(&clgs); + + // Set up running state + clgs.events_used = 0; + clgs.ii_index = 0; + clgs.instr_offset = 0; + + for (/*use current i*/; i < sbIn->stmts_used; i++) { + + st = sbIn->stmts[i]; + CLG_ASSERT(isFlatIRStmt(st)); + + switch (st->tag) { + case Ist_NoOp: + case Ist_AbiHint: + case Ist_Put: + case Ist_PutI: + case Ist_MBE: + break; + + case Ist_IMark: { + CLG_ASSERT(clgs.instr_offset == (Addr)st->Ist.IMark.addr - origAddr); + isize = st->Ist.IMark.len; + // If Vex fails to decode an instruction, the size will be zero. + // Pretend otherwise. + if (isize == 0) isize = VG_MIN_INSTR_SZB; + + // Sanity-check size. + tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) + || VG_CLREQ_SZB == isize ); + + // Init the inode, record it as the current one. + // Subsequent Dr/Dw/Dm events from the same instruction will + // also use it. + curr_inode = next_InstrInfo (&clgs, isize); + + addEvent_Ir( &clgs, curr_inode ); + break; + } + + case Ist_WrTmp: { + IRExpr* data = st->Ist.WrTmp.data; + if (data->tag == Iex_Load) { + IRExpr* aexpr = data->Iex.Load.addr; + // Note also, endianness info is ignored. I guess + // that's not interesting. + addEvent_Dr( &clgs, curr_inode, + sizeofIRType(data->Iex.Load.ty), aexpr ); + } + break; + } + + case Ist_Store: { + IRExpr* data = st->Ist.Store.data; + IRExpr* aexpr = st->Ist.Store.addr; + addEvent_Dw( &clgs, curr_inode, + sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr ); + break; + } + + case Ist_Dirty: { + Int dataSize; + IRDirty* d = st->Ist.Dirty.details; + if (d->mFx != Ifx_None) { + /* This dirty helper accesses memory. Collect the details. */ + tl_assert(d->mAddr != NULL); + tl_assert(d->mSize != 0); + dataSize = d->mSize; + // Large (eg. 28B, 108B, 512B on x86) data-sized + // instructions will be done inaccurately, but they're + // very rare and this avoids errors from hitting more + // than two cache lines in the simulation. + if (dataSize > MIN_LINE_SIZE) + dataSize = MIN_LINE_SIZE; + if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) + addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr ); + if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) + addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr ); + } else { + tl_assert(d->mAddr == NULL); + tl_assert(d->mSize == 0); + } + break; + } - // helper call to setup_bbcc, with pointer to basic block info struct as argument - arg1 = mkIRExpr_HWord( (HWord)bb ); - argv = mkIRExprVec_1(arg1); - di = unsafeIRDirty_0_N( 1, "setup_bbcc", - VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ), - argv); - addStmtToIRSB( bbOut, IRStmt_Dirty(di) ); - - instrCount = 0; - costOffset = 0; - - // loop for each host instruction (starting from 'i') - do { - - // We should be at an IMark statement - CLG_ASSERT(Ist_IMark == st->tag); - - // Reset stuff for this original instruction - loadAddrExpr = storeAddrExpr = NULL; - instrIssued = False; - dataSize = 0; - - // Process all the statements for this original instruction (ie. until - // the next IMark statement, or the end of the block) - do { - i++; - stnext = ( i < bbIn->stmts_used ? bbIn->stmts[i] : NULL ); - beforeIBoundary = !stnext || (Ist_IMark == stnext->tag); - collectStatementInfo(bbIn->tyenv, st, &instrAddr, &instrLen, - &loadAddrExpr, &storeAddrExpr, &dataSize); - - // instrument a simulator call before conditional jumps - if (st->tag == Ist_Exit) { - // Nb: instrLen will be zero if Vex failed to decode it. - // Also Client requests can appear to be very large (eg. 18 - // bytes on x86) because they are really multiple instructions. - CLG_ASSERT( 0 == instrLen || - bbIn->jumpkind == Ijk_ClientReq || - (instrLen >= VG_MIN_INSTR_SZB && - instrLen <= VG_MAX_INSTR_SZB) ); - - // Add instrumentation before this statement - endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before, - instrAddr - origAddr, instrLen, dataSize, &costOffset, - instrIssued, loadAddrExpr, storeAddrExpr); - - // prepare for a possible further simcall in same host instr - loadAddrExpr = storeAddrExpr = NULL; - instrIssued = True; - - if (!bb_seen_before) { - bb->jmp[cJumps].instr = instrCount; - bb->jmp[cJumps].skip = False; - } - - /* Update global variable jmps_passed (this is before the jump!) - * A correction is needed if VEX inverted the last jump condition - */ - cJumpsCorrected = cJumps; - if ((cJumps+1 == bb->cjmp_count) && bb->cjmp_inverted) cJumpsCorrected++; - addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed, - cJumpsCorrected, hWordTy); - - cJumps++; - } + case Ist_Exit: { + UInt jmps_passed; + + /* We may never reach the next statement, so need to flush + all outstanding transactions now. */ + flushEvents( &clgs ); - addStmtToIRSB( bbOut, st ); - st = stnext; - } - while (!beforeIBoundary); + CLG_ASSERT(clgs.ii_index>0); + if (!clgs.seen_before) { + clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; + clgs.bb->jmp[cJumps].skip = False; + } + + /* Update global variable jmps_passed before the jump + * A correction is needed if VEX inverted the last jump condition + */ + jmps_passed = cJumps; + if ((cJumps+1 == clgs.bb->cjmp_count) && clgs.bb->cjmp_inverted) + jmps_passed++; + addConstMemStoreStmt( clgs.sbOut, + (UWord) &CLG_(current_state).jmps_passed, + jmps_passed, hWordTy); + cJumps++; + + break; + } + + default: + tl_assert(0); + break; + } - // Add instrumentation for this original instruction. - if (!instrIssued || (loadAddrExpr != 0) || (storeAddrExpr !=0)) - endOfInstr(bbOut, &(bb->instr[instrCount]), bb_seen_before, - instrAddr - origAddr, instrLen, dataSize, &costOffset, - instrIssued, loadAddrExpr, storeAddrExpr); + /* Copy the original statement */ + addStmtToIRSB( clgs.sbOut, st ); - instrCount++; + CLG_DEBUGIF(5) { + VG_(printf)(" pass "); + ppIRStmt(st); + VG_(printf)("\n"); + } } - while (st); - /* Always update global variable jmps_passed (at end of BB) + /* At the end of the bb. Flush outstandings. */ + flushEvents( &clgs ); + + /* Always update global variable jmps_passed at end of bb. * A correction is needed if VEX inverted the last jump condition */ - cJumpsCorrected = cJumps; - if (bb->cjmp_inverted) cJumpsCorrected--; - addConstMemStoreStmt( bbOut, (UWord) &CLG_(current_state).jmps_passed, - cJumpsCorrected, hWordTy); + { + UInt jmps_passed = cJumps; + if (clgs.bb->cjmp_inverted) jmps_passed--; + addConstMemStoreStmt( clgs.sbOut, + (UWord) &CLG_(current_state).jmps_passed, + jmps_passed, hWordTy); + } + CLG_ASSERT(clgs.bb->cjmp_count == cJumps); + CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index); /* This stores the instr of the call/ret at BB end */ - bb->jmp[cJumps].instr = instrCount-1; + clgs.bb->jmp[cJumps].instr = clgs.ii_index-1; - CLG_ASSERT(bb->cjmp_count == cJumps); - CLG_ASSERT(bb->instr_count == instrCount); - - instrAddr += instrLen; - if (bb_seen_before) { - CLG_ASSERT(bb->instr_len == instrAddr - origAddr); - CLG_ASSERT(bb->cost_count == costOffset); - CLG_ASSERT(bb->jmpkind == bbIn->jumpkind); + if (clgs.seen_before) { + CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs)); + CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset); + CLG_ASSERT(clgs.bb->jmpkind == sbIn->jumpkind); } else { - bb->instr_len = instrAddr - origAddr; - bb->cost_count = costOffset; - bb->jmpkind = bbIn->jumpkind; + clgs.bb->cost_count = update_cost_offsets(&clgs); + clgs.bb->instr_len = clgs.instr_offset; + clgs.bb->jmpkind = sbIn->jumpkind; } - + CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n", - origAddr, bb->instr_len, bb->cjmp_count, bb->cost_count); + origAddr, clgs.bb->instr_len, + clgs.bb->cjmp_count, clgs.bb->cost_count); if (cJumps>0) { CLG_DEBUG(3, " [ "); for (i=0;i<cJumps;i++) - CLG_DEBUG(3, "%d ", bb->jmp[i].instr); - CLG_DEBUG(3, "], last inverted: %s \n", bb->cjmp_inverted ? "yes":"no"); + CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr); + CLG_DEBUG(3, "], last inverted: %s \n", + clgs.bb->cjmp_inverted ? "yes":"no"); } - return bbOut; + return clgs.sbOut; } /*--------------------------------------------------------------------*/ diff --git a/callgrind/sim.c b/callgrind/sim.c index 9edbecc17..9e53f8916 100644 --- a/callgrind/sim.c +++ b/callgrind/sim.c @@ -113,22 +113,21 @@ static Bool clo_collect_cacheuse = False; * - BBCC* nonskipped (only != 0 when in a function not skipped) */ -/* Offset to events in event set, used in log_* functions */ -static Int off_D0_Ir; -static Int off_D1r_Ir; -static Int off_D1r_Dr; -static Int off_D1w_Ir; -static Int off_D1w_Dw; -static Int off_D2_Ir; -static Int off_D2_Dr; -static Int off_D2_Dw; +/* Offset to events in event set, used in log_* functions + * <off_EventSet_BasicEventSet>: offset where basic set is found + */ +static Int off_UIr_Ir; +static Int off_UIrDr_Ir, off_UIrDr_Dr; +static Int off_UIrDrDw_Ir, off_UIrDrDw_Dr, off_UIrDrDw_Dw; +static Int off_UIrDw_Ir, off_UIrDw_Dw; +static Int off_UIrDwDr_Ir, off_UIrDwDr_Dr, off_UIrDwDr_Dw; static Addr bb_base; static ULong* cost_base; static InstrInfo* current_ii; /* Cache use offsets */ -/* FIXME: The offsets are only correct because all eventsets get +/* The offsets are only correct because all per-instruction event sets get * the "Use" set added first ! */ static Int off_I1_AcCost = 0; @@ -984,13 +983,13 @@ static void cacheuse_finish(void) { int i; - InstrInfo ii = { 0,0,0,0,0 }; + InstrInfo ii = { 0,0,0,0 }; if (!CLG_(current_state).collect) return; bb_base = 0; current_ii = ⅈ - cost_base = 0; + cost_base = 0; /* update usage counters */ if (I1.use) @@ -1043,6 +1042,19 @@ void inc_costs(CacheModelResult r, ULong* c1, ULong* c2) } } +static +Char* cacheRes(CacheModelResult r) +{ + switch(r) { + case L1_Hit: return "L1 Hit "; + case L2_Hit: return "L2 Hit "; + case MemAccess: return "L2 Miss"; + case WriteBackMemAccess: return "L2 Miss (dirty)"; + default: + tl_assert(0); + } + return "??"; +} VG_REGPARM(1) static void log_1I0D(InstrInfo* ii) @@ -1052,37 +1064,101 @@ static void log_1I0D(InstrInfo* ii) current_ii = ii; IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); - CLG_DEBUG(6, "log_1I0D: Ir=%#lx/%u => Ir %d\n", - bb_base + ii->instr_offset, ii->instr_size, IrRes); + CLG_DEBUG(6, "log_1I0D: Ir %#lx/%u => %s\n", + bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes)); if (CLG_(current_state).collect) { ULong* cost_Ir; - + if (CLG_(current_state).nonskipped) cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir; else - cost_Ir = cost_base + ii->cost_offset + off_D0_Ir; + cost_Ir = cost_base + ii->cost_offset + off_UIr_Ir; inc_costs(IrRes, cost_Ir, CLG_(current_state).cost + CLG_(sets).off_full_Ir ); } } +VG_REGPARM(2) +static void log_2I0D(InstrInfo* ii1, InstrInfo* ii2) +{ + CacheModelResult Ir1Res, Ir2Res; + ULong *global_cost_Ir; + + current_ii = ii1; + Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size); + current_ii = ii2; + Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size); + + CLG_DEBUG(6, "log_2I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s\n", + bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), + bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res) ); + + if (!CLG_(current_state).collect) return; + + global_cost_Ir = CLG_(current_state).cost + CLG_(sets).off_full_Ir; + if (CLG_(current_state).nonskipped) { + ULong* skipped_cost_Ir = CLG_(current_state).nonskipped->skipped + + CLG_(sets).off_full_Ir; + inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); + inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); + return; + } + + inc_costs(Ir1Res, global_cost_Ir, cost_base + ii1->cost_offset + off_UIr_Ir); + inc_costs(Ir2Res, global_cost_Ir, cost_base + ii2->cost_offset + off_UIr_Ir); +} + +VG_REGPARM(3) +static void log_3I0D(InstrInfo* ii1, InstrInfo* ii2, InstrInfo* ii3) +{ + CacheModelResult Ir1Res, Ir2Res, Ir3Res; + ULong *global_cost_Ir; + + current_ii = ii1; + Ir1Res = (*simulator.I1_Read)(bb_base + ii1->instr_offset, ii1->instr_size); + current_ii = ii2; + Ir2Res = (*simulator.I1_Read)(bb_base + ii2->instr_offset, ii2->instr_size); + current_ii = ii3; + Ir3Res = (*simulator.I1_Read)(bb_base + ii3->instr_offset, ii3->instr_size); + + CLG_DEBUG(6, "log_3I0D: Ir1 %#lx/%u => %s, Ir2 %#lx/%u => %s, Ir3 %#lx/%u => %s\n", + bb_base + ii1->instr_offset, ii1->instr_size, cacheRes(Ir1Res), + bb_base + ii2->instr_offset, ii2->instr_size, cacheRes(Ir2Res), + bb_base + ii3->instr_offset, ii3->instr_size, cacheRes(Ir3Res) ); + + if (!CLG_(current_state).collect) return; + + global_cost_Ir = CLG_(current_state).cost + CLG_(sets).off_full_Ir; + if (CLG_(current_state).nonskipped) { + ULong* skipped_cost_Ir = CLG_(current_state).nonskipped->skipped + + CLG_(sets).off_full_Ir; + inc_costs(Ir1Res, global_cost_Ir, skipped_cost_Ir); + inc_costs(Ir2Res, global_cost_Ir, skipped_cost_Ir); + inc_costs(Ir3Res, global_cost_Ir, skipped_cost_Ir); + return; + } + + inc_costs(Ir1Res, global_cost_Ir, cost_base + ii1->cost_offset + off_UIr_Ir); + inc_costs(Ir2Res, global_cost_Ir, cost_base + ii2->cost_offset + off_UIr_Ir); + inc_costs(Ir3Res, global_cost_Ir, cost_base + ii3->cost_offset + off_UIr_Ir); +} /* Instruction doing a read access */ -VG_REGPARM(2) -static void log_1I1Dr(InstrInfo* ii, Addr data) +VG_REGPARM(3) +static void log_1I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) { CacheModelResult IrRes, DrRes; current_ii = ii; IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); - DrRes = (*simulator.D1_Read)(data, ii->data_size); + DrRes = (*simulator.D1_Read)(data_addr, data_size); - CLG_DEBUG(6, "log_1I1Dr: Ir=%#lx/%u, Dr=%#lx/%u => Ir %d, Dr %d\n", - bb_base + ii->instr_offset, ii->instr_size, - data, ii->data_size, IrRes, DrRes); + CLG_DEBUG(6, "log_1I1Dr: Ir %#lx/%u => %s, Dr %#lx/%lu => %s\n", + bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes), + data_addr, data_size, cacheRes(DrRes)); if (CLG_(current_state).collect) { ULong *cost_Ir, *cost_Dr; @@ -1092,8 +1168,11 @@ static void log_1I1Dr(InstrInfo* ii, Addr data) cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr; } else { - cost_Ir = cost_base + ii->cost_offset + off_D1r_Ir; - cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr; + // event set must be UIrDr or extension + CLG_ASSERT((ii->eventset == CLG_(sets).UIrDr) || + (ii->eventset == CLG_(sets).UIrDrDw)); + cost_Ir = cost_base + ii->cost_offset + off_UIrDr_Ir; + cost_Dr = cost_base + ii->cost_offset + off_UIrDr_Dr; } inc_costs(IrRes, cost_Ir, @@ -1104,16 +1183,16 @@ static void log_1I1Dr(InstrInfo* ii, Addr data) } -VG_REGPARM(2) -static void log_0I1Dr(InstrInfo* ii, Addr data) +VG_REGPARM(3) +static void log_0I1Dr(InstrInfo* ii, Addr data_addr, Word data_size) { CacheModelResult DrRes; current_ii = ii; - DrRes = (*simulator.D1_Read)(data, ii->data_size); + DrRes = (*simulator.D1_Read)(data_addr, data_size); - CLG_DEBUG(6, "log_0I1Dr: Dr=%#lx/%u => Dr %d\n", - data, ii->data_size, DrRes); + CLG_DEBUG(6, "log_0I1Dr: Dr %#lx/%lu => %s\n", + data_addr, data_size, cacheRes(DrRes)); if (CLG_(current_state).collect) { ULong *cost_Dr; @@ -1122,9 +1201,15 @@ static void log_0I1Dr(InstrInfo* ii, Addr data) cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr; } else { - cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr; + Int off_Dr; + if (ii->eventset == CLG_(sets).UIrDr) off_Dr = off_UIrDr_Dr; + else if (ii->eventset == CLG_(sets).UIrDrDw) off_Dr = off_UIrDrDw_Dr; + else if (ii->eventset == CLG_(sets).UIrDwDr) off_Dr = off_UIrDwDr_Dr; + else CLG_ASSERT(0); + + cost_Dr = cost_base + ii->cost_offset + off_Dr; } - + inc_costs(DrRes, cost_Dr, CLG_(current_state).cost + CLG_(sets).off_full_Dr ); } @@ -1133,29 +1218,33 @@ static void log_0I1Dr(InstrInfo* ii, Addr data) /* Instruction doing a write access */ -VG_REGPARM(2) -static void log_1I1Dw(InstrInfo* ii, Addr data) +VG_REGPARM(3) +static void log_1I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) { CacheModelResult IrRes, DwRes; current_ii = ii; IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); - DwRes = (*simulator.D1_Write)(data, ii->data_size); + DwRes = (*simulator.D1_Write)(data_addr, data_size); - CLG_DEBUG(6, "log_1I1Dw: Ir=%#lx/%u, Dw=%#lx/%u => Ir %d, Dw %d\n", - bb_base + ii->instr_offset, ii->instr_size, - data, ii->data_size, IrRes, DwRes); + CLG_DEBUG(6, "log_1I1Dw: Ir %#lx/%u => %s, Dw %#lx/%lu => %s\n", + bb_base + ii->instr_offset, ii->instr_size, cacheRes(IrRes), + data_addr, data_size, cacheRes(DwRes)); if (CLG_(current_state).collect) { ULong *cost_Ir, *cost_Dw; if (CLG_(current_state).nonskipped) { - cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir; - cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw; + cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir; + cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw; } else { - cost_Ir = cost_base + ii->cost_offset + off_D1w_Ir; - cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw; + // This helper is called when a Dr event follows Ir; + // Event set must be UIrDw or extension + CLG_ASSERT((ii->eventset == CLG_(sets).UIrDw) || + (ii->eventset == CLG_(sets).UIrDwDr)); + cost_Ir = cost_base + ii->cost_offset + off_UIrDw_Ir; + cost_Dw = cost_base + ii->cost_offset + off_UIrDw_Dw; } inc_costs(IrRes, cost_Ir, @@ -1165,16 +1254,16 @@ static void log_1I1Dw(InstrInfo* ii, Addr data) } } -VG_REGPARM(2) -static void log_0I1Dw(InstrInfo* ii, Addr data) +VG_REGPARM(3) +static void log_0I1Dw(InstrInfo* ii, Addr data_addr, Word data_size) { CacheModelResult DwRes; current_ii = ii; - DwRes = (*simulator.D1_Write)(data, ii->data_size); + DwRes = (*simulator.D1_Write)(data_addr, data_size); - CLG_DEBUG(6, "log_0I1Dw: Dw=%#lx/%u => Dw %d\n", - data, ii->data_size, DwRes); + CLG_DEBUG(6, "log_0I1Dw: Dw %#lx/%lu => %s\n", + data_addr, data_size, cacheRes(DwRes)); if (CLG_(current_state).collect) { ULong *cost_Dw; @@ -1183,7 +1272,13 @@ static void log_0I1Dw(InstrInfo* ii, Addr data) cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw; } else { - cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw; + Int off_Dw; + if (ii->eventset == CLG_(sets).UIrDw) off_Dw = off_UIrDw_Dw; + else if (ii->eventset == CLG_(sets).UIrDwDr) off_Dw = off_UIrDwDr_Dw; + else if (ii->eventset == CLG_(sets).UIrDrDw) off_Dw = off_UIrDrDw_Dw; + else CLG_ASSERT(0); + + cost_Dw = cost_base + ii->cost_offset + off_Dw; } inc_costs(DwRes, cost_Dw, @@ -1191,77 +1286,6 @@ static void log_0I1Dw(InstrInfo* ii, Addr data) } } -/* Instruction doing a read and a write access */ - -VG_REGPARM(3) -static void log_1I2D(InstrInfo* ii, Addr data1, Addr data2) -{ - CacheModelResult IrRes, DrRes, DwRes; - - current_ii = ii; - IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size); - DrRes = (*simulator.D1_Read)(data1, ii->data_size); - DwRes = (*simulator.D1_Write)(data2, ii->data_size); - - CLG_DEBUG(6, - "log_1I2D: Ir=%#lx/%u, Dr=%#lx/%u, Dw=%#lx/%u => Ir %d, Dr %d, Dw %d\n", - bb_base + ii->instr_offset, ii->instr_size, - data1, ii->data_size, data2, ii->data_size, IrRes, DrRes, DwRes); - - if (CLG_(current_state).collect) { - ULong *cost_Ir, *cost_Dr, *cost_Dw; - - if (CLG_(current_state).nonskipped) { - cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir; - cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr; - cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw; - } - else { - cost_Ir = cost_base + ii->cost_offset + off_D2_Ir; - cost_Dr = cost_base + ii->cost_offset + off_D2_Dr; - cost_Dw = cost_base + ii->cost_offset + off_D2_Dw; - } - - inc_costs(IrRes, cost_Ir, - CLG_(current_state).cost + CLG_(sets).off_full_Ir ); - inc_costs(DrRes, cost_Dr, - CLG_(current_state).cost + CLG_(sets).off_full_Dr ); - inc_costs(DwRes, cost_Dw, - CLG_(current_state).cost + CLG_(sets).off_full_Dw ); - } -} - -VG_REGPARM(3) -static void log_0I2D(InstrInfo* ii, Addr data1, Addr data2) -{ - CacheModelResult DrRes, DwRes; - - current_ii = ii; - DrRes = (*simulator.D1_Read)(data1, ii->data_size); - DwRes = (*simulator.D1_Write)(data2, ii->data_size); - - CLG_DEBUG(6, - "log_0D2D: Dr=%#lx/%u, Dw=%#lx/%u => Dr %d, Dw %d\n", - data1, ii->data_size, data2, ii->data_size, DrRes, DwRes); - - if (CLG_(current_state).collect) { - ULong *cost_Dr, *cost_Dw; - - if (CLG_(current_state).nonskipped) { - cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr; - cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw; - } - else { - cost_Dr = cost_base + ii->cost_offset + off_D2_Dr; - cost_Dw = cost_base + ii->cost_offset + off_D2_Dw; - } - - inc_costs(DrRes, cost_Dr, - CLG_(current_state).cost + CLG_(sets).off_full_Dr ); - inc_costs(DwRes, cost_Dw, - CLG_(current_state).cost + CLG_(sets).off_full_Dw ); - } -} /*------------------------------------------------------------*/ @@ -1369,20 +1393,20 @@ static void cachesim_post_clo_init(void) if (!CLG_(clo).simulate_cache) { CLG_(cachesim).log_1I0D = 0; CLG_(cachesim).log_1I0D_name = "(no function)"; + CLG_(cachesim).log_2I0D = 0; + CLG_(cachesim).log_2I0D_name = "(no function)"; + CLG_(cachesim).log_3I0D = 0; + CLG_(cachesim).log_3I0D_name = "(no function)"; CLG_(cachesim).log_1I1Dr = 0; - CLG_(cachesim).log_1I1Dw = 0; - CLG_(cachesim).log_1I2D = 0; CLG_(cachesim).log_1I1Dr_name = "(no function)"; + CLG_(cachesim).log_1I1Dw = 0; CLG_(cachesim).log_1I1Dw_name = "(no function)"; - CLG_(cachesim).log_1I2D_name = "(no function)"; CLG_(cachesim).log_0I1Dr = 0; - CLG_(cachesim).log_0I1Dw = 0; - CLG_(cachesim).log_0I2D = 0; CLG_(cachesim).log_0I1Dr_name = "(no function)"; + CLG_(cachesim).log_0I1Dw = 0; CLG_(cachesim).log_0I1Dw_name = "(no function)"; - CLG_(cachesim).log_0I2D_name = "(no function)"; return; } @@ -1402,20 +1426,20 @@ static void cachesim_post_clo_init(void) CLG_(cachesim).log_1I0D = log_1I0D; CLG_(cachesim).log_1I0D_name = "log_1I0D"; + CLG_(cachesim).log_2I0D = log_2I0D; + CLG_(cachesim).log_2I0D_name = "log_2I0D"; + CLG_(cachesim).log_3I0D = log_3I0D; + CLG_(cachesim).log_3I0D_name = "log_3I0D"; CLG_(cachesim).log_1I1Dr = log_1I1Dr; CLG_(cachesim).log_1I1Dw = log_1I1Dw; - CLG_(cachesim).log_1I2D = log_1I2D; CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr"; CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw"; - CLG_(cachesim).log_1I2D_name = "log_1I2D"; CLG_(cachesim).log_0I1Dr = log_0I1Dr; CLG_(cachesim).log_0I1Dw = log_0I1Dw; - CLG_(cachesim).log_0I2D = log_0I2D; CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr"; CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw"; - CLG_(cachesim).log_0I2D_name = "log_0I2D"; if (clo_collect_cacheuse) { @@ -1763,26 +1787,29 @@ struct event_sets CLG_(sets); void CLG_(init_eventsets)(Int max_user) { EventType * e1, *e2, *e3, *e4; - EventSet *Ir, *Dr, *Dw; - EventSet *D0, *D1r, *D1w, *D2; - EventSet *sim, *full; - EventSet *use; + // Basic event sets from which others are composed + EventSet *Use, *Ir, *Dr, *Dw; + // Compositions of basic sets used for per-instruction counters + EventSet *UIr, *UIrDr, *UIrDrDw, *UIrDw, *UIrDwDr; + // Composition used for global counters and aggregation + EventSet *full; int sizeOfUseIr; - use = CLG_(get_eventset)("Use", 4); + // the "Use" events types only are used with "cacheuse" simulation + Use = CLG_(get_eventset)("Use", 4); if (clo_collect_cacheuse) { /* if TUse is 0, there was never a load, and no loss, too */ e1 = CLG_(register_eventtype)("AcCost1"); - CLG_(add_eventtype)(use, e1); + CLG_(add_eventtype)(Use, e1); e1 = CLG_(register_eventtype)("SpLoss1"); - CLG_(add_eventtype)(use, e1); + CLG_(add_eventtype)(Use, e1); e1 = CLG_(register_eventtype)("AcCost2"); - CLG_(add_eventtype)(use, e1); + CLG_(add_eventtype)(Use, e1); e1 = CLG_(register_eventtype)("SpLoss2"); - CLG_(add_eventtype)(use, e1); + CLG_(add_eventtype)(Use, e1); } - Ir = CLG_(get_eventset)("Ir", 4); + Ir = CLG_(get_eventset)("Ir", 4); Dr = CLG_(get_eventset)("Dr", 4); Dw = CLG_(get_eventset)("Dw", 4); if (CLG_(clo).simulate_cache) { @@ -1822,74 +1849,76 @@ void CLG_(init_eventsets)(Int max_user) CLG_(add_eventtype)(Ir, e1); } - sizeOfUseIr = use->size + Ir->size; - D0 = CLG_(get_eventset)("D0", sizeOfUseIr); - CLG_(add_eventset)(D0, use); - off_D0_Ir = CLG_(add_eventset)(D0, Ir); - - D1r = CLG_(get_eventset)("D1r", sizeOfUseIr + Dr->size); - CLG_(add_eventset)(D1r, use); - off_D1r_Ir = CLG_(add_eventset)(D1r, Ir); - off_D1r_Dr = CLG_(add_eventset)(D1r, Dr); - - D1w = CLG_(get_eventset)("D1w", sizeOfUseIr + Dw->size); - CLG_(add_eventset)(D1w, use); - off_D1w_Ir = CLG_(add_eventset)(D1w, Ir); - off_D1w_Dw = CLG_(add_eventset)(D1w, Dw); - - D2 = CLG_(get_eventset)("D2", sizeOfUseIr + Dr->size + Dw->size); - CLG_(add_eventset)(D2, use); - off_D2_Ir = CLG_(add_eventset)(D2, Ir); - off_D2_Dr = CLG_(add_eventset)(D2, Dr); - off_D2_Dw = CLG_(add_eventset)(D2, Dw); - - sim = CLG_(get_eventset)("sim", sizeOfUseIr + Dr->size + Dw->size); - CLG_(add_eventset)(sim, use); - CLG_(sets).off_sim_Ir = CLG_(add_eventset)(sim, Ir); - CLG_(sets).off_sim_Dr = CLG_(add_eventset)(sim, Dr); - CLG_(sets).off_sim_Dw = CLG_(add_eventset)(sim, Dw); + // Self cost event sets per guest instruction (U used only for cacheUse). + // Each basic event set only appears once, as eg. multiple different Dr's + // in one guest instruction are counted in the same counter. - if (CLG_(clo).collect_alloc) max_user += 2; - if (CLG_(clo).collect_systime) max_user += 2; + sizeOfUseIr = Use->size + Ir->size; + UIr = CLG_(get_eventset)("UIr", sizeOfUseIr); + CLG_(add_eventset)(UIr, Use); + off_UIr_Ir = CLG_(add_eventset)(UIr, Ir); - full = CLG_(get_eventset)("full", sim->size + max_user); - CLG_(add_eventset)(full, sim); - CLG_(sets).off_full_Ir = CLG_(sets).off_sim_Ir; - CLG_(sets).off_full_Dr = CLG_(sets).off_sim_Dr; - CLG_(sets).off_full_Dw = CLG_(sets).off_sim_Dw; + UIrDr = CLG_(get_eventset)("UIrDr", sizeOfUseIr + Dr->size); + CLG_(add_eventset)(UIrDr, Use); + off_UIrDr_Ir = CLG_(add_eventset)(UIrDr, Ir); + off_UIrDr_Dr = CLG_(add_eventset)(UIrDr, Dr); - CLG_(sets).use = use; - CLG_(sets).Ir = Ir; - CLG_(sets).Dr = Dr; - CLG_(sets).Dw = Dw; + UIrDrDw = CLG_(get_eventset)("IrDrDw", sizeOfUseIr + Dr->size + Dw->size); + CLG_(add_eventset)(UIrDrDw, Use); + off_UIrDrDw_Ir = CLG_(add_eventset)(UIrDrDw, Ir); + off_UIrDrDw_Dr = CLG_(add_eventset)(UIrDrDw, Dr); + off_UIrDrDw_Dw = CLG_(add_eventset)(UIrDrDw, Dw); - CLG_(sets).D0 = D0; - CLG_(sets).D1r = D1r; - CLG_(sets).D1w = D1w; - CLG_(sets).D2 = D2; + UIrDw = CLG_(get_eventset)("UIrDw", sizeOfUseIr + Dw->size); + CLG_(add_eventset)(UIrDw, Use); + off_UIrDw_Ir = CLG_(add_eventset)(UIrDw, Ir); + off_UIrDw_Dw = CLG_(add_eventset)(UIrDw, Dw); + + UIrDwDr = CLG_(get_eventset)("IrDwDr", sizeOfUseIr + Dw->size + Dr->size); + CLG_(add_eventset)(UIrDwDr, Use); + off_UIrDwDr_Ir = CLG_(add_eventset)(UIrDrDw, Ir); + off_UIrDwDr_Dw = CLG_(add_eventset)(UIrDrDw, Dw); + off_UIrDwDr_Dr = CLG_(add_eventset)(UIrDrDw, Dr); - CLG_(sets).sim = sim; - CLG_(sets).full = full; + // the "full" event set is used as global counter and for aggregation + if (CLG_(clo).collect_alloc) max_user += 2; + if (CLG_(clo).collect_systime) max_user += 2; + full = CLG_(get_eventset)("full", + sizeOfUseIr + Dr->size + Dw->size + max_user); + CLG_(add_eventset)(full, Use); + CLG_(sets).off_full_Ir = CLG_(add_eventset)(full, Ir); + CLG_(sets).off_full_Dr = CLG_(add_eventset)(full, Dr); + CLG_(sets).off_full_Dw = CLG_(add_eventset)(full, Dw); if (CLG_(clo).collect_alloc) { - e1 = CLG_(register_eventtype)("allocCount"); - e2 = CLG_(register_eventtype)("allocSize"); - CLG_(sets).off_full_user = CLG_(add_dep_event2)(full, e1,e2); + e1 = CLG_(register_eventtype)("allocCount"); + e2 = CLG_(register_eventtype)("allocSize"); + CLG_(sets).off_full_alloc = CLG_(add_dep_event2)(full, e1,e2); } - if (CLG_(clo).collect_systime) { - e1 = CLG_(register_eventtype)("sysCount"); - e2 = CLG_(register_eventtype)("sysTime"); - CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2); + e1 = CLG_(register_eventtype)("sysCount"); + e2 = CLG_(register_eventtype)("sysTime"); + CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2); } + CLG_(sets).Use = Use; + CLG_(sets).Ir = Ir; + CLG_(sets).Dr = Dr; + CLG_(sets).Dw = Dw; + CLG_(sets).UIr = UIr; + CLG_(sets).UIrDr = UIrDr; + CLG_(sets).UIrDrDw = UIrDrDw; + CLG_(sets).UIrDw = UIrDw; + CLG_(sets).UIrDwDr = UIrDwDr; + CLG_(sets).full = full; + + CLG_DEBUGIF(1) { CLG_DEBUG(1, "EventSets:\n"); - CLG_(print_eventset)(-2, use); + CLG_(print_eventset)(-2, Use); CLG_(print_eventset)(-2, Ir); CLG_(print_eventset)(-2, Dr); CLG_(print_eventset)(-2, Dw); - CLG_(print_eventset)(-2, sim); CLG_(print_eventset)(-2, full); } @@ -1924,34 +1953,41 @@ static void add_and_zero_Dx(EventSet* es, SimCost dst, ULong* cost) { /* if eventset use is defined, it is always first (hardcoded!) */ - CLG_(add_and_zero_cost)( CLG_(sets).use, dst, cost); + CLG_(add_and_zero_cost)( CLG_(sets).Use, dst, cost); - /* FIXME: This is hardcoded... */ - if (es == CLG_(sets).D0) { - CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, - cost + off_D0_Ir); + if (es == CLG_(sets).UIr) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir, + cost + off_UIr_Ir); } - else if (es == CLG_(sets).D1r) { - CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, - cost + off_D1r_Ir); - CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr, - cost + off_D1r_Dr); + else if (es == CLG_(sets).UIrDr) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir, + cost + off_UIrDr_Ir); + CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr, + cost + off_UIrDr_Dr); } - else if (es == CLG_(sets).D1w) { - CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, - cost + off_D1w_Ir); - CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw, - cost + off_D1w_Dw); + else if (es == CLG_(sets).UIrDrDw) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir, + cost + off_UIrDrDw_Ir); + CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr, + cost + off_UIrDrDw_Dr); + CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw, + cost + off_UIrDrDw_Dw); } - else { - CLG_ASSERT(es == CLG_(sets).D2); - CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir, - cost + off_D2_Ir); - CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr, - cost + off_D2_Dr); - CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw, - cost + off_D2_Dw); + else if (es == CLG_(sets).UIrDw) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir, + cost + off_UIrDw_Ir); + CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw, + cost + off_UIrDw_Dw); + } + else if (es == CLG_(sets).UIrDwDr) { + CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_full_Ir, + cost + off_UIrDwDr_Ir); + CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_full_Dw, + cost + off_UIrDwDr_Dw); + CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_full_Dr, + cost + off_UIrDwDr_Dr); } + else CLG_ASSERT(0); } /* this is called at dump time for every instruction executed */ @@ -1959,7 +1995,7 @@ static void cachesim_add_icost(SimCost cost, BBCC* bbcc, InstrInfo* ii, ULong exe_count) { if (!CLG_(clo).simulate_cache) - cost[CLG_(sets).off_sim_Ir] += exe_count; + cost[CLG_(sets).off_full_Ir] += exe_count; else { #if 0 @@ -2019,24 +2055,24 @@ struct cachesim_if CLG_(cachesim) = { /* these will be set by cachesim_post_clo_init */ .log_1I0D = 0, + .log_2I0D = 0, + .log_3I0D = 0, .log_1I1Dr = 0, .log_1I1Dw = 0, - .log_1I2D = 0, .log_0I1Dr = 0, .log_0I1Dw = 0, - .log_0I2D = 0, .log_1I0D_name = "(no function)", + .log_2I0D_name = "(no function)", + .log_3I0D_name = "(no function)", .log_1I1Dr_name = "(no function)", .log_1I1Dw_name = "(no function)", - .log_1I2D_name = "(no function)", .log_0I1Dr_name = "(no function)", .log_0I1Dw_name = "(no function)", - .log_0I2D_name = "(no function)" }; |