diff options
Diffstat (limited to 'doc/examples/runemomniaggdemo.sh')
-rwxr-xr-x | doc/examples/runemomniaggdemo.sh | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/doc/examples/runemomniaggdemo.sh b/doc/examples/runemomniaggdemo.sh new file mode 100755 index 0000000..317ff5a --- /dev/null +++ b/doc/examples/runemomniaggdemo.sh @@ -0,0 +1,251 @@ +# this is a quick and dirty migration of runemomniagg2.sh to the +# --enable-demo mode of aggregate testing +function kill_netperfs { + pkill -ALRM netperf + + pgrep -P 1 -f netperf > /dev/null + while [ $? -eq 0 ] + do + sleep 1 + pgrep -P 1 -f netperf > /dev/null + done +} + +function run_cmd { + + NOW=`date +%s.%N` + echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG + i=0; + +# the starting point for our load level pauses + PAUSE_AT=1 + + + while [ $i -lt $MAX_INSTANCES ] + do + TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]} + echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG + id=`printf "%.5d" $i` + $NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out & + + # give it a moment to get going + sleep 1 + + i=`expr $i + 1` + + if [ $i -eq $PAUSE_AT ] && [ $i -ne $MAX_INSTANCES ] + then + NOW=`date +%s.%N` + echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG + sleep $DURATION + PAUSE_AT=`expr $PAUSE_AT \* 2` + NOW=`date +%s.%N` + echo "Resuming at $NOW for $TEST" | tee -a $TESTLOG + fi + done + + NOW=`date +%s.%N` + echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG + +#wait for our test duration + sleep $DURATION + +#kludgey but this sleep should mean that another interim result will be emitted + sleep 3 + +# stop all the netperfs + NOW=`date +%s.%N` + echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG + kill_netperfs + + NOW=`date +%s.%N` + echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG + +} + +# very much like run_cmd, but it runs the tests one at a time rather +# than in parallel. We keep the same logging strings to be compatible +# (hopefully) with the post processing script, even though they don't +# make all that much sense :) + +function run_cmd_serial { + + NOW=`date +%s.%N` + echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG + i=0; + +# the starting point for our load level pauses + PAUSE_AT=1 + + + while [ $i -lt $NUM_REMOTE_HOSTS ] + do + TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]} + echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG + id=`printf "%.5d" $i` + $NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out & + + # give it a moment to get going + sleep 1 + + i=`expr $i + 1` + + NOW=`date +%s.%N` + echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG + # the plus two is to make sure we have a full set of interim + # results. probably not necessary here but we want to be + # certain + sleep `expr $DURATION + 1` + kill_netperfs + NOW=`date +%s.%N` + THEN=`echo $NOW | awk -F "." '{printf("%d.%d",$1-1,$2)}'` + echo "Resuming at $THEN for $TEST" | tee -a $TESTLOG + + done + + NOW=`date +%s.%N` + echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG + +# stop all the netperfs - of course actually they have all been +# stopped already, we just want the log entries + NOW=`date +%s.%N` + echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG + kill_netperfs + NOW=`date +%s.%N` + echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG +} + +# here then is the "main" part + +if [ ! -f ./remote_hosts ] +then + echo "This script requires a remote_hosts file" + exit -1 +fi +. ./remote_hosts + +# how many processors are there on this system +NUM_CPUS=`grep processor /proc/cpuinfo | wc -l` + +# the number of netperf instances we will run will be up to 2x the +# number of CPUs +MAX_INSTANCES=`expr $NUM_CPUS \* 2` + +# but at least as many as there are entries in remote_hosts +if [ $MAX_INSTANCES -lt $NUM_REMOTE_HOSTS ] +then + MAX_INSTANCES=$NUM_REMOTE_HOSTS +fi + +# allow the netperf binary to be used to be overridden +NETPERF=${NETPERF:="netperf"} + +if [ $NUM_REMOTE_HOSTS -lt 2 ] +then + echo "The list of remote hosts is too short. There must be at least 2." + exit -1 +fi + +# we assume that netservers are already running on all the load generators + +DURATION=120 +# do not have a uuidgen? then use the one in netperf +MY_UUID=`uuidgen` +# with top-of-trunk we could make this 0 and run forever +# but two hours is something of a failsafe if the signals +# get lost +LENGTH="-l 7200" +OUTPUT="-o all" + +DO_STREAM=1; +DO_MAERTS=1; +# NOTE! The Bidir test depends on being able to set a socket buffer +# size greater than 13 * 64KB or 832 KB or there is a risk of the test +# hanging. If you are running linux, make certain that +# net.core.[r|w]mem_max are sufficiently large +DO_BIDIR=1; +DO_RRAGG=1; +DO_RR=1; +DO_ANCILLARY=1; + +# UDP_RR for TPC/PPS using single-byte transactions. we do not use +# TCP_RR any longer because any packet losses or other matters +# affecting the congestion window will break our desire that there be +# a one to one correspondence between requests/responses and packets. +if [ $DO_RRAGG -eq 1 ]; then + BURST=`find_max_burst.sh ${REMOTE_HOSTS[0]}` + if [ $BURST -eq -1 ]; then + # use a value that find_max_burst will not have picked + BURST=9 + echo "find_max_burst.sh returned -1 so picking a burst of $BURST" + fi + TEST="tps" + TESTLOG="netperf_tps.log" + NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -b $BURST -e 1 -T udp -u $MY_UUID $OUTPUT" + run_cmd +fi + +# Bidirectional using burst-mode TCP_RR and large request/response size +if [ $DO_BIDIR -eq 1 ]; then + TEST="bidirectional" + TESTLOG="netperf_bidirectional.log" + NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -r 64K -s 1M -S 1M -b 12 -u $MY_UUID $OUTPUT" + run_cmd +fi + +# TCP_STREAM aka outbound with a 64K send size +# the netperf command is everything but netperf -H mumble +if [ $DO_STREAM -eq 1 ];then + TEST="outbound" + TESTLOG="netperf_outbound.log" + NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m 64K -u $MY_UUID $OUTPUT" + run_cmd +fi + +# TCP_MAERTS aka inbound with a 64K send size - why is this one last? +# because presently when I pkill the netperf of a "MAERTS" test, the +# netserver does not behave well and it may not be possible to get it +# to behave well. but we will still have all the interim results even +# if we don't get the final results, the useful parts of which will be +# the same as the other tests anyway +if [ $DO_MAERTS -eq 1 ]; then + TEST="inbound" + TESTLOG="netperf_inbound.log" + NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m ,64K -u $MY_UUID $OUTPUT" + run_cmd +fi + +# A single-stream of synchronous, no-burst TCP_RR in an "aggregate" +# script? Yes, because the way the aggregate tests work, while there +# is a way to see what the performance of a single bulk transfer was, +# there is no way to see a basic latency - by the time +# find_max_burst.sh has completed, we are past a burst size of 0 +if [ $DO_RR -eq 1 ]; then + if [ $DURATION -lt 60 ]; then + DURATION=60 + fi + TEST="sync_tps" + TESTLOG="netperf_sync_tps.log" + NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -u $MY_UUID $OUTPUT" + run_cmd_serial +fi + + +# now some ancillary things which may nor may not work on your platform +if [ $DO_ANCILLARY -eq 1 ];then + dmidecode 2>&1 > dmidecode.txt + uname -a 2>&1 > uname.txt + cat /proc/cpuinfo 2>&1 > cpuinfo.txt + cat /proc/meminfo 2>&1 > meminfo.txt + ifconfig -a 2>&1 > ifconfig.txt + netstat -rn 2>&1 > netstat.txt + dpkg -l 2>&1 > dpkg.txt + rpm -qa 2>&1 > rpm.txt + cat /proc/interrupts 2>&1 > interrupts.txt + i=0 + while [ $i -lt `expr $NUM_REMOTE_HOSTS - 1` ] + do + traceroute ${REMOTE_HOSTS[$i]} > traceroute_${REMOTE_HOSTS[$i]}.txt + i=`expr $i + 1` + done +fi |