aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Sanders <nsanders@google.com>2015-09-15 12:41:37 -0700
committerNick Sanders <nsanders@google.com>2015-09-15 12:41:37 -0700
commit241f33a3e958842e3db803c03300764bd2ee9c19 (patch)
treed38035fadbfff8dbe02121f11658dd3fe1540df8
parent279816931fafe7dbffb0169185e9ac360144aad7 (diff)
downloadstressapptest-241f33a3e958842e3db803c03300764bd2ee9c19.tar.gz
Update to stressapptest 1.0.7 from upstream
https://github.com/stressapptest/stressapptest Change-Id: I6307bcfad2e67392b4e0308680c708546e9a15a3 Signed-off-by: Nick Sanders <nsanders@google.com>
-rw-r--r--Android.mk8
-rw-r--r--Makefile.am3
-rw-r--r--Makefile.in103
-rwxr-xr-xconfigure158
-rw-r--r--configure.ac75
-rw-r--r--src/Makefile.am3
-rw-r--r--src/Makefile.in75
-rw-r--r--src/adler32memcpy.cc119
-rw-r--r--src/clock.h29
-rw-r--r--src/disk_blocks.cc187
-rw-r--r--src/disk_blocks.h157
-rw-r--r--src/findmask.c140
-rw-r--r--src/findmask.inc4
-rw-r--r--src/logger.cc56
-rw-r--r--src/logger.h17
-rw-r--r--src/os.cc260
-rw-r--r--src/os.h150
-rw-r--r--src/sat.cc224
-rw-r--r--src/sat.h23
-rw-r--r--src/sattypes.h60
-rw-r--r--src/stressapptest_config.h.in3
-rw-r--r--src/stressapptest_config_android.h15
-rw-r--r--src/worker.cc441
-rw-r--r--src/worker.h112
-rw-r--r--stressapptest.17
25 files changed, 1757 insertions, 672 deletions
diff --git a/Android.mk b/Android.mk
index e026a36..43127e9 100644
--- a/Android.mk
+++ b/Android.mk
@@ -16,11 +16,17 @@ LOCAL_SRC_FILES := \
src/queue.cc \
src/sat.cc \
src/sat_factory.cc \
- src/worker.cc \
+ src/worker.cc
LOCAL_MODULE:= stressapptest
LOCAL_MODULE_TAGS := optional
+
LOCAL_CFLAGS := -DHAVE_CONFIG_H -DANDROID -DNDEBUG -UDEBUG -DCHECKOPTS
+
+LOCAL_C_INCLUDES := \
+ bionic \
+ libc++
+
LOCAL_CPP_EXTENSION := .cc
LOCAL_CXX_STL := libc++
diff --git a/Makefile.am b/Makefile.am
index c476e5f..5b1998f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,2 +1,3 @@
SUBDIRS = src
-dist_doc_DATA = COPYING stressapptest.1 \ No newline at end of file
+dist_man_MANS = stressapptest.1
+
diff --git a/Makefile.in b/Makefile.in
index 718866a..e0386c7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -14,7 +14,6 @@
# PARTICULAR PURPOSE.
@SET_MAKE@
-
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
@@ -34,9 +33,8 @@ PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-target_triplet = @target@
subdir = .
-DIST_COMMON = $(am__configure_deps) $(dist_doc_DATA) \
+DIST_COMMON = $(am__configure_deps) $(dist_man_MANS) \
$(srcdir)/Makefile.am $(srcdir)/Makefile.in \
$(top_srcdir)/configure COPYING config.guess config.sub \
depcomp install-sh missing
@@ -80,8 +78,10 @@ am__nobase_list = $(am__nobase_strip_setup); \
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__installdirs = "$(DESTDIR)$(docdir)"
-DATA = $(dist_doc_DATA)
+man1dir = $(mandir)/man1
+am__installdirs = "$(DESTDIR)$(man1dir)"
+NROFF = nroff
+MANS = $(dist_man_MANS)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
@@ -220,16 +220,12 @@ sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
-target = @target@
target_alias = @target_alias@
-target_cpu = @target_cpu@
-target_os = @target_os@
-target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
SUBDIRS = src
-dist_doc_DATA = COPYING stressapptest.1
+dist_man_MANS = stressapptest.1
all: all-recursive
.SUFFIXES:
@@ -267,26 +263,44 @@ $(top_srcdir)/configure: $(am__configure_deps)
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
$(am__aclocal_m4_deps):
-install-dist_docDATA: $(dist_doc_DATA)
+install-man1: $(dist_man_MANS)
@$(NORMAL_INSTALL)
- test -z "$(docdir)" || $(MKDIR_P) "$(DESTDIR)$(docdir)"
- @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- echo "$$d$$p"; \
- done | $(am__base_list) | \
+ test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)"
+ @list=''; test -n "$(man1dir)" || exit 0; \
+ { for i in $$list; do echo "$$i"; done; \
+ l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \
+ sed -n '/\.1[a-z]*$$/p'; \
+ } | while read p; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; echo "$$p"; \
+ done | \
+ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+ sed 'N;N;s,\n, ,g' | { \
+ list=; while read file base inst; do \
+ if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+ fi; \
+ done; \
+ for i in $$list; do echo "$$i"; done | $(am__base_list) | \
while read files; do \
- echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(docdir)'"; \
- $(INSTALL_DATA) $$files "$(DESTDIR)$(docdir)" || exit $$?; \
- done
+ test -z "$$files" || { \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
+ done; }
-uninstall-dist_docDATA:
+uninstall-man1:
@$(NORMAL_UNINSTALL)
- @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \
- files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- test -n "$$files" || exit 0; \
- echo " ( cd '$(DESTDIR)$(docdir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(docdir)" && rm -f $$files
+ @list=''; test -n "$(man1dir)" || exit 0; \
+ files=`{ for i in $$list; do echo "$$i"; done; \
+ l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \
+ sed -n '/\.1[a-z]*$$/p'; \
+ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+ test -z "$$files" || { \
+ echo " ( cd '$(DESTDIR)$(man1dir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(man1dir)" && rm -f $$files; }
# This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile.
@@ -424,6 +438,19 @@ distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
+ @list='$(MANS)'; if test -n "$$list"; then \
+ list=`for p in $$list; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
+ if test -n "$$list" && \
+ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
+ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
+ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
+ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
+ echo " typically \`make maintainer-clean' will remove them" >&2; \
+ exit 1; \
+ else :; fi; \
+ else :; fi
$(am__remove_distdir)
test -d "$(distdir)" || mkdir "$(distdir)"
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
@@ -600,10 +627,10 @@ distcleancheck: distclean
exit 1; } >&2
check-am: all-am
check: check-recursive
-all-am: Makefile $(DATA)
+all-am: Makefile $(MANS)
installdirs: installdirs-recursive
installdirs-am:
- for dir in "$(DESTDIR)$(docdir)"; do \
+ for dir in "$(DESTDIR)$(man1dir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-recursive
@@ -652,7 +679,7 @@ info: info-recursive
info-am:
-install-data-am: install-dist_docDATA
+install-data-am: install-man
install-dvi: install-dvi-recursive
@@ -668,7 +695,7 @@ install-info: install-info-recursive
install-info-am:
-install-man:
+install-man: install-man1
install-pdf: install-pdf-recursive
@@ -698,7 +725,9 @@ ps: ps-recursive
ps-am:
-uninstall-am: uninstall-dist_docDATA
+uninstall-am: uninstall-man
+
+uninstall-man: uninstall-man1
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
install-am install-strip tags-recursive
@@ -710,14 +739,14 @@ uninstall-am: uninstall-dist_docDATA
distclean distclean-generic distclean-tags distcleancheck \
distdir distuninstallcheck dvi dvi-am html html-am info \
info-am install install-am install-data install-data-am \
- install-dist_docDATA install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs installdirs-am maintainer-clean \
+ install-dvi install-dvi-am install-exec install-exec-am \
+ install-html install-html-am install-info install-info-am \
+ install-man install-man1 install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
- uninstall-dist_docDATA
+ uninstall-man uninstall-man1
# Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/configure b/configure
index 3f27d49..97d2c38 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.65 for stressapptest 1.0.4_autoconf.
+# Generated by GNU Autoconf 2.65 for stressapptest 1.0.7_autoconf.
#
# Report bugs to <opensource@google.com>.
#
@@ -552,8 +552,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='stressapptest'
PACKAGE_TARNAME='stressapptest'
-PACKAGE_VERSION='1.0.4_autoconf'
-PACKAGE_STRING='stressapptest 1.0.4_autoconf'
+PACKAGE_VERSION='1.0.7_autoconf'
+PACKAGE_STRING='stressapptest 1.0.7_autoconf'
PACKAGE_BUGREPORT='opensource@google.com'
PACKAGE_URL=''
@@ -646,10 +646,6 @@ am__isrc
INSTALL_DATA
INSTALL_SCRIPT
INSTALL_PROGRAM
-target_os
-target_vendor
-target_cpu
-target
host_os
host_vendor
host_cpu
@@ -701,6 +697,7 @@ ac_user_opts='
enable_option_checking
with_static
enable_dependency_tracking
+enable_default_optimizations
'
ac_precious_vars='build_alias
host_alias
@@ -1255,7 +1252,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures stressapptest 1.0.4_autoconf to adapt to many kinds of systems.
+\`configure' configures stressapptest 1.0.7_autoconf to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1320,13 +1317,12 @@ Program names:
System types:
--build=BUILD configure for building on BUILD [guessed]
--host=HOST cross-compile to build programs to run on HOST [BUILD]
- --target=TARGET configure for building compilers for TARGET [HOST]
_ACEOF
fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of stressapptest 1.0.4_autoconf:";;
+ short | recursive ) echo "Configuration of stressapptest 1.0.7_autoconf:";;
esac
cat <<\_ACEOF
@@ -1336,6 +1332,8 @@ Optional Features:
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
--disable-dependency-tracking speeds up one-time build
--enable-dependency-tracking do not reject slow dependency extractors
+ --disable-default-optimizations
+ Disable default optimization flag overrides
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@@ -1420,7 +1418,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-stressapptest configure 1.0.4_autoconf
+stressapptest configure 1.0.7_autoconf
generated by GNU Autoconf 2.65
Copyright (C) 2009 Free Software Foundation, Inc.
@@ -1976,7 +1974,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by stressapptest $as_me 1.0.4_autoconf, which was
+It was created by stressapptest $as_me 1.0.7_autoconf, which was
generated by GNU Autoconf 2.65. Invocation command line was
$ $0 $@
@@ -2331,13 +2329,13 @@ if test "${with_static+set}" = set; then :
fi
-if test "$with_static" == "yes"
+if test "$with_static" = "yes"
then
- { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with staticaly linked libraries." >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with staticaly linked libraries." >&5
$as_echo "$as_me: Compiling with staticaly linked libraries." >&6;}
- LIBS="$LIBS -static"
+ LIBS="$LIBS -static"
else
- { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with dynamically linked libraries." >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with dynamically linked libraries." >&5
$as_echo "$as_me: Compiling with dynamically linked libraries." >&6;}
fi
@@ -2435,105 +2433,74 @@ IFS=$ac_save_IFS
case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
-
# Checking for target cpu and setting custom configuration
# for the different platforms
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5
-$as_echo_n "checking target system type... " >&6; }
-if test "${ac_cv_target+set}" = set; then :
- $as_echo_n "(cached) " >&6
-else
- if test "x$target_alias" = x; then
- ac_cv_target=$ac_cv_host
-else
- ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` ||
- as_fn_error "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5
-fi
+case "$host_cpu" in #(
+ *x86_64*) :
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5
-$as_echo "$ac_cv_target" >&6; }
-case $ac_cv_target in
-*-*-*) ;;
-*) as_fn_error "invalid value of canonical target" "$LINENO" 5;;
-esac
-target=$ac_cv_target
-ac_save_IFS=$IFS; IFS='-'
-set x $ac_cv_target
-shift
-target_cpu=$1
-target_vendor=$2
-shift; shift
-# Remember, the first character of IFS is used to create $*,
-# except with old shells:
-target_os=$*
-IFS=$ac_save_IFS
-case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac
-
-
-# The aliases save the names the user supplied, while $host etc.
-# will get canonicalized.
-test -n "$target_alias" &&
- test "$program_prefix$program_suffix$program_transform_name" = \
- NONENONEs,x,x, &&
- program_prefix=${target_alias}-
-case x"$target_cpu" in
- "xx86_64")
$as_echo "#define STRESSAPPTEST_CPU_X86_64 /**/" >>confdefs.h
- ;;
- "xi686")
+ ;; #(
+ *i686*) :
+
$as_echo "#define STRESSAPPTEST_CPU_I686 /**/" >>confdefs.h
- ;;
- "xpowerpc")
+ ;; #(
+ *powerpc*) :
+
$as_echo "#define STRESSAPPTEST_CPU_PPC /**/" >>confdefs.h
- ;;
- "xarmv7a")
+ ;; #(
+ *armv7a*) :
+
$as_echo "#define STRESSAPPTEST_CPU_ARMV7A /**/" >>confdefs.h
- ;;
- *)
- as_fn_error "$target_cpu is not supported! Try x86_64, i686, powerpc, or armv7a" "$LINENO" 5
- ;;
+ ;; #(
+ *) :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unsupported CPU: $host_cpu! Try x86_64, i686, powerpc, or armv7a" >&5
+$as_echo "$as_me: WARNING: Unsupported CPU: $host_cpu! Try x86_64, i686, powerpc, or armv7a" >&2;}
+ ;;
esac
-_os=`uname`
## The following allows like systems to share settings. This is not meant to
## imply that these OS are the same thing. From OpenOffice dmake configure.in
-case "$_os" in
- "Linux")
+case "$host_os" in #(
+ *linux*) :
+
OS_VERSION=linux
$as_echo "#define STRESSAPPTEST_OS_LINUX /**/" >>confdefs.h
- ;;
- "Darwin")
+ ;; #(
+ *darwin*) :
+
OS_VERSION=macosx
$as_echo "#define STRESSAPPTEST_OS_DARWIN /**/" >>confdefs.h
- ;;
- "FreeBSD")
+ ;; #(
+ *freebsd*) :
+
OS_VERSION=bsd
$as_echo "#define STRESSAPPTEST_OS_BSD /**/" >>confdefs.h
- ;;
- "NetBSD")
+ ;; #(
+ *netbsd*) :
+
OS_VERSION=bsd
$as_echo "#define STRESSAPPTEST_OS_BSD /**/" >>confdefs.h
- ;;
- *)
- as_fn_error "$_os operating system is not suitable to build dmake!" "$LINENO" 5
- ;;
+ ;; #(
+ *) :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unsupported system: $host_os" >&5
+$as_echo "$as_me: WARNING: unsupported system: $host_os" >&2;}
+ ;;
esac
am__api_version='1.11'
@@ -2974,7 +2941,7 @@ fi
# Define the identity of the package.
PACKAGE='stressapptest'
- VERSION='1.0.4_autoconf'
+ VERSION='1.0.7_autoconf'
cat >>confdefs.h <<_ACEOF
@@ -4412,10 +4379,19 @@ cat >>confdefs.h <<_ACEOF
_ACEOF
-#Default cxxflags
-CXXFLAGS="$CXXFLAGS -DCHECKOPTS"
-CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall -Wno-psabi"
-CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG"
+# Check whether --enable-default-optimizations was given.
+if test "${enable_default_optimizations+set}" = set; then :
+ enableval=$enable_default_optimizations;
+fi
+
+if test x"$enable_default_optimizations" != xno; then :
+
+ #Default cxxflags
+ CXXFLAGS="$CXXFLAGS -DCHECKOPTS"
+ CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall"
+ CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG"
+
+fi
# Checks for header files.
@@ -5064,6 +5040,13 @@ if test "$ac_res" != no; then :
fi
+ac_fn_c_check_type "$LINENO" "pthread_barrier_t" "ac_cv_type_pthread_barrier_t" "$ac_includes_default"
+if test "x$ac_cv_type_pthread_barrier_t" = x""yes; then :
+
+$as_echo "#define HAVE_PTHREAD_BARRIERS 1" >>confdefs.h
+
+fi
+
for ac_header in libaio.h
do :
ac_fn_c_check_header_mongrel "$LINENO" "libaio.h" "ac_cv_header_libaio_h" "$ac_includes_default"
@@ -5201,6 +5184,7 @@ if test "$ac_res" != no; then :
fi
+
# Checks for typedefs, structures, and compiler characteristics.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5
$as_echo_n "checking for stdbool.h that conforms to C99... " >&6; }
@@ -6455,7 +6439,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by stressapptest $as_me 1.0.4_autoconf, which was
+This file was extended by stressapptest $as_me 1.0.7_autoconf, which was
generated by GNU Autoconf 2.65. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -6521,7 +6505,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-stressapptest config.status 1.0.4_autoconf
+stressapptest config.status 1.0.7_autoconf
configured by $0, generated by GNU Autoconf 2.65,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index e1e44fa..74e8687 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,71 +1,64 @@
AC_PREREQ(2.61)
-AC_INIT([stressapptest], [1.0.4_autoconf], [opensource@google.com])
+AC_INIT([stressapptest], [1.0.7_autoconf], [opensource@google.com])
AC_ARG_WITH(static, [ --with-static enable static linking])
-if test "$with_static" == "yes"
+if test "$with_static" = "yes"
then
- AC_MSG_NOTICE([Compiling with staticaly linked libraries.])
- LIBS="$LIBS -static"
+ AC_MSG_NOTICE([Compiling with staticaly linked libraries.])
+ LIBS="$LIBS -static"
else
- AC_MSG_NOTICE([Compiling with dynamically linked libraries.])
+ AC_MSG_NOTICE([Compiling with dynamically linked libraries.])
fi
AC_CANONICAL_HOST
-AC_CANONICAL_BUILD
# Checking for target cpu and setting custom configuration
# for the different platforms
-AC_CANONICAL_TARGET
-case x"$target_cpu" in
- "xx86_64")
+AS_CASE(["$host_cpu"],
+ [*x86_64*], [
AC_DEFINE([STRESSAPPTEST_CPU_X86_64],[],
[Defined if the target CPU is x86_64])
- ;;
- "xi686")
+ ],
+ [*i686*], [
AC_DEFINE([STRESSAPPTEST_CPU_I686],[],
[Defined if the target CPU is i686])
- ;;
- "xpowerpc")
+ ],
+ [*powerpc*], [
AC_DEFINE([STRESSAPPTEST_CPU_PPC],[],
[Defined if the target CPU is PowerPC])
- ;;
- "xarmv7a")
+ ],
+ [*armv7a*], [
AC_DEFINE([STRESSAPPTEST_CPU_ARMV7A],[],
[Defined if the target CPU is armv7a])
- ;;
- *)
- AC_MSG_ERROR([$target_cpu is not supported! Try x86_64, i686, powerpc, or armv7a])
- ;;
-esac
+ ],
+ [AC_MSG_WARN([Unsupported CPU: $host_cpu! Try x86_64, i686, powerpc, or armv7a])]
+)
-_os=`uname`
## The following allows like systems to share settings. This is not meant to
## imply that these OS are the same thing. From OpenOffice dmake configure.in
-case "$_os" in
- "Linux")
+AS_CASE(["$host_os"],
+ [*linux*], [
OS_VERSION=linux
AC_DEFINE([STRESSAPPTEST_OS_LINUX],[],
[Defined if the target OS is Linux])
- ;;
- "Darwin")
+ ],
+ [*darwin*], [
OS_VERSION=macosx
AC_DEFINE([STRESSAPPTEST_OS_DARWIN],[],
[Defined if the target OS is OSX])
- ;;
- "FreeBSD")
+ ],
+ [*freebsd*], [
OS_VERSION=bsd
AC_DEFINE([STRESSAPPTEST_OS_BSD],[],
[Defined if the target OS is BSD based])
- ;;
- "NetBSD")
+ ],
+ [*netbsd*], [
OS_VERSION=bsd
AC_DEFINE([STRESSAPPTEST_OS_BSD],[],
[Defined if the target OS is BSD based])
- ;;
- *)
- AC_MSG_ERROR([$_os operating system is not suitable to build dmake!])
- ;;
-esac
+ ],
+ [AC_MSG_WARN([unsupported system: $host_os])]
+)
AM_INIT_AUTOMAKE([-Wall -Werror foreign])
AC_CONFIG_SRCDIR([src/])
@@ -95,10 +88,14 @@ AC_DEFINE_UNQUOTED([STRESSAPPTEST_TIMESTAMP],
"$username @ $hostname on $timestamp",
[Timestamp when ./configure was executed])
-#Default cxxflags
-CXXFLAGS="$CXXFLAGS -DCHECKOPTS"
-CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall -Wno-psabi"
-CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG"
+AC_ARG_ENABLE([default-optimizations],
+ [AS_HELP_STRING([--disable-default-optimizations], [Disable default optimization flag overrides])])
+AS_IF([test x"$enable_default_optimizations" != xno], [
+ #Default cxxflags
+ CXXFLAGS="$CXXFLAGS -DCHECKOPTS"
+ CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall"
+ CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG"
+])
# Checks for header files.
AC_HEADER_DIRENT
@@ -107,11 +104,13 @@ AC_HEADER_STDC
AC_CHECK_HEADERS([arpa/inet.h fcntl.h netdb.h stdint.h stdlib.h string.h sys/ioctl.h sys/socket.h sys/time.h unistd.h], [], [AC_MSG_FAILURE([Missing some header files.])])
AC_CHECK_HEADERS([pthread.h])
AC_SEARCH_LIBS([pthread_create], [pthread])
+AC_CHECK_TYPE([pthread_barrier_t], AC_DEFINE(HAVE_PTHREAD_BARRIERS, [1], [Define to 1 if the system has `pthread_barrier'.]))
AC_CHECK_HEADERS([libaio.h])
AC_SEARCH_LIBS([io_setup], [aio])
AC_CHECK_HEADERS([sys/shm.h])
AC_SEARCH_LIBS([shm_open], [rt])
+
# Checks for typedefs, structures, and compiler characteristics.
AC_HEADER_STDBOOL
AC_C_CONST
diff --git a/src/Makefile.am b/src/Makefile.am
index e044974..16f539d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,4 +1,5 @@
bin_PROGRAMS = stressapptest
+noinst_PROGRAMS = findmask
AM_DEFAULT_SOURCE_EXT=.cc
@@ -27,5 +28,7 @@ HFILES += error_diag.h
HFILES += disk_blocks.h
HFILES += adler32memcpy.h
HFILES += logger.h
+HFILES += clock.h
stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
+findmask_SOURCES = findmask.c findmask.inc
diff --git a/src/Makefile.in b/src/Makefile.in
index f62d1ac..ff320f3 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -34,8 +34,8 @@ PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-target_triplet = @target@
bin_PROGRAMS = stressapptest$(EXEEXT)
+noinst_PROGRAMS = findmask$(EXEEXT)
subdir = src
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
$(srcdir)/stressapptest_config.h.in
@@ -48,7 +48,10 @@ CONFIG_HEADER = stressapptest_config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
am__installdirs = "$(DESTDIR)$(bindir)"
-PROGRAMS = $(bin_PROGRAMS)
+PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
+am_findmask_OBJECTS = findmask.$(OBJEXT)
+findmask_OBJECTS = $(am_findmask_OBJECTS)
+findmask_LDADD = $(LDADD)
am__objects_1 = main.$(OBJEXT)
am__objects_2 = os.$(OBJEXT) os_factory.$(OBJEXT) pattern.$(OBJEXT) \
queue.$(OBJEXT) sat.$(OBJEXT) sat_factory.$(OBJEXT) \
@@ -63,17 +66,17 @@ DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
CXXLD = $(CXX)
CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
-o $@
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
- $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = $(stressapptest_SOURCES)
-DIST_SOURCES = $(stressapptest_SOURCES)
+SOURCES = $(findmask_SOURCES) $(stressapptest_SOURCES)
+DIST_SOURCES = $(findmask_SOURCES) $(stressapptest_SOURCES)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -171,11 +174,7 @@ sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
-target = @target@
target_alias = @target_alias@
-target_cpu = @target_cpu@
-target_os = @target_os@
-target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
@@ -186,13 +185,14 @@ CFILES = os.cc os_factory.cc pattern.cc queue.cc sat.cc sat_factory.cc \
adler32memcpy.cc logger.cc
HFILES = os.h pattern.h queue.h sat.h worker.h sattypes.h \
finelock_queue.h error_diag.h disk_blocks.h adler32memcpy.h \
- logger.h
+ logger.h clock.h
stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
+findmask_SOURCES = findmask.c findmask.inc
all: stressapptest_config.h
$(MAKE) $(AM_MAKEFLAGS) all-am
.SUFFIXES:
-.SUFFIXES: .cc .o .obj
+.SUFFIXES: .c .cc .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
@@ -277,6 +277,12 @@ uninstall-binPROGRAMS:
clean-binPROGRAMS:
-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+
+clean-noinstPROGRAMS:
+ -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS)
+findmask$(EXEEXT): $(findmask_OBJECTS) $(findmask_DEPENDENCIES)
+ @rm -f findmask$(EXEEXT)
+ $(LINK) $(findmask_OBJECTS) $(findmask_LDADD) $(LIBS)
stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES)
@rm -f stressapptest$(EXEEXT)
$(CXXLINK) $(stressapptest_OBJECTS) $(stressapptest_LDADD) $(LIBS)
@@ -290,6 +296,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adler32memcpy.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/disk_blocks.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error_diag.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findmask.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/finelock_queue.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/logger.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@
@@ -301,6 +308,20 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sat_factory.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/worker.Po@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
.cc.o:
@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@@ -431,7 +452,8 @@ maintainer-clean-generic:
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
-clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \
+ mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
@@ -501,17 +523,18 @@ uninstall-am: uninstall-binPROGRAMS
.MAKE: all install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
- clean-generic ctags distclean distclean-compile \
- distclean-generic distclean-hdr distclean-tags distdir dvi \
- dvi-am html html-am info info-am install install-am \
- install-binPROGRAMS install-data install-data-am install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
- tags uninstall uninstall-am uninstall-binPROGRAMS
+ clean-generic clean-noinstPROGRAMS ctags distclean \
+ distclean-compile distclean-generic distclean-hdr \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-binPROGRAMS install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
+ uninstall-am uninstall-binPROGRAMS
# Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/src/adler32memcpy.cc b/src/adler32memcpy.cc
index 69324f7..47c6262 100644
--- a/src/adler32memcpy.cc
+++ b/src/adler32memcpy.cc
@@ -70,7 +70,7 @@ bool AdlerChecksum::Equals(const AdlerChecksum &other) const {
// Returns string representation of the Adler checksum.
string AdlerChecksum::ToHexString() const {
char buffer[128];
- snprintf(buffer, sizeof(buffer), "%llx%llx%llx%llx", a1_, a2_, b1_, b2_);
+ snprintf(buffer, sizeof(buffer), "%016llx %016llx %016llx %016llx", a1_, a2_, b1_, b2_);
return string(buffer);
}
@@ -399,7 +399,124 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
// that there is no problem with memory this just mean that data was copied
// from src to dst and checksum was calculated successfully).
return true;
+#elif defined(STRESSAPPTEST_CPU_ARMV7A) && defined(__ARM_NEON__)
+ // Elements 0 to 3 are used for holding checksum terms a1, a2,
+ // b1, b2 respectively. These elements are filled by asm code.
+ // Checksum is seeded with the null checksum.
+ volatile uint64 checksum_arr[] __attribute__ ((aligned(16))) =
+ {1, 1, 0, 0};
+
+ if ((size_in_bytes >> 19) > 0) {
+ // Size is too large. Must be less than 2^19 bytes = 512 KB.
+ return false;
+ }
+
+ // Since we are moving 64 bytes at a time number of iterations = total size/64
+ uint32 blocks = size_in_bytes / 64;
+
+ uint64 *dst = dstmem64;
+ uint64 *src = srcmem64;
+
+ #define src_r "r3"
+ #define dst_r "r4"
+ #define blocks_r "r5"
+ #define crc_r "r6"
+
+ asm volatile (
+ "mov "src_r", %[src]; \n"
+ "mov "dst_r", %[dst]; \n"
+ "mov "crc_r", %[crc]; \n"
+ "mov "blocks_r", %[blocks]; \n"
+
+ // Loop over block count.
+ "cmp "blocks_r", #0; \n" // Compare counter to zero.
+ "ble END; \n"
+
+
+ // Preload upcoming cacheline.
+ "pld ["src_r", #0x0]; \n"
+ "pld ["src_r", #0x20]; \n"
+
+ // Init checksum
+ "vldm "crc_r", {q0}; \n"
+ "vmov.i32 q1, #0; \n"
+
+ // Start of the loop which copies 48 bytes from source to dst each time.
+ "TOP: \n"
+
+ // Make 3 moves each of 16 bytes from srcmem to qX registers.
+ // We are using 2 words out of 4 words in each qX register,
+ // word index 0 and word index 2. We'll swizzle them in a bit.
+ // Copy it.
+ "vldm "src_r"!, {q8, q9, q10, q11}; \n"
+ "vstm "dst_r"!, {q8, q9, q10, q11}; \n"
+
+ // Arrange it.
+ "vmov.i64 q12, #0; \n"
+ "vmov.i64 q13, #0; \n"
+ "vmov.i64 q14, #0; \n"
+ "vmov.i64 q15, #0; \n"
+ // This exchenges words 1,3 in the filled registers with
+ // words 0,2 in the empty registers.
+ "vtrn.32 q8, q12; \n"
+ "vtrn.32 q9, q13; \n"
+ "vtrn.32 q10, q14; \n"
+ "vtrn.32 q11, q15; \n"
+
+ // Sum into q0, then into q1.
+ // Repeat this for q8 - q13.
+ // Overflow can occur only if there are more
+ // than 2^16 additions => more than 2^17 words => more than 2^19 bytes so
+ // if size_in_bytes > 2^19 than overflow occurs.
+ "vadd.i64 q0, q0, q8; \n"
+ "vadd.i64 q1, q1, q0; \n"
+ "vadd.i64 q0, q0, q12; \n"
+ "vadd.i64 q1, q1, q0; \n"
+ "vadd.i64 q0, q0, q9; \n"
+ "vadd.i64 q1, q1, q0; \n"
+ "vadd.i64 q0, q0, q13; \n"
+ "vadd.i64 q1, q1, q0; \n"
+
+ "vadd.i64 q0, q0, q10; \n"
+ "vadd.i64 q1, q1, q0; \n"
+ "vadd.i64 q0, q0, q14; \n"
+ "vadd.i64 q1, q1, q0; \n"
+ "vadd.i64 q0, q0, q11; \n"
+ "vadd.i64 q1, q1, q0; \n"
+ "vadd.i64 q0, q0, q15; \n"
+ "vadd.i64 q1, q1, q0; \n"
+
+ // Increment counter and loop.
+ "sub "blocks_r", "blocks_r", #1; \n"
+ "cmp "blocks_r", #0; \n" // Compare counter to zero.
+ "bgt TOP; \n"
+
+
+ "END:\n"
+ // Report checksum values A and B (both right now are two concatenated
+ // 64 bit numbers and have to be converted to 64 bit numbers)
+ // seems like Adler128 (since size of each part is 4 byte rather than
+ // 1 byte).
+ "vstm "crc_r", {q0, q1}; \n"
+
+ // Output registers.
+ :
+ // Input registers.
+ : [src] "r"(src), [dst] "r"(dst), [blocks] "r"(blocks) , [crc] "r"(checksum_arr)
+ : "memory", "cc", "r3", "r4", "r5", "r6", "q0", "q1", "q8","q9","q10", "q11", "q12","q13","q14","q15"
+ ); // asm.
+
+ if (checksum != NULL) {
+ checksum->Set(checksum_arr[0], checksum_arr[1],
+ checksum_arr[2], checksum_arr[3]);
+ }
+
+ // Everything went fine, so return true (this does not mean
+ // that there is no problem with memory this just mean that data was copied
+ // from src to dst and checksum was calculated successfully).
+ return true;
#else
+ #warning "No vector copy defined for this architecture."
// Fall back to C implementation for anything else.
return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum);
#endif
diff --git a/src/clock.h b/src/clock.h
new file mode 100644
index 0000000..4204188
--- /dev/null
+++ b/src/clock.h
@@ -0,0 +1,29 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: cferris
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_CLOCK_H_ // NOLINT
+#define STRESSAPPTEST_CLOCK_H_
+
+#include <time.h>
+
+// This class implements a clock that can be overriden for unit tests.
+class Clock {
+ public:
+ virtual ~Clock() {}
+
+ virtual time_t Now() { return time(NULL); }
+};
+
+#endif // STRESSAPPTEST_CLOCK_H_ NOLINT
diff --git a/src/disk_blocks.cc b/src/disk_blocks.cc
index c7860b0..60018f9 100644
--- a/src/disk_blocks.cc
+++ b/src/disk_blocks.cc
@@ -14,38 +14,51 @@
// Thread-safe container of disk blocks
-#include <utility>
-
// This file must work with autoconf on its public version,
// so these includes are correct.
#include "disk_blocks.h"
-DiskBlockTable::DiskBlockTable() {
- nelems_ = 0;
+#include <utility>
+
+// BlockData
+BlockData::BlockData() : address_(0), size_(0),
+ references_(0), initialized_(false),
+ pattern_(NULL) {
+ pthread_mutex_init(&data_mutex_, NULL);
+}
+
+BlockData::~BlockData() {
+ pthread_mutex_destroy(&data_mutex_);
+}
+
+void BlockData::set_initialized() {
+ pthread_mutex_lock(&data_mutex_);
+ initialized_ = true;
+ pthread_mutex_unlock(&data_mutex_);
+}
+
+bool BlockData::initialized() const {
+ pthread_mutex_lock(&data_mutex_);
+ bool initialized = initialized_;
+ pthread_mutex_unlock(&data_mutex_);
+ return initialized;
+}
+
+// DiskBlockTable
+DiskBlockTable::DiskBlockTable() : sector_size_(0), write_block_size_(0),
+ device_name_(""), device_sectors_(0),
+ segment_size_(0), size_(0) {
pthread_mutex_init(&data_mutex_, NULL);
pthread_mutex_init(&parameter_mutex_, NULL);
pthread_cond_init(&data_condition_, NULL);
}
DiskBlockTable::~DiskBlockTable() {
- CleanTable();
pthread_mutex_destroy(&data_mutex_);
pthread_mutex_destroy(&parameter_mutex_);
pthread_cond_destroy(&data_condition_);
}
-void DiskBlockTable::CleanTable() {
- pthread_mutex_lock(&data_mutex_);
- for (map<int64, StorageData*>::iterator it =
- addr_to_block_.begin(); it != addr_to_block_.end(); ++it) {
- delete it->second;
- }
- addr_to_block_.erase(addr_to_block_.begin(), addr_to_block_.end());
- nelems_ = 0;
- pthread_cond_broadcast(&data_condition_);
- pthread_mutex_unlock(&data_mutex_);
-}
-
// 64-bit non-negative random number generator. Stolen from
// depot/google3/base/tracecontext_unittest.cc.
int64 DiskBlockTable::Random64() {
@@ -58,28 +71,27 @@ int64 DiskBlockTable::Random64() {
return -x;
}
-int64 DiskBlockTable::NumElems() {
- unsigned int nelems;
+uint64 DiskBlockTable::Size() {
pthread_mutex_lock(&data_mutex_);
- nelems = nelems_;
+ uint64 size = size_;
pthread_mutex_unlock(&data_mutex_);
- return nelems;
+ return size;
}
void DiskBlockTable::InsertOnStructure(BlockData *block) {
- int64 address = block->GetAddress();
+ int64 address = block->address();
StorageData *sd = new StorageData();
sd->block = block;
- sd->pos = nelems_;
+ sd->pos = size_;
// Creating new block ...
pthread_mutex_lock(&data_mutex_);
- if (pos_to_addr_.size() <= nelems_) {
+ if (pos_to_addr_.size() <= size_) {
pos_to_addr_.insert(pos_to_addr_.end(), address);
} else {
- pos_to_addr_[nelems_] = address;
+ pos_to_addr_[size_] = address;
}
- addr_to_block_.insert(std::make_pair(address, sd));
- nelems_++;
+ addr_to_block_[address] = sd;
+ size_++;
pthread_cond_broadcast(&data_condition_);
pthread_mutex_unlock(&data_mutex_);
}
@@ -87,26 +99,28 @@ void DiskBlockTable::InsertOnStructure(BlockData *block) {
int DiskBlockTable::RemoveBlock(BlockData *block) {
// For write threads, check the reference counter and remove
// it from the structure.
- int64 address = block->GetAddress();
+ int64 address = block->address();
AddrToBlockMap::iterator it = addr_to_block_.find(address);
int ret = 1;
if (it != addr_to_block_.end()) {
int curr_pos = it->second->pos;
- int last_pos = nelems_ - 1;
+ int last_pos = size_ - 1;
AddrToBlockMap::iterator last_it = addr_to_block_.find(
pos_to_addr_[last_pos]);
- sat_assert(nelems_ > 0);
+ sat_assert(size_ > 0);
sat_assert(last_it != addr_to_block_.end());
- // Everything is fine, updating ...
+ // Everything is fine, removing block from table.
pthread_mutex_lock(&data_mutex_);
pos_to_addr_[curr_pos] = pos_to_addr_[last_pos];
last_it->second->pos = curr_pos;
delete it->second;
addr_to_block_.erase(it);
- nelems_--;
+ size_--;
block->DecreaseReferenceCounter();
if (block->GetReferenceCounter() == 0)
delete block;
+ else if (block->GetReferenceCounter() < 0)
+ ret = 0;
pthread_cond_broadcast(&data_condition_);
pthread_mutex_unlock(&data_mutex_);
} else {
@@ -116,18 +130,16 @@ int DiskBlockTable::RemoveBlock(BlockData *block) {
}
int DiskBlockTable::ReleaseBlock(BlockData *block) {
- // If is a random thread, just check the reference counter.
+ // If caller is a random thread, just check the reference counter.
int ret = 1;
pthread_mutex_lock(&data_mutex_);
int references = block->GetReferenceCounter();
- if (references > 0) {
- if (references == 1)
- delete block;
- else
- block->DecreaseReferenceCounter();
- } else {
+ if (references == 1)
+ delete block;
+ else if (references > 0)
+ block->DecreaseReferenceCounter();
+ else
ret = 0;
- }
pthread_mutex_unlock(&data_mutex_);
return ret;
}
@@ -135,13 +147,13 @@ int DiskBlockTable::ReleaseBlock(BlockData *block) {
BlockData *DiskBlockTable::GetRandomBlock() {
struct timespec ts;
struct timeval tp;
- int result = 0;
gettimeofday(&tp, NULL);
ts.tv_sec = tp.tv_sec;
ts.tv_nsec = tp.tv_usec * 1000;
ts.tv_sec += 2; // Wait for 2 seconds.
+ int result = 0;
pthread_mutex_lock(&data_mutex_);
- while (!nelems_ && result != ETIMEDOUT) {
+ while (!size_ && result != ETIMEDOUT) {
result = pthread_cond_timedwait(&data_condition_, &data_mutex_, &ts);
}
if (result == ETIMEDOUT) {
@@ -149,13 +161,13 @@ BlockData *DiskBlockTable::GetRandomBlock() {
return NULL;
} else {
int64 random_number = Random64();
- int64 random_pos = random_number % nelems_;
+ int64 random_pos = random_number % size_;
int64 address = pos_to_addr_[random_pos];
AddrToBlockMap::const_iterator it = addr_to_block_.find(address);
sat_assert(it != addr_to_block_.end());
BlockData *b = it->second->block;
// A block is returned only if its content is written on disk.
- if (b->BlockIsInitialized()) {
+ if (b->initialized()) {
b->IncreaseReferenceCounter();
} else {
b = NULL;
@@ -165,45 +177,38 @@ BlockData *DiskBlockTable::GetRandomBlock() {
}
}
-void DiskBlockTable::SetParameters(
- int sector_size, int write_block_size, int64 device_sectors,
- int64 segment_size, string device_name) {
+void DiskBlockTable::SetParameters(int sector_size,
+ int write_block_size,
+ int64 device_sectors,
+ int64 segment_size,
+ const string& device_name) {
+ sat_assert(size_ == 0);
pthread_mutex_lock(&parameter_mutex_);
sector_size_ = sector_size;
write_block_size_ = write_block_size;
device_sectors_ = device_sectors;
segment_size_ = segment_size;
device_name_ = device_name;
- CleanTable();
pthread_mutex_unlock(&parameter_mutex_);
}
BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
int64 sector = 0;
BlockData *block = new BlockData();
-
bool good_sequence = false;
- int num_sectors;
-
if (block == NULL) {
logprintf(0, "Process Error: Unable to allocate memory "
"for sector data for disk %s.\n", device_name_.c_str());
return NULL;
}
-
pthread_mutex_lock(&parameter_mutex_);
-
sat_assert(device_sectors_ != 0);
-
// Align the first sector with the beginning of a write block
- num_sectors = write_block_size_ / sector_size_;
-
+ int num_sectors = write_block_size_ / sector_size_;
for (int i = 0; i < kBlockRetry && !good_sequence; i++) {
good_sequence = true;
-
// Use the entire disk or a small segment of the disk to allocate the first
// sector in the block from.
-
if (segment_size_ == -1) {
sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % (
device_sectors_ / num_sectors);
@@ -213,7 +218,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
segment_size_ / num_sectors);
sector *= num_sectors;
sector += segment * segment_size_;
-
// Make sure the block is within the segment.
if (sector + num_sectors > (segment + 1) * segment_size_) {
good_sequence = false;
@@ -229,7 +233,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
// now aligned to the write_block_size, it is not necessary
// to check each sector, just the first block (a sector
// overlap will never occur).
-
pthread_mutex_lock(&data_mutex_);
if (addr_to_block_.find(sector) != addr_to_block_.end()) {
good_sequence = false;
@@ -238,7 +241,8 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
}
if (good_sequence) {
- block->SetParameters(sector, write_block_size_);
+ block->set_address(sector);
+ block->set_size(write_block_size_);
block->IncreaseReferenceCounter();
InsertOnStructure(block);
} else {
@@ -248,66 +252,5 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
block = NULL;
}
pthread_mutex_unlock(&parameter_mutex_);
-
return block;
}
-
-// BlockData
-
-BlockData::BlockData() {
- addr_ = 0;
- size_ = 0;
- references_ = 0;
- initialized_ = false;
- pthread_mutex_init(&data_mutex_, NULL);
-}
-
-BlockData::~BlockData() {
- pthread_mutex_destroy(&data_mutex_);
-}
-
-void BlockData::SetParameters(int64 address, int64 size) {
- addr_ = address;
- size_ = size;
-}
-
-void BlockData::IncreaseReferenceCounter() {
- references_++;
-}
-
-void BlockData::DecreaseReferenceCounter() {
- references_--;
-}
-
-int BlockData::GetReferenceCounter() {
- return references_;
-}
-
-void BlockData::SetBlockAsInitialized() {
- pthread_mutex_lock(&data_mutex_);
- initialized_ = true;
- pthread_mutex_unlock(&data_mutex_);
-}
-
-bool BlockData::BlockIsInitialized() {
- pthread_mutex_lock(&data_mutex_);
- bool initialized = initialized_;
- pthread_mutex_unlock(&data_mutex_);
- return initialized;
-}
-
-int64 BlockData::GetAddress() {
- return addr_;
-}
-
-int64 BlockData::GetSize() {
- return size_;
-}
-
-Pattern *BlockData::GetPattern() {
- return pattern_;
-}
-
-void BlockData::SetPattern(Pattern *p) {
- pattern_ = p;
-}
diff --git a/src/disk_blocks.h b/src/disk_blocks.h
index cb634c9..638ee9f 100644
--- a/src/disk_blocks.h
+++ b/src/disk_blocks.h
@@ -25,87 +25,146 @@
#include <map>
#include <vector>
#include <string>
-// This file must work with autoconf on its public version,
-// so these includes are correct.
-#include "pattern.h"
+
+#include "sattypes.h"
+
+class Pattern;
// Data about a block written to disk so that it can be verified later.
+// Thread-unsafe, must be used with locks on non-const methods,
+// except for initialized accessor/mutator, which are thread-safe
+// (and in fact, is the only method supposed to be accessed from
+// someone which is not the thread-safe DiskBlockTable).
class BlockData {
public:
BlockData();
~BlockData();
- void SetParameters(int64 address, int64 size);
- void IncreaseReferenceCounter();
- void DecreaseReferenceCounter();
- int GetReferenceCounter();
- void SetBlockAsInitialized();
- bool BlockIsInitialized();
- int64 GetAddress();
- int64 GetSize();
- void SetPattern(Pattern *p);
- Pattern *GetPattern();
- protected:
- int64 addr_; // address of first sector in block
- int64 size_; // size of block
- int references_; // reference counter
- bool initialized_; // flag indicating the block was written on disk
+
+ // These are reference counters used to control how many
+ // threads currently have a copy of this particular block.
+ void IncreaseReferenceCounter() { references_++; }
+ void DecreaseReferenceCounter() { references_--; }
+ int GetReferenceCounter() const { return references_; }
+
+ // Controls whether the block was written on disk or not.
+ // Once written, you cannot "un-written" then without destroying
+ // this object.
+ void set_initialized();
+ bool initialized() const;
+
+ // Accessor methods for some data related to blocks.
+ void set_address(uint64 address) { address_ = address; }
+ uint64 address() const { return address_; }
+ void set_size(uint64 size) { size_ = size; }
+ uint64 size() const { return size_; }
+ void set_pattern(Pattern *p) { pattern_ = p; }
+ Pattern *pattern() { return pattern_; }
+ private:
+ uint64 address_; // Address of first sector in block
+ uint64 size_; // Size of block
+ int references_; // Reference counter
+ bool initialized_; // Flag indicating the block was written on disk
Pattern *pattern_;
- pthread_mutex_t data_mutex_;
+ mutable pthread_mutex_t data_mutex_;
DISALLOW_COPY_AND_ASSIGN(BlockData);
};
-// Disk Block table - store data from blocks to be write / read by
-// a DiskThread
+// A thread-safe table used to store block data and control access
+// to these blocks, letting several threads read and write blocks on
+// disk.
class DiskBlockTable {
public:
DiskBlockTable();
virtual ~DiskBlockTable();
- // Get Number of elements stored on table
- int64 NumElems();
- // Clean all table data
- void CleanTable();
- // Get a random block from the list. Only returns if a element
- // is available (consider that other thread must have added them.
- BlockData *GetRandomBlock();
- // Set all initial parameters. Assumes all existent data is
+ // Returns number of elements stored on table.
+ uint64 Size();
+
+ // Sets all initial parameters. Assumes all existent data is
// invalid and, therefore, must be removed.
void SetParameters(int sector_size, int write_block_size,
int64 device_sectors,
int64 segment_size,
- string device_name);
- // Return a new block in a unused address.
+ const string& device_name);
+
+ // During the regular execution, there will be 2 types of threads:
+ // - Write thread: gets a large number of blocks using GetUnusedBlock,
+ // writes them on disk (if on destructive mode),
+ // reads block content ONCE from disk and them removes
+ // the block from queue with RemoveBlock. After a removal a
+ // block is not available for read threads, but it is
+ // only removed from memory if there is no reference for
+ // this block. Note that a write thread also counts as
+ // a reference.
+ // - Read threads: get one block at a time (if available) with
+ // GetRandomBlock, reads its content from disk,
+ // checking whether it is correct or not, and releases
+ // (Using ReleaseBlock) the block to be erased by the
+ // write threads. Since several read threads are allowed
+ // to read the same block, a reference counter is used to
+ // control when the block can be REALLY erased from
+ // memory, and all memory management is made by a
+ // DiskBlockTable instance.
+
+ // Returns a new block in a unused address. Does not
+ // grant ownership of the pointer to the caller
+ // (use RemoveBlock to delete the block from memory instead).
BlockData *GetUnusedBlock(int64 segment);
- // Remove block from structure (called by write threads)
+
+ // Removes block from structure (called by write threads). Returns
+ // 1 if successful, 0 otherwise.
int RemoveBlock(BlockData *block);
- // Release block to be erased (called by random threads)
- int ReleaseBlock(BlockData *block);
- protected:
+ // Gets a random block from the list. Only returns if an element
+ // is available (a write thread has got this block, written it on disk,
+ // and set this block as initialized). Does not grant ownership of the
+ // pointer to the caller (use RemoveBlock to delete the block from
+ // memory instead).
+ BlockData *GetRandomBlock();
- void InsertOnStructure(BlockData *block);
- // Generate a random 64-bit integer (virtual so it could be
- // override by the tests)
- virtual int64 Random64();
+ // Releases block to be erased (called by random threads). Returns
+ // 1 if successful, 0 otherwise.
+ int ReleaseBlock(BlockData *block);
+ protected:
struct StorageData {
BlockData *block;
int pos;
};
-
- static const int kBlockRetry = 100; // Number of retries to allocate
- // sectors.
-
typedef map<int64, StorageData*> AddrToBlockMap;
typedef vector<int64> PosToAddrVector;
+
+ // Inserts block in structure, used in tests and by other methods.
+ void InsertOnStructure(BlockData *block);
+
+ // Generates a random 64-bit integer.
+ // Virtual method so it can be overridden by the tests.
+ virtual int64 Random64();
+
+ // Accessor methods for testing.
+ const PosToAddrVector& pos_to_addr() const { return pos_to_addr_; }
+ const AddrToBlockMap& addr_to_block() const { return addr_to_block_; }
+
+ int sector_size() const { return sector_size_; }
+ int write_block_size() const { return write_block_size_; }
+ const string& device_name() const { return device_name_; }
+ int64 device_sectors() const { return device_sectors_; }
+ int64 segment_size() const { return segment_size_; }
+
+ private:
+ // Number of retries to allocate sectors.
+ static const int kBlockRetry = 100;
+ // Actual tables.
PosToAddrVector pos_to_addr_;
AddrToBlockMap addr_to_block_;
- uint64 nelems_;
- int sector_size_; // Sector size, in bytes
- int write_block_size_; // Block size, in bytes
- string device_name_; // Device name
- int64 device_sectors_; // Number of sectors in device
- int64 segment_size_; // Segment size, in bytes
+
+ // Configuration parameters for block selection
+ int sector_size_; // Sector size, in bytes
+ int write_block_size_; // Block size, in bytes
+ string device_name_; // Device name
+ int64 device_sectors_; // Number of sectors in device
+ int64 segment_size_; // Segment size in bytes
+ uint64 size_; // Number of elements on table
pthread_mutex_t data_mutex_;
pthread_cond_t data_condition_;
pthread_mutex_t parameter_mutex_;
diff --git a/src/findmask.c b/src/findmask.c
new file mode 100644
index 0000000..1b10988
--- /dev/null
+++ b/src/findmask.c
@@ -0,0 +1,140 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ * This "tool" can be used to brute force the XOR bitmask that a memory
+ * controller uses to interleave addresses onto its two channels. To use it,
+ * you need to have a bunch of addresses that are known to go to only one
+ * of the memory channels... easiest way to get these is to run stressapptest on
+ * a machine while holding a soldering iron close to the chips of one channel.
+ * Generate about a thousand failures and extract their physical addresses
+ * from the output. Write them to findmask.inc in a way that forms a valid
+ * definition for the addrs array. Make and run on a big machine.
+ *
+ * The program iterates over all possible bitmasks within the first NUM_BITS,
+ * parallelizing execution over NUM_THREADS. Every integer is masked
+ * onto all supplied addresses, counting the amount of times this results in
+ * an odd or even amount of bits. If all but NOISE addresses fall on one side,
+ * it will print that mask to stdout. Note that the script will always "find"
+ * the mask 0x0, and may also report masks such as 0x100000000 depending on
+ * your test machines memory size... you will need to use your own judgement to
+ * interpret the results.
+ *
+ * As the program might run for a long time, you can send SIGUSR1 to it to
+ * output the last mask that was processed and get a rough idea of the
+ * current progress.
+ */
+
+#include <inttypes.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NOISE 20
+#define NUM_BITS 32
+#define NUM_THREADS 128 // keep this a power of two
+
+static uint64_t addrs[] = {
+#include "findmask.inc"
+};
+static uint64_t lastmask;
+
+__attribute__((optimize(3, "unroll-loops")))
+void* thread_func(void* arg) {
+ register uint64_t mask;
+ register uintptr_t num = (uintptr_t)arg;
+
+ for (mask = num; mask < (1ULL << (NUM_BITS + 1)); mask += NUM_THREADS) {
+ register const uint64_t* cur;
+ register int a = 0;
+ register int b = 0;
+
+ for (cur = addrs; (char*)cur < (char*)addrs + sizeof(addrs); cur++) {
+#ifdef __x86_64__
+ register uint64_t addr asm("rdx") = *cur & mask;
+ register uint32_t tmp asm("ebx");
+
+ // Behold: the dark bit counting magic!
+ asm (
+ // Fold high and low 32 bits onto each other
+ "MOVl %%edx, %%ebx\n\t"
+ "SHRq $32, %%rdx\n\t"
+ "XORl %%ebx, %%edx\n\t"
+ // Fold high and low 16 bits onto each other
+ "MOVl %%edx, %%ebx\n\t"
+ "SHRl $16, %%edx\n\t"
+ "XORw %%bx, %%dx\n\t"
+ // Fold high and low 8 bits onto each other
+ "XORb %%dh, %%dl\n\t"
+ // Invoke ancient 8086 parity flag (only counts lowest byte)
+ "SETnp %%bl\n\t"
+ "SETp %%dl\n\t"
+ // Stupid SET instruction can only affect the lowest byte...
+ "ANDl $1, %%ebx\n\t"
+ "ANDl $1, %%edx\n\t"
+ // Increment either 'a' or 'b' without needing another branch
+ "ADDl %%ebx, %2\n\t"
+ "ADDl %%edx, %1\n\t"
+ : "=b" (tmp), "+r"(a), "+r"(b) : "d"(addr) : "cc");
+
+#else // generic processor
+ register uint64_t addr = *cur & mask;
+ register uint32_t low = (uint32_t)addr;
+ register uint32_t high = (uint32_t)(addr >> 32);
+
+ // Takes about twice as long as the version above... take that GCC!
+ __builtin_parity(low) ^ __builtin_parity(high) ? a++ : b++;
+#endif
+
+ // Early abort: probably still the most valuable optimization in here
+ if (a >= NOISE && b >= NOISE) break;
+ }
+
+ if (a < NOISE) b = a;
+ if (b < NOISE) {
+ printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask);
+ fflush(stdout);
+ }
+
+ // I'm a little paranoid about performance: don't write to memory too often
+ if (!(mask & 0x7ff)) lastmask = mask;
+ }
+
+ return 0;
+}
+
+void signal_handler(int signum) {
+ printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n",
+ lastmask);
+ fflush(stdout);
+}
+
+int main(int argc, char** argv) {
+ uintptr_t i;
+ pthread_t threads[NUM_THREADS];
+
+ signal(SIGUSR1, signal_handler);
+
+ for (i = 0; i < NUM_THREADS; i++)
+ pthread_create(&threads[i], 0, thread_func, (void*)i);
+
+ for (i = 0; i < NUM_THREADS; i++)
+ pthread_join(threads[i], 0);
+
+ return 0;
+}
diff --git a/src/findmask.inc b/src/findmask.inc
new file mode 100644
index 0000000..e76f72f
--- /dev/null
+++ b/src/findmask.inc
@@ -0,0 +1,4 @@
+// This is the body of a uintptr_t array definition. Fill in your own addresses.
+0x116bb312c, // example values (can be >32 bit)
+0x38d3c5ad, // replace with your own
+0x77c1e96d // don't forget: no comma after the last one
diff --git a/src/logger.cc b/src/logger.cc
index e4ecb03..f13e003 100644
--- a/src/logger.cc
+++ b/src/logger.cc
@@ -17,6 +17,7 @@
#include <pthread.h>
#include <stdarg.h>
#include <stdio.h>
+#include <time.h>
#include <unistd.h>
#include <string>
@@ -37,10 +38,20 @@ void Logger::VLogF(int priority, const char *format, va_list args) {
return;
}
char buffer[4096];
- int length = vsnprintf(buffer, sizeof buffer, format, args);
- if (static_cast<size_t>(length) >= sizeof buffer) {
- length = sizeof buffer;
- buffer[sizeof buffer - 1] = '\n';
+ size_t length = 0;
+ if (log_timestamps_) {
+ time_t raw_time;
+ time(&raw_time);
+ struct tm time_struct;
+ localtime_r(&raw_time, &time_struct);
+ length = strftime(buffer, sizeof(buffer), "%Y/%m/%d-%H:%M:%S(%Z) ",
+ &time_struct);
+ LOGGER_ASSERT(length); // Catch if the buffer is set too small.
+ }
+ length += vsnprintf(buffer + length, sizeof(buffer) - length, format, args);
+ if (length >= sizeof(buffer)) {
+ length = sizeof(buffer);
+ buffer[sizeof(buffer) - 1] = '\n';
}
QueueLogLine(new string(buffer, length));
}
@@ -52,19 +63,30 @@ void Logger::StartThread() {
}
void Logger::StopThread() {
- LOGGER_ASSERT(thread_running_);
+ // Allow this to be called before the thread has started.
+ if (!thread_running_) {
+ return;
+ }
thread_running_ = false;
- LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+ int retval = pthread_mutex_lock(&queued_lines_mutex_);
+ LOGGER_ASSERT(0 == retval);
bool need_cond_signal = queued_lines_.empty();
queued_lines_.push_back(NULL);
- LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
+ retval = pthread_mutex_unlock(&queued_lines_mutex_);
+ LOGGER_ASSERT(0 == retval);
if (need_cond_signal) {
- LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_));
+ retval = pthread_cond_signal(&queued_lines_cond_);
+ LOGGER_ASSERT(0 == retval);
}
- LOGGER_ASSERT(0 == pthread_join(thread_, NULL));
+ retval = pthread_join(thread_, NULL);
+ LOGGER_ASSERT(0 == retval);
}
-Logger::Logger() : verbosity_(20), log_fd_(-1), thread_running_(false) {
+Logger::Logger()
+ : verbosity_(20),
+ log_fd_(-1),
+ thread_running_(false),
+ log_timestamps_(true) {
LOGGER_ASSERT(0 == pthread_mutex_init(&queued_lines_mutex_, NULL));
LOGGER_ASSERT(0 == pthread_cond_init(&queued_lines_cond_, NULL));
LOGGER_ASSERT(0 == pthread_cond_init(&full_queue_cond_, NULL));
@@ -94,19 +116,15 @@ void Logger::QueueLogLine(string *line) {
LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
}
-namespace {
-void WriteToFile(const string& line, int fd) {
- LOGGER_ASSERT(write(fd, line.data(), line.size()) ==
- static_cast<ssize_t>(line.size()));
-}
-}
-
void Logger::WriteAndDeleteLogLine(string *line) {
LOGGER_ASSERT(line != NULL);
+ ssize_t bytes_written;
if (log_fd_ >= 0) {
- WriteToFile(*line, log_fd_);
+ bytes_written = write(log_fd_, line->data(), line->size());
+ LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
}
- WriteToFile(*line, 1);
+ bytes_written = write(STDOUT_FILENO, line->data(), line->size());
+ LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
delete line;
}
diff --git a/src/logger.h b/src/logger.h
index 1d70107..21b3c6b 100644
--- a/src/logger.h
+++ b/src/logger.h
@@ -62,7 +62,7 @@ class Logger {
// Lines with a priority numerically greater than this will not be logged.
// May not be called while multiple threads are running.
- void SetVerbosity(int verbosity) {
+ virtual void SetVerbosity(int verbosity) {
verbosity_ = verbosity;
}
@@ -72,17 +72,22 @@ class Logger {
// Args:
// log_fd: The file descriptor to write to. Will not be closed by this
// object.
- void SetLogFd(int log_fd) {
+ virtual void SetLogFd(int log_fd) {
LOGGER_ASSERT(log_fd >= 0);
log_fd_ = log_fd;
}
// Set output to be written to stdout only. This is the default mode. May
// not be called while multiple threads are running.
- void SetStdoutOnly() {
+ virtual void SetStdoutOnly() {
log_fd_ = -1;
}
+ // Enable or disable logging of timestamps.
+ void SetTimestampLogging(bool log_ts_enabled) {
+ log_timestamps_ = log_ts_enabled;
+ }
+
// Logs a line, with a vprintf(3)-like interface. This will block on writing
// the line to stdout/disk iff the dedicated logging thread is not running.
// This will block on adding the line to the queue if doing so would exceed
@@ -104,11 +109,12 @@ class Logger {
// before this returns. Waits for the thread to finish before returning.
void StopThread();
- private:
+ protected:
Logger();
- ~Logger();
+ virtual ~Logger();
+ private:
// Args:
// line: Must be non-NULL. This function takes ownership of it.
void QueueLogLine(string *line);
@@ -127,6 +133,7 @@ class Logger {
int verbosity_;
int log_fd_;
bool thread_running_;
+ bool log_timestamps_;
vector<string*> queued_lines_;
// This doubles as a mutex for log_fd_ when the logging thread is not running.
pthread_mutex_t queued_lines_mutex_;
diff --git a/src/os.cc b/src/os.cc
index 8032cfc..7c4e3d1 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -48,6 +48,7 @@
// so these includes are correct.
#include "sattypes.h"
#include "error_diag.h"
+#include "clock.h"
// OsLayer initialization.
OsLayer::OsLayer() {
@@ -55,10 +56,12 @@ OsLayer::OsLayer() {
testmemsize_ = 0;
totalmemsize_ = 0;
min_hugepages_bytes_ = 0;
+ reserve_mb_ = 0;
normal_mem_ = true;
use_hugepages_ = false;
use_posix_shm_ = false;
dynamic_mapped_shmem_ = false;
+ mmapped_allocation_ = false;
shmid_ = 0;
time_initialized_ = 0;
@@ -76,20 +79,28 @@ OsLayer::OsLayer() {
address_mode_ = sizeof(pvoid) * 8;
has_clflush_ = false;
- has_sse2_ = false;
+ has_vector_ = false;
use_flush_page_cache_ = false;
+
+ clock_ = NULL;
}
// OsLayer cleanup.
OsLayer::~OsLayer() {
if (error_diagnoser_)
delete error_diagnoser_;
+ if (clock_)
+ delete clock_;
}
// OsLayer initialization.
bool OsLayer::Initialize() {
- time_initialized_ = time(NULL);
+ if (!clock_) {
+ clock_ = new Clock();
+ }
+
+ time_initialized_ = clock_->Now();
// Detect asm support.
GetFeatures();
@@ -129,8 +140,28 @@ int OsLayer::AddressMode() {
// Translates user virtual to physical address.
uint64 OsLayer::VirtualToPhysical(void *vaddr) {
- // Needs platform specific implementation.
- return 0;
+ uint64 frame, shift;
+ off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
+ int fd = open(kPagemapPath, O_RDONLY);
+ // /proc/self/pagemap is available in kernel >= 2.6.25
+ if (fd < 0)
+ return 0;
+
+ if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
+ int err = errno;
+ string errtxt = ErrorString(err);
+ logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
+ kPagemapPath, err, errtxt.c_str());
+ if (fd >= 0)
+ close(fd);
+ return 0;
+ }
+ close(fd);
+ if (!(frame & (1LL << 63)) || (frame & (1LL << 62)))
+ return 0;
+ shift = (frame >> 55) & 0x3f;
+ frame = (frame & 0x007fffffffffffffLL) << shift;
+ return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1));
}
// Returns the HD device that contains this file.
@@ -149,21 +180,21 @@ list<string> OsLayer::FindFileDevices() {
// Get HW core features from cpuid instruction.
void OsLayer::GetFeatures() {
#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
- // CPUID features documented at:
- // http://www.sandpile.org/ia32/cpuid.htm
- int ax, bx, cx, dx;
- __asm__ __volatile__ (
- "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1));
- has_clflush_ = (dx >> 19) & 1;
- has_sse2_ = (dx >> 26) & 1;
+ unsigned int eax = 1, ebx, ecx, edx;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ has_clflush_ = (edx >> 19) & 1;
+ has_vector_ = (edx >> 26) & 1; // SSE2 caps bit.
logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
has_clflush_ ? "true" : "false",
- has_sse2_ ? "true" : "false");
+ has_vector_ ? "true" : "false");
#elif defined(STRESSAPPTEST_CPU_PPC)
// All PPC implementations have cache flush instructions.
has_clflush_ = true;
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+ // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
+ // For now assume neon and don't run -W if you don't have it.
+ has_vector_ = true; // NEON.
#warning "Unsupported CPU type ARMV7A: unable to determine feature set."
#else
#warning "Unsupported CPU type: unable to determine feature set."
@@ -215,8 +246,9 @@ bool OsLayer::FlushPageCache(void) {
void OsLayer::Flush(void *vaddr) {
// Use the generic flush. This function is just so we can override
// this if we are so inclined.
- if (has_clflush_)
- FastFlush(vaddr);
+ if (has_clflush_) {
+ OsLayer::FastFlush(vaddr);
+ }
}
@@ -224,7 +256,7 @@ void OsLayer::Flush(void *vaddr) {
bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
unsigned int size_in_bytes,
AdlerChecksum *checksum) {
- if (has_sse2_) {
+ if (has_vector_) {
return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
} else {
return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
@@ -232,12 +264,31 @@ bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
}
-// Translate user virtual to physical address.
+// Translate physical address to memory module/chip name.
+// Assumes interleaving between two memory channels based on the XOR of
+// all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
+// blocks with bits distributed from each chip in that channel.
int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
- char tmpbuf[256];
- snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
- snprintf(buf, len, "%s", tmpbuf);
- return 0;
+ if (!channels_) {
+ snprintf(buf, len, "DIMM Unknown");
+ return -1;
+ }
+
+ // Find channel by XORing address bits in channel_hash mask.
+ uint32 low = static_cast<uint32>(addr & channel_hash_);
+ uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
+ vector<string>& channel = (*channels_)[
+ __builtin_parity(high) ^ __builtin_parity(low)];
+
+ // Find dram chip by finding which byte within the channel
+ // by address mod channel width, then divide the channel
+ // evenly among the listed dram chips. Note, this will not work
+ // with x4 dram.
+ int chip = (addr % (channel_width_ / 8)) /
+ ((channel_width_ / 8) / channel.size());
+ string name = channel[chip];
+ snprintf(buf, len, "%s", name.c_str());
+ return 1;
}
@@ -293,9 +344,17 @@ string OsLayer::FindCoreMaskFormat(int32 region) {
// Report an error in an easily parseable way.
bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
- time_t now = time(NULL);
+ time_t now = clock_->Now();
int ttf = now - time_initialized_;
- logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
+ if (strlen(symptom) && strlen(part)) {
+ logprintf(0, "Report Error: %s : %s : %d : %ds\n",
+ symptom, part, count, ttf);
+ } else {
+ // Log something so the error still shows up, but this won't break the
+ // parser.
+ logprintf(0, "Warning: Invalid Report Error: "
+ "%s : %s : %d : %ds\n", symptom, part, count, ttf);
+ }
return true;
}
@@ -359,12 +418,31 @@ int64 OsLayer::FindFreeMemSize() {
//
// TODO(nsanders): is there a more correct way to determine target
// memory size?
- if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
- minsize = min_hugepages_bytes_;
- } else if (physsize < 2048LL * kMegabyte) {
- minsize = ((pages * 85) / 100) * pagesize;
+ if (hugepagesize > 0) {
+ if (min_hugepages_bytes_ > 0) {
+ minsize = min_hugepages_bytes_;
+ } else {
+ minsize = hugepagesize;
+ }
} else {
- minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+ if (physsize < 2048LL * kMegabyte) {
+ minsize = ((pages * 85) / 100) * pagesize;
+ } else {
+ minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+ }
+ // Make sure that at least reserve_mb_ is left for the system.
+ if (reserve_mb_ > 0) {
+ int64 totalsize = pages * pagesize;
+ int64 reserve_kb = reserve_mb_ * kMegabyte;
+ if (reserve_kb > totalsize) {
+ logprintf(0, "Procedural Error: %lld is bigger than the total memory "
+ "available %lld\n", reserve_kb, totalsize);
+ } else if (reserve_kb > totalsize - minsize) {
+ logprintf(5, "Warning: Overriding memory to use: original %lld, "
+ "current %lld\n", minsize, totalsize - reserve_kb);
+ minsize = totalsize - reserve_kb;
+ }
+ }
}
// Use hugepage sizing if available.
@@ -435,7 +513,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
"'sudo mount -o remount,size=100\% /dev/shm.'\n");
} else if (hugepagesize >= length) {
prefer_hugepages = true;
- logprintf(3, "Log: Prefer using hugepace allocation.\n");
+ logprintf(3, "Log: Prefer using hugepage allocation.\n");
} else {
logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
}
@@ -458,7 +536,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
break;
}
- shmaddr = shmat(shmid, NULL, NULL);
+ shmaddr = shmat(shmid, NULL, 0);
if (shmaddr == reinterpret_cast<void*>(-1)) {
int err = errno;
string errtxt = ErrorString(err);
@@ -515,7 +593,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
// Do a full mapping here otherwise.
shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
- shm_object, NULL);
+ shm_object, 0);
if (shmaddr == reinterpret_cast<void*>(-1)) {
int err = errno;
string errtxt = ErrorString(err);
@@ -540,18 +618,32 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
} while (0);
shm_unlink("/stressapptest");
}
-#endif // HAVE_SYS_SHM_H
+#endif // HAVE_SYS_SHM_H
if (!use_hugepages_ && !use_posix_shm_) {
- // Use memalign to ensure that blocks are aligned enough for disk direct IO.
- buf = static_cast<char*>(memalign(4096, length));
- if (buf) {
- logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
- } else {
- logprintf(0, "Process Error: memalign returned 0\n");
- if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
- logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
- "bit process. Please setup shared memory.\n");
+ // If the page size is what SAT is expecting explicitly perform mmap()
+ // allocation.
+ if (sysconf(_SC_PAGESIZE) >= 4096) {
+ void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (map_buf != MAP_FAILED) {
+ buf = map_buf;
+ mmapped_allocation_ = true;
+ logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
+ }
+ }
+ if (!mmapped_allocation_) {
+ // Use memalign to ensure that blocks are aligned enough for disk direct
+ // IO.
+ buf = static_cast<char*>(memalign(4096, length));
+ if (buf) {
+ logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
+ } else {
+ logprintf(0, "Process Error: memalign returned 0\n");
+ if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
+ logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
+ "bit process. Please setup shared memory.\n");
+ }
}
}
}
@@ -579,6 +671,8 @@ void OsLayer::FreeTestMem() {
munmap(testmem_, testmemsize_);
}
close(shmid_);
+ } else if (mmapped_allocation_) {
+ munmap(testmem_, testmemsize_);
} else {
free(testmem_);
}
@@ -800,7 +894,9 @@ uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
bool OsLayer::CpuStressWorkload() {
double float_arr[100];
double sum = 0;
+#ifdef HAVE_RAND_R
unsigned int seed = 12345;
+#endif
// Initialize array with random numbers.
for (int i = 0; i < 100; i++) {
@@ -809,8 +905,9 @@ bool OsLayer::CpuStressWorkload() {
if (rand_r(&seed) % 2)
float_arr[i] *= -1.0;
#else
- float_arr[i] = rand();
- if (rand() % 2)
+ srand(time(NULL));
+ float_arr[i] = rand(); // NOLINT
+ if (rand() % 2) // NOLINT
float_arr[i] *= -1.0;
#endif
}
@@ -828,82 +925,3 @@ bool OsLayer::CpuStressWorkload() {
logprintf(12, "Log: I'm Feeling Lucky!\n");
return true;
}
-
-PCIDevices OsLayer::GetPCIDevices() {
- PCIDevices device_list;
- DIR *dir;
- struct dirent *buf = new struct dirent();
- struct dirent *entry;
- dir = opendir(kSysfsPath);
- if (!dir)
- logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
- while (readdir_r(dir, buf, &entry) == 0 && entry) {
- PCIDevice *device;
- unsigned int dev, func;
- // ".", ".." or a special non-device perhaps.
- if (entry->d_name[0] == '.')
- continue;
-
- device = new PCIDevice();
- if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
- &device->domain, &device->bus, &dev, &func) < 4) {
- logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
- free(device);
- continue;
- }
- device->dev = dev;
- device->func = func;
- device->vendor_id = PCIGetValue(entry->d_name, "vendor");
- device->device_id = PCIGetValue(entry->d_name, "device");
- PCIGetResources(entry->d_name, device);
- device_list.insert(device_list.end(), device);
- }
- closedir(dir);
- delete buf;
- return device_list;
-}
-
-int OsLayer::PCIGetValue(string name, string object) {
- int fd, len;
- char filename[256];
- char buf[256];
- snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
- name.c_str(), object.c_str());
- fd = open(filename, O_RDONLY);
- if (fd < 0)
- return 0;
- len = read(fd, buf, 256);
- close(fd);
- buf[len] = '\0';
- return strtol(buf, NULL, 0); // NOLINT
-}
-
-int OsLayer::PCIGetResources(string name, PCIDevice *device) {
- char filename[256];
- char buf[256];
- FILE *file;
- int64 start;
- int64 end;
- int64 size;
- int i;
- snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
- name.c_str(), "resource");
- file = fopen(filename, "r");
- if (!file) {
- logprintf(0, "Process Error: impossible to find resource file for %s",
- filename);
- return errno;
- }
- for (i = 0; i < 6; i++) {
- if (!fgets(buf, 256, file))
- break;
- sscanf(buf, "%llx %llx", &start, &end); // NOLINT
- size = 0;
- if (start)
- size = end - start + 1;
- device->base_addr[i] = start;
- device->size[i] = size;
- }
- fclose(file);
- return 0;
-}
diff --git a/src/os.h b/src/os.h
index b043b61..0812f1a 100644
--- a/src/os.h
+++ b/src/os.h
@@ -17,6 +17,9 @@
#define STRESSAPPTEST_OS_H_
#include <dirent.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
#include <string>
#include <list>
#include <map>
@@ -26,8 +29,9 @@
// so these includes are correct.
#include "adler32memcpy.h" // NOLINT
#include "sattypes.h" // NOLINT
+#include "clock.h" // NOLINT
-const char kSysfsPath[] = "/sys/bus/pci/devices";
+const char kPagemapPath[] = "/proc/self/pagemap";
struct PCIDevice {
int32 domain;
@@ -44,6 +48,8 @@ typedef vector<PCIDevice*> PCIDevices;
class ErrorDiag;
+class Clock;
+
// This class implements OS/Platform specific funtions.
class OsLayer {
public:
@@ -56,6 +62,21 @@ class OsLayer {
min_hugepages_bytes_ = min_bytes;
}
+ // Set the minium amount of memory that should not be allocated. This only
+ // has any affect if hugepages are not used.
+ // Must be set before Initialize().
+ void SetReserveSize(int64 reserve_mb) {
+ reserve_mb_ = reserve_mb;
+ }
+
+ // Set parameters needed to translate physical address to memory module.
+ void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
+ vector< vector<string> > *channels) {
+ channel_hash_ = channel_hash;
+ channel_width_ = channel_width;
+ channels_ = channels;
+ }
+
// Initializes data strctures and open files.
// Returns false on error.
virtual bool Initialize();
@@ -68,13 +89,11 @@ class OsLayer {
// Prints failed dimm. This implementation is optional for
// subclasses to implement.
// Takes a bus address and string, and prints the DIMM name
- // into the string. Returns error status.
+ // into the string. Returns the DIMM number that corresponds to the
+ // address given, or -1 if unable to identify the DIMM number.
+ // Note that subclass implementations of FindDimm() MUST fill
+ // buf with at LEAST one non-whitespace character (provided len > 0).
virtual int FindDimm(uint64 addr, char *buf, int len);
- // Print dimm info, plus more available info.
- virtual int FindDimmExtended(uint64 addr, char *buf, int len) {
- return FindDimm(addr, buf, len);
- }
-
// Classifies addresses according to "regions"
// This may mean different things on different platforms.
@@ -132,10 +151,94 @@ class OsLayer {
// instruction. For example, software can use an MFENCE instruction to
// insure that previous stores are included in the write-back.
asm volatile("mfence");
- asm volatile("clflush (%0)" :: "r" (vaddr));
+ asm volatile("clflush (%0)" : : "r" (vaddr));
+ asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A) && !defined(__aarch64__)
+ // ARMv7a cachelines are 8 words (32 bytes).
+ syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
+#else
+ #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+ }
+
+ // Fast flush, for use in performance critical code.
+ // This is bound at compile time, and will not pick up
+ // any runtime machine configuration info. Takes a NULL-terminated
+ // array of addresses to flush.
+ inline static void FastFlushList(void **vaddrs) {
+#ifdef STRESSAPPTEST_CPU_PPC
+ while (*vaddrs) {
+ asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
+ }
+ asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ // Put mfence before and after clflush to make sure:
+ // 1. The write before the clflush is committed to memory bus;
+ // 2. The read after the clflush is hitting the memory bus.
+ //
+ // From Intel manual:
+ // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+ // to be ordered by any other fencing, serializing or other CLFLUSH
+ // instruction. For example, software can use an MFENCE instruction to
+ // insure that previous stores are included in the write-back.
+ asm volatile("mfence");
+ while (*vaddrs) {
+ asm volatile("clflush (%0)" : : "r" (*vaddrs++));
+ }
asm volatile("mfence");
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
- #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes."
+ while (*vaddrs) {
+ FastFlush(*vaddrs++);
+ }
+#else
+ #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+ }
+
+ // Fast flush hint, for use in performance critical code.
+ // This is bound at compile time, and will not pick up
+ // any runtime machine configuration info. Note that this
+ // will not guarantee that a flush happens, but will at least
+ // hint that it should. This is useful for speeding up
+ // parallel march algorithms.
+ inline static void FastFlushHint(void *vaddr) {
+#ifdef STRESSAPPTEST_CPU_PPC
+ asm volatile("dcbf 0,%0" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ // From Intel manual:
+ // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+ // to be ordered by any other fencing, serializing or other CLFLUSH
+ // instruction. For example, software can use an MFENCE instruction to
+ // insure that previous stores are included in the write-back.
+ asm volatile("clflush (%0)" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+ FastFlush(vaddr);
+#else
+ #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+ }
+
+ // Fast flush, for use in performance critical code.
+ // This is bound at compile time, and will not pick up
+ // any runtime machine configuration info. Sync's any
+ // transactions for ordering FastFlushHints.
+ inline static void FastFlushSync() {
+#ifdef STRESSAPPTEST_CPU_PPC
+ asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ // Put mfence before and after clflush to make sure:
+ // 1. The write before the clflush is committed to memory bus;
+ // 2. The read after the clflush is hitting the memory bus.
+ //
+ // From Intel manual:
+ // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+ // to be ordered by any other fencing, serializing or other CLFLUSH
+ // instruction. For example, software can use an MFENCE instruction to
+ // insure that previous stores are included in the write-back.
+ asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+ // This is a NOP, FastFlushHint() always does a full flush, so there's
+ // nothing to do for FastFlushSync().
#else
#warning "Unsupported CPU type: Unable to force cache flushes."
#endif
@@ -164,10 +267,10 @@ class OsLayer {
__asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
tsc = data.l64;
#elif defined(STRESSAPPTEST_CPU_ARMV7A)
- #warning "Unsupported CPU type ARMV7A: your build may not function correctly"
+ #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
tsc = 0;
#else
- #warning "Unsupported CPU type: your build may not function correctly"
+ #warning "Unsupported CPU type: your timer may not function correctly"
tsc = 0;
#endif
return (tsc);
@@ -230,9 +333,6 @@ class OsLayer {
// Handle to platform-specific error diagnoser.
ErrorDiag *error_diagnoser_;
- // Detect all PCI Devices.
- virtual PCIDevices GetPCIDevices();
-
// Disambiguate between different "warm" memcopies.
virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
unsigned int size_in_bytes,
@@ -249,17 +349,31 @@ class OsLayer {
}
ErrCallback get_err_log_callback() { return err_log_callback_; }
+ // Set a clock object that can be overridden for use with unit tests.
+ void SetClock(Clock *clock) {
+ if (clock_) {
+ delete clock_;
+ }
+ clock_ = clock;
+ time_initialized_ = clock_->Now();
+ }
+
protected:
void *testmem_; // Location of test memory.
uint64 testmemsize_; // Size of test memory.
int64 totalmemsize_; // Size of available memory.
int64 min_hugepages_bytes_; // Minimum hugepages size.
+ int64 reserve_mb_; // Minimum amount of memory to reserve in MB.
bool error_injection_; // Do error injection?
bool normal_mem_; // Memory DMA capable?
bool use_hugepages_; // Use hugepage shmem?
bool use_posix_shm_; // Use 4k page shmem?
bool dynamic_mapped_shmem_; // Conserve virtual address space.
+ bool mmapped_allocation_; // Was memory allocated using mmap()?
int shmid_; // Handle to shmem
+ vector< vector<string> > *channels_; // Memory module names per channel.
+ uint64 channel_hash_; // Mask of address bits XORed for channel.
+ int channel_width_; // Channel width in bits.
int64 regionsize_; // Size of memory "regions"
int regioncount_; // Number of memory "regions"
@@ -267,7 +381,7 @@ class OsLayer {
int num_nodes_; // Number of nodes in the system.
int num_cpus_per_node_; // Number of cpus per node in the system.
int address_mode_; // Are we running 32 or 64 bit?
- bool has_sse2_; // Do we have sse2 instructions?
+ bool has_vector_; // Do we have sse2/neon instructions?
bool has_clflush_; // Do we have clflush instructions?
bool use_flush_page_cache_; // Do we need to flush the page cache?
@@ -279,9 +393,6 @@ class OsLayer {
// Get file descriptor for dev msr.
virtual int OpenMSR(uint32 core, uint32 address);
- // Auxiliary methods for PCI device configuration
- int PCIGetValue(string name, string object);
- int PCIGetResources(string name, PCIDevice *device);
// Look up how many hugepages there are.
virtual int64 FindHugePages();
@@ -289,6 +400,9 @@ class OsLayer {
// Link to find last transaction at an error location.
ErrCallback err_log_callback_;
+ // Object to wrap the time function.
+ Clock *clock_;
+
private:
DISALLOW_COPY_AND_ASSIGN(OsLayer);
};
diff --git a/src/sat.cc b/src/sat.cc
index ede951d..927ba54 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -125,6 +125,26 @@ bool Sat::CheckEnvironment() {
#error Build system regression - COPTS disregarded.
#endif
+ // Check if the cpu frequency test is enabled and able to run.
+ if (cpu_freq_test_) {
+ if (!CpuFreqThread::CanRun()) {
+ logprintf(0, "Process Error: This platform does not support this "
+ "test.\n");
+ bad_status();
+ return false;
+ } else if (cpu_freq_threshold_ <= 0) {
+ logprintf(0, "Process Error: The cpu frequency test requires "
+ "--cpu_freq_threshold set to a value > 0\n");
+ bad_status();
+ return false;
+ } else if (cpu_freq_round_ < 0) {
+ logprintf(0, "Process Error: The --cpu_freq_round option must be greater"
+ " than or equal to zero. A value of zero means no rounding.\n");
+ bad_status();
+ return false;
+ }
+ }
+
// Use all CPUs if nothing is specified.
if (memory_threads_ == -1) {
memory_threads_ = os_->num_cpus();
@@ -488,15 +508,9 @@ bool Sat::InitializePages() {
for (int64 i = 0; i < pages_; i++) {
struct page_entry pe;
// Only get valid pages with uninitialized tags here.
- char buf[256];
if (GetValid(&pe, kInvalidTag)) {
int64 paddr = os_->VirtualToPhysical(pe.addr);
int32 region = os_->FindRegion(paddr);
-
- os_->FindDimm(paddr, buf, sizeof(buf));
- if (i < 256) {
- logprintf(12, "Log: address: %#llx, %s\n", paddr, buf);
- }
region_[region]++;
pe.paddr = paddr;
pe.tag = 1 << region;
@@ -554,6 +568,7 @@ bool Sat::Initialize() {
// Initializes sync'd log file to ensure output is saved.
if (!InitializeLogfile())
return false;
+ Logger::GlobalLogger()->SetTimestampLogging(log_timestamps_);
Logger::GlobalLogger()->StartThread();
logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str());
@@ -573,6 +588,17 @@ bool Sat::Initialize() {
if (min_hugepages_mbytes_ > 0)
os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
+ if (reserve_mb_ > 0)
+ os_->SetReserveSize(reserve_mb_);
+
+ if (channels_.size() > 0) {
+ logprintf(6, "Log: Decoding memory: %dx%d bit channels,"
+ "%d modules per channel (x%d), decoding hash 0x%x\n",
+ channels_.size(), channel_width_, channels_[0].size(),
+ channel_width_/channels_[0].size(), channel_hash_);
+ os_->SetDramMappingParams(channel_hash_, channel_width_, &channels_);
+ }
+
if (!os_->Initialize()) {
logprintf(0, "Process Error: Failed to initialize OS layer\n");
bad_status();
@@ -640,18 +666,23 @@ Sat::Sat() {
pages_ = 0;
size_mb_ = 0;
size_ = size_mb_ * kMegabyte;
+ reserve_mb_ = 0;
min_hugepages_mbytes_ = 0;
freepages_ = 0;
paddr_base_ = 0;
+ channel_hash_ = kCacheLineSize;
+ channel_width_ = 64;
user_break_ = false;
verbosity_ = 8;
Logger::GlobalLogger()->SetVerbosity(verbosity_);
+ print_delay_ = 10;
strict_ = 1;
warm_ = 0;
run_on_anything_ = 0;
use_logfile_ = 0;
logfile_ = 0;
+ log_timestamps_ = true;
// Detect 32/64 bit binary.
void *pvoid = 0;
address_mode_ = sizeof(pvoid) * 8;
@@ -669,9 +700,15 @@ Sat::Sat() {
// Cache coherency data initialization.
cc_test_ = false; // Flag to trigger cc threads.
cc_cacheline_count_ = 2; // Two datastructures of cache line size.
+ cc_cacheline_size_ = 0; // Size of a cacheline (0 for auto-detect).
cc_inc_count_ = 1000; // Number of times to increment the shared variable.
cc_cacheline_data_ = 0; // Cache Line size datastructure.
+ // Cpu frequency data initialization.
+ cpu_freq_test_ = false; // Flag to trigger cpu frequency thread.
+ cpu_freq_threshold_ = 0; // Threshold, in MHz, at which a cpu fails.
+ cpu_freq_round_ = 10; // Round the computed frequency to this value.
+
sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL));
file_threads_ = 0;
net_threads_ = 0;
@@ -765,6 +802,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
// Set number of megabyte to use.
ARG_IVALUE("-M", size_mb_);
+ // Specify the amount of megabytes to be reserved for system.
+ ARG_IVALUE("--reserve_memory", reserve_mb_);
+
// Set minimum megabytes of hugepages to require.
ARG_IVALUE("-H", min_hugepages_mbytes_);
@@ -786,8 +826,21 @@ bool Sat::ParseArgs(int argc, char **argv) {
// Set number of cache line size datastructures
ARG_IVALUE("--cc_line_count", cc_cacheline_count_);
+ // Override the detected or assumed cache line size.
+ ARG_IVALUE("--cc_line_size", cc_cacheline_size_);
+
// Flag set when cache coherency tests need to be run
- ARG_KVALUE("--cc_test", cc_test_, 1);
+ ARG_KVALUE("--cc_test", cc_test_, true);
+
+ // Set when the cpu_frequency test needs to be run
+ ARG_KVALUE("--cpu_freq_test", cpu_freq_test_, true);
+
+ // Set the threshold in MHz at which the cpu frequency test will fail.
+ ARG_IVALUE("--cpu_freq_threshold", cpu_freq_threshold_);
+
+ // Set the rounding value for the cpu frequency test. The default is to
+ // round to the nearest 10s value.
+ ARG_IVALUE("--cpu_freq_round", cpu_freq_round_);
// Set number of CPU stress threads.
ARG_IVALUE("-C", cpu_stress_threads_);
@@ -798,6 +851,12 @@ bool Sat::ParseArgs(int argc, char **argv) {
// Verbosity level.
ARG_IVALUE("-v", verbosity_);
+ // Chatty printout level.
+ ARG_IVALUE("--printsec", print_delay_);
+
+ // Turn off timestamps logging.
+ ARG_KVALUE("--no_timestamps", log_timestamps_, false);
+
// Set maximum number of errors to collect. Stop running after this many.
ARG_IVALUE("--max_errors", max_errorcount_);
@@ -918,6 +977,23 @@ bool Sat::ParseArgs(int argc, char **argv) {
continue;
}
+ ARG_IVALUE("--channel_hash", channel_hash_);
+ ARG_IVALUE("--channel_width", channel_width_);
+
+ if (!strcmp(argv[i], "--memory_channel")) {
+ i++;
+ if (i < argc) {
+ char *channel = argv[i];
+ channels_.push_back(vector<string>());
+ while (char* next = strchr(channel, ',')) {
+ channels_.back().push_back(string(channel, next - channel));
+ channel = next + 1;
+ }
+ channels_.back().push_back(string(channel));
+ }
+ continue;
+ }
+
// Default:
PrintVersion();
PrintHelp();
@@ -963,6 +1039,47 @@ bool Sat::ParseArgs(int argc, char **argv) {
disk_pages_ = 1;
}
+ // Validate memory channel parameters if supplied
+ if (channels_.size()) {
+ if (channels_.size() == 1) {
+ channel_hash_ = 0;
+ logprintf(7, "Log: "
+ "Only one memory channel...deactivating interleave decoding.\n");
+ } else if (channels_.size() > 2) {
+ logprintf(6, "Process Error: "
+ "Triple-channel mode not yet supported... sorry.\n");
+ bad_status();
+ return false;
+ }
+ for (uint i = 0; i < channels_.size(); i++)
+ if (channels_[i].size() != channels_[0].size()) {
+ logprintf(6, "Process Error: "
+ "Channels 0 and %d have a different count of dram modules.\n", i);
+ bad_status();
+ return false;
+ }
+ if (channels_[0].size() & (channels_[0].size() - 1)) {
+ logprintf(6, "Process Error: "
+ "Amount of modules per memory channel is not a power of 2.\n");
+ bad_status();
+ return false;
+ }
+ if (channel_width_ < 16
+ || channel_width_ & (channel_width_ - 1)) {
+ logprintf(6, "Process Error: "
+ "Channel width %d is invalid.\n", channel_width_);
+ bad_status();
+ return false;
+ }
+ if (channel_width_ / channels_[0].size() < 8) {
+ logprintf(6, "Process Error: Chip width x%d must be x8 or greater.\n",
+ channel_width_ / channels_[0].size());
+ bad_status();
+ return false;
+ }
+ }
+
+
// Print each argument.
for (int i = 0; i < argc; i++) {
if (i)
@@ -976,6 +1093,8 @@ bool Sat::ParseArgs(int argc, char **argv) {
void Sat::PrintHelp() {
printf("Usage: ./sat(32|64) [options]\n"
" -M mbytes megabytes of ram to test\n"
+ " --reserve-memory If not using hugepages, the amount of memory to "
+ " reserve for the system\n"
" -H mbytes minimum megabytes of hugepages to require\n"
" -s seconds number of seconds to run\n"
" -m threads number of memory copy threads to run\n"
@@ -987,8 +1106,10 @@ void Sat::PrintHelp() {
" -f filename add a disk thread with "
"tempfile 'filename'\n"
" -l logfile log output to file 'logfile'\n"
+ " --no_timestamps do not prefix timestamps to log messages\n"
" --max_errors n exit early after finding 'n' errors\n"
" -v level verbosity (0-20), default is 8\n"
+ " --printsec secs How often to print 'seconds remaining'\n"
" -W Use more CPU-stressful memory copy\n"
" -A run in degraded mode on incompatible systems\n"
" -p pagesize size in bytes of memory chunks\n"
@@ -1024,13 +1145,26 @@ void Sat::PrintHelp() {
"cacheline's member\n"
" --cc_line_count number of cache line sized datastructures "
"to allocate for the cache coherency threads to operate\n"
+ " --cc_line_size override the auto-detected cache line size\n"
+ " --cpu_freq_test enable the cpu frequency test (requires the "
+ "--cpu_freq_threshold argument to be set)\n"
+ " --cpu_freq_threshold fail the cpu frequency test if the frequency "
+ "goes below this value (specified in MHz)\n"
+ " --cpu_freq_round round the computed frequency to this value, if set"
+ " to zero, only round to the nearest MHz\n"
" --paddr_base allocate memory starting from this address\n"
" --pause_delay delay (in seconds) between power spikes\n"
" --pause_duration duration (in seconds) of each pause\n"
- " --local_numa : choose memory regions associated with "
+ " --local_numa choose memory regions associated with "
"each CPU to be tested by that CPU\n"
- " --remote_numa : choose memory regions not associated with "
- "each CPU to be tested by that CPU\n");
+ " --remote_numa choose memory regions not associated with "
+ "each CPU to be tested by that CPU\n"
+ " --channel_hash mask of address bits XORed to determine channel. "
+ "Mask 0x40 interleaves cachelines between channels\n"
+ " --channel_width bits width in bits of each memory channel\n"
+ " --memory_channel u1,u2 defines a comma-separated list of names "
+ "for dram packages in a memory channel. Use multiple times to "
+ "define multiple channels.\n");
}
bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) {
@@ -1275,32 +1409,45 @@ void Sat::InitializeThreads() {
sizeof(cc_cacheline_data) * cc_cacheline_count_);
int num_cpus = CpuCount();
+ char *num;
+ // Calculate the number of cache lines needed just to give each core
+ // its own counter.
+ int line_size = cc_cacheline_size_;
+ if (line_size <= 0) {
+ line_size = CacheLineSize();
+ if (line_size < kCacheLineSize)
+ line_size = kCacheLineSize;
+ logprintf(12, "Log: Using %d as cache line size\n", line_size);
+ }
+ // The number of cache lines needed to hold an array of num_cpus.
+ // "num" must be the same type as cc_cacheline_data[X].num or the memory
+ // size calculations will fail.
+ int needed_lines = (sizeof(*num) * num_cpus + line_size - 1) / line_size;
// Allocate all the nums once so that we get a single chunk
// of contiguous memory.
- int *num;
#ifdef HAVE_POSIX_MEMALIGN
int err_result = posix_memalign(
reinterpret_cast<void**>(&num),
- kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_);
+ line_size, line_size * needed_lines * cc_cacheline_count_);
#else
- num = reinterpret_cast<int*>(memalign(kCacheLineSize,
- sizeof(*num) * num_cpus * cc_cacheline_count_));
+ num = reinterpret_cast<char*>(memalign(
+ line_size, line_size * needed_lines * cc_cacheline_count_));
int err_result = (num == 0);
#endif
sat_assert(err_result == 0);
int cline;
for (cline = 0; cline < cc_cacheline_count_; cline++) {
- memset(num, 0, sizeof(num_cpus) * num_cpus);
+ memset(num, 0, sizeof(*num) * num_cpus);
cc_cacheline_data_[cline].num = num;
- num += num_cpus;
+ num += (line_size * needed_lines) / sizeof(*num);
}
int tnum;
for (tnum = 0; tnum < num_cpus; tnum++) {
CpuCacheCoherencyThread *thread =
new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_,
- tnum, cc_inc_count_);
+ tnum, num_cpus, cc_inc_count_);
thread->InitThread(total_threads_++, this, os_, patternlist_,
&continuous_status_);
// Pin the thread to a particular core.
@@ -1311,6 +1458,22 @@ void Sat::InitializeThreads() {
}
workers_map_.insert(make_pair(kCCType, cc_vector));
}
+
+ if (cpu_freq_test_) {
+ // Create the frequency test thread.
+ logprintf(5, "Log: Running cpu frequency test: threshold set to %dMHz.\n",
+ cpu_freq_threshold_);
+ CpuFreqThread *thread = new CpuFreqThread(CpuCount(), cpu_freq_threshold_,
+ cpu_freq_round_);
+ // This thread should be paused when other threads are paused.
+ thread->InitThread(total_threads_++, this, os_, NULL,
+ &power_spike_status_);
+
+ WorkerVector *cpu_freq_vector = new WorkerVector();
+ cpu_freq_vector->insert(cpu_freq_vector->end(), thread);
+ workers_map_.insert(make_pair(kCPUFreqType, cpu_freq_vector));
+ }
+
ReleaseWorkerLock();
}
@@ -1319,6 +1482,19 @@ int Sat::CpuCount() {
return sysconf(_SC_NPROCESSORS_CONF);
}
+// Return the worst case (largest) cache line size of the various levels of
+// cache actually prsent in the machine.
+int Sat::CacheLineSize() {
+ int max_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+ int linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
+ if (linesize > max_linesize) max_linesize = linesize;
+ linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
+ if (linesize > max_linesize) max_linesize = linesize;
+ linesize = sysconf(_SC_LEVEL4_CACHE_LINESIZE);
+ if (linesize > max_linesize) max_linesize = linesize;
+ return max_linesize;
+}
+
// Notify and reap worker threads.
void Sat::JoinThreads() {
logprintf(12, "Log: Joining worker threads\n");
@@ -1443,7 +1619,7 @@ void Sat::AnalysisAllStats() {
map_it != workers_map_.end(); ++map_it) {
for (WorkerVector::const_iterator it = map_it->second->begin();
it != map_it->second->end(); ++it) {
- thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000;
+ thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000.;
total_data += (*it)->GetMemoryCopiedData();
total_data += (*it)->GetDeviceCopiedData();
if (thread_runtime_sec > max_runtime_sec) {
@@ -1714,12 +1890,12 @@ bool Sat::Run() {
// All of these are in seconds. You probably want them to be >=
// kSleepFrequency and multiples of kSleepFrequency, but neither is necessary.
static const time_t kInjectionFrequency = 10;
- static const time_t kPrintFrequency = 10;
+ // print_delay_ determines "seconds remaining" chatty update.
const time_t start = time(NULL);
const time_t end = start + runtime_seconds_;
time_t now = start;
- time_t next_print = start + kPrintFrequency;
+ time_t next_print = start + print_delay_;
time_t next_pause = start + pause_delay_;
time_t next_resume = 0;
time_t next_injection;
@@ -1755,7 +1931,7 @@ bool Sat::Run() {
if (now >= next_print) {
// Print a count down message.
logprintf(5, "Log: Seconds remaining: %d\n", seconds_remaining);
- next_print = NextOccurance(kPrintFrequency, start, now);
+ next_print = NextOccurance(print_delay_, start, now);
}
if (next_injection && now >= next_injection) {
@@ -1901,3 +2077,9 @@ void logprintf(int priority, const char *format, ...) {
Logger::GlobalLogger()->VLogF(priority, format, args);
va_end(args);
}
+
+// Stop the logging thread and verify any pending data is written to the log.
+void logstop() {
+ Logger::GlobalLogger()->StopThread();
+}
+
diff --git a/src/sat.h b/src/sat.h
index b48f519..5cc3bec 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -134,6 +134,8 @@ class Sat {
// Return the number of cpus in the system.
int CpuCount();
+ // Return the worst-case (largest) cache line size of the system.
+ int CacheLineSize();
// Collect error counts from threads.
int64 GetTotalErrorCount();
@@ -147,17 +149,23 @@ class Sat {
int64 pages_; // Number of memory blocks.
int64 size_; // Size of memory tested, in bytes.
int64 size_mb_; // Size of memory tested, in MB.
+ int64 reserve_mb_; // Reserve at least this amount of memory
+ // for the system, in MB.
int64 min_hugepages_mbytes_; // Minimum hugepages size.
int64 freepages_; // How many invalid pages we need.
int disk_pages_; // Number of pages per temp file.
uint64 paddr_base_; // Physical address base.
+ uint64 channel_hash_; // Mask of address bits XORed for channel.
+ int channel_width_; // Channel width in bits.
+ vector< vector<string> > channels_; // Memory module names per channel.
// Control flags.
volatile sig_atomic_t user_break_; // User has signalled early exit. Used as
// a boolean.
int verbosity_; // How much to print.
+ int print_delay_; // Chatty update frequency.
int strict_; // Check results per transaction.
- int warm_; // FPU warms CPU while coying.
+ int warm_; // FPU warms CPU while copying.
int address_mode_; // 32 or 64 bit binary.
bool stop_on_error_; // Exit immendiately on any error.
bool findfiles_; // Autodetect tempfile locations.
@@ -169,6 +177,7 @@ class Sat {
int use_logfile_; // Log to a file.
char logfilename_[255]; // Name of file to log to.
int logfile_; // File handle to log to.
+ bool log_timestamps_; // Whether to add timestamps to log lines.
// Disk thread options.
int read_block_size_; // Size of block to read from disk.
@@ -199,9 +208,18 @@ class Sat {
bool cc_test_; // Flag to decide whether to start the
// cache coherency threads.
int cc_cacheline_count_; // Number of cache line size structures.
+ int cc_cacheline_size_; // Size of a cache line.
int cc_inc_count_; // Number of times to increment the shared
// cache lines structure members.
+ // Cpu Frequency Options.
+ bool cpu_freq_test_; // Flag to decide whether to start the
+ // cpu frequency thread.
+ int cpu_freq_threshold_; // The MHz threshold which will cause
+ // the test to fail.
+ int cpu_freq_round_; // Round the computed frequency to this
+ // value.
+
// Thread control.
int file_threads_; // Threads of file IO.
int net_threads_; // Threads of network IO.
@@ -249,7 +267,8 @@ class Sat {
kRandomDiskType = 7,
kCPUType = 8,
kErrorType = 9,
- kCCType = 10
+ kCCType = 10,
+ kCPUFreqType = 11,
};
// Helper functions.
diff --git a/src/sattypes.h b/src/sattypes.h
index c9341d0..79bb47d 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -27,11 +27,11 @@
#ifdef HAVE_CONFIG_H // Built using autoconf
#ifdef __ANDROID__
-#include "stressapptest_config_android.h"
+#include "stressapptest_config_android.h" // NOLINT
#else
-#include "stressapptest_config.h"
-using namespace __gnu_cxx;
-#endif
+#include "stressapptest_config.h" // NOLINT
+using namespace __gnu_cxx; //NOLINT
+#endif // __ANDROID__
using namespace std;
typedef signed long long int64;
@@ -57,10 +57,10 @@ inline const char* BuildChangelist() {
}
static const bool kOpenSource = true;
-#else
+#else // !HAVE_CONFIG_H
static const bool kOpenSource = false;
- #include "googlesattypes.h"
-#endif
+ #include "googlesattypes.h" // NOLINT
+#endif // HAVE_CONFIG_H
// Workaround to allow 32/64 bit conversion
// without running into strict aliasing problems.
union datacast_t {
@@ -75,11 +75,15 @@ union datacast_t {
// File sync'd print to console and log
void logprintf(int priority, const char *format, ...);
+// Stop the log and dump any queued lines.
+void logstop();
+
// We print to stderr ourselves first in case we're in such a bad state that the
// logger can't work.
#define sat_assert(x) \
{\
if (!(x)) {\
+ logstop();\
fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
logprintf(0, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
exit(1);\
@@ -186,6 +190,48 @@ inline string ErrorString(int error_num) {
#endif
}
+// Execute the cpuid instruction and pass back the contents of the registers.
+// This only works on x86 based platforms.
+inline void cpuid(
+ unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+ *ebx = 0;
+ *ecx = 0;
+ *edx = 0;
+ // CPUID features documented at:
+ // http://www.sandpile.org/ia32/cpuid.htm
+#if defined(STRESSAPPTEST_CPU_I686) || defined(STRESSAPPTEST_CPU_X86_64)
+#if defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+ // In PIC compilations using the i686 cpu type, ebx contains the address
+ // of the global offset table. The compiler can't properly handle constraints
+ // using the ebx register for this compile, so preserve the register
+ // ourselves.
+ asm(
+ "mov %%ebx, %%edi;"
+ "cpuid;"
+ "xchg %%edi, %%ebx;"
+ // Output registers.
+ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx)
+ // Input registers.
+ : "a" (*eax)
+ ); // Asm
+#else
+ asm(
+ "cpuid;"
+ // Output registers.
+ : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+ // Input registers.
+ : "a" (*eax)
+ ); // Asm
+#endif // defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+#elif defined(STRESSAPPTEST_CPU_PPC)
+ return;
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+ return;
+#else
+#warning "Unsupported CPU type."
+#endif
+}
+
// Define handy constants here
static const int kTicksPerSec = 100;
static const int kMegabyte = (1024LL*1024LL);
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index 97f306e..5412df4 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -53,6 +53,9 @@
/* Define to 1 if you have the `posix_memalign' function. */
#undef HAVE_POSIX_MEMALIGN
+/* Define to 1 if the system has `pthread_barrier'. */
+#undef HAVE_PTHREAD_BARRIERS
+
/* Define to 1 if you have the <pthread.h> header file. */
#undef HAVE_PTHREAD_H
diff --git a/src/stressapptest_config_android.h b/src/stressapptest_config_android.h
index 3817bdf..14081e5 100644
--- a/src/stressapptest_config_android.h
+++ b/src/stressapptest_config_android.h
@@ -54,12 +54,12 @@
/* Define to 1 if you have the `posix_memalign' function. */
/* #undef HAVE_POSIX_MEMALIGN */
+/* Define to 1 if the system has `pthread_barrier'. */
+#undef HAVE_PTHREAD_BARRIERS
+
/* Define to 1 if you have the <pthread.h> header file. */
#define HAVE_PTHREAD_H 1
-/* Android, why do you define _POSIX_BARRIERS when you have no _POSIX_BARRIERS?! */
-#undef _POSIX_BARRIERS
-
/* Define to 1 if you have the `rand_r' function. */
/* #undef HAVE_RAND_R */
@@ -144,7 +144,7 @@
#define PACKAGE_NAME "stressapptest"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "stressapptest 1.0.4_autoconf"
+#define PACKAGE_STRING "stressapptest 1.0.7_autoconf"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "stressapptest"
@@ -153,7 +153,7 @@
#define PACKAGE_URL ""
/* Define to the version of this package. */
-#define PACKAGE_VERSION "1.0.4_autoconf"
+#define PACKAGE_VERSION "1.0.7_autoconf"
/* Define as the return type of signal handlers (`int' or `void'). */
#define RETSIGTYPE void
@@ -179,6 +179,9 @@
/* Defined if the target CPU is i686 */
/* #undef STRESSAPPTEST_CPU_I686 */
+/* Defined if the target CPU is mips */
+/* #undef STRESSAPPTEST_CPU_MIPS */
+
/* Defined if the target CPU is PowerPC */
/* #undef STRESSAPPTEST_CPU_PPC */
@@ -203,7 +206,7 @@
#define TIME_WITH_SYS_TIME 1
/* Version number of package */
-#define VERSION "1.0.4_autoconf"
+#define VERSION "1.0.7_autoconf"
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
diff --git a/src/worker.cc b/src/worker.cc
index 62b0ede..5b0fe59 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -78,21 +78,6 @@ _syscall3(int, sched_setaffinity, pid_t, pid,
#endif
namespace {
- // Get HW core ID from cpuid instruction.
- inline int apicid(void) {
- int cpu;
-#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
- __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx");
-#elif defined(STRESSAPPTEST_CPU_ARMV7A)
- #warning "Unsupported CPU type ARMV7A: unable to determine core ID."
- cpu = 0;
-#else
- #warning "Unsupported CPU type: unable to determine core ID."
- cpu = 0;
-#endif
- return (cpu >> 24);
- }
-
// Work around the sad fact that there are two (gnu, xsi) incompatible
// versions of strerror_r floating around google. Awesome.
bool sat_strerror(int err, char *buf, int len) {
@@ -114,7 +99,7 @@ namespace {
inline uint64 addr_to_tag(void *address) {
return reinterpret_cast<uint64>(address);
}
-}
+} // namespace
#if !defined(O_DIRECT)
// Sometimes this isn't available.
@@ -144,7 +129,7 @@ static void *ThreadSpawnerGeneric(void *ptr) {
void WorkerStatus::Initialize() {
sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL,
num_workers_ + 1));
#endif
@@ -153,7 +138,7 @@ void WorkerStatus::Initialize() {
void WorkerStatus::Destroy() {
sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_));
sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_));
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
#endif
}
@@ -173,10 +158,13 @@ void WorkerStatus::StopWorkers() {
WaitOnPauseBarrier();
}
-bool WorkerStatus::ContinueRunning() {
+bool WorkerStatus::ContinueRunning(bool *paused) {
// This loop is an optimization. We use it to immediately re-check the status
// after resuming from a pause, instead of returning and waiting for the next
// call to this function.
+ if (paused) {
+ *paused = false;
+ }
for (;;) {
switch (GetStatus()) {
case RUN:
@@ -187,6 +175,10 @@ bool WorkerStatus::ContinueRunning() {
WaitOnPauseBarrier();
// Wait for ResumeWorkers() to be called.
WaitOnPauseBarrier();
+ // Indicate that a pause occurred.
+ if (paused) {
+ *paused = true;
+ }
break;
case STOP:
return false;
@@ -220,7 +212,7 @@ void WorkerStatus::RemoveSelf() {
AcquireNumWorkersLock();
// Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't
// in use because (status != PAUSE).
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_));
#endif
@@ -315,8 +307,8 @@ bool WorkerThread::InitPriority() {
logprintf(11, "Log: Bind to %s failed.\n",
cpuset_format(&cpu_mask_).c_str());
- logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n",
- thread_num_, apicid(),
+ logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n",
+ thread_num_, sched_getcpu(),
CurrentCpusFormat().c_str(),
cpuset_format(&cpu_mask_).c_str());
#if 0
@@ -580,7 +572,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
const char *message) {
char dimm_string[256] = "";
- int apic_id = apicid();
+ int core_id = sched_getcpu();
// Determine if this is a write or read error.
os_->Flush(error->vaddr);
@@ -615,7 +607,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
"%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
"read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
message,
- apic_id,
+ core_id,
CurrentCpusFormat().c_str(),
error->vaddr,
error->paddr,
@@ -815,6 +807,9 @@ int WorkerThread::CheckRegion(void *addr,
if ((state == kGoodAgain) || (state == kBad)) {
unsigned int blockerrors = badend - badstart + 1;
errormessage = "Block Error";
+ // It's okay for the 1st entry to be corrected multiple times,
+ // it will simply be reported twice. Once here and once below
+ // when processing the error queue.
ProcessError(&recorded[0], 0, errormessage.c_str());
logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
"%d bytes from offset 0x%x to 0x%x\n",
@@ -823,8 +818,6 @@ int WorkerThread::CheckRegion(void *addr,
blockerrors * wordsize_,
offset + badstart * wordsize_,
offset + badend * wordsize_);
- errorcount_ += blockerrors;
- return blockerrors;
}
}
}
@@ -840,7 +833,6 @@ int WorkerThread::CheckRegion(void *addr,
if (page_error) {
// For each word in the data region.
- int error_recount = 0;
for (int i = 0; i < length / wordsize_; i++) {
uint64 actual = memblock[i];
uint64 expected;
@@ -859,21 +851,16 @@ int WorkerThread::CheckRegion(void *addr,
// If the value is incorrect, save an error record for later printing.
if (actual != expected) {
- if (error_recount < kErrorLimit) {
- // We already reported these.
- error_recount++;
- } else {
- // If we have overflowed the error queue, print the errors now.
- struct ErrorRecord er;
- er.actual = actual;
- er.expected = expected;
- er.vaddr = &memblock[i];
-
- // Do the error printout. This will take a long time and
- // likely change the machine state.
- ProcessError(&er, 12, errormessage.c_str());
- overflowerrors++;
- }
+ // If we have overflowed the error queue, print the errors now.
+ struct ErrorRecord er;
+ er.actual = actual;
+ er.expected = expected;
+ er.vaddr = &memblock[i];
+
+ // Do the error printout. This will take a long time and
+ // likely change the machine state.
+ ProcessError(&er, 12, errormessage.c_str());
+ overflowerrors++;
}
}
}
@@ -948,7 +935,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
char tag_dimm_string[256] = "";
bool read_error = false;
- int apic_id = apicid();
+ int core_id = sched_getcpu();
// Determine if this is a write or read error.
os_->Flush(error->vaddr);
@@ -982,7 +969,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
error->tagvaddr, error->tagpaddr,
tag_dimm_string,
read_error ? "read error" : "write error",
- apic_id,
+ core_id,
CurrentCpusFormat().c_str(),
error->vaddr,
error->paddr,
@@ -1100,12 +1087,18 @@ bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64,
AdlerChecksum ignored_checksum;
os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
- // Force cache flush.
- int length = size_in_bytes / sizeof(*dstmem64);
- for (int i = 0; i < length; i += sizeof(*dstmem64)) {
- os_->FastFlush(dstmem64 + i);
- os_->FastFlush(srcmem64 + i);
+ // Force cache flush of both the source and destination addresses.
+ // length - length of block to flush in cachelines.
+ // mem_increment - number of dstmem/srcmem values per cacheline.
+ int length = size_in_bytes / kCacheLineSize;
+ int mem_increment = kCacheLineSize / sizeof(*dstmem64);
+ OsLayer::FastFlushSync();
+ for (int i = 0; i < length; ++i) {
+ OsLayer::FastFlushHint(dstmem64 + (i * mem_increment));
+ OsLayer::FastFlushHint(srcmem64 + (i * mem_increment));
}
+ OsLayer::FastFlushSync();
+
// Check results.
AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
// Patch up address tags.
@@ -1236,11 +1229,11 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
blocksize,
currentblock * blocksize, 0);
if (errorcount == 0) {
- int apic_id = apicid();
+ int core_id = sched_getcpu();
logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
"CRC mismatch %s != %s, "
"but no miscompares found on second pass.\n",
- apic_id, CurrentCpusFormat().c_str(),
+ core_id, CurrentCpusFormat().c_str(),
crc.ToHexString().c_str(),
expectedcrc->ToHexString().c_str());
struct ErrorRecord er;
@@ -1366,10 +1359,10 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
blocksize,
currentblock * blocksize, 0);
if (errorcount == 0) {
- logprintf(0, "Log: CrcWarmCopyPage CRC mismatch %s != %s, "
+ logprintf(0, "Log: CrcWarmCopyPage CRC mismatch expected: %s != actual: %s, "
"but no miscompares found. Retrying with fresh data.\n",
- crc.ToHexString().c_str(),
- expectedcrc->ToHexString().c_str());
+ expectedcrc->ToHexString().c_str(),
+ crc.ToHexString().c_str() );
if (!tag_mode_) {
// Copy the data originally read from this region back again.
// This data should have any corruption read originally while
@@ -1380,16 +1373,16 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
blocksize,
currentblock * blocksize, 0);
if (errorcount == 0) {
- int apic_id = apicid();
+ int core_id = sched_getcpu();
logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
"CRC mismatch %s != %s, "
"but no miscompares found on second pass.\n",
- apic_id, CurrentCpusFormat().c_str(),
+ core_id, CurrentCpusFormat().c_str(),
crc.ToHexString().c_str(),
expectedcrc->ToHexString().c_str());
struct ErrorRecord er;
er.actual = sourcemem[0];
- er.expected = 0x0;
+ er.expected = 0xbad;
er.vaddr = sourcemem;
ProcessError(&er, 0, "Hardware Error");
}
@@ -1600,12 +1593,11 @@ void FileThread::SetFile(const char *filename_init) {
// Open the file for access.
bool FileThread::OpenFile(int *pfile) {
- bool no_O_DIRECT = false;
int flags = O_RDWR | O_CREAT | O_SYNC;
int fd = open(filename_.c_str(), flags | O_DIRECT, 0644);
if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
- no_O_DIRECT = true;
- fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
+ fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
+ os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
}
if (fd < 0) {
logprintf(0, "Process Error: Failed to create file %s!!\n",
@@ -1613,8 +1605,6 @@ bool FileThread::OpenFile(int *pfile) {
pages_copied_ = 0;
return false;
}
- if (no_O_DIRECT)
- os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
*pfile = fd;
return true;
}
@@ -1685,7 +1675,7 @@ bool FileThread::WritePages(int fd) {
if (!result)
return false;
}
- return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
+ return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
}
// Copy data from file into memory block.
@@ -1964,7 +1954,7 @@ bool FileThread::Work() {
// Load patterns into page records.
page_recs_ = new struct PageRec[sat_->disk_pages()];
for (int i = 0; i < sat_->disk_pages(); i++) {
- page_recs_[i].pattern = new struct Pattern();
+ page_recs_[i].pattern = new class Pattern();
}
// Loop until done.
@@ -2465,13 +2455,22 @@ bool CpuStressThread::Work() {
CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
int cacheline_count,
int thread_num,
+ int thread_count,
int inc_count) {
cc_cacheline_data_ = data;
cc_cacheline_count_ = cacheline_count;
cc_thread_num_ = thread_num;
+ cc_thread_count_ = thread_count;
cc_inc_count_ = inc_count;
}
+// A very simple psuedorandom generator. Since the random number is based
+// on only a few simple logic operations, it can be done quickly in registers
+// and the compiler can inline it.
+uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) {
+ return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial);
+}
+
// Worked thread to test the cache coherency of the CPUs
// Return false on fatal sw error.
bool CpuCacheCoherencyThread::Work() {
@@ -2480,7 +2479,19 @@ bool CpuCacheCoherencyThread::Work() {
uint64 time_start, time_end;
struct timeval tv;
+ // Use a slightly more robust random number for the initial
+ // value, so the random sequences from the simple generator will
+ // be more divergent.
+#ifdef HAVE_RAND_R
unsigned int seed = static_cast<unsigned int>(gettid());
+ uint64 r = static_cast<uint64>(rand_r(&seed));
+ r |= static_cast<uint64>(rand_r(&seed)) << 32;
+#else
+ srand(time(NULL));
+ uint64 r = static_cast<uint64>(rand()); // NOLINT
+ r |= static_cast<uint64>(rand()) << 32; // NOLINT
+#endif
+
gettimeofday(&tv, NULL); // Get the timestamp before increments.
time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
@@ -2490,14 +2501,19 @@ bool CpuCacheCoherencyThread::Work() {
// Choose a datastructure in random and increment the appropriate
// member in that according to the offset (which is the same as the
// thread number.
-#ifdef HAVE_RAND_R
- int r = rand_r(&seed);
-#else
- int r = rand();
-#endif
- r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
+ r = SimpleRandom(r);
+ int cline_num = r % cc_cacheline_count_;
+ int offset;
+ // Reverse the order for odd numbered threads in odd numbered cache
+ // lines. This is designed for massively multi-core systems where the
+ // number of cores exceeds the bytes in a cache line, so "distant" cores
+ // get a chance to exercize cache coherency between them.
+ if (cline_num & cc_thread_num_ & 1)
+ offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+ else
+ offset = cc_thread_num_;
// Increment the member of the randomely selected structure.
- (cc_cacheline_data_[r].num[cc_thread_num_])++;
+ (cc_cacheline_data_[cline_num].num[offset])++;
}
total_inc += cc_inc_count_;
@@ -2506,14 +2522,26 @@ bool CpuCacheCoherencyThread::Work() {
// in all the cache line structures for this particular thread.
int cc_global_num = 0;
for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
- cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_];
+ int offset;
+ // Perform the same offset calculation from above.
+ if (cline_num & cc_thread_num_ & 1)
+ offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+ else
+ offset = cc_thread_num_;
+ cc_global_num += cc_cacheline_data_[cline_num].num[offset];
// Reset the cachline member's value for the next run.
- cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0;
+ cc_cacheline_data_[cline_num].num[offset] = 0;
}
if (sat_->error_injection())
cc_global_num = -1;
- if (cc_global_num != cc_inc_count_) {
+ // Since the count is only stored in a byte, to squeeze more into a
+ // single cache line, only compare it as a byte. In the event that there
+ // is something detected, the chance that it would be missed by a single
+ // thread is 1 in 256. If it affects all cores, that makes the chance
+ // of it being missed terribly minute. It seems unlikely any failure
+ // case would be off by more than a small number.
+ if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) {
errorcount_++;
logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
cc_global_num, cc_inc_count_);
@@ -2697,20 +2725,17 @@ bool DiskThread::SetParameters(int read_block_size,
// Open a device, return false on failure.
bool DiskThread::OpenDevice(int *pfile) {
- bool no_O_DIRECT = false;
int flags = O_RDWR | O_SYNC | O_LARGEFILE;
int fd = open(device_name_.c_str(), flags | O_DIRECT, 0);
if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
- no_O_DIRECT = true;
- fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
+ fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
+ os_->ActivateFlushPageCache();
}
if (fd < 0) {
logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
device_name_.c_str(), thread_num_);
return false;
}
- if (no_O_DIRECT)
- os_->ActivateFlushPageCache();
*pfile = fd;
return GetDiskSize(fd);
@@ -2866,11 +2891,11 @@ bool DiskThread::DoWork(int fd) {
// Block is either initialized by writing, or in nondestructive case,
// initialized by being added into the datastructure for later reading.
- block->SetBlockAsInitialized();
+ block->initialized();
in_flight_sectors_.push(block);
}
- if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
+ if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
return false;
// Verify blocks on disk.
@@ -2979,8 +3004,9 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
errorcount_++;
os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
- if (event.res < 0) {
- switch (event.res) {
+ int64 result = static_cast<int64>(event.res);
+ if (result < 0) {
+ switch (result) {
case -EIO:
logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
"sectors starting at %lld on disk %s (thread %d).\n",
@@ -3003,7 +3029,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
}
return true;
-#else // !HAVE_LIBAIO_H
+#else // !HAVE_LIBAIO_H
return false;
#endif
}
@@ -3011,7 +3037,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
// Write a block to disk.
// Return false if the block is not written.
bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
- memset(block_buffer_, 0, block->GetSize());
+ memset(block_buffer_, 0, block->size());
// Fill block buffer with a pattern
struct page_entry pe;
@@ -3019,30 +3045,30 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
// Even though a valid page could not be obatined, it is not an error
// since we can always fill in a pattern directly, albeit slower.
unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
- block->SetPattern(patternlist_->GetRandomPattern());
+ block->set_pattern(patternlist_->GetRandomPattern());
logprintf(11, "Log: Warning, using pattern fill fallback in "
"DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
device_name_.c_str(), thread_num_);
- for (int i = 0; i < block->GetSize()/wordsize_; i++) {
- memblock[i] = block->GetPattern()->pattern(i);
+ for (unsigned int i = 0; i < block->size()/wordsize_; i++) {
+ memblock[i] = block->pattern()->pattern(i);
}
} else {
- memcpy(block_buffer_, pe.addr, block->GetSize());
- block->SetPattern(pe.pattern);
+ memcpy(block_buffer_, pe.addr, block->size());
+ block->set_pattern(pe.pattern);
sat_->PutValid(&pe);
}
logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
" (thread %d).\n",
- block->GetSize()/kSectorSize, block->GetAddress(),
+ block->size()/kSectorSize, block->address(),
device_name_.c_str(), thread_num_);
int64 start_time = GetTime();
- if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(),
- block->GetAddress() * kSectorSize, write_timeout_)) {
+ if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(),
+ block->address() * kSectorSize, write_timeout_)) {
return false;
}
@@ -3063,11 +3089,11 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
// Return true if the block was read, also increment errorcount
// if the block had data errors or performance problems.
bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
- int64 blocks = block->GetSize() / read_block_size_;
+ int64 blocks = block->size() / read_block_size_;
int64 bytes_read = 0;
int64 current_blocks;
int64 current_bytes;
- uint64 address = block->GetAddress();
+ uint64 address = block->address();
logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
"(thread %d).\n",
@@ -3119,7 +3145,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
// In non-destructive mode, don't compare the block to the pattern since
// the block was never written to disk in the first place.
if (!non_destructive_) {
- if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes,
+ if (CheckRegion(block_buffer_, block->pattern(), current_bytes,
0, bytes_read)) {
os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
errorcount_ += 1;
@@ -3156,7 +3182,7 @@ bool DiskThread::Work() {
// when using direct IO.
#ifdef HAVE_POSIX_MEMALIGN
int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
- sat_->page_length());
+ sat_->page_length());
#else
block_buffer_ = memalign(kBufferAlignment, sat_->page_length());
int memalign_result = (block_buffer_ == 0);
@@ -3400,3 +3426,224 @@ bool MemoryRegionThread::Work() {
"pages checked\n", thread_num_, status_, pages_copied_);
return result;
}
+
+// The list of MSRs to read from each cpu.
+const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = {
+ { kMsrTscAddr, "TSC" },
+ { kMsrAperfAddr, "APERF" },
+ { kMsrMperfAddr, "MPERF" },
+};
+
+CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round)
+ : num_cpus_(num_cpus),
+ freq_threshold_(freq_threshold),
+ round_(round) {
+ sat_assert(round >= 0);
+ if (round == 0) {
+ // If rounding is off, force rounding to the nearest MHz.
+ round_ = 1;
+ round_value_ = 0.5;
+ } else {
+ round_value_ = round/2.0;
+ }
+}
+
+CpuFreqThread::~CpuFreqThread() {
+}
+
+// Compute the difference between the currently read MSR values and the
+// previously read values and store the results in delta. If any of the
+// values did not increase, or the TSC value is too small, returns false.
+// Otherwise, returns true.
+bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous,
+ CpuDataType *delta) {
+ // Loop through the msrs.
+ for (int msr = 0; msr < kMsrLast; msr++) {
+ if (previous->msrs[msr] > current->msrs[msr]) {
+ logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx "
+ "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr],
+ current->msrs[msr]);
+ return false;
+ } else {
+ delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr];
+ }
+ }
+
+ // Check for TSC < 1 Mcycles over interval.
+ if (delta->msrs[kMsrTsc] < (1000 * 1000)) {
+ logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n");
+ return false;
+ }
+ timersub(&current->tv, &previous->tv, &delta->tv);
+
+ return true;
+}
+
+// Compute the change in values of the MSRs between current and previous,
+// set the frequency in MHz of the cpu. If there is an error computing
+// the delta, return false. Othewise, return true.
+bool CpuFreqThread::ComputeFrequency(CpuDataType *current,
+ CpuDataType *previous, int *freq) {
+ CpuDataType delta;
+ if (!ComputeDelta(current, previous, &delta)) {
+ return false;
+ }
+
+ double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0;
+ double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000
+ * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval;
+
+ // Use the rounding value to round up properly.
+ int computed = static_cast<int>(frequency + round_value_);
+ *freq = computed - (computed % round_);
+ return true;
+}
+
+// This is the task function that the thread executes.
+bool CpuFreqThread::Work() {
+ cpu_set_t cpuset;
+ if (!AvailableCpus(&cpuset)) {
+ logprintf(0, "Process Error: Cannot get information about the cpus.\n");
+ return false;
+ }
+
+ // Start off indicating the test is passing.
+ status_ = true;
+
+ int curr = 0;
+ int prev = 1;
+ uint32 num_intervals = 0;
+ bool paused = false;
+ bool valid;
+ bool pass = true;
+
+ vector<CpuDataType> data[2];
+ data[0].resize(num_cpus_);
+ data[1].resize(num_cpus_);
+ while (IsReadyToRun(&paused)) {
+ if (paused) {
+ // Reset the intervals and restart logic after the pause.
+ num_intervals = 0;
+ }
+ if (num_intervals == 0) {
+ // If this is the first interval, then always wait a bit before
+ // starting to collect data.
+ sat_sleep(kStartupDelay);
+ }
+
+ // Get the per cpu counters.
+ valid = true;
+ for (int cpu = 0; cpu < num_cpus_; cpu++) {
+ if (CPU_ISSET(cpu, &cpuset)) {
+ if (!GetMsrs(cpu, &data[curr][cpu])) {
+ logprintf(0, "Failed to get msrs on cpu %d.\n", cpu);
+ valid = false;
+ break;
+ }
+ }
+ }
+ if (!valid) {
+ // Reset the number of collected intervals since something bad happened.
+ num_intervals = 0;
+ continue;
+ }
+
+ num_intervals++;
+
+ // Only compute a delta when we have at least two intervals worth of data.
+ if (num_intervals > 2) {
+ for (int cpu = 0; cpu < num_cpus_; cpu++) {
+ if (CPU_ISSET(cpu, &cpuset)) {
+ int freq;
+ if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu],
+ &freq)) {
+ // Reset the number of collected intervals since an unknown
+ // error occurred.
+ logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu);
+ num_intervals = 0;
+ break;
+ }
+ logprintf(15, "Cpu %d Freq %d\n", cpu, freq);
+ if (freq < freq_threshold_) {
+ errorcount_++;
+ pass = false;
+ logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz "
+ "threshold %d MHz.\n", cpu, freq, freq_threshold_);
+ }
+ }
+ }
+ }
+
+ sat_sleep(kIntervalPause);
+
+ // Swap the values in curr and prev (these values flip between 0 and 1).
+ curr ^= 1;
+ prev ^= 1;
+ }
+
+ return pass;
+}
+
+
+// Get the MSR values for this particular cpu and save them in data. If
+// any error is encountered, returns false. Otherwise, returns true.
+bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) {
+ for (int msr = 0; msr < kMsrLast; msr++) {
+ if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) {
+ return false;
+ }
+ }
+ // Save the time at which we acquired these values.
+ gettimeofday(&data->tv, NULL);
+
+ return true;
+}
+
+// Returns true if this test can run on the current machine. Otherwise,
+// returns false.
+bool CpuFreqThread::CanRun() {
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+ unsigned int eax, ebx, ecx, edx;
+
+ // Check that the TSC feature is supported.
+ // This check is valid for both Intel and AMD.
+ eax = 1;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(edx & (1 << 5))) {
+ logprintf(0, "Process Error: No TSC support.\n");
+ return false;
+ }
+
+ // Check the highest extended function level supported.
+ // This check is valid for both Intel and AMD.
+ eax = 0x80000000;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x80000007) {
+ logprintf(0, "Process Error: No invariant TSC support.\n");
+ return false;
+ }
+
+ // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+ // This check is valid for both Intel and AMD.
+ eax = 0x80000007;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if ((edx & (1 << 8)) == 0) {
+ logprintf(0, "Process Error: No non-stop TSC support.\n");
+ return false;
+ }
+
+ // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+ // This check is valid for both Intel and AMD.
+ eax = 0x6;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if ((ecx & 1) == 0) {
+ logprintf(0, "Process Error: No APERF MSR support.\n");
+ return false;
+ }
+ return true;
+#else
+ logprintf(0, "Process Error: "
+ "cpu_freq_test is only supported on X86 processors.\n");
+ return false;
+#endif
+}
diff --git a/src/worker.h b/src/worker.h
index 0ec4c1d..091d96b 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -44,7 +44,7 @@
// Global Datastruture shared by the Cache Coherency Worker Threads.
struct cc_cacheline_data {
- int *num;
+ char *num;
};
// Typical usage:
@@ -127,10 +127,8 @@ class WorkerStatus {
// ResumeWorkers() or StopWorkers() has been called. Number of distinct
// calling threads must match the worker count (see AddWorkers() and
// RemoveSelf()).
- bool ContinueRunning();
+ bool ContinueRunning(bool *paused);
- // TODO(matthewb): Is this functionality really necessary? Remove it if not.
- //
// This is a hack! It's like ContinueRunning(), except it won't pause. If
// any worker threads use this exclusively in place of ContinueRunning() then
// PauseWorkers() should never be used!
@@ -140,7 +138,7 @@ class WorkerStatus {
enum Status { RUN, PAUSE, STOP };
void WaitOnPauseBarrier() {
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
int error = pthread_barrier_wait(&pause_barrier_);
if (error != PTHREAD_BARRIER_SERIAL_THREAD)
sat_assert(error == 0);
@@ -189,7 +187,7 @@ class WorkerStatus {
pthread_rwlock_t status_rwlock_;
Status status_;
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
// Guaranteed to not be in use when (status_ != PAUSE).
pthread_barrier_t pause_barrier_;
#endif
@@ -242,7 +240,7 @@ class WorkerThread {
int64 ReadThreadTimer() {
struct timeval end_time_;
gettimeofday(&end_time_, NULL);
- return (end_time_.tv_sec - start_time_.tv_sec)*1000000 +
+ return (end_time_.tv_sec - start_time_.tv_sec)*1000000ULL +
(end_time_.tv_usec - start_time_.tv_usec);
}
// Stops per-WorkerThread timer and records thread run duration.
@@ -266,10 +264,10 @@ class WorkerThread {
// Calculate worker thread specific bandwidth.
virtual float GetMemoryBandwidth()
{return GetMemoryCopiedData() / (
- runduration_usec_ * 1.0 / 1000000);}
+ runduration_usec_ * 1.0 / 1000000.);}
virtual float GetDeviceBandwidth()
{return GetDeviceCopiedData() / (
- runduration_usec_ * 1.0 / 1000000);}
+ runduration_usec_ * 1.0 / 1000000.);}
void set_cpu_mask(cpu_set_t *mask) {
memcpy(&cpu_mask_, mask, sizeof(*mask));
@@ -304,9 +302,10 @@ class WorkerThread {
// do {
// // work.
// } while (IsReadyToRun());
- virtual bool IsReadyToRun() { return worker_status_->ContinueRunning(); }
- // TODO(matthewb): Is this function really necessary? Remove it if not.
- //
+ virtual bool IsReadyToRun(bool *paused = NULL) {
+ return worker_status_->ContinueRunning(paused);
+ }
+
// Like IsReadyToRun(), except it won't pause.
virtual bool IsReadyToRunNoPause() {
return worker_status_->ContinueRunningNoPause();
@@ -422,7 +421,7 @@ class FileThread : public WorkerThread {
// Record of where these pages were sourced from, and what
// potentially broken components they passed through.
struct PageRec {
- struct Pattern *pattern; // This is the data it should contain.
+ class Pattern *pattern; // This is the data it should contain.
void *src; // This is the memory location the data was sourced from.
void *dst; // This is where it ended up.
};
@@ -641,16 +640,27 @@ class CpuCacheCoherencyThread : public WorkerThread {
CpuCacheCoherencyThread(cc_cacheline_data *cc_data,
int cc_cacheline_count_,
int cc_thread_num_,
+ int cc_thread_count_,
int cc_inc_count_);
virtual bool Work();
protected:
+ // Used by the simple random number generator as a shift feedback;
+ // this polynomial (x^64 + x^63 + x^61 + x^60 + 1) will produce a
+ // psuedorandom cycle of period 2^64-1.
+ static const uint64 kRandomPolynomial = 0xD800000000000000ULL;
+ // A very simple psuedorandom generator that can be inlined and use
+ // registers, to keep the CC test loop tight and focused.
+ static uint64 SimpleRandom(uint64 seed);
+
cc_cacheline_data *cc_cacheline_data_; // Datstructure for each cacheline.
int cc_local_num_; // Local counter for each thread.
int cc_cacheline_count_; // Number of cache lines to operate on.
int cc_thread_num_; // The integer id of the thread which is
// used as an index into the integer array
// of the cacheline datastructure.
+ int cc_thread_count_; // Total number of threads being run, for
+ // calculations mixing up cache line access.
int cc_inc_count_; // Number of times to increment the counter.
private:
@@ -809,4 +819,80 @@ class MemoryRegionThread : public WorkerThread {
DISALLOW_COPY_AND_ASSIGN(MemoryRegionThread);
};
+// Worker thread to check that the frequency of every cpu does not go below a
+// certain threshold.
+class CpuFreqThread : public WorkerThread {
+ public:
+ CpuFreqThread(int num_cpus, int freq_threshold, int round);
+ ~CpuFreqThread();
+
+ // This is the task function that the thread executes.
+ virtual bool Work();
+
+ // Returns true if this test can run on the current machine. Otherwise,
+ // returns false.
+ static bool CanRun();
+
+ private:
+ static const int kIntervalPause = 10; // The number of seconds to pause
+ // between acquiring the MSR data.
+ static const int kStartupDelay = 5; // The number of seconds to wait
+ // before acquiring MSR data.
+ static const int kMsrTscAddr = 0x10; // The address of the TSC MSR.
+ static const int kMsrAperfAddr = 0xE8; // The address of the APERF MSR.
+ static const int kMsrMperfAddr = 0xE7; // The address of the MPERF MSR.
+
+ // The index values into the CpuDataType.msr[] array.
+ enum MsrValues {
+ kMsrTsc = 0, // MSR index 0 = TSC.
+ kMsrAperf = 1, // MSR index 1 = APERF.
+ kMsrMperf = 2, // MSR index 2 = MPERF.
+ kMsrLast, // Last MSR index.
+ };
+
+ typedef struct {
+ uint32 msr; // The address of the MSR.
+ const char *name; // A human readable string for the MSR.
+ } CpuRegisterType;
+
+ typedef struct {
+ uint64 msrs[kMsrLast]; // The values of the MSRs.
+ struct timeval tv; // The time at which the MSRs were read.
+ } CpuDataType;
+
+ // The set of MSR addresses and register names.
+ static const CpuRegisterType kCpuRegisters[kMsrLast];
+
+ // Compute the change in values of the MSRs between current and previous,
+ // set the frequency in MHz of the cpu. If there is an error computing
+ // the delta, return false. Othewise, return true.
+ bool ComputeFrequency(CpuDataType *current, CpuDataType *previous,
+ int *frequency);
+
+ // Get the MSR values for this particular cpu and save them in data. If
+ // any error is encountered, returns false. Otherwise, returns true.
+ bool GetMsrs(int cpu, CpuDataType *data);
+
+ // Compute the difference between the currently read MSR values and the
+ // previously read values and store the results in delta. If any of the
+ // values did not increase, or the TSC value is too small, returns false.
+ // Otherwise, returns true.
+ bool ComputeDelta(CpuDataType *current, CpuDataType *previous,
+ CpuDataType *delta);
+
+ // The total number of cpus on the system.
+ int num_cpus_;
+
+ // The minimum frequency that each cpu must operate at (in MHz).
+ int freq_threshold_;
+
+ // The value to round the computed frequency to.
+ int round_;
+
+ // Precomputed value to add to the frequency to do the rounding.
+ double round_value_;
+
+ DISALLOW_COPY_AND_ASSIGN(CpuFreqThread);
+};
+
#endif // STRESSAPPTEST_WORKER_H_
diff --git a/stressapptest.1 b/stressapptest.1
index 695f9ee..2c91478 100644
--- a/stressapptest.1
+++ b/stressapptest.1
@@ -86,10 +86,15 @@ Number of times to increment the cacheline's member.
.TP
.B \-\-cc_line_count <number>
-Mumber of cache line sized datastructures to allocate for the cache coherency
+Number of cache line sized datastructures to allocate for the cache coherency
threads to operate.
.TP
+.B \-\-cc_line_size <number>
+Size of cache line to use as the basis for cache coherency test data
+structures.
+
+.TP
.B \-\-cc_test
Do the cache coherency testing.