diff options
author | Nick Sanders <nsanders@google.com> | 2015-09-15 12:41:37 -0700 |
---|---|---|
committer | Nick Sanders <nsanders@google.com> | 2015-09-15 12:41:37 -0700 |
commit | 241f33a3e958842e3db803c03300764bd2ee9c19 (patch) | |
tree | d38035fadbfff8dbe02121f11658dd3fe1540df8 | |
parent | 279816931fafe7dbffb0169185e9ac360144aad7 (diff) | |
download | stressapptest-241f33a3e958842e3db803c03300764bd2ee9c19.tar.gz |
Update to stressapptest 1.0.7 from upstream
https://github.com/stressapptest/stressapptest
Change-Id: I6307bcfad2e67392b4e0308680c708546e9a15a3
Signed-off-by: Nick Sanders <nsanders@google.com>
-rw-r--r-- | Android.mk | 8 | ||||
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | Makefile.in | 103 | ||||
-rwxr-xr-x | configure | 158 | ||||
-rw-r--r-- | configure.ac | 75 | ||||
-rw-r--r-- | src/Makefile.am | 3 | ||||
-rw-r--r-- | src/Makefile.in | 75 | ||||
-rw-r--r-- | src/adler32memcpy.cc | 119 | ||||
-rw-r--r-- | src/clock.h | 29 | ||||
-rw-r--r-- | src/disk_blocks.cc | 187 | ||||
-rw-r--r-- | src/disk_blocks.h | 157 | ||||
-rw-r--r-- | src/findmask.c | 140 | ||||
-rw-r--r-- | src/findmask.inc | 4 | ||||
-rw-r--r-- | src/logger.cc | 56 | ||||
-rw-r--r-- | src/logger.h | 17 | ||||
-rw-r--r-- | src/os.cc | 260 | ||||
-rw-r--r-- | src/os.h | 150 | ||||
-rw-r--r-- | src/sat.cc | 224 | ||||
-rw-r--r-- | src/sat.h | 23 | ||||
-rw-r--r-- | src/sattypes.h | 60 | ||||
-rw-r--r-- | src/stressapptest_config.h.in | 3 | ||||
-rw-r--r-- | src/stressapptest_config_android.h | 15 | ||||
-rw-r--r-- | src/worker.cc | 441 | ||||
-rw-r--r-- | src/worker.h | 112 | ||||
-rw-r--r-- | stressapptest.1 | 7 |
25 files changed, 1757 insertions, 672 deletions
@@ -16,11 +16,17 @@ LOCAL_SRC_FILES := \ src/queue.cc \ src/sat.cc \ src/sat_factory.cc \ - src/worker.cc \ + src/worker.cc LOCAL_MODULE:= stressapptest LOCAL_MODULE_TAGS := optional + LOCAL_CFLAGS := -DHAVE_CONFIG_H -DANDROID -DNDEBUG -UDEBUG -DCHECKOPTS + +LOCAL_C_INCLUDES := \ + bionic \ + libc++ + LOCAL_CPP_EXTENSION := .cc LOCAL_CXX_STL := libc++ diff --git a/Makefile.am b/Makefile.am index c476e5f..5b1998f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,2 +1,3 @@ SUBDIRS = src -dist_doc_DATA = COPYING stressapptest.1
\ No newline at end of file +dist_man_MANS = stressapptest.1 + diff --git a/Makefile.in b/Makefile.in index 718866a..e0386c7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -14,7 +14,6 @@ # PARTICULAR PURPOSE. @SET_MAKE@ - VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ @@ -34,9 +33,8 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ -target_triplet = @target@ subdir = . -DIST_COMMON = $(am__configure_deps) $(dist_doc_DATA) \ +DIST_COMMON = $(am__configure_deps) $(dist_man_MANS) \ $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ $(top_srcdir)/configure COPYING config.guess config.sub \ depcomp install-sh missing @@ -80,8 +78,10 @@ am__nobase_list = $(am__nobase_strip_setup); \ am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' -am__installdirs = "$(DESTDIR)$(docdir)" -DATA = $(dist_doc_DATA) +man1dir = $(mandir)/man1 +am__installdirs = "$(DESTDIR)$(man1dir)" +NROFF = nroff +MANS = $(dist_man_MANS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ @@ -220,16 +220,12 @@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ -target = @target@ target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src -dist_doc_DATA = COPYING stressapptest.1 +dist_man_MANS = stressapptest.1 all: all-recursive .SUFFIXES: @@ -267,26 +263,44 @@ $(top_srcdir)/configure: $(am__configure_deps) $(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): -install-dist_docDATA: $(dist_doc_DATA) +install-man1: $(dist_man_MANS) @$(NORMAL_INSTALL) - test -z "$(docdir)" || $(MKDIR_P) "$(DESTDIR)$(docdir)" - @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \ - for p in $$list; do \ - if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ - echo "$$d$$p"; \ - done | $(am__base_list) | \ + test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)" + @list=''; test -n "$(man1dir)" || exit 0; \ + { for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ while read files; do \ - echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(docdir)'"; \ - $(INSTALL_DATA) $$files "$(DESTDIR)$(docdir)" || exit $$?; \ - done + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } -uninstall-dist_docDATA: +uninstall-man1: @$(NORMAL_UNINSTALL) - @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \ - files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ - test -n "$$files" || exit 0; \ - echo " ( cd '$(DESTDIR)$(docdir)' && rm -f" $$files ")"; \ - cd "$(DESTDIR)$(docdir)" && rm -f $$files + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + test -z "$$files" || { \ + echo " ( cd '$(DESTDIR)$(man1dir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(man1dir)" && rm -f $$files; } # This directory's subdirectories are mostly independent; you can cd # into them and run `make' without going through this Makefile. @@ -424,6 +438,19 @@ distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) + @list='$(MANS)'; if test -n "$$list"; then \ + list=`for p in $$list; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \ + if test -n "$$list" && \ + grep 'ab help2man is required to generate this page' $$list >/dev/null; then \ + echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \ + grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \ + echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \ + echo " typically \`make maintainer-clean' will remove them" >&2; \ + exit 1; \ + else :; fi; \ + else :; fi $(am__remove_distdir) test -d "$(distdir)" || mkdir "$(distdir)" @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ @@ -600,10 +627,10 @@ distcleancheck: distclean exit 1; } >&2 check-am: all-am check: check-recursive -all-am: Makefile $(DATA) +all-am: Makefile $(MANS) installdirs: installdirs-recursive installdirs-am: - for dir in "$(DESTDIR)$(docdir)"; do \ + for dir in "$(DESTDIR)$(man1dir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive @@ -652,7 +679,7 @@ info: info-recursive info-am: -install-data-am: install-dist_docDATA +install-data-am: install-man install-dvi: install-dvi-recursive @@ -668,7 +695,7 @@ install-info: install-info-recursive install-info-am: -install-man: +install-man: install-man1 install-pdf: install-pdf-recursive @@ -698,7 +725,9 @@ ps: ps-recursive ps-am: -uninstall-am: uninstall-dist_docDATA +uninstall-am: uninstall-man + +uninstall-man: uninstall-man1 .MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ install-am install-strip tags-recursive @@ -710,14 +739,14 @@ uninstall-am: uninstall-dist_docDATA distclean distclean-generic distclean-tags distcleancheck \ distdir distuninstallcheck dvi dvi-am html html-am info \ info-am install install-am install-data install-data-am \ - install-dist_docDATA install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am install-info \ - install-info-am install-man install-pdf install-pdf-am \ - install-ps install-ps-am install-strip installcheck \ - installcheck-am installdirs installdirs-am maintainer-clean \ + install-dvi install-dvi-am install-exec install-exec-am \ + install-html install-html-am install-info install-info-am \ + install-man install-man1 install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \ - uninstall-dist_docDATA + uninstall-man uninstall-man1 # Tell versions [3.59,3.63) of GNU make to not export all variables. @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.65 for stressapptest 1.0.4_autoconf. +# Generated by GNU Autoconf 2.65 for stressapptest 1.0.7_autoconf. # # Report bugs to <opensource@google.com>. # @@ -552,8 +552,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='stressapptest' PACKAGE_TARNAME='stressapptest' -PACKAGE_VERSION='1.0.4_autoconf' -PACKAGE_STRING='stressapptest 1.0.4_autoconf' +PACKAGE_VERSION='1.0.7_autoconf' +PACKAGE_STRING='stressapptest 1.0.7_autoconf' PACKAGE_BUGREPORT='opensource@google.com' PACKAGE_URL='' @@ -646,10 +646,6 @@ am__isrc INSTALL_DATA INSTALL_SCRIPT INSTALL_PROGRAM -target_os -target_vendor -target_cpu -target host_os host_vendor host_cpu @@ -701,6 +697,7 @@ ac_user_opts=' enable_option_checking with_static enable_dependency_tracking +enable_default_optimizations ' ac_precious_vars='build_alias host_alias @@ -1255,7 +1252,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures stressapptest 1.0.4_autoconf to adapt to many kinds of systems. +\`configure' configures stressapptest 1.0.7_autoconf to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1320,13 +1317,12 @@ Program names: System types: --build=BUILD configure for building on BUILD [guessed] --host=HOST cross-compile to build programs to run on HOST [BUILD] - --target=TARGET configure for building compilers for TARGET [HOST] _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of stressapptest 1.0.4_autoconf:";; + short | recursive ) echo "Configuration of stressapptest 1.0.7_autoconf:";; esac cat <<\_ACEOF @@ -1336,6 +1332,8 @@ Optional Features: --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors + --disable-default-optimizations + Disable default optimization flag overrides Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1420,7 +1418,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -stressapptest configure 1.0.4_autoconf +stressapptest configure 1.0.7_autoconf generated by GNU Autoconf 2.65 Copyright (C) 2009 Free Software Foundation, Inc. @@ -1976,7 +1974,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by stressapptest $as_me 1.0.4_autoconf, which was +It was created by stressapptest $as_me 1.0.7_autoconf, which was generated by GNU Autoconf 2.65. Invocation command line was $ $0 $@ @@ -2331,13 +2329,13 @@ if test "${with_static+set}" = set; then : fi -if test "$with_static" == "yes" +if test "$with_static" = "yes" then - { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with staticaly linked libraries." >&5 + { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with staticaly linked libraries." >&5 $as_echo "$as_me: Compiling with staticaly linked libraries." >&6;} - LIBS="$LIBS -static" + LIBS="$LIBS -static" else - { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with dynamically linked libraries." >&5 + { $as_echo "$as_me:${as_lineno-$LINENO}: Compiling with dynamically linked libraries." >&5 $as_echo "$as_me: Compiling with dynamically linked libraries." >&6;} fi @@ -2435,105 +2433,74 @@ IFS=$ac_save_IFS case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac - # Checking for target cpu and setting custom configuration # for the different platforms -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking target system type" >&5 -$as_echo_n "checking target system type... " >&6; } -if test "${ac_cv_target+set}" = set; then : - $as_echo_n "(cached) " >&6 -else - if test "x$target_alias" = x; then - ac_cv_target=$ac_cv_host -else - ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` || - as_fn_error "$SHELL $ac_aux_dir/config.sub $target_alias failed" "$LINENO" 5 -fi +case "$host_cpu" in #( + *x86_64*) : -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_target" >&5 -$as_echo "$ac_cv_target" >&6; } -case $ac_cv_target in -*-*-*) ;; -*) as_fn_error "invalid value of canonical target" "$LINENO" 5;; -esac -target=$ac_cv_target -ac_save_IFS=$IFS; IFS='-' -set x $ac_cv_target -shift -target_cpu=$1 -target_vendor=$2 -shift; shift -# Remember, the first character of IFS is used to create $*, -# except with old shells: -target_os=$* -IFS=$ac_save_IFS -case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac - - -# The aliases save the names the user supplied, while $host etc. -# will get canonicalized. -test -n "$target_alias" && - test "$program_prefix$program_suffix$program_transform_name" = \ - NONENONEs,x,x, && - program_prefix=${target_alias}- -case x"$target_cpu" in - "xx86_64") $as_echo "#define STRESSAPPTEST_CPU_X86_64 /**/" >>confdefs.h - ;; - "xi686") + ;; #( + *i686*) : + $as_echo "#define STRESSAPPTEST_CPU_I686 /**/" >>confdefs.h - ;; - "xpowerpc") + ;; #( + *powerpc*) : + $as_echo "#define STRESSAPPTEST_CPU_PPC /**/" >>confdefs.h - ;; - "xarmv7a") + ;; #( + *armv7a*) : + $as_echo "#define STRESSAPPTEST_CPU_ARMV7A /**/" >>confdefs.h - ;; - *) - as_fn_error "$target_cpu is not supported! Try x86_64, i686, powerpc, or armv7a" "$LINENO" 5 - ;; + ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unsupported CPU: $host_cpu! Try x86_64, i686, powerpc, or armv7a" >&5 +$as_echo "$as_me: WARNING: Unsupported CPU: $host_cpu! Try x86_64, i686, powerpc, or armv7a" >&2;} + ;; esac -_os=`uname` ## The following allows like systems to share settings. This is not meant to ## imply that these OS are the same thing. From OpenOffice dmake configure.in -case "$_os" in - "Linux") +case "$host_os" in #( + *linux*) : + OS_VERSION=linux $as_echo "#define STRESSAPPTEST_OS_LINUX /**/" >>confdefs.h - ;; - "Darwin") + ;; #( + *darwin*) : + OS_VERSION=macosx $as_echo "#define STRESSAPPTEST_OS_DARWIN /**/" >>confdefs.h - ;; - "FreeBSD") + ;; #( + *freebsd*) : + OS_VERSION=bsd $as_echo "#define STRESSAPPTEST_OS_BSD /**/" >>confdefs.h - ;; - "NetBSD") + ;; #( + *netbsd*) : + OS_VERSION=bsd $as_echo "#define STRESSAPPTEST_OS_BSD /**/" >>confdefs.h - ;; - *) - as_fn_error "$_os operating system is not suitable to build dmake!" "$LINENO" 5 - ;; + ;; #( + *) : + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unsupported system: $host_os" >&5 +$as_echo "$as_me: WARNING: unsupported system: $host_os" >&2;} + ;; esac am__api_version='1.11' @@ -2974,7 +2941,7 @@ fi # Define the identity of the package. PACKAGE='stressapptest' - VERSION='1.0.4_autoconf' + VERSION='1.0.7_autoconf' cat >>confdefs.h <<_ACEOF @@ -4412,10 +4379,19 @@ cat >>confdefs.h <<_ACEOF _ACEOF -#Default cxxflags -CXXFLAGS="$CXXFLAGS -DCHECKOPTS" -CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall -Wno-psabi" -CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG" +# Check whether --enable-default-optimizations was given. +if test "${enable_default_optimizations+set}" = set; then : + enableval=$enable_default_optimizations; +fi + +if test x"$enable_default_optimizations" != xno; then : + + #Default cxxflags + CXXFLAGS="$CXXFLAGS -DCHECKOPTS" + CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall" + CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG" + +fi # Checks for header files. @@ -5064,6 +5040,13 @@ if test "$ac_res" != no; then : fi +ac_fn_c_check_type "$LINENO" "pthread_barrier_t" "ac_cv_type_pthread_barrier_t" "$ac_includes_default" +if test "x$ac_cv_type_pthread_barrier_t" = x""yes; then : + +$as_echo "#define HAVE_PTHREAD_BARRIERS 1" >>confdefs.h + +fi + for ac_header in libaio.h do : ac_fn_c_check_header_mongrel "$LINENO" "libaio.h" "ac_cv_header_libaio_h" "$ac_includes_default" @@ -5201,6 +5184,7 @@ if test "$ac_res" != no; then : fi + # Checks for typedefs, structures, and compiler characteristics. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5 $as_echo_n "checking for stdbool.h that conforms to C99... " >&6; } @@ -6455,7 +6439,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by stressapptest $as_me 1.0.4_autoconf, which was +This file was extended by stressapptest $as_me 1.0.7_autoconf, which was generated by GNU Autoconf 2.65. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6521,7 +6505,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -stressapptest config.status 1.0.4_autoconf +stressapptest config.status 1.0.7_autoconf configured by $0, generated by GNU Autoconf 2.65, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index e1e44fa..74e8687 100644 --- a/configure.ac +++ b/configure.ac @@ -1,71 +1,64 @@ AC_PREREQ(2.61) -AC_INIT([stressapptest], [1.0.4_autoconf], [opensource@google.com]) +AC_INIT([stressapptest], [1.0.7_autoconf], [opensource@google.com]) AC_ARG_WITH(static, [ --with-static enable static linking]) -if test "$with_static" == "yes" +if test "$with_static" = "yes" then - AC_MSG_NOTICE([Compiling with staticaly linked libraries.]) - LIBS="$LIBS -static" + AC_MSG_NOTICE([Compiling with staticaly linked libraries.]) + LIBS="$LIBS -static" else - AC_MSG_NOTICE([Compiling with dynamically linked libraries.]) + AC_MSG_NOTICE([Compiling with dynamically linked libraries.]) fi AC_CANONICAL_HOST -AC_CANONICAL_BUILD # Checking for target cpu and setting custom configuration # for the different platforms -AC_CANONICAL_TARGET -case x"$target_cpu" in - "xx86_64") +AS_CASE(["$host_cpu"], + [*x86_64*], [ AC_DEFINE([STRESSAPPTEST_CPU_X86_64],[], [Defined if the target CPU is x86_64]) - ;; - "xi686") + ], + [*i686*], [ AC_DEFINE([STRESSAPPTEST_CPU_I686],[], [Defined if the target CPU is i686]) - ;; - "xpowerpc") + ], + [*powerpc*], [ AC_DEFINE([STRESSAPPTEST_CPU_PPC],[], [Defined if the target CPU is PowerPC]) - ;; - "xarmv7a") + ], + [*armv7a*], [ AC_DEFINE([STRESSAPPTEST_CPU_ARMV7A],[], [Defined if the target CPU is armv7a]) - ;; - *) - AC_MSG_ERROR([$target_cpu is not supported! Try x86_64, i686, powerpc, or armv7a]) - ;; -esac + ], + [AC_MSG_WARN([Unsupported CPU: $host_cpu! Try x86_64, i686, powerpc, or armv7a])] +) -_os=`uname` ## The following allows like systems to share settings. This is not meant to ## imply that these OS are the same thing. From OpenOffice dmake configure.in -case "$_os" in - "Linux") +AS_CASE(["$host_os"], + [*linux*], [ OS_VERSION=linux AC_DEFINE([STRESSAPPTEST_OS_LINUX],[], [Defined if the target OS is Linux]) - ;; - "Darwin") + ], + [*darwin*], [ OS_VERSION=macosx AC_DEFINE([STRESSAPPTEST_OS_DARWIN],[], [Defined if the target OS is OSX]) - ;; - "FreeBSD") + ], + [*freebsd*], [ OS_VERSION=bsd AC_DEFINE([STRESSAPPTEST_OS_BSD],[], [Defined if the target OS is BSD based]) - ;; - "NetBSD") + ], + [*netbsd*], [ OS_VERSION=bsd AC_DEFINE([STRESSAPPTEST_OS_BSD],[], [Defined if the target OS is BSD based]) - ;; - *) - AC_MSG_ERROR([$_os operating system is not suitable to build dmake!]) - ;; -esac + ], + [AC_MSG_WARN([unsupported system: $host_os])] +) AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AC_CONFIG_SRCDIR([src/]) @@ -95,10 +88,14 @@ AC_DEFINE_UNQUOTED([STRESSAPPTEST_TIMESTAMP], "$username @ $hostname on $timestamp", [Timestamp when ./configure was executed]) -#Default cxxflags -CXXFLAGS="$CXXFLAGS -DCHECKOPTS" -CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall -Wno-psabi" -CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG" +AC_ARG_ENABLE([default-optimizations], + [AS_HELP_STRING([--disable-default-optimizations], [Disable default optimization flag overrides])]) +AS_IF([test x"$enable_default_optimizations" != xno], [ + #Default cxxflags + CXXFLAGS="$CXXFLAGS -DCHECKOPTS" + CXXFLAGS="$CXXFLAGS -Wreturn-type -Wunused -Wuninitialized -Wall" + CXXFLAGS="$CXXFLAGS -O3 -funroll-all-loops -funroll-loops -DNDEBUG" +]) # Checks for header files. AC_HEADER_DIRENT @@ -107,11 +104,13 @@ AC_HEADER_STDC AC_CHECK_HEADERS([arpa/inet.h fcntl.h netdb.h stdint.h stdlib.h string.h sys/ioctl.h sys/socket.h sys/time.h unistd.h], [], [AC_MSG_FAILURE([Missing some header files.])]) AC_CHECK_HEADERS([pthread.h]) AC_SEARCH_LIBS([pthread_create], [pthread]) +AC_CHECK_TYPE([pthread_barrier_t], AC_DEFINE(HAVE_PTHREAD_BARRIERS, [1], [Define to 1 if the system has `pthread_barrier'.])) AC_CHECK_HEADERS([libaio.h]) AC_SEARCH_LIBS([io_setup], [aio]) AC_CHECK_HEADERS([sys/shm.h]) AC_SEARCH_LIBS([shm_open], [rt]) + # Checks for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL AC_C_CONST diff --git a/src/Makefile.am b/src/Makefile.am index e044974..16f539d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,5 @@ bin_PROGRAMS = stressapptest +noinst_PROGRAMS = findmask AM_DEFAULT_SOURCE_EXT=.cc @@ -27,5 +28,7 @@ HFILES += error_diag.h HFILES += disk_blocks.h HFILES += adler32memcpy.h HFILES += logger.h +HFILES += clock.h stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES) +findmask_SOURCES = findmask.c findmask.inc diff --git a/src/Makefile.in b/src/Makefile.in index f62d1ac..ff320f3 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -34,8 +34,8 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ -target_triplet = @target@ bin_PROGRAMS = stressapptest$(EXEEXT) +noinst_PROGRAMS = findmask$(EXEEXT) subdir = src DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ $(srcdir)/stressapptest_config.h.in @@ -48,7 +48,10 @@ CONFIG_HEADER = stressapptest_config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__installdirs = "$(DESTDIR)$(bindir)" -PROGRAMS = $(bin_PROGRAMS) +PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) +am_findmask_OBJECTS = findmask.$(OBJEXT) +findmask_OBJECTS = $(am_findmask_OBJECTS) +findmask_LDADD = $(LDADD) am__objects_1 = main.$(OBJEXT) am__objects_2 = os.$(OBJEXT) os_factory.$(OBJEXT) pattern.$(OBJEXT) \ queue.$(OBJEXT) sat.$(OBJEXT) sat_factory.$(OBJEXT) \ @@ -63,17 +66,17 @@ DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) CXXLD = $(CXX) CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ -o $@ -COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ - $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ -SOURCES = $(stressapptest_SOURCES) -DIST_SOURCES = $(stressapptest_SOURCES) +SOURCES = $(findmask_SOURCES) $(stressapptest_SOURCES) +DIST_SOURCES = $(findmask_SOURCES) $(stressapptest_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -171,11 +174,7 @@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ -target = @target@ target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ @@ -186,13 +185,14 @@ CFILES = os.cc os_factory.cc pattern.cc queue.cc sat.cc sat_factory.cc \ adler32memcpy.cc logger.cc HFILES = os.h pattern.h queue.h sat.h worker.h sattypes.h \ finelock_queue.h error_diag.h disk_blocks.h adler32memcpy.h \ - logger.h + logger.h clock.h stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES) +findmask_SOURCES = findmask.c findmask.inc all: stressapptest_config.h $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: -.SUFFIXES: .cc .o .obj +.SUFFIXES: .c .cc .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ @@ -277,6 +277,12 @@ uninstall-binPROGRAMS: clean-binPROGRAMS: -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) + +clean-noinstPROGRAMS: + -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS) +findmask$(EXEEXT): $(findmask_OBJECTS) $(findmask_DEPENDENCIES) + @rm -f findmask$(EXEEXT) + $(LINK) $(findmask_OBJECTS) $(findmask_LDADD) $(LIBS) stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES) @rm -f stressapptest$(EXEEXT) $(CXXLINK) $(stressapptest_OBJECTS) $(stressapptest_LDADD) $(LIBS) @@ -290,6 +296,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adler32memcpy.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/disk_blocks.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error_diag.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findmask.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/finelock_queue.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/logger.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@ @@ -301,6 +308,20 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sat_factory.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/worker.Po@am__quote@ +.c.o: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + .cc.o: @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @@ -431,7 +452,8 @@ maintainer-clean-generic: @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-binPROGRAMS clean-generic mostlyclean-am +clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \ + mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) @@ -501,17 +523,18 @@ uninstall-am: uninstall-binPROGRAMS .MAKE: all install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ - clean-generic ctags distclean distclean-compile \ - distclean-generic distclean-hdr distclean-tags distdir dvi \ - dvi-am html html-am info info-am install install-am \ - install-binPROGRAMS install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-info install-info-am install-man \ - install-pdf install-pdf-am install-ps install-ps-am \ - install-strip installcheck installcheck-am installdirs \ - maintainer-clean maintainer-clean-generic mostlyclean \ - mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-binPROGRAMS + clean-generic clean-noinstPROGRAMS ctags distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-binPROGRAMS install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-binPROGRAMS # Tell versions [3.59,3.63) of GNU make to not export all variables. diff --git a/src/adler32memcpy.cc b/src/adler32memcpy.cc index 69324f7..47c6262 100644 --- a/src/adler32memcpy.cc +++ b/src/adler32memcpy.cc @@ -70,7 +70,7 @@ bool AdlerChecksum::Equals(const AdlerChecksum &other) const { // Returns string representation of the Adler checksum. string AdlerChecksum::ToHexString() const { char buffer[128]; - snprintf(buffer, sizeof(buffer), "%llx%llx%llx%llx", a1_, a2_, b1_, b2_); + snprintf(buffer, sizeof(buffer), "%016llx %016llx %016llx %016llx", a1_, a2_, b1_, b2_); return string(buffer); } @@ -399,7 +399,124 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64, // that there is no problem with memory this just mean that data was copied // from src to dst and checksum was calculated successfully). return true; +#elif defined(STRESSAPPTEST_CPU_ARMV7A) && defined(__ARM_NEON__) + // Elements 0 to 3 are used for holding checksum terms a1, a2, + // b1, b2 respectively. These elements are filled by asm code. + // Checksum is seeded with the null checksum. + volatile uint64 checksum_arr[] __attribute__ ((aligned(16))) = + {1, 1, 0, 0}; + + if ((size_in_bytes >> 19) > 0) { + // Size is too large. Must be less than 2^19 bytes = 512 KB. + return false; + } + + // Since we are moving 64 bytes at a time number of iterations = total size/64 + uint32 blocks = size_in_bytes / 64; + + uint64 *dst = dstmem64; + uint64 *src = srcmem64; + + #define src_r "r3" + #define dst_r "r4" + #define blocks_r "r5" + #define crc_r "r6" + + asm volatile ( + "mov "src_r", %[src]; \n" + "mov "dst_r", %[dst]; \n" + "mov "crc_r", %[crc]; \n" + "mov "blocks_r", %[blocks]; \n" + + // Loop over block count. + "cmp "blocks_r", #0; \n" // Compare counter to zero. + "ble END; \n" + + + // Preload upcoming cacheline. + "pld ["src_r", #0x0]; \n" + "pld ["src_r", #0x20]; \n" + + // Init checksum + "vldm "crc_r", {q0}; \n" + "vmov.i32 q1, #0; \n" + + // Start of the loop which copies 48 bytes from source to dst each time. + "TOP: \n" + + // Make 3 moves each of 16 bytes from srcmem to qX registers. + // We are using 2 words out of 4 words in each qX register, + // word index 0 and word index 2. We'll swizzle them in a bit. + // Copy it. + "vldm "src_r"!, {q8, q9, q10, q11}; \n" + "vstm "dst_r"!, {q8, q9, q10, q11}; \n" + + // Arrange it. + "vmov.i64 q12, #0; \n" + "vmov.i64 q13, #0; \n" + "vmov.i64 q14, #0; \n" + "vmov.i64 q15, #0; \n" + // This exchenges words 1,3 in the filled registers with + // words 0,2 in the empty registers. + "vtrn.32 q8, q12; \n" + "vtrn.32 q9, q13; \n" + "vtrn.32 q10, q14; \n" + "vtrn.32 q11, q15; \n" + + // Sum into q0, then into q1. + // Repeat this for q8 - q13. + // Overflow can occur only if there are more + // than 2^16 additions => more than 2^17 words => more than 2^19 bytes so + // if size_in_bytes > 2^19 than overflow occurs. + "vadd.i64 q0, q0, q8; \n" + "vadd.i64 q1, q1, q0; \n" + "vadd.i64 q0, q0, q12; \n" + "vadd.i64 q1, q1, q0; \n" + "vadd.i64 q0, q0, q9; \n" + "vadd.i64 q1, q1, q0; \n" + "vadd.i64 q0, q0, q13; \n" + "vadd.i64 q1, q1, q0; \n" + + "vadd.i64 q0, q0, q10; \n" + "vadd.i64 q1, q1, q0; \n" + "vadd.i64 q0, q0, q14; \n" + "vadd.i64 q1, q1, q0; \n" + "vadd.i64 q0, q0, q11; \n" + "vadd.i64 q1, q1, q0; \n" + "vadd.i64 q0, q0, q15; \n" + "vadd.i64 q1, q1, q0; \n" + + // Increment counter and loop. + "sub "blocks_r", "blocks_r", #1; \n" + "cmp "blocks_r", #0; \n" // Compare counter to zero. + "bgt TOP; \n" + + + "END:\n" + // Report checksum values A and B (both right now are two concatenated + // 64 bit numbers and have to be converted to 64 bit numbers) + // seems like Adler128 (since size of each part is 4 byte rather than + // 1 byte). + "vstm "crc_r", {q0, q1}; \n" + + // Output registers. + : + // Input registers. + : [src] "r"(src), [dst] "r"(dst), [blocks] "r"(blocks) , [crc] "r"(checksum_arr) + : "memory", "cc", "r3", "r4", "r5", "r6", "q0", "q1", "q8","q9","q10", "q11", "q12","q13","q14","q15" + ); // asm. + + if (checksum != NULL) { + checksum->Set(checksum_arr[0], checksum_arr[1], + checksum_arr[2], checksum_arr[3]); + } + + // Everything went fine, so return true (this does not mean + // that there is no problem with memory this just mean that data was copied + // from src to dst and checksum was calculated successfully). + return true; #else + #warning "No vector copy defined for this architecture." // Fall back to C implementation for anything else. return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum); #endif diff --git a/src/clock.h b/src/clock.h new file mode 100644 index 0000000..4204188 --- /dev/null +++ b/src/clock.h @@ -0,0 +1,29 @@ +// Copyright 2010 Google Inc. All Rights Reserved. +// Author: cferris + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef STRESSAPPTEST_CLOCK_H_ // NOLINT +#define STRESSAPPTEST_CLOCK_H_ + +#include <time.h> + +// This class implements a clock that can be overriden for unit tests. +class Clock { + public: + virtual ~Clock() {} + + virtual time_t Now() { return time(NULL); } +}; + +#endif // STRESSAPPTEST_CLOCK_H_ NOLINT diff --git a/src/disk_blocks.cc b/src/disk_blocks.cc index c7860b0..60018f9 100644 --- a/src/disk_blocks.cc +++ b/src/disk_blocks.cc @@ -14,38 +14,51 @@ // Thread-safe container of disk blocks -#include <utility> - // This file must work with autoconf on its public version, // so these includes are correct. #include "disk_blocks.h" -DiskBlockTable::DiskBlockTable() { - nelems_ = 0; +#include <utility> + +// BlockData +BlockData::BlockData() : address_(0), size_(0), + references_(0), initialized_(false), + pattern_(NULL) { + pthread_mutex_init(&data_mutex_, NULL); +} + +BlockData::~BlockData() { + pthread_mutex_destroy(&data_mutex_); +} + +void BlockData::set_initialized() { + pthread_mutex_lock(&data_mutex_); + initialized_ = true; + pthread_mutex_unlock(&data_mutex_); +} + +bool BlockData::initialized() const { + pthread_mutex_lock(&data_mutex_); + bool initialized = initialized_; + pthread_mutex_unlock(&data_mutex_); + return initialized; +} + +// DiskBlockTable +DiskBlockTable::DiskBlockTable() : sector_size_(0), write_block_size_(0), + device_name_(""), device_sectors_(0), + segment_size_(0), size_(0) { pthread_mutex_init(&data_mutex_, NULL); pthread_mutex_init(¶meter_mutex_, NULL); pthread_cond_init(&data_condition_, NULL); } DiskBlockTable::~DiskBlockTable() { - CleanTable(); pthread_mutex_destroy(&data_mutex_); pthread_mutex_destroy(¶meter_mutex_); pthread_cond_destroy(&data_condition_); } -void DiskBlockTable::CleanTable() { - pthread_mutex_lock(&data_mutex_); - for (map<int64, StorageData*>::iterator it = - addr_to_block_.begin(); it != addr_to_block_.end(); ++it) { - delete it->second; - } - addr_to_block_.erase(addr_to_block_.begin(), addr_to_block_.end()); - nelems_ = 0; - pthread_cond_broadcast(&data_condition_); - pthread_mutex_unlock(&data_mutex_); -} - // 64-bit non-negative random number generator. Stolen from // depot/google3/base/tracecontext_unittest.cc. int64 DiskBlockTable::Random64() { @@ -58,28 +71,27 @@ int64 DiskBlockTable::Random64() { return -x; } -int64 DiskBlockTable::NumElems() { - unsigned int nelems; +uint64 DiskBlockTable::Size() { pthread_mutex_lock(&data_mutex_); - nelems = nelems_; + uint64 size = size_; pthread_mutex_unlock(&data_mutex_); - return nelems; + return size; } void DiskBlockTable::InsertOnStructure(BlockData *block) { - int64 address = block->GetAddress(); + int64 address = block->address(); StorageData *sd = new StorageData(); sd->block = block; - sd->pos = nelems_; + sd->pos = size_; // Creating new block ... pthread_mutex_lock(&data_mutex_); - if (pos_to_addr_.size() <= nelems_) { + if (pos_to_addr_.size() <= size_) { pos_to_addr_.insert(pos_to_addr_.end(), address); } else { - pos_to_addr_[nelems_] = address; + pos_to_addr_[size_] = address; } - addr_to_block_.insert(std::make_pair(address, sd)); - nelems_++; + addr_to_block_[address] = sd; + size_++; pthread_cond_broadcast(&data_condition_); pthread_mutex_unlock(&data_mutex_); } @@ -87,26 +99,28 @@ void DiskBlockTable::InsertOnStructure(BlockData *block) { int DiskBlockTable::RemoveBlock(BlockData *block) { // For write threads, check the reference counter and remove // it from the structure. - int64 address = block->GetAddress(); + int64 address = block->address(); AddrToBlockMap::iterator it = addr_to_block_.find(address); int ret = 1; if (it != addr_to_block_.end()) { int curr_pos = it->second->pos; - int last_pos = nelems_ - 1; + int last_pos = size_ - 1; AddrToBlockMap::iterator last_it = addr_to_block_.find( pos_to_addr_[last_pos]); - sat_assert(nelems_ > 0); + sat_assert(size_ > 0); sat_assert(last_it != addr_to_block_.end()); - // Everything is fine, updating ... + // Everything is fine, removing block from table. pthread_mutex_lock(&data_mutex_); pos_to_addr_[curr_pos] = pos_to_addr_[last_pos]; last_it->second->pos = curr_pos; delete it->second; addr_to_block_.erase(it); - nelems_--; + size_--; block->DecreaseReferenceCounter(); if (block->GetReferenceCounter() == 0) delete block; + else if (block->GetReferenceCounter() < 0) + ret = 0; pthread_cond_broadcast(&data_condition_); pthread_mutex_unlock(&data_mutex_); } else { @@ -116,18 +130,16 @@ int DiskBlockTable::RemoveBlock(BlockData *block) { } int DiskBlockTable::ReleaseBlock(BlockData *block) { - // If is a random thread, just check the reference counter. + // If caller is a random thread, just check the reference counter. int ret = 1; pthread_mutex_lock(&data_mutex_); int references = block->GetReferenceCounter(); - if (references > 0) { - if (references == 1) - delete block; - else - block->DecreaseReferenceCounter(); - } else { + if (references == 1) + delete block; + else if (references > 0) + block->DecreaseReferenceCounter(); + else ret = 0; - } pthread_mutex_unlock(&data_mutex_); return ret; } @@ -135,13 +147,13 @@ int DiskBlockTable::ReleaseBlock(BlockData *block) { BlockData *DiskBlockTable::GetRandomBlock() { struct timespec ts; struct timeval tp; - int result = 0; gettimeofday(&tp, NULL); ts.tv_sec = tp.tv_sec; ts.tv_nsec = tp.tv_usec * 1000; ts.tv_sec += 2; // Wait for 2 seconds. + int result = 0; pthread_mutex_lock(&data_mutex_); - while (!nelems_ && result != ETIMEDOUT) { + while (!size_ && result != ETIMEDOUT) { result = pthread_cond_timedwait(&data_condition_, &data_mutex_, &ts); } if (result == ETIMEDOUT) { @@ -149,13 +161,13 @@ BlockData *DiskBlockTable::GetRandomBlock() { return NULL; } else { int64 random_number = Random64(); - int64 random_pos = random_number % nelems_; + int64 random_pos = random_number % size_; int64 address = pos_to_addr_[random_pos]; AddrToBlockMap::const_iterator it = addr_to_block_.find(address); sat_assert(it != addr_to_block_.end()); BlockData *b = it->second->block; // A block is returned only if its content is written on disk. - if (b->BlockIsInitialized()) { + if (b->initialized()) { b->IncreaseReferenceCounter(); } else { b = NULL; @@ -165,45 +177,38 @@ BlockData *DiskBlockTable::GetRandomBlock() { } } -void DiskBlockTable::SetParameters( - int sector_size, int write_block_size, int64 device_sectors, - int64 segment_size, string device_name) { +void DiskBlockTable::SetParameters(int sector_size, + int write_block_size, + int64 device_sectors, + int64 segment_size, + const string& device_name) { + sat_assert(size_ == 0); pthread_mutex_lock(¶meter_mutex_); sector_size_ = sector_size; write_block_size_ = write_block_size; device_sectors_ = device_sectors; segment_size_ = segment_size; device_name_ = device_name; - CleanTable(); pthread_mutex_unlock(¶meter_mutex_); } BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { int64 sector = 0; BlockData *block = new BlockData(); - bool good_sequence = false; - int num_sectors; - if (block == NULL) { logprintf(0, "Process Error: Unable to allocate memory " "for sector data for disk %s.\n", device_name_.c_str()); return NULL; } - pthread_mutex_lock(¶meter_mutex_); - sat_assert(device_sectors_ != 0); - // Align the first sector with the beginning of a write block - num_sectors = write_block_size_ / sector_size_; - + int num_sectors = write_block_size_ / sector_size_; for (int i = 0; i < kBlockRetry && !good_sequence; i++) { good_sequence = true; - // Use the entire disk or a small segment of the disk to allocate the first // sector in the block from. - if (segment_size_ == -1) { sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % ( device_sectors_ / num_sectors); @@ -213,7 +218,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { segment_size_ / num_sectors); sector *= num_sectors; sector += segment * segment_size_; - // Make sure the block is within the segment. if (sector + num_sectors > (segment + 1) * segment_size_) { good_sequence = false; @@ -229,7 +233,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { // now aligned to the write_block_size, it is not necessary // to check each sector, just the first block (a sector // overlap will never occur). - pthread_mutex_lock(&data_mutex_); if (addr_to_block_.find(sector) != addr_to_block_.end()) { good_sequence = false; @@ -238,7 +241,8 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { } if (good_sequence) { - block->SetParameters(sector, write_block_size_); + block->set_address(sector); + block->set_size(write_block_size_); block->IncreaseReferenceCounter(); InsertOnStructure(block); } else { @@ -248,66 +252,5 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) { block = NULL; } pthread_mutex_unlock(¶meter_mutex_); - return block; } - -// BlockData - -BlockData::BlockData() { - addr_ = 0; - size_ = 0; - references_ = 0; - initialized_ = false; - pthread_mutex_init(&data_mutex_, NULL); -} - -BlockData::~BlockData() { - pthread_mutex_destroy(&data_mutex_); -} - -void BlockData::SetParameters(int64 address, int64 size) { - addr_ = address; - size_ = size; -} - -void BlockData::IncreaseReferenceCounter() { - references_++; -} - -void BlockData::DecreaseReferenceCounter() { - references_--; -} - -int BlockData::GetReferenceCounter() { - return references_; -} - -void BlockData::SetBlockAsInitialized() { - pthread_mutex_lock(&data_mutex_); - initialized_ = true; - pthread_mutex_unlock(&data_mutex_); -} - -bool BlockData::BlockIsInitialized() { - pthread_mutex_lock(&data_mutex_); - bool initialized = initialized_; - pthread_mutex_unlock(&data_mutex_); - return initialized; -} - -int64 BlockData::GetAddress() { - return addr_; -} - -int64 BlockData::GetSize() { - return size_; -} - -Pattern *BlockData::GetPattern() { - return pattern_; -} - -void BlockData::SetPattern(Pattern *p) { - pattern_ = p; -} diff --git a/src/disk_blocks.h b/src/disk_blocks.h index cb634c9..638ee9f 100644 --- a/src/disk_blocks.h +++ b/src/disk_blocks.h @@ -25,87 +25,146 @@ #include <map> #include <vector> #include <string> -// This file must work with autoconf on its public version, -// so these includes are correct. -#include "pattern.h" + +#include "sattypes.h" + +class Pattern; // Data about a block written to disk so that it can be verified later. +// Thread-unsafe, must be used with locks on non-const methods, +// except for initialized accessor/mutator, which are thread-safe +// (and in fact, is the only method supposed to be accessed from +// someone which is not the thread-safe DiskBlockTable). class BlockData { public: BlockData(); ~BlockData(); - void SetParameters(int64 address, int64 size); - void IncreaseReferenceCounter(); - void DecreaseReferenceCounter(); - int GetReferenceCounter(); - void SetBlockAsInitialized(); - bool BlockIsInitialized(); - int64 GetAddress(); - int64 GetSize(); - void SetPattern(Pattern *p); - Pattern *GetPattern(); - protected: - int64 addr_; // address of first sector in block - int64 size_; // size of block - int references_; // reference counter - bool initialized_; // flag indicating the block was written on disk + + // These are reference counters used to control how many + // threads currently have a copy of this particular block. + void IncreaseReferenceCounter() { references_++; } + void DecreaseReferenceCounter() { references_--; } + int GetReferenceCounter() const { return references_; } + + // Controls whether the block was written on disk or not. + // Once written, you cannot "un-written" then without destroying + // this object. + void set_initialized(); + bool initialized() const; + + // Accessor methods for some data related to blocks. + void set_address(uint64 address) { address_ = address; } + uint64 address() const { return address_; } + void set_size(uint64 size) { size_ = size; } + uint64 size() const { return size_; } + void set_pattern(Pattern *p) { pattern_ = p; } + Pattern *pattern() { return pattern_; } + private: + uint64 address_; // Address of first sector in block + uint64 size_; // Size of block + int references_; // Reference counter + bool initialized_; // Flag indicating the block was written on disk Pattern *pattern_; - pthread_mutex_t data_mutex_; + mutable pthread_mutex_t data_mutex_; DISALLOW_COPY_AND_ASSIGN(BlockData); }; -// Disk Block table - store data from blocks to be write / read by -// a DiskThread +// A thread-safe table used to store block data and control access +// to these blocks, letting several threads read and write blocks on +// disk. class DiskBlockTable { public: DiskBlockTable(); virtual ~DiskBlockTable(); - // Get Number of elements stored on table - int64 NumElems(); - // Clean all table data - void CleanTable(); - // Get a random block from the list. Only returns if a element - // is available (consider that other thread must have added them. - BlockData *GetRandomBlock(); - // Set all initial parameters. Assumes all existent data is + // Returns number of elements stored on table. + uint64 Size(); + + // Sets all initial parameters. Assumes all existent data is // invalid and, therefore, must be removed. void SetParameters(int sector_size, int write_block_size, int64 device_sectors, int64 segment_size, - string device_name); - // Return a new block in a unused address. + const string& device_name); + + // During the regular execution, there will be 2 types of threads: + // - Write thread: gets a large number of blocks using GetUnusedBlock, + // writes them on disk (if on destructive mode), + // reads block content ONCE from disk and them removes + // the block from queue with RemoveBlock. After a removal a + // block is not available for read threads, but it is + // only removed from memory if there is no reference for + // this block. Note that a write thread also counts as + // a reference. + // - Read threads: get one block at a time (if available) with + // GetRandomBlock, reads its content from disk, + // checking whether it is correct or not, and releases + // (Using ReleaseBlock) the block to be erased by the + // write threads. Since several read threads are allowed + // to read the same block, a reference counter is used to + // control when the block can be REALLY erased from + // memory, and all memory management is made by a + // DiskBlockTable instance. + + // Returns a new block in a unused address. Does not + // grant ownership of the pointer to the caller + // (use RemoveBlock to delete the block from memory instead). BlockData *GetUnusedBlock(int64 segment); - // Remove block from structure (called by write threads) + + // Removes block from structure (called by write threads). Returns + // 1 if successful, 0 otherwise. int RemoveBlock(BlockData *block); - // Release block to be erased (called by random threads) - int ReleaseBlock(BlockData *block); - protected: + // Gets a random block from the list. Only returns if an element + // is available (a write thread has got this block, written it on disk, + // and set this block as initialized). Does not grant ownership of the + // pointer to the caller (use RemoveBlock to delete the block from + // memory instead). + BlockData *GetRandomBlock(); - void InsertOnStructure(BlockData *block); - // Generate a random 64-bit integer (virtual so it could be - // override by the tests) - virtual int64 Random64(); + // Releases block to be erased (called by random threads). Returns + // 1 if successful, 0 otherwise. + int ReleaseBlock(BlockData *block); + protected: struct StorageData { BlockData *block; int pos; }; - - static const int kBlockRetry = 100; // Number of retries to allocate - // sectors. - typedef map<int64, StorageData*> AddrToBlockMap; typedef vector<int64> PosToAddrVector; + + // Inserts block in structure, used in tests and by other methods. + void InsertOnStructure(BlockData *block); + + // Generates a random 64-bit integer. + // Virtual method so it can be overridden by the tests. + virtual int64 Random64(); + + // Accessor methods for testing. + const PosToAddrVector& pos_to_addr() const { return pos_to_addr_; } + const AddrToBlockMap& addr_to_block() const { return addr_to_block_; } + + int sector_size() const { return sector_size_; } + int write_block_size() const { return write_block_size_; } + const string& device_name() const { return device_name_; } + int64 device_sectors() const { return device_sectors_; } + int64 segment_size() const { return segment_size_; } + + private: + // Number of retries to allocate sectors. + static const int kBlockRetry = 100; + // Actual tables. PosToAddrVector pos_to_addr_; AddrToBlockMap addr_to_block_; - uint64 nelems_; - int sector_size_; // Sector size, in bytes - int write_block_size_; // Block size, in bytes - string device_name_; // Device name - int64 device_sectors_; // Number of sectors in device - int64 segment_size_; // Segment size, in bytes + + // Configuration parameters for block selection + int sector_size_; // Sector size, in bytes + int write_block_size_; // Block size, in bytes + string device_name_; // Device name + int64 device_sectors_; // Number of sectors in device + int64 segment_size_; // Segment size in bytes + uint64 size_; // Number of elements on table pthread_mutex_t data_mutex_; pthread_cond_t data_condition_; pthread_mutex_t parameter_mutex_; diff --git a/src/findmask.c b/src/findmask.c new file mode 100644 index 0000000..1b10988 --- /dev/null +++ b/src/findmask.c @@ -0,0 +1,140 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* + * This "tool" can be used to brute force the XOR bitmask that a memory + * controller uses to interleave addresses onto its two channels. To use it, + * you need to have a bunch of addresses that are known to go to only one + * of the memory channels... easiest way to get these is to run stressapptest on + * a machine while holding a soldering iron close to the chips of one channel. + * Generate about a thousand failures and extract their physical addresses + * from the output. Write them to findmask.inc in a way that forms a valid + * definition for the addrs array. Make and run on a big machine. + * + * The program iterates over all possible bitmasks within the first NUM_BITS, + * parallelizing execution over NUM_THREADS. Every integer is masked + * onto all supplied addresses, counting the amount of times this results in + * an odd or even amount of bits. If all but NOISE addresses fall on one side, + * it will print that mask to stdout. Note that the script will always "find" + * the mask 0x0, and may also report masks such as 0x100000000 depending on + * your test machines memory size... you will need to use your own judgement to + * interpret the results. + * + * As the program might run for a long time, you can send SIGUSR1 to it to + * output the last mask that was processed and get a rough idea of the + * current progress. + */ + +#include <inttypes.h> +#include <pthread.h> +#include <signal.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> + +#define NOISE 20 +#define NUM_BITS 32 +#define NUM_THREADS 128 // keep this a power of two + +static uint64_t addrs[] = { +#include "findmask.inc" +}; +static uint64_t lastmask; + +__attribute__((optimize(3, "unroll-loops"))) +void* thread_func(void* arg) { + register uint64_t mask; + register uintptr_t num = (uintptr_t)arg; + + for (mask = num; mask < (1ULL << (NUM_BITS + 1)); mask += NUM_THREADS) { + register const uint64_t* cur; + register int a = 0; + register int b = 0; + + for (cur = addrs; (char*)cur < (char*)addrs + sizeof(addrs); cur++) { +#ifdef __x86_64__ + register uint64_t addr asm("rdx") = *cur & mask; + register uint32_t tmp asm("ebx"); + + // Behold: the dark bit counting magic! + asm ( + // Fold high and low 32 bits onto each other + "MOVl %%edx, %%ebx\n\t" + "SHRq $32, %%rdx\n\t" + "XORl %%ebx, %%edx\n\t" + // Fold high and low 16 bits onto each other + "MOVl %%edx, %%ebx\n\t" + "SHRl $16, %%edx\n\t" + "XORw %%bx, %%dx\n\t" + // Fold high and low 8 bits onto each other + "XORb %%dh, %%dl\n\t" + // Invoke ancient 8086 parity flag (only counts lowest byte) + "SETnp %%bl\n\t" + "SETp %%dl\n\t" + // Stupid SET instruction can only affect the lowest byte... + "ANDl $1, %%ebx\n\t" + "ANDl $1, %%edx\n\t" + // Increment either 'a' or 'b' without needing another branch + "ADDl %%ebx, %2\n\t" + "ADDl %%edx, %1\n\t" + : "=b" (tmp), "+r"(a), "+r"(b) : "d"(addr) : "cc"); + +#else // generic processor + register uint64_t addr = *cur & mask; + register uint32_t low = (uint32_t)addr; + register uint32_t high = (uint32_t)(addr >> 32); + + // Takes about twice as long as the version above... take that GCC! + __builtin_parity(low) ^ __builtin_parity(high) ? a++ : b++; +#endif + + // Early abort: probably still the most valuable optimization in here + if (a >= NOISE && b >= NOISE) break; + } + + if (a < NOISE) b = a; + if (b < NOISE) { + printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask); + fflush(stdout); + } + + // I'm a little paranoid about performance: don't write to memory too often + if (!(mask & 0x7ff)) lastmask = mask; + } + + return 0; +} + +void signal_handler(int signum) { + printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n", + lastmask); + fflush(stdout); +} + +int main(int argc, char** argv) { + uintptr_t i; + pthread_t threads[NUM_THREADS]; + + signal(SIGUSR1, signal_handler); + + for (i = 0; i < NUM_THREADS; i++) + pthread_create(&threads[i], 0, thread_func, (void*)i); + + for (i = 0; i < NUM_THREADS; i++) + pthread_join(threads[i], 0); + + return 0; +} diff --git a/src/findmask.inc b/src/findmask.inc new file mode 100644 index 0000000..e76f72f --- /dev/null +++ b/src/findmask.inc @@ -0,0 +1,4 @@ +// This is the body of a uintptr_t array definition. Fill in your own addresses. +0x116bb312c, // example values (can be >32 bit) +0x38d3c5ad, // replace with your own +0x77c1e96d // don't forget: no comma after the last one diff --git a/src/logger.cc b/src/logger.cc index e4ecb03..f13e003 100644 --- a/src/logger.cc +++ b/src/logger.cc @@ -17,6 +17,7 @@ #include <pthread.h> #include <stdarg.h> #include <stdio.h> +#include <time.h> #include <unistd.h> #include <string> @@ -37,10 +38,20 @@ void Logger::VLogF(int priority, const char *format, va_list args) { return; } char buffer[4096]; - int length = vsnprintf(buffer, sizeof buffer, format, args); - if (static_cast<size_t>(length) >= sizeof buffer) { - length = sizeof buffer; - buffer[sizeof buffer - 1] = '\n'; + size_t length = 0; + if (log_timestamps_) { + time_t raw_time; + time(&raw_time); + struct tm time_struct; + localtime_r(&raw_time, &time_struct); + length = strftime(buffer, sizeof(buffer), "%Y/%m/%d-%H:%M:%S(%Z) ", + &time_struct); + LOGGER_ASSERT(length); // Catch if the buffer is set too small. + } + length += vsnprintf(buffer + length, sizeof(buffer) - length, format, args); + if (length >= sizeof(buffer)) { + length = sizeof(buffer); + buffer[sizeof(buffer) - 1] = '\n'; } QueueLogLine(new string(buffer, length)); } @@ -52,19 +63,30 @@ void Logger::StartThread() { } void Logger::StopThread() { - LOGGER_ASSERT(thread_running_); + // Allow this to be called before the thread has started. + if (!thread_running_) { + return; + } thread_running_ = false; - LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_)); + int retval = pthread_mutex_lock(&queued_lines_mutex_); + LOGGER_ASSERT(0 == retval); bool need_cond_signal = queued_lines_.empty(); queued_lines_.push_back(NULL); - LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_)); + retval = pthread_mutex_unlock(&queued_lines_mutex_); + LOGGER_ASSERT(0 == retval); if (need_cond_signal) { - LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_)); + retval = pthread_cond_signal(&queued_lines_cond_); + LOGGER_ASSERT(0 == retval); } - LOGGER_ASSERT(0 == pthread_join(thread_, NULL)); + retval = pthread_join(thread_, NULL); + LOGGER_ASSERT(0 == retval); } -Logger::Logger() : verbosity_(20), log_fd_(-1), thread_running_(false) { +Logger::Logger() + : verbosity_(20), + log_fd_(-1), + thread_running_(false), + log_timestamps_(true) { LOGGER_ASSERT(0 == pthread_mutex_init(&queued_lines_mutex_, NULL)); LOGGER_ASSERT(0 == pthread_cond_init(&queued_lines_cond_, NULL)); LOGGER_ASSERT(0 == pthread_cond_init(&full_queue_cond_, NULL)); @@ -94,19 +116,15 @@ void Logger::QueueLogLine(string *line) { LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_)); } -namespace { -void WriteToFile(const string& line, int fd) { - LOGGER_ASSERT(write(fd, line.data(), line.size()) == - static_cast<ssize_t>(line.size())); -} -} - void Logger::WriteAndDeleteLogLine(string *line) { LOGGER_ASSERT(line != NULL); + ssize_t bytes_written; if (log_fd_ >= 0) { - WriteToFile(*line, log_fd_); + bytes_written = write(log_fd_, line->data(), line->size()); + LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size())); } - WriteToFile(*line, 1); + bytes_written = write(STDOUT_FILENO, line->data(), line->size()); + LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size())); delete line; } diff --git a/src/logger.h b/src/logger.h index 1d70107..21b3c6b 100644 --- a/src/logger.h +++ b/src/logger.h @@ -62,7 +62,7 @@ class Logger { // Lines with a priority numerically greater than this will not be logged. // May not be called while multiple threads are running. - void SetVerbosity(int verbosity) { + virtual void SetVerbosity(int verbosity) { verbosity_ = verbosity; } @@ -72,17 +72,22 @@ class Logger { // Args: // log_fd: The file descriptor to write to. Will not be closed by this // object. - void SetLogFd(int log_fd) { + virtual void SetLogFd(int log_fd) { LOGGER_ASSERT(log_fd >= 0); log_fd_ = log_fd; } // Set output to be written to stdout only. This is the default mode. May // not be called while multiple threads are running. - void SetStdoutOnly() { + virtual void SetStdoutOnly() { log_fd_ = -1; } + // Enable or disable logging of timestamps. + void SetTimestampLogging(bool log_ts_enabled) { + log_timestamps_ = log_ts_enabled; + } + // Logs a line, with a vprintf(3)-like interface. This will block on writing // the line to stdout/disk iff the dedicated logging thread is not running. // This will block on adding the line to the queue if doing so would exceed @@ -104,11 +109,12 @@ class Logger { // before this returns. Waits for the thread to finish before returning. void StopThread(); - private: + protected: Logger(); - ~Logger(); + virtual ~Logger(); + private: // Args: // line: Must be non-NULL. This function takes ownership of it. void QueueLogLine(string *line); @@ -127,6 +133,7 @@ class Logger { int verbosity_; int log_fd_; bool thread_running_; + bool log_timestamps_; vector<string*> queued_lines_; // This doubles as a mutex for log_fd_ when the logging thread is not running. pthread_mutex_t queued_lines_mutex_; @@ -48,6 +48,7 @@ // so these includes are correct. #include "sattypes.h" #include "error_diag.h" +#include "clock.h" // OsLayer initialization. OsLayer::OsLayer() { @@ -55,10 +56,12 @@ OsLayer::OsLayer() { testmemsize_ = 0; totalmemsize_ = 0; min_hugepages_bytes_ = 0; + reserve_mb_ = 0; normal_mem_ = true; use_hugepages_ = false; use_posix_shm_ = false; dynamic_mapped_shmem_ = false; + mmapped_allocation_ = false; shmid_ = 0; time_initialized_ = 0; @@ -76,20 +79,28 @@ OsLayer::OsLayer() { address_mode_ = sizeof(pvoid) * 8; has_clflush_ = false; - has_sse2_ = false; + has_vector_ = false; use_flush_page_cache_ = false; + + clock_ = NULL; } // OsLayer cleanup. OsLayer::~OsLayer() { if (error_diagnoser_) delete error_diagnoser_; + if (clock_) + delete clock_; } // OsLayer initialization. bool OsLayer::Initialize() { - time_initialized_ = time(NULL); + if (!clock_) { + clock_ = new Clock(); + } + + time_initialized_ = clock_->Now(); // Detect asm support. GetFeatures(); @@ -129,8 +140,28 @@ int OsLayer::AddressMode() { // Translates user virtual to physical address. uint64 OsLayer::VirtualToPhysical(void *vaddr) { - // Needs platform specific implementation. - return 0; + uint64 frame, shift; + off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8; + int fd = open(kPagemapPath, O_RDONLY); + // /proc/self/pagemap is available in kernel >= 2.6.25 + if (fd < 0) + return 0; + + if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) { + int err = errno; + string errtxt = ErrorString(err); + logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n", + kPagemapPath, err, errtxt.c_str()); + if (fd >= 0) + close(fd); + return 0; + } + close(fd); + if (!(frame & (1LL << 63)) || (frame & (1LL << 62))) + return 0; + shift = (frame >> 55) & 0x3f; + frame = (frame & 0x007fffffffffffffLL) << shift; + return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1)); } // Returns the HD device that contains this file. @@ -149,21 +180,21 @@ list<string> OsLayer::FindFileDevices() { // Get HW core features from cpuid instruction. void OsLayer::GetFeatures() { #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) - // CPUID features documented at: - // http://www.sandpile.org/ia32/cpuid.htm - int ax, bx, cx, dx; - __asm__ __volatile__ ( - "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1)); - has_clflush_ = (dx >> 19) & 1; - has_sse2_ = (dx >> 26) & 1; + unsigned int eax = 1, ebx, ecx, edx; + cpuid(&eax, &ebx, &ecx, &edx); + has_clflush_ = (edx >> 19) & 1; + has_vector_ = (edx >> 26) & 1; // SSE2 caps bit. logprintf(9, "Log: has clflush: %s, has sse2: %s\n", has_clflush_ ? "true" : "false", - has_sse2_ ? "true" : "false"); + has_vector_ ? "true" : "false"); #elif defined(STRESSAPPTEST_CPU_PPC) // All PPC implementations have cache flush instructions. has_clflush_ = true; #elif defined(STRESSAPPTEST_CPU_ARMV7A) + // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv. + // For now assume neon and don't run -W if you don't have it. + has_vector_ = true; // NEON. #warning "Unsupported CPU type ARMV7A: unable to determine feature set." #else #warning "Unsupported CPU type: unable to determine feature set." @@ -215,8 +246,9 @@ bool OsLayer::FlushPageCache(void) { void OsLayer::Flush(void *vaddr) { // Use the generic flush. This function is just so we can override // this if we are so inclined. - if (has_clflush_) - FastFlush(vaddr); + if (has_clflush_) { + OsLayer::FastFlush(vaddr); + } } @@ -224,7 +256,7 @@ void OsLayer::Flush(void *vaddr) { bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, unsigned int size_in_bytes, AdlerChecksum *checksum) { - if (has_sse2_) { + if (has_vector_) { return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum); } else { return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum); @@ -232,12 +264,31 @@ bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, } -// Translate user virtual to physical address. +// Translate physical address to memory module/chip name. +// Assumes interleaving between two memory channels based on the XOR of +// all address bits in the 'channel_hash' mask, with repeated 'channel_width_' +// blocks with bits distributed from each chip in that channel. int OsLayer::FindDimm(uint64 addr, char *buf, int len) { - char tmpbuf[256]; - snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown"); - snprintf(buf, len, "%s", tmpbuf); - return 0; + if (!channels_) { + snprintf(buf, len, "DIMM Unknown"); + return -1; + } + + // Find channel by XORing address bits in channel_hash mask. + uint32 low = static_cast<uint32>(addr & channel_hash_); + uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32); + vector<string>& channel = (*channels_)[ + __builtin_parity(high) ^ __builtin_parity(low)]; + + // Find dram chip by finding which byte within the channel + // by address mod channel width, then divide the channel + // evenly among the listed dram chips. Note, this will not work + // with x4 dram. + int chip = (addr % (channel_width_ / 8)) / + ((channel_width_ / 8) / channel.size()); + string name = channel[chip]; + snprintf(buf, len, "%s", name.c_str()); + return 1; } @@ -293,9 +344,17 @@ string OsLayer::FindCoreMaskFormat(int32 region) { // Report an error in an easily parseable way. bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) { - time_t now = time(NULL); + time_t now = clock_->Now(); int ttf = now - time_initialized_; - logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf); + if (strlen(symptom) && strlen(part)) { + logprintf(0, "Report Error: %s : %s : %d : %ds\n", + symptom, part, count, ttf); + } else { + // Log something so the error still shows up, but this won't break the + // parser. + logprintf(0, "Warning: Invalid Report Error: " + "%s : %s : %d : %ds\n", symptom, part, count, ttf); + } return true; } @@ -359,12 +418,31 @@ int64 OsLayer::FindFreeMemSize() { // // TODO(nsanders): is there a more correct way to determine target // memory size? - if (hugepagesize > 0 && min_hugepages_bytes_ > 0) { - minsize = min_hugepages_bytes_; - } else if (physsize < 2048LL * kMegabyte) { - minsize = ((pages * 85) / 100) * pagesize; + if (hugepagesize > 0) { + if (min_hugepages_bytes_ > 0) { + minsize = min_hugepages_bytes_; + } else { + minsize = hugepagesize; + } } else { - minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte); + if (physsize < 2048LL * kMegabyte) { + minsize = ((pages * 85) / 100) * pagesize; + } else { + minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte); + } + // Make sure that at least reserve_mb_ is left for the system. + if (reserve_mb_ > 0) { + int64 totalsize = pages * pagesize; + int64 reserve_kb = reserve_mb_ * kMegabyte; + if (reserve_kb > totalsize) { + logprintf(0, "Procedural Error: %lld is bigger than the total memory " + "available %lld\n", reserve_kb, totalsize); + } else if (reserve_kb > totalsize - minsize) { + logprintf(5, "Warning: Overriding memory to use: original %lld, " + "current %lld\n", minsize, totalsize - reserve_kb); + minsize = totalsize - reserve_kb; + } + } } // Use hugepage sizing if available. @@ -435,7 +513,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { "'sudo mount -o remount,size=100\% /dev/shm.'\n"); } else if (hugepagesize >= length) { prefer_hugepages = true; - logprintf(3, "Log: Prefer using hugepace allocation.\n"); + logprintf(3, "Log: Prefer using hugepage allocation.\n"); } else { logprintf(3, "Log: Prefer plain malloc memory allocation.\n"); } @@ -458,7 +536,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { break; } - shmaddr = shmat(shmid, NULL, NULL); + shmaddr = shmat(shmid, NULL, 0); if (shmaddr == reinterpret_cast<void*>(-1)) { int err = errno; string errtxt = ErrorString(err); @@ -515,7 +593,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { // Do a full mapping here otherwise. shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE, - shm_object, NULL); + shm_object, 0); if (shmaddr == reinterpret_cast<void*>(-1)) { int err = errno; string errtxt = ErrorString(err); @@ -540,18 +618,32 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) { } while (0); shm_unlink("/stressapptest"); } -#endif // HAVE_SYS_SHM_H +#endif // HAVE_SYS_SHM_H if (!use_hugepages_ && !use_posix_shm_) { - // Use memalign to ensure that blocks are aligned enough for disk direct IO. - buf = static_cast<char*>(memalign(4096, length)); - if (buf) { - logprintf(0, "Log: Using memaligned allocation at %p.\n", buf); - } else { - logprintf(0, "Process Error: memalign returned 0\n"); - if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) { - logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 " - "bit process. Please setup shared memory.\n"); + // If the page size is what SAT is expecting explicitly perform mmap() + // allocation. + if (sysconf(_SC_PAGESIZE) >= 4096) { + void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (map_buf != MAP_FAILED) { + buf = map_buf; + mmapped_allocation_ = true; + logprintf(0, "Log: Using mmap() allocation at %p.\n", buf); + } + } + if (!mmapped_allocation_) { + // Use memalign to ensure that blocks are aligned enough for disk direct + // IO. + buf = static_cast<char*>(memalign(4096, length)); + if (buf) { + logprintf(0, "Log: Using memaligned allocation at %p.\n", buf); + } else { + logprintf(0, "Process Error: memalign returned 0\n"); + if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) { + logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 " + "bit process. Please setup shared memory.\n"); + } } } } @@ -579,6 +671,8 @@ void OsLayer::FreeTestMem() { munmap(testmem_, testmemsize_); } close(shmid_); + } else if (mmapped_allocation_) { + munmap(testmem_, testmemsize_); } else { free(testmem_); } @@ -800,7 +894,9 @@ uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) { bool OsLayer::CpuStressWorkload() { double float_arr[100]; double sum = 0; +#ifdef HAVE_RAND_R unsigned int seed = 12345; +#endif // Initialize array with random numbers. for (int i = 0; i < 100; i++) { @@ -809,8 +905,9 @@ bool OsLayer::CpuStressWorkload() { if (rand_r(&seed) % 2) float_arr[i] *= -1.0; #else - float_arr[i] = rand(); - if (rand() % 2) + srand(time(NULL)); + float_arr[i] = rand(); // NOLINT + if (rand() % 2) // NOLINT float_arr[i] *= -1.0; #endif } @@ -828,82 +925,3 @@ bool OsLayer::CpuStressWorkload() { logprintf(12, "Log: I'm Feeling Lucky!\n"); return true; } - -PCIDevices OsLayer::GetPCIDevices() { - PCIDevices device_list; - DIR *dir; - struct dirent *buf = new struct dirent(); - struct dirent *entry; - dir = opendir(kSysfsPath); - if (!dir) - logprintf(0, "Process Error: Cannot open %s", kSysfsPath); - while (readdir_r(dir, buf, &entry) == 0 && entry) { - PCIDevice *device; - unsigned int dev, func; - // ".", ".." or a special non-device perhaps. - if (entry->d_name[0] == '.') - continue; - - device = new PCIDevice(); - if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d", - &device->domain, &device->bus, &dev, &func) < 4) { - logprintf(0, "Process Error: Couldn't parse %s", entry->d_name); - free(device); - continue; - } - device->dev = dev; - device->func = func; - device->vendor_id = PCIGetValue(entry->d_name, "vendor"); - device->device_id = PCIGetValue(entry->d_name, "device"); - PCIGetResources(entry->d_name, device); - device_list.insert(device_list.end(), device); - } - closedir(dir); - delete buf; - return device_list; -} - -int OsLayer::PCIGetValue(string name, string object) { - int fd, len; - char filename[256]; - char buf[256]; - snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath, - name.c_str(), object.c_str()); - fd = open(filename, O_RDONLY); - if (fd < 0) - return 0; - len = read(fd, buf, 256); - close(fd); - buf[len] = '\0'; - return strtol(buf, NULL, 0); // NOLINT -} - -int OsLayer::PCIGetResources(string name, PCIDevice *device) { - char filename[256]; - char buf[256]; - FILE *file; - int64 start; - int64 end; - int64 size; - int i; - snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath, - name.c_str(), "resource"); - file = fopen(filename, "r"); - if (!file) { - logprintf(0, "Process Error: impossible to find resource file for %s", - filename); - return errno; - } - for (i = 0; i < 6; i++) { - if (!fgets(buf, 256, file)) - break; - sscanf(buf, "%llx %llx", &start, &end); // NOLINT - size = 0; - if (start) - size = end - start + 1; - device->base_addr[i] = start; - device->size[i] = size; - } - fclose(file); - return 0; -} @@ -17,6 +17,9 @@ #define STRESSAPPTEST_OS_H_ #include <dirent.h> +#include <unistd.h> +#include <sys/syscall.h> + #include <string> #include <list> #include <map> @@ -26,8 +29,9 @@ // so these includes are correct. #include "adler32memcpy.h" // NOLINT #include "sattypes.h" // NOLINT +#include "clock.h" // NOLINT -const char kSysfsPath[] = "/sys/bus/pci/devices"; +const char kPagemapPath[] = "/proc/self/pagemap"; struct PCIDevice { int32 domain; @@ -44,6 +48,8 @@ typedef vector<PCIDevice*> PCIDevices; class ErrorDiag; +class Clock; + // This class implements OS/Platform specific funtions. class OsLayer { public: @@ -56,6 +62,21 @@ class OsLayer { min_hugepages_bytes_ = min_bytes; } + // Set the minium amount of memory that should not be allocated. This only + // has any affect if hugepages are not used. + // Must be set before Initialize(). + void SetReserveSize(int64 reserve_mb) { + reserve_mb_ = reserve_mb; + } + + // Set parameters needed to translate physical address to memory module. + void SetDramMappingParams(uintptr_t channel_hash, int channel_width, + vector< vector<string> > *channels) { + channel_hash_ = channel_hash; + channel_width_ = channel_width; + channels_ = channels; + } + // Initializes data strctures and open files. // Returns false on error. virtual bool Initialize(); @@ -68,13 +89,11 @@ class OsLayer { // Prints failed dimm. This implementation is optional for // subclasses to implement. // Takes a bus address and string, and prints the DIMM name - // into the string. Returns error status. + // into the string. Returns the DIMM number that corresponds to the + // address given, or -1 if unable to identify the DIMM number. + // Note that subclass implementations of FindDimm() MUST fill + // buf with at LEAST one non-whitespace character (provided len > 0). virtual int FindDimm(uint64 addr, char *buf, int len); - // Print dimm info, plus more available info. - virtual int FindDimmExtended(uint64 addr, char *buf, int len) { - return FindDimm(addr, buf, len); - } - // Classifies addresses according to "regions" // This may mean different things on different platforms. @@ -132,10 +151,94 @@ class OsLayer { // instruction. For example, software can use an MFENCE instruction to // insure that previous stores are included in the write-back. asm volatile("mfence"); - asm volatile("clflush (%0)" :: "r" (vaddr)); + asm volatile("clflush (%0)" : : "r" (vaddr)); + asm volatile("mfence"); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) && !defined(__aarch64__) + // ARMv7a cachelines are 8 words (32 bytes). + syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0); +#else + #warning "Unsupported CPU type: Unable to force cache flushes." +#endif + } + + // Fast flush, for use in performance critical code. + // This is bound at compile time, and will not pick up + // any runtime machine configuration info. Takes a NULL-terminated + // array of addresses to flush. + inline static void FastFlushList(void **vaddrs) { +#ifdef STRESSAPPTEST_CPU_PPC + while (*vaddrs) { + asm volatile("dcbf 0,%0" : : "r" (*vaddrs++)); + } + asm volatile("sync"); +#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + // Put mfence before and after clflush to make sure: + // 1. The write before the clflush is committed to memory bus; + // 2. The read after the clflush is hitting the memory bus. + // + // From Intel manual: + // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed + // to be ordered by any other fencing, serializing or other CLFLUSH + // instruction. For example, software can use an MFENCE instruction to + // insure that previous stores are included in the write-back. + asm volatile("mfence"); + while (*vaddrs) { + asm volatile("clflush (%0)" : : "r" (*vaddrs++)); + } asm volatile("mfence"); #elif defined(STRESSAPPTEST_CPU_ARMV7A) - #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes." + while (*vaddrs) { + FastFlush(*vaddrs++); + } +#else + #warning "Unsupported CPU type: Unable to force cache flushes." +#endif + } + + // Fast flush hint, for use in performance critical code. + // This is bound at compile time, and will not pick up + // any runtime machine configuration info. Note that this + // will not guarantee that a flush happens, but will at least + // hint that it should. This is useful for speeding up + // parallel march algorithms. + inline static void FastFlushHint(void *vaddr) { +#ifdef STRESSAPPTEST_CPU_PPC + asm volatile("dcbf 0,%0" : : "r" (vaddr)); +#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + // From Intel manual: + // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed + // to be ordered by any other fencing, serializing or other CLFLUSH + // instruction. For example, software can use an MFENCE instruction to + // insure that previous stores are included in the write-back. + asm volatile("clflush (%0)" : : "r" (vaddr)); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + FastFlush(vaddr); +#else + #warning "Unsupported CPU type: Unable to force cache flushes." +#endif + } + + // Fast flush, for use in performance critical code. + // This is bound at compile time, and will not pick up + // any runtime machine configuration info. Sync's any + // transactions for ordering FastFlushHints. + inline static void FastFlushSync() { +#ifdef STRESSAPPTEST_CPU_PPC + asm volatile("sync"); +#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + // Put mfence before and after clflush to make sure: + // 1. The write before the clflush is committed to memory bus; + // 2. The read after the clflush is hitting the memory bus. + // + // From Intel manual: + // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed + // to be ordered by any other fencing, serializing or other CLFLUSH + // instruction. For example, software can use an MFENCE instruction to + // insure that previous stores are included in the write-back. + asm volatile("mfence"); +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + // This is a NOP, FastFlushHint() always does a full flush, so there's + // nothing to do for FastFlushSync(). #else #warning "Unsupported CPU type: Unable to force cache flushes." #endif @@ -164,10 +267,10 @@ class OsLayer { __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h)); tsc = data.l64; #elif defined(STRESSAPPTEST_CPU_ARMV7A) - #warning "Unsupported CPU type ARMV7A: your build may not function correctly" + #warning "Unsupported CPU type ARMV7A: your timer may not function correctly" tsc = 0; #else - #warning "Unsupported CPU type: your build may not function correctly" + #warning "Unsupported CPU type: your timer may not function correctly" tsc = 0; #endif return (tsc); @@ -230,9 +333,6 @@ class OsLayer { // Handle to platform-specific error diagnoser. ErrorDiag *error_diagnoser_; - // Detect all PCI Devices. - virtual PCIDevices GetPCIDevices(); - // Disambiguate between different "warm" memcopies. virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem, unsigned int size_in_bytes, @@ -249,17 +349,31 @@ class OsLayer { } ErrCallback get_err_log_callback() { return err_log_callback_; } + // Set a clock object that can be overridden for use with unit tests. + void SetClock(Clock *clock) { + if (clock_) { + delete clock_; + } + clock_ = clock; + time_initialized_ = clock_->Now(); + } + protected: void *testmem_; // Location of test memory. uint64 testmemsize_; // Size of test memory. int64 totalmemsize_; // Size of available memory. int64 min_hugepages_bytes_; // Minimum hugepages size. + int64 reserve_mb_; // Minimum amount of memory to reserve in MB. bool error_injection_; // Do error injection? bool normal_mem_; // Memory DMA capable? bool use_hugepages_; // Use hugepage shmem? bool use_posix_shm_; // Use 4k page shmem? bool dynamic_mapped_shmem_; // Conserve virtual address space. + bool mmapped_allocation_; // Was memory allocated using mmap()? int shmid_; // Handle to shmem + vector< vector<string> > *channels_; // Memory module names per channel. + uint64 channel_hash_; // Mask of address bits XORed for channel. + int channel_width_; // Channel width in bits. int64 regionsize_; // Size of memory "regions" int regioncount_; // Number of memory "regions" @@ -267,7 +381,7 @@ class OsLayer { int num_nodes_; // Number of nodes in the system. int num_cpus_per_node_; // Number of cpus per node in the system. int address_mode_; // Are we running 32 or 64 bit? - bool has_sse2_; // Do we have sse2 instructions? + bool has_vector_; // Do we have sse2/neon instructions? bool has_clflush_; // Do we have clflush instructions? bool use_flush_page_cache_; // Do we need to flush the page cache? @@ -279,9 +393,6 @@ class OsLayer { // Get file descriptor for dev msr. virtual int OpenMSR(uint32 core, uint32 address); - // Auxiliary methods for PCI device configuration - int PCIGetValue(string name, string object); - int PCIGetResources(string name, PCIDevice *device); // Look up how many hugepages there are. virtual int64 FindHugePages(); @@ -289,6 +400,9 @@ class OsLayer { // Link to find last transaction at an error location. ErrCallback err_log_callback_; + // Object to wrap the time function. + Clock *clock_; + private: DISALLOW_COPY_AND_ASSIGN(OsLayer); }; @@ -125,6 +125,26 @@ bool Sat::CheckEnvironment() { #error Build system regression - COPTS disregarded. #endif + // Check if the cpu frequency test is enabled and able to run. + if (cpu_freq_test_) { + if (!CpuFreqThread::CanRun()) { + logprintf(0, "Process Error: This platform does not support this " + "test.\n"); + bad_status(); + return false; + } else if (cpu_freq_threshold_ <= 0) { + logprintf(0, "Process Error: The cpu frequency test requires " + "--cpu_freq_threshold set to a value > 0\n"); + bad_status(); + return false; + } else if (cpu_freq_round_ < 0) { + logprintf(0, "Process Error: The --cpu_freq_round option must be greater" + " than or equal to zero. A value of zero means no rounding.\n"); + bad_status(); + return false; + } + } + // Use all CPUs if nothing is specified. if (memory_threads_ == -1) { memory_threads_ = os_->num_cpus(); @@ -488,15 +508,9 @@ bool Sat::InitializePages() { for (int64 i = 0; i < pages_; i++) { struct page_entry pe; // Only get valid pages with uninitialized tags here. - char buf[256]; if (GetValid(&pe, kInvalidTag)) { int64 paddr = os_->VirtualToPhysical(pe.addr); int32 region = os_->FindRegion(paddr); - - os_->FindDimm(paddr, buf, sizeof(buf)); - if (i < 256) { - logprintf(12, "Log: address: %#llx, %s\n", paddr, buf); - } region_[region]++; pe.paddr = paddr; pe.tag = 1 << region; @@ -554,6 +568,7 @@ bool Sat::Initialize() { // Initializes sync'd log file to ensure output is saved. if (!InitializeLogfile()) return false; + Logger::GlobalLogger()->SetTimestampLogging(log_timestamps_); Logger::GlobalLogger()->StartThread(); logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str()); @@ -573,6 +588,17 @@ bool Sat::Initialize() { if (min_hugepages_mbytes_ > 0) os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte); + if (reserve_mb_ > 0) + os_->SetReserveSize(reserve_mb_); + + if (channels_.size() > 0) { + logprintf(6, "Log: Decoding memory: %dx%d bit channels," + "%d modules per channel (x%d), decoding hash 0x%x\n", + channels_.size(), channel_width_, channels_[0].size(), + channel_width_/channels_[0].size(), channel_hash_); + os_->SetDramMappingParams(channel_hash_, channel_width_, &channels_); + } + if (!os_->Initialize()) { logprintf(0, "Process Error: Failed to initialize OS layer\n"); bad_status(); @@ -640,18 +666,23 @@ Sat::Sat() { pages_ = 0; size_mb_ = 0; size_ = size_mb_ * kMegabyte; + reserve_mb_ = 0; min_hugepages_mbytes_ = 0; freepages_ = 0; paddr_base_ = 0; + channel_hash_ = kCacheLineSize; + channel_width_ = 64; user_break_ = false; verbosity_ = 8; Logger::GlobalLogger()->SetVerbosity(verbosity_); + print_delay_ = 10; strict_ = 1; warm_ = 0; run_on_anything_ = 0; use_logfile_ = 0; logfile_ = 0; + log_timestamps_ = true; // Detect 32/64 bit binary. void *pvoid = 0; address_mode_ = sizeof(pvoid) * 8; @@ -669,9 +700,15 @@ Sat::Sat() { // Cache coherency data initialization. cc_test_ = false; // Flag to trigger cc threads. cc_cacheline_count_ = 2; // Two datastructures of cache line size. + cc_cacheline_size_ = 0; // Size of a cacheline (0 for auto-detect). cc_inc_count_ = 1000; // Number of times to increment the shared variable. cc_cacheline_data_ = 0; // Cache Line size datastructure. + // Cpu frequency data initialization. + cpu_freq_test_ = false; // Flag to trigger cpu frequency thread. + cpu_freq_threshold_ = 0; // Threshold, in MHz, at which a cpu fails. + cpu_freq_round_ = 10; // Round the computed frequency to this value. + sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL)); file_threads_ = 0; net_threads_ = 0; @@ -765,6 +802,9 @@ bool Sat::ParseArgs(int argc, char **argv) { // Set number of megabyte to use. ARG_IVALUE("-M", size_mb_); + // Specify the amount of megabytes to be reserved for system. + ARG_IVALUE("--reserve_memory", reserve_mb_); + // Set minimum megabytes of hugepages to require. ARG_IVALUE("-H", min_hugepages_mbytes_); @@ -786,8 +826,21 @@ bool Sat::ParseArgs(int argc, char **argv) { // Set number of cache line size datastructures ARG_IVALUE("--cc_line_count", cc_cacheline_count_); + // Override the detected or assumed cache line size. + ARG_IVALUE("--cc_line_size", cc_cacheline_size_); + // Flag set when cache coherency tests need to be run - ARG_KVALUE("--cc_test", cc_test_, 1); + ARG_KVALUE("--cc_test", cc_test_, true); + + // Set when the cpu_frequency test needs to be run + ARG_KVALUE("--cpu_freq_test", cpu_freq_test_, true); + + // Set the threshold in MHz at which the cpu frequency test will fail. + ARG_IVALUE("--cpu_freq_threshold", cpu_freq_threshold_); + + // Set the rounding value for the cpu frequency test. The default is to + // round to the nearest 10s value. + ARG_IVALUE("--cpu_freq_round", cpu_freq_round_); // Set number of CPU stress threads. ARG_IVALUE("-C", cpu_stress_threads_); @@ -798,6 +851,12 @@ bool Sat::ParseArgs(int argc, char **argv) { // Verbosity level. ARG_IVALUE("-v", verbosity_); + // Chatty printout level. + ARG_IVALUE("--printsec", print_delay_); + + // Turn off timestamps logging. + ARG_KVALUE("--no_timestamps", log_timestamps_, false); + // Set maximum number of errors to collect. Stop running after this many. ARG_IVALUE("--max_errors", max_errorcount_); @@ -918,6 +977,23 @@ bool Sat::ParseArgs(int argc, char **argv) { continue; } + ARG_IVALUE("--channel_hash", channel_hash_); + ARG_IVALUE("--channel_width", channel_width_); + + if (!strcmp(argv[i], "--memory_channel")) { + i++; + if (i < argc) { + char *channel = argv[i]; + channels_.push_back(vector<string>()); + while (char* next = strchr(channel, ',')) { + channels_.back().push_back(string(channel, next - channel)); + channel = next + 1; + } + channels_.back().push_back(string(channel)); + } + continue; + } + // Default: PrintVersion(); PrintHelp(); @@ -963,6 +1039,47 @@ bool Sat::ParseArgs(int argc, char **argv) { disk_pages_ = 1; } + // Validate memory channel parameters if supplied + if (channels_.size()) { + if (channels_.size() == 1) { + channel_hash_ = 0; + logprintf(7, "Log: " + "Only one memory channel...deactivating interleave decoding.\n"); + } else if (channels_.size() > 2) { + logprintf(6, "Process Error: " + "Triple-channel mode not yet supported... sorry.\n"); + bad_status(); + return false; + } + for (uint i = 0; i < channels_.size(); i++) + if (channels_[i].size() != channels_[0].size()) { + logprintf(6, "Process Error: " + "Channels 0 and %d have a different count of dram modules.\n", i); + bad_status(); + return false; + } + if (channels_[0].size() & (channels_[0].size() - 1)) { + logprintf(6, "Process Error: " + "Amount of modules per memory channel is not a power of 2.\n"); + bad_status(); + return false; + } + if (channel_width_ < 16 + || channel_width_ & (channel_width_ - 1)) { + logprintf(6, "Process Error: " + "Channel width %d is invalid.\n", channel_width_); + bad_status(); + return false; + } + if (channel_width_ / channels_[0].size() < 8) { + logprintf(6, "Process Error: Chip width x%d must be x8 or greater.\n", + channel_width_ / channels_[0].size()); + bad_status(); + return false; + } + } + + // Print each argument. for (int i = 0; i < argc; i++) { if (i) @@ -976,6 +1093,8 @@ bool Sat::ParseArgs(int argc, char **argv) { void Sat::PrintHelp() { printf("Usage: ./sat(32|64) [options]\n" " -M mbytes megabytes of ram to test\n" + " --reserve-memory If not using hugepages, the amount of memory to " + " reserve for the system\n" " -H mbytes minimum megabytes of hugepages to require\n" " -s seconds number of seconds to run\n" " -m threads number of memory copy threads to run\n" @@ -987,8 +1106,10 @@ void Sat::PrintHelp() { " -f filename add a disk thread with " "tempfile 'filename'\n" " -l logfile log output to file 'logfile'\n" + " --no_timestamps do not prefix timestamps to log messages\n" " --max_errors n exit early after finding 'n' errors\n" " -v level verbosity (0-20), default is 8\n" + " --printsec secs How often to print 'seconds remaining'\n" " -W Use more CPU-stressful memory copy\n" " -A run in degraded mode on incompatible systems\n" " -p pagesize size in bytes of memory chunks\n" @@ -1024,13 +1145,26 @@ void Sat::PrintHelp() { "cacheline's member\n" " --cc_line_count number of cache line sized datastructures " "to allocate for the cache coherency threads to operate\n" + " --cc_line_size override the auto-detected cache line size\n" + " --cpu_freq_test enable the cpu frequency test (requires the " + "--cpu_freq_threshold argument to be set)\n" + " --cpu_freq_threshold fail the cpu frequency test if the frequency " + "goes below this value (specified in MHz)\n" + " --cpu_freq_round round the computed frequency to this value, if set" + " to zero, only round to the nearest MHz\n" " --paddr_base allocate memory starting from this address\n" " --pause_delay delay (in seconds) between power spikes\n" " --pause_duration duration (in seconds) of each pause\n" - " --local_numa : choose memory regions associated with " + " --local_numa choose memory regions associated with " "each CPU to be tested by that CPU\n" - " --remote_numa : choose memory regions not associated with " - "each CPU to be tested by that CPU\n"); + " --remote_numa choose memory regions not associated with " + "each CPU to be tested by that CPU\n" + " --channel_hash mask of address bits XORed to determine channel. " + "Mask 0x40 interleaves cachelines between channels\n" + " --channel_width bits width in bits of each memory channel\n" + " --memory_channel u1,u2 defines a comma-separated list of names " + "for dram packages in a memory channel. Use multiple times to " + "define multiple channels.\n"); } bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) { @@ -1275,32 +1409,45 @@ void Sat::InitializeThreads() { sizeof(cc_cacheline_data) * cc_cacheline_count_); int num_cpus = CpuCount(); + char *num; + // Calculate the number of cache lines needed just to give each core + // its own counter. + int line_size = cc_cacheline_size_; + if (line_size <= 0) { + line_size = CacheLineSize(); + if (line_size < kCacheLineSize) + line_size = kCacheLineSize; + logprintf(12, "Log: Using %d as cache line size\n", line_size); + } + // The number of cache lines needed to hold an array of num_cpus. + // "num" must be the same type as cc_cacheline_data[X].num or the memory + // size calculations will fail. + int needed_lines = (sizeof(*num) * num_cpus + line_size - 1) / line_size; // Allocate all the nums once so that we get a single chunk // of contiguous memory. - int *num; #ifdef HAVE_POSIX_MEMALIGN int err_result = posix_memalign( reinterpret_cast<void**>(&num), - kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_); + line_size, line_size * needed_lines * cc_cacheline_count_); #else - num = reinterpret_cast<int*>(memalign(kCacheLineSize, - sizeof(*num) * num_cpus * cc_cacheline_count_)); + num = reinterpret_cast<char*>(memalign( + line_size, line_size * needed_lines * cc_cacheline_count_)); int err_result = (num == 0); #endif sat_assert(err_result == 0); int cline; for (cline = 0; cline < cc_cacheline_count_; cline++) { - memset(num, 0, sizeof(num_cpus) * num_cpus); + memset(num, 0, sizeof(*num) * num_cpus); cc_cacheline_data_[cline].num = num; - num += num_cpus; + num += (line_size * needed_lines) / sizeof(*num); } int tnum; for (tnum = 0; tnum < num_cpus; tnum++) { CpuCacheCoherencyThread *thread = new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_, - tnum, cc_inc_count_); + tnum, num_cpus, cc_inc_count_); thread->InitThread(total_threads_++, this, os_, patternlist_, &continuous_status_); // Pin the thread to a particular core. @@ -1311,6 +1458,22 @@ void Sat::InitializeThreads() { } workers_map_.insert(make_pair(kCCType, cc_vector)); } + + if (cpu_freq_test_) { + // Create the frequency test thread. + logprintf(5, "Log: Running cpu frequency test: threshold set to %dMHz.\n", + cpu_freq_threshold_); + CpuFreqThread *thread = new CpuFreqThread(CpuCount(), cpu_freq_threshold_, + cpu_freq_round_); + // This thread should be paused when other threads are paused. + thread->InitThread(total_threads_++, this, os_, NULL, + &power_spike_status_); + + WorkerVector *cpu_freq_vector = new WorkerVector(); + cpu_freq_vector->insert(cpu_freq_vector->end(), thread); + workers_map_.insert(make_pair(kCPUFreqType, cpu_freq_vector)); + } + ReleaseWorkerLock(); } @@ -1319,6 +1482,19 @@ int Sat::CpuCount() { return sysconf(_SC_NPROCESSORS_CONF); } +// Return the worst case (largest) cache line size of the various levels of +// cache actually prsent in the machine. +int Sat::CacheLineSize() { + int max_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + int linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE); + if (linesize > max_linesize) max_linesize = linesize; + linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE); + if (linesize > max_linesize) max_linesize = linesize; + linesize = sysconf(_SC_LEVEL4_CACHE_LINESIZE); + if (linesize > max_linesize) max_linesize = linesize; + return max_linesize; +} + // Notify and reap worker threads. void Sat::JoinThreads() { logprintf(12, "Log: Joining worker threads\n"); @@ -1443,7 +1619,7 @@ void Sat::AnalysisAllStats() { map_it != workers_map_.end(); ++map_it) { for (WorkerVector::const_iterator it = map_it->second->begin(); it != map_it->second->end(); ++it) { - thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000; + thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000.; total_data += (*it)->GetMemoryCopiedData(); total_data += (*it)->GetDeviceCopiedData(); if (thread_runtime_sec > max_runtime_sec) { @@ -1714,12 +1890,12 @@ bool Sat::Run() { // All of these are in seconds. You probably want them to be >= // kSleepFrequency and multiples of kSleepFrequency, but neither is necessary. static const time_t kInjectionFrequency = 10; - static const time_t kPrintFrequency = 10; + // print_delay_ determines "seconds remaining" chatty update. const time_t start = time(NULL); const time_t end = start + runtime_seconds_; time_t now = start; - time_t next_print = start + kPrintFrequency; + time_t next_print = start + print_delay_; time_t next_pause = start + pause_delay_; time_t next_resume = 0; time_t next_injection; @@ -1755,7 +1931,7 @@ bool Sat::Run() { if (now >= next_print) { // Print a count down message. logprintf(5, "Log: Seconds remaining: %d\n", seconds_remaining); - next_print = NextOccurance(kPrintFrequency, start, now); + next_print = NextOccurance(print_delay_, start, now); } if (next_injection && now >= next_injection) { @@ -1901,3 +2077,9 @@ void logprintf(int priority, const char *format, ...) { Logger::GlobalLogger()->VLogF(priority, format, args); va_end(args); } + +// Stop the logging thread and verify any pending data is written to the log. +void logstop() { + Logger::GlobalLogger()->StopThread(); +} + @@ -134,6 +134,8 @@ class Sat { // Return the number of cpus in the system. int CpuCount(); + // Return the worst-case (largest) cache line size of the system. + int CacheLineSize(); // Collect error counts from threads. int64 GetTotalErrorCount(); @@ -147,17 +149,23 @@ class Sat { int64 pages_; // Number of memory blocks. int64 size_; // Size of memory tested, in bytes. int64 size_mb_; // Size of memory tested, in MB. + int64 reserve_mb_; // Reserve at least this amount of memory + // for the system, in MB. int64 min_hugepages_mbytes_; // Minimum hugepages size. int64 freepages_; // How many invalid pages we need. int disk_pages_; // Number of pages per temp file. uint64 paddr_base_; // Physical address base. + uint64 channel_hash_; // Mask of address bits XORed for channel. + int channel_width_; // Channel width in bits. + vector< vector<string> > channels_; // Memory module names per channel. // Control flags. volatile sig_atomic_t user_break_; // User has signalled early exit. Used as // a boolean. int verbosity_; // How much to print. + int print_delay_; // Chatty update frequency. int strict_; // Check results per transaction. - int warm_; // FPU warms CPU while coying. + int warm_; // FPU warms CPU while copying. int address_mode_; // 32 or 64 bit binary. bool stop_on_error_; // Exit immendiately on any error. bool findfiles_; // Autodetect tempfile locations. @@ -169,6 +177,7 @@ class Sat { int use_logfile_; // Log to a file. char logfilename_[255]; // Name of file to log to. int logfile_; // File handle to log to. + bool log_timestamps_; // Whether to add timestamps to log lines. // Disk thread options. int read_block_size_; // Size of block to read from disk. @@ -199,9 +208,18 @@ class Sat { bool cc_test_; // Flag to decide whether to start the // cache coherency threads. int cc_cacheline_count_; // Number of cache line size structures. + int cc_cacheline_size_; // Size of a cache line. int cc_inc_count_; // Number of times to increment the shared // cache lines structure members. + // Cpu Frequency Options. + bool cpu_freq_test_; // Flag to decide whether to start the + // cpu frequency thread. + int cpu_freq_threshold_; // The MHz threshold which will cause + // the test to fail. + int cpu_freq_round_; // Round the computed frequency to this + // value. + // Thread control. int file_threads_; // Threads of file IO. int net_threads_; // Threads of network IO. @@ -249,7 +267,8 @@ class Sat { kRandomDiskType = 7, kCPUType = 8, kErrorType = 9, - kCCType = 10 + kCCType = 10, + kCPUFreqType = 11, }; // Helper functions. diff --git a/src/sattypes.h b/src/sattypes.h index c9341d0..79bb47d 100644 --- a/src/sattypes.h +++ b/src/sattypes.h @@ -27,11 +27,11 @@ #ifdef HAVE_CONFIG_H // Built using autoconf #ifdef __ANDROID__ -#include "stressapptest_config_android.h" +#include "stressapptest_config_android.h" // NOLINT #else -#include "stressapptest_config.h" -using namespace __gnu_cxx; -#endif +#include "stressapptest_config.h" // NOLINT +using namespace __gnu_cxx; //NOLINT +#endif // __ANDROID__ using namespace std; typedef signed long long int64; @@ -57,10 +57,10 @@ inline const char* BuildChangelist() { } static const bool kOpenSource = true; -#else +#else // !HAVE_CONFIG_H static const bool kOpenSource = false; - #include "googlesattypes.h" -#endif + #include "googlesattypes.h" // NOLINT +#endif // HAVE_CONFIG_H // Workaround to allow 32/64 bit conversion // without running into strict aliasing problems. union datacast_t { @@ -75,11 +75,15 @@ union datacast_t { // File sync'd print to console and log void logprintf(int priority, const char *format, ...); +// Stop the log and dump any queued lines. +void logstop(); + // We print to stderr ourselves first in case we're in such a bad state that the // logger can't work. #define sat_assert(x) \ {\ if (!(x)) {\ + logstop();\ fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\ logprintf(0, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\ exit(1);\ @@ -186,6 +190,48 @@ inline string ErrorString(int error_num) { #endif } +// Execute the cpuid instruction and pass back the contents of the registers. +// This only works on x86 based platforms. +inline void cpuid( + unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { + *ebx = 0; + *ecx = 0; + *edx = 0; + // CPUID features documented at: + // http://www.sandpile.org/ia32/cpuid.htm +#if defined(STRESSAPPTEST_CPU_I686) || defined(STRESSAPPTEST_CPU_X86_64) +#if defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686) + // In PIC compilations using the i686 cpu type, ebx contains the address + // of the global offset table. The compiler can't properly handle constraints + // using the ebx register for this compile, so preserve the register + // ourselves. + asm( + "mov %%ebx, %%edi;" + "cpuid;" + "xchg %%edi, %%ebx;" + // Output registers. + : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) + // Input registers. + : "a" (*eax) + ); // Asm +#else + asm( + "cpuid;" + // Output registers. + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) + // Input registers. + : "a" (*eax) + ); // Asm +#endif // defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686) +#elif defined(STRESSAPPTEST_CPU_PPC) + return; +#elif defined(STRESSAPPTEST_CPU_ARMV7A) + return; +#else +#warning "Unsupported CPU type." +#endif +} + // Define handy constants here static const int kTicksPerSec = 100; static const int kMegabyte = (1024LL*1024LL); diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in index 97f306e..5412df4 100644 --- a/src/stressapptest_config.h.in +++ b/src/stressapptest_config.h.in @@ -53,6 +53,9 @@ /* Define to 1 if you have the `posix_memalign' function. */ #undef HAVE_POSIX_MEMALIGN +/* Define to 1 if the system has `pthread_barrier'. */ +#undef HAVE_PTHREAD_BARRIERS + /* Define to 1 if you have the <pthread.h> header file. */ #undef HAVE_PTHREAD_H diff --git a/src/stressapptest_config_android.h b/src/stressapptest_config_android.h index 3817bdf..14081e5 100644 --- a/src/stressapptest_config_android.h +++ b/src/stressapptest_config_android.h @@ -54,12 +54,12 @@ /* Define to 1 if you have the `posix_memalign' function. */ /* #undef HAVE_POSIX_MEMALIGN */ +/* Define to 1 if the system has `pthread_barrier'. */ +#undef HAVE_PTHREAD_BARRIERS + /* Define to 1 if you have the <pthread.h> header file. */ #define HAVE_PTHREAD_H 1 -/* Android, why do you define _POSIX_BARRIERS when you have no _POSIX_BARRIERS?! */ -#undef _POSIX_BARRIERS - /* Define to 1 if you have the `rand_r' function. */ /* #undef HAVE_RAND_R */ @@ -144,7 +144,7 @@ #define PACKAGE_NAME "stressapptest" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "stressapptest 1.0.4_autoconf" +#define PACKAGE_STRING "stressapptest 1.0.7_autoconf" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "stressapptest" @@ -153,7 +153,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.0.4_autoconf" +#define PACKAGE_VERSION "1.0.7_autoconf" /* Define as the return type of signal handlers (`int' or `void'). */ #define RETSIGTYPE void @@ -179,6 +179,9 @@ /* Defined if the target CPU is i686 */ /* #undef STRESSAPPTEST_CPU_I686 */ +/* Defined if the target CPU is mips */ +/* #undef STRESSAPPTEST_CPU_MIPS */ + /* Defined if the target CPU is PowerPC */ /* #undef STRESSAPPTEST_CPU_PPC */ @@ -203,7 +206,7 @@ #define TIME_WITH_SYS_TIME 1 /* Version number of package */ -#define VERSION "1.0.4_autoconf" +#define VERSION "1.0.7_autoconf" /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/src/worker.cc b/src/worker.cc index 62b0ede..5b0fe59 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -78,21 +78,6 @@ _syscall3(int, sched_setaffinity, pid_t, pid, #endif namespace { - // Get HW core ID from cpuid instruction. - inline int apicid(void) { - int cpu; -#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) - __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx"); -#elif defined(STRESSAPPTEST_CPU_ARMV7A) - #warning "Unsupported CPU type ARMV7A: unable to determine core ID." - cpu = 0; -#else - #warning "Unsupported CPU type: unable to determine core ID." - cpu = 0; -#endif - return (cpu >> 24); - } - // Work around the sad fact that there are two (gnu, xsi) incompatible // versions of strerror_r floating around google. Awesome. bool sat_strerror(int err, char *buf, int len) { @@ -114,7 +99,7 @@ namespace { inline uint64 addr_to_tag(void *address) { return reinterpret_cast<uint64>(address); } -} +} // namespace #if !defined(O_DIRECT) // Sometimes this isn't available. @@ -144,7 +129,7 @@ static void *ThreadSpawnerGeneric(void *ptr) { void WorkerStatus::Initialize() { sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL)); sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL)); -#ifdef _POSIX_BARRIERS +#ifdef HAVE_PTHREAD_BARRIERS sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_ + 1)); #endif @@ -153,7 +138,7 @@ void WorkerStatus::Initialize() { void WorkerStatus::Destroy() { sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_)); sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_)); -#ifdef _POSIX_BARRIERS +#ifdef HAVE_PTHREAD_BARRIERS sat_assert(0 == pthread_barrier_destroy(&pause_barrier_)); #endif } @@ -173,10 +158,13 @@ void WorkerStatus::StopWorkers() { WaitOnPauseBarrier(); } -bool WorkerStatus::ContinueRunning() { +bool WorkerStatus::ContinueRunning(bool *paused) { // This loop is an optimization. We use it to immediately re-check the status // after resuming from a pause, instead of returning and waiting for the next // call to this function. + if (paused) { + *paused = false; + } for (;;) { switch (GetStatus()) { case RUN: @@ -187,6 +175,10 @@ bool WorkerStatus::ContinueRunning() { WaitOnPauseBarrier(); // Wait for ResumeWorkers() to be called. WaitOnPauseBarrier(); + // Indicate that a pause occurred. + if (paused) { + *paused = true; + } break; case STOP: return false; @@ -220,7 +212,7 @@ void WorkerStatus::RemoveSelf() { AcquireNumWorkersLock(); // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't // in use because (status != PAUSE). -#ifdef _POSIX_BARRIERS +#ifdef HAVE_PTHREAD_BARRIERS sat_assert(0 == pthread_barrier_destroy(&pause_barrier_)); sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_)); #endif @@ -315,8 +307,8 @@ bool WorkerThread::InitPriority() { logprintf(11, "Log: Bind to %s failed.\n", cpuset_format(&cpu_mask_).c_str()); - logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n", - thread_num_, apicid(), + logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n", + thread_num_, sched_getcpu(), CurrentCpusFormat().c_str(), cpuset_format(&cpu_mask_).c_str()); #if 0 @@ -580,7 +572,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error, const char *message) { char dimm_string[256] = ""; - int apic_id = apicid(); + int core_id = sched_getcpu(); // Determine if this is a write or read error. os_->Flush(error->vaddr); @@ -615,7 +607,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error, "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): " "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n", message, - apic_id, + core_id, CurrentCpusFormat().c_str(), error->vaddr, error->paddr, @@ -815,6 +807,9 @@ int WorkerThread::CheckRegion(void *addr, if ((state == kGoodAgain) || (state == kBad)) { unsigned int blockerrors = badend - badstart + 1; errormessage = "Block Error"; + // It's okay for the 1st entry to be corrected multiple times, + // it will simply be reported twice. Once here and once below + // when processing the error queue. ProcessError(&recorded[0], 0, errormessage.c_str()); logprintf(0, "Block Error: (%p) pattern %s instead of %s, " "%d bytes from offset 0x%x to 0x%x\n", @@ -823,8 +818,6 @@ int WorkerThread::CheckRegion(void *addr, blockerrors * wordsize_, offset + badstart * wordsize_, offset + badend * wordsize_); - errorcount_ += blockerrors; - return blockerrors; } } } @@ -840,7 +833,6 @@ int WorkerThread::CheckRegion(void *addr, if (page_error) { // For each word in the data region. - int error_recount = 0; for (int i = 0; i < length / wordsize_; i++) { uint64 actual = memblock[i]; uint64 expected; @@ -859,21 +851,16 @@ int WorkerThread::CheckRegion(void *addr, // If the value is incorrect, save an error record for later printing. if (actual != expected) { - if (error_recount < kErrorLimit) { - // We already reported these. - error_recount++; - } else { - // If we have overflowed the error queue, print the errors now. - struct ErrorRecord er; - er.actual = actual; - er.expected = expected; - er.vaddr = &memblock[i]; - - // Do the error printout. This will take a long time and - // likely change the machine state. - ProcessError(&er, 12, errormessage.c_str()); - overflowerrors++; - } + // If we have overflowed the error queue, print the errors now. + struct ErrorRecord er; + er.actual = actual; + er.expected = expected; + er.vaddr = &memblock[i]; + + // Do the error printout. This will take a long time and + // likely change the machine state. + ProcessError(&er, 12, errormessage.c_str()); + overflowerrors++; } } } @@ -948,7 +935,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error, char tag_dimm_string[256] = ""; bool read_error = false; - int apic_id = apicid(); + int core_id = sched_getcpu(); // Determine if this is a write or read error. os_->Flush(error->vaddr); @@ -982,7 +969,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error, error->tagvaddr, error->tagpaddr, tag_dimm_string, read_error ? "read error" : "write error", - apic_id, + core_id, CurrentCpusFormat().c_str(), error->vaddr, error->paddr, @@ -1100,12 +1087,18 @@ bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64, AdlerChecksum ignored_checksum; os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum); - // Force cache flush. - int length = size_in_bytes / sizeof(*dstmem64); - for (int i = 0; i < length; i += sizeof(*dstmem64)) { - os_->FastFlush(dstmem64 + i); - os_->FastFlush(srcmem64 + i); + // Force cache flush of both the source and destination addresses. + // length - length of block to flush in cachelines. + // mem_increment - number of dstmem/srcmem values per cacheline. + int length = size_in_bytes / kCacheLineSize; + int mem_increment = kCacheLineSize / sizeof(*dstmem64); + OsLayer::FastFlushSync(); + for (int i = 0; i < length; ++i) { + OsLayer::FastFlushHint(dstmem64 + (i * mem_increment)); + OsLayer::FastFlushHint(srcmem64 + (i * mem_increment)); } + OsLayer::FastFlushSync(); + // Check results. AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe); // Patch up address tags. @@ -1236,11 +1229,11 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe, blocksize, currentblock * blocksize, 0); if (errorcount == 0) { - int apic_id = apicid(); + int core_id = sched_getcpu(); logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage " "CRC mismatch %s != %s, " "but no miscompares found on second pass.\n", - apic_id, CurrentCpusFormat().c_str(), + core_id, CurrentCpusFormat().c_str(), crc.ToHexString().c_str(), expectedcrc->ToHexString().c_str()); struct ErrorRecord er; @@ -1366,10 +1359,10 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe, blocksize, currentblock * blocksize, 0); if (errorcount == 0) { - logprintf(0, "Log: CrcWarmCopyPage CRC mismatch %s != %s, " + logprintf(0, "Log: CrcWarmCopyPage CRC mismatch expected: %s != actual: %s, " "but no miscompares found. Retrying with fresh data.\n", - crc.ToHexString().c_str(), - expectedcrc->ToHexString().c_str()); + expectedcrc->ToHexString().c_str(), + crc.ToHexString().c_str() ); if (!tag_mode_) { // Copy the data originally read from this region back again. // This data should have any corruption read originally while @@ -1380,16 +1373,16 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe, blocksize, currentblock * blocksize, 0); if (errorcount == 0) { - int apic_id = apicid(); + int core_id = sched_getcpu(); logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage " "CRC mismatch %s != %s, " "but no miscompares found on second pass.\n", - apic_id, CurrentCpusFormat().c_str(), + core_id, CurrentCpusFormat().c_str(), crc.ToHexString().c_str(), expectedcrc->ToHexString().c_str()); struct ErrorRecord er; er.actual = sourcemem[0]; - er.expected = 0x0; + er.expected = 0xbad; er.vaddr = sourcemem; ProcessError(&er, 0, "Hardware Error"); } @@ -1600,12 +1593,11 @@ void FileThread::SetFile(const char *filename_init) { // Open the file for access. bool FileThread::OpenFile(int *pfile) { - bool no_O_DIRECT = false; int flags = O_RDWR | O_CREAT | O_SYNC; int fd = open(filename_.c_str(), flags | O_DIRECT, 0644); if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { - no_O_DIRECT = true; - fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT + fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT + os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL } if (fd < 0) { logprintf(0, "Process Error: Failed to create file %s!!\n", @@ -1613,8 +1605,6 @@ bool FileThread::OpenFile(int *pfile) { pages_copied_ = 0; return false; } - if (no_O_DIRECT) - os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL *pfile = fd; return true; } @@ -1685,7 +1675,7 @@ bool FileThread::WritePages(int fd) { if (!result) return false; } - return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP. + return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP. } // Copy data from file into memory block. @@ -1964,7 +1954,7 @@ bool FileThread::Work() { // Load patterns into page records. page_recs_ = new struct PageRec[sat_->disk_pages()]; for (int i = 0; i < sat_->disk_pages(); i++) { - page_recs_[i].pattern = new struct Pattern(); + page_recs_[i].pattern = new class Pattern(); } // Loop until done. @@ -2465,13 +2455,22 @@ bool CpuStressThread::Work() { CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data, int cacheline_count, int thread_num, + int thread_count, int inc_count) { cc_cacheline_data_ = data; cc_cacheline_count_ = cacheline_count; cc_thread_num_ = thread_num; + cc_thread_count_ = thread_count; cc_inc_count_ = inc_count; } +// A very simple psuedorandom generator. Since the random number is based +// on only a few simple logic operations, it can be done quickly in registers +// and the compiler can inline it. +uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) { + return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial); +} + // Worked thread to test the cache coherency of the CPUs // Return false on fatal sw error. bool CpuCacheCoherencyThread::Work() { @@ -2480,7 +2479,19 @@ bool CpuCacheCoherencyThread::Work() { uint64 time_start, time_end; struct timeval tv; + // Use a slightly more robust random number for the initial + // value, so the random sequences from the simple generator will + // be more divergent. +#ifdef HAVE_RAND_R unsigned int seed = static_cast<unsigned int>(gettid()); + uint64 r = static_cast<uint64>(rand_r(&seed)); + r |= static_cast<uint64>(rand_r(&seed)) << 32; +#else + srand(time(NULL)); + uint64 r = static_cast<uint64>(rand()); // NOLINT + r |= static_cast<uint64>(rand()) << 32; // NOLINT +#endif + gettimeofday(&tv, NULL); // Get the timestamp before increments. time_start = tv.tv_sec * 1000000ULL + tv.tv_usec; @@ -2490,14 +2501,19 @@ bool CpuCacheCoherencyThread::Work() { // Choose a datastructure in random and increment the appropriate // member in that according to the offset (which is the same as the // thread number. -#ifdef HAVE_RAND_R - int r = rand_r(&seed); -#else - int r = rand(); -#endif - r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0)); + r = SimpleRandom(r); + int cline_num = r % cc_cacheline_count_; + int offset; + // Reverse the order for odd numbered threads in odd numbered cache + // lines. This is designed for massively multi-core systems where the + // number of cores exceeds the bytes in a cache line, so "distant" cores + // get a chance to exercize cache coherency between them. + if (cline_num & cc_thread_num_ & 1) + offset = (cc_thread_count_ & ~1) - cc_thread_num_; + else + offset = cc_thread_num_; // Increment the member of the randomely selected structure. - (cc_cacheline_data_[r].num[cc_thread_num_])++; + (cc_cacheline_data_[cline_num].num[offset])++; } total_inc += cc_inc_count_; @@ -2506,14 +2522,26 @@ bool CpuCacheCoherencyThread::Work() { // in all the cache line structures for this particular thread. int cc_global_num = 0; for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) { - cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_]; + int offset; + // Perform the same offset calculation from above. + if (cline_num & cc_thread_num_ & 1) + offset = (cc_thread_count_ & ~1) - cc_thread_num_; + else + offset = cc_thread_num_; + cc_global_num += cc_cacheline_data_[cline_num].num[offset]; // Reset the cachline member's value for the next run. - cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0; + cc_cacheline_data_[cline_num].num[offset] = 0; } if (sat_->error_injection()) cc_global_num = -1; - if (cc_global_num != cc_inc_count_) { + // Since the count is only stored in a byte, to squeeze more into a + // single cache line, only compare it as a byte. In the event that there + // is something detected, the chance that it would be missed by a single + // thread is 1 in 256. If it affects all cores, that makes the chance + // of it being missed terribly minute. It seems unlikely any failure + // case would be off by more than a small number. + if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) { errorcount_++; logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n", cc_global_num, cc_inc_count_); @@ -2697,20 +2725,17 @@ bool DiskThread::SetParameters(int read_block_size, // Open a device, return false on failure. bool DiskThread::OpenDevice(int *pfile) { - bool no_O_DIRECT = false; int flags = O_RDWR | O_SYNC | O_LARGEFILE; int fd = open(device_name_.c_str(), flags | O_DIRECT, 0); if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { - no_O_DIRECT = true; - fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT + fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT + os_->ActivateFlushPageCache(); } if (fd < 0) { logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n", device_name_.c_str(), thread_num_); return false; } - if (no_O_DIRECT) - os_->ActivateFlushPageCache(); *pfile = fd; return GetDiskSize(fd); @@ -2866,11 +2891,11 @@ bool DiskThread::DoWork(int fd) { // Block is either initialized by writing, or in nondestructive case, // initialized by being added into the datastructure for later reading. - block->SetBlockAsInitialized(); + block->initialized(); in_flight_sectors_.push(block); } - if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP. + if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP. return false; // Verify blocks on disk. @@ -2979,8 +3004,9 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, errorcount_++; os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1); - if (event.res < 0) { - switch (event.res) { + int64 result = static_cast<int64>(event.res); + if (result < 0) { + switch (result) { case -EIO: logprintf(0, "Hardware Error: Low-level I/O error while doing %s to " "sectors starting at %lld on disk %s (thread %d).\n", @@ -3003,7 +3029,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, } return true; -#else // !HAVE_LIBAIO_H +#else // !HAVE_LIBAIO_H return false; #endif } @@ -3011,7 +3037,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, // Write a block to disk. // Return false if the block is not written. bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { - memset(block_buffer_, 0, block->GetSize()); + memset(block_buffer_, 0, block->size()); // Fill block buffer with a pattern struct page_entry pe; @@ -3019,30 +3045,30 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { // Even though a valid page could not be obatined, it is not an error // since we can always fill in a pattern directly, albeit slower. unsigned int *memblock = static_cast<unsigned int *>(block_buffer_); - block->SetPattern(patternlist_->GetRandomPattern()); + block->set_pattern(patternlist_->GetRandomPattern()); logprintf(11, "Log: Warning, using pattern fill fallback in " "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n", device_name_.c_str(), thread_num_); - for (int i = 0; i < block->GetSize()/wordsize_; i++) { - memblock[i] = block->GetPattern()->pattern(i); + for (unsigned int i = 0; i < block->size()/wordsize_; i++) { + memblock[i] = block->pattern()->pattern(i); } } else { - memcpy(block_buffer_, pe.addr, block->GetSize()); - block->SetPattern(pe.pattern); + memcpy(block_buffer_, pe.addr, block->size()); + block->set_pattern(pe.pattern); sat_->PutValid(&pe); } logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s" " (thread %d).\n", - block->GetSize()/kSectorSize, block->GetAddress(), + block->size()/kSectorSize, block->address(), device_name_.c_str(), thread_num_); int64 start_time = GetTime(); - if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(), - block->GetAddress() * kSectorSize, write_timeout_)) { + if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(), + block->address() * kSectorSize, write_timeout_)) { return false; } @@ -3063,11 +3089,11 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { // Return true if the block was read, also increment errorcount // if the block had data errors or performance problems. bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) { - int64 blocks = block->GetSize() / read_block_size_; + int64 blocks = block->size() / read_block_size_; int64 bytes_read = 0; int64 current_blocks; int64 current_bytes; - uint64 address = block->GetAddress(); + uint64 address = block->address(); logprintf(20, "Log: Reading sectors starting at %lld on disk %s " "(thread %d).\n", @@ -3119,7 +3145,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) { // In non-destructive mode, don't compare the block to the pattern since // the block was never written to disk in the first place. if (!non_destructive_) { - if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes, + if (CheckRegion(block_buffer_, block->pattern(), current_bytes, 0, bytes_read)) { os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1); errorcount_ += 1; @@ -3156,7 +3182,7 @@ bool DiskThread::Work() { // when using direct IO. #ifdef HAVE_POSIX_MEMALIGN int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment, - sat_->page_length()); + sat_->page_length()); #else block_buffer_ = memalign(kBufferAlignment, sat_->page_length()); int memalign_result = (block_buffer_ == 0); @@ -3400,3 +3426,224 @@ bool MemoryRegionThread::Work() { "pages checked\n", thread_num_, status_, pages_copied_); return result; } + +// The list of MSRs to read from each cpu. +const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = { + { kMsrTscAddr, "TSC" }, + { kMsrAperfAddr, "APERF" }, + { kMsrMperfAddr, "MPERF" }, +}; + +CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round) + : num_cpus_(num_cpus), + freq_threshold_(freq_threshold), + round_(round) { + sat_assert(round >= 0); + if (round == 0) { + // If rounding is off, force rounding to the nearest MHz. + round_ = 1; + round_value_ = 0.5; + } else { + round_value_ = round/2.0; + } +} + +CpuFreqThread::~CpuFreqThread() { +} + +// Compute the difference between the currently read MSR values and the +// previously read values and store the results in delta. If any of the +// values did not increase, or the TSC value is too small, returns false. +// Otherwise, returns true. +bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous, + CpuDataType *delta) { + // Loop through the msrs. + for (int msr = 0; msr < kMsrLast; msr++) { + if (previous->msrs[msr] > current->msrs[msr]) { + logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx " + "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr], + current->msrs[msr]); + return false; + } else { + delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr]; + } + } + + // Check for TSC < 1 Mcycles over interval. + if (delta->msrs[kMsrTsc] < (1000 * 1000)) { + logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n"); + return false; + } + timersub(¤t->tv, &previous->tv, &delta->tv); + + return true; +} + +// Compute the change in values of the MSRs between current and previous, +// set the frequency in MHz of the cpu. If there is an error computing +// the delta, return false. Othewise, return true. +bool CpuFreqThread::ComputeFrequency(CpuDataType *current, + CpuDataType *previous, int *freq) { + CpuDataType delta; + if (!ComputeDelta(current, previous, &delta)) { + return false; + } + + double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0; + double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000 + * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval; + + // Use the rounding value to round up properly. + int computed = static_cast<int>(frequency + round_value_); + *freq = computed - (computed % round_); + return true; +} + +// This is the task function that the thread executes. +bool CpuFreqThread::Work() { + cpu_set_t cpuset; + if (!AvailableCpus(&cpuset)) { + logprintf(0, "Process Error: Cannot get information about the cpus.\n"); + return false; + } + + // Start off indicating the test is passing. + status_ = true; + + int curr = 0; + int prev = 1; + uint32 num_intervals = 0; + bool paused = false; + bool valid; + bool pass = true; + + vector<CpuDataType> data[2]; + data[0].resize(num_cpus_); + data[1].resize(num_cpus_); + while (IsReadyToRun(&paused)) { + if (paused) { + // Reset the intervals and restart logic after the pause. + num_intervals = 0; + } + if (num_intervals == 0) { + // If this is the first interval, then always wait a bit before + // starting to collect data. + sat_sleep(kStartupDelay); + } + + // Get the per cpu counters. + valid = true; + for (int cpu = 0; cpu < num_cpus_; cpu++) { + if (CPU_ISSET(cpu, &cpuset)) { + if (!GetMsrs(cpu, &data[curr][cpu])) { + logprintf(0, "Failed to get msrs on cpu %d.\n", cpu); + valid = false; + break; + } + } + } + if (!valid) { + // Reset the number of collected intervals since something bad happened. + num_intervals = 0; + continue; + } + + num_intervals++; + + // Only compute a delta when we have at least two intervals worth of data. + if (num_intervals > 2) { + for (int cpu = 0; cpu < num_cpus_; cpu++) { + if (CPU_ISSET(cpu, &cpuset)) { + int freq; + if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu], + &freq)) { + // Reset the number of collected intervals since an unknown + // error occurred. + logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu); + num_intervals = 0; + break; + } + logprintf(15, "Cpu %d Freq %d\n", cpu, freq); + if (freq < freq_threshold_) { + errorcount_++; + pass = false; + logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz " + "threshold %d MHz.\n", cpu, freq, freq_threshold_); + } + } + } + } + + sat_sleep(kIntervalPause); + + // Swap the values in curr and prev (these values flip between 0 and 1). + curr ^= 1; + prev ^= 1; + } + + return pass; +} + + +// Get the MSR values for this particular cpu and save them in data. If +// any error is encountered, returns false. Otherwise, returns true. +bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) { + for (int msr = 0; msr < kMsrLast; msr++) { + if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) { + return false; + } + } + // Save the time at which we acquired these values. + gettimeofday(&data->tv, NULL); + + return true; +} + +// Returns true if this test can run on the current machine. Otherwise, +// returns false. +bool CpuFreqThread::CanRun() { +#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) + unsigned int eax, ebx, ecx, edx; + + // Check that the TSC feature is supported. + // This check is valid for both Intel and AMD. + eax = 1; + cpuid(&eax, &ebx, &ecx, &edx); + if (!(edx & (1 << 5))) { + logprintf(0, "Process Error: No TSC support.\n"); + return false; + } + + // Check the highest extended function level supported. + // This check is valid for both Intel and AMD. + eax = 0x80000000; + cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x80000007) { + logprintf(0, "Process Error: No invariant TSC support.\n"); + return false; + } + + // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 + // This check is valid for both Intel and AMD. + eax = 0x80000007; + cpuid(&eax, &ebx, &ecx, &edx); + if ((edx & (1 << 8)) == 0) { + logprintf(0, "Process Error: No non-stop TSC support.\n"); + return false; + } + + // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 + // This check is valid for both Intel and AMD. + eax = 0x6; + cpuid(&eax, &ebx, &ecx, &edx); + if ((ecx & 1) == 0) { + logprintf(0, "Process Error: No APERF MSR support.\n"); + return false; + } + return true; +#else + logprintf(0, "Process Error: " + "cpu_freq_test is only supported on X86 processors.\n"); + return false; +#endif +} diff --git a/src/worker.h b/src/worker.h index 0ec4c1d..091d96b 100644 --- a/src/worker.h +++ b/src/worker.h @@ -44,7 +44,7 @@ // Global Datastruture shared by the Cache Coherency Worker Threads. struct cc_cacheline_data { - int *num; + char *num; }; // Typical usage: @@ -127,10 +127,8 @@ class WorkerStatus { // ResumeWorkers() or StopWorkers() has been called. Number of distinct // calling threads must match the worker count (see AddWorkers() and // RemoveSelf()). - bool ContinueRunning(); + bool ContinueRunning(bool *paused); - // TODO(matthewb): Is this functionality really necessary? Remove it if not. - // // This is a hack! It's like ContinueRunning(), except it won't pause. If // any worker threads use this exclusively in place of ContinueRunning() then // PauseWorkers() should never be used! @@ -140,7 +138,7 @@ class WorkerStatus { enum Status { RUN, PAUSE, STOP }; void WaitOnPauseBarrier() { -#ifdef _POSIX_BARRIERS +#ifdef HAVE_PTHREAD_BARRIERS int error = pthread_barrier_wait(&pause_barrier_); if (error != PTHREAD_BARRIER_SERIAL_THREAD) sat_assert(error == 0); @@ -189,7 +187,7 @@ class WorkerStatus { pthread_rwlock_t status_rwlock_; Status status_; -#ifdef _POSIX_BARRIERS +#ifdef HAVE_PTHREAD_BARRIERS // Guaranteed to not be in use when (status_ != PAUSE). pthread_barrier_t pause_barrier_; #endif @@ -242,7 +240,7 @@ class WorkerThread { int64 ReadThreadTimer() { struct timeval end_time_; gettimeofday(&end_time_, NULL); - return (end_time_.tv_sec - start_time_.tv_sec)*1000000 + + return (end_time_.tv_sec - start_time_.tv_sec)*1000000ULL + (end_time_.tv_usec - start_time_.tv_usec); } // Stops per-WorkerThread timer and records thread run duration. @@ -266,10 +264,10 @@ class WorkerThread { // Calculate worker thread specific bandwidth. virtual float GetMemoryBandwidth() {return GetMemoryCopiedData() / ( - runduration_usec_ * 1.0 / 1000000);} + runduration_usec_ * 1.0 / 1000000.);} virtual float GetDeviceBandwidth() {return GetDeviceCopiedData() / ( - runduration_usec_ * 1.0 / 1000000);} + runduration_usec_ * 1.0 / 1000000.);} void set_cpu_mask(cpu_set_t *mask) { memcpy(&cpu_mask_, mask, sizeof(*mask)); @@ -304,9 +302,10 @@ class WorkerThread { // do { // // work. // } while (IsReadyToRun()); - virtual bool IsReadyToRun() { return worker_status_->ContinueRunning(); } - // TODO(matthewb): Is this function really necessary? Remove it if not. - // + virtual bool IsReadyToRun(bool *paused = NULL) { + return worker_status_->ContinueRunning(paused); + } + // Like IsReadyToRun(), except it won't pause. virtual bool IsReadyToRunNoPause() { return worker_status_->ContinueRunningNoPause(); @@ -422,7 +421,7 @@ class FileThread : public WorkerThread { // Record of where these pages were sourced from, and what // potentially broken components they passed through. struct PageRec { - struct Pattern *pattern; // This is the data it should contain. + class Pattern *pattern; // This is the data it should contain. void *src; // This is the memory location the data was sourced from. void *dst; // This is where it ended up. }; @@ -641,16 +640,27 @@ class CpuCacheCoherencyThread : public WorkerThread { CpuCacheCoherencyThread(cc_cacheline_data *cc_data, int cc_cacheline_count_, int cc_thread_num_, + int cc_thread_count_, int cc_inc_count_); virtual bool Work(); protected: + // Used by the simple random number generator as a shift feedback; + // this polynomial (x^64 + x^63 + x^61 + x^60 + 1) will produce a + // psuedorandom cycle of period 2^64-1. + static const uint64 kRandomPolynomial = 0xD800000000000000ULL; + // A very simple psuedorandom generator that can be inlined and use + // registers, to keep the CC test loop tight and focused. + static uint64 SimpleRandom(uint64 seed); + cc_cacheline_data *cc_cacheline_data_; // Datstructure for each cacheline. int cc_local_num_; // Local counter for each thread. int cc_cacheline_count_; // Number of cache lines to operate on. int cc_thread_num_; // The integer id of the thread which is // used as an index into the integer array // of the cacheline datastructure. + int cc_thread_count_; // Total number of threads being run, for + // calculations mixing up cache line access. int cc_inc_count_; // Number of times to increment the counter. private: @@ -809,4 +819,80 @@ class MemoryRegionThread : public WorkerThread { DISALLOW_COPY_AND_ASSIGN(MemoryRegionThread); }; +// Worker thread to check that the frequency of every cpu does not go below a +// certain threshold. +class CpuFreqThread : public WorkerThread { + public: + CpuFreqThread(int num_cpus, int freq_threshold, int round); + ~CpuFreqThread(); + + // This is the task function that the thread executes. + virtual bool Work(); + + // Returns true if this test can run on the current machine. Otherwise, + // returns false. + static bool CanRun(); + + private: + static const int kIntervalPause = 10; // The number of seconds to pause + // between acquiring the MSR data. + static const int kStartupDelay = 5; // The number of seconds to wait + // before acquiring MSR data. + static const int kMsrTscAddr = 0x10; // The address of the TSC MSR. + static const int kMsrAperfAddr = 0xE8; // The address of the APERF MSR. + static const int kMsrMperfAddr = 0xE7; // The address of the MPERF MSR. + + // The index values into the CpuDataType.msr[] array. + enum MsrValues { + kMsrTsc = 0, // MSR index 0 = TSC. + kMsrAperf = 1, // MSR index 1 = APERF. + kMsrMperf = 2, // MSR index 2 = MPERF. + kMsrLast, // Last MSR index. + }; + + typedef struct { + uint32 msr; // The address of the MSR. + const char *name; // A human readable string for the MSR. + } CpuRegisterType; + + typedef struct { + uint64 msrs[kMsrLast]; // The values of the MSRs. + struct timeval tv; // The time at which the MSRs were read. + } CpuDataType; + + // The set of MSR addresses and register names. + static const CpuRegisterType kCpuRegisters[kMsrLast]; + + // Compute the change in values of the MSRs between current and previous, + // set the frequency in MHz of the cpu. If there is an error computing + // the delta, return false. Othewise, return true. + bool ComputeFrequency(CpuDataType *current, CpuDataType *previous, + int *frequency); + + // Get the MSR values for this particular cpu and save them in data. If + // any error is encountered, returns false. Otherwise, returns true. + bool GetMsrs(int cpu, CpuDataType *data); + + // Compute the difference between the currently read MSR values and the + // previously read values and store the results in delta. If any of the + // values did not increase, or the TSC value is too small, returns false. + // Otherwise, returns true. + bool ComputeDelta(CpuDataType *current, CpuDataType *previous, + CpuDataType *delta); + + // The total number of cpus on the system. + int num_cpus_; + + // The minimum frequency that each cpu must operate at (in MHz). + int freq_threshold_; + + // The value to round the computed frequency to. + int round_; + + // Precomputed value to add to the frequency to do the rounding. + double round_value_; + + DISALLOW_COPY_AND_ASSIGN(CpuFreqThread); +}; + #endif // STRESSAPPTEST_WORKER_H_ diff --git a/stressapptest.1 b/stressapptest.1 index 695f9ee..2c91478 100644 --- a/stressapptest.1 +++ b/stressapptest.1 @@ -86,10 +86,15 @@ Number of times to increment the cacheline's member. .TP .B \-\-cc_line_count <number> -Mumber of cache line sized datastructures to allocate for the cache coherency +Number of cache line sized datastructures to allocate for the cache coherency threads to operate. .TP +.B \-\-cc_line_size <number> +Size of cache line to use as the basis for cache coherency test data +structures. + +.TP .B \-\-cc_test Do the cache coherency testing. |