From 4861bb817645893f15313e0e832a0c75e1916d5a Mon Sep 17 00:00:00 2001
From: menderico <menderico@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Tue, 29 Sep 2009 00:31:36 +0000
Subject: Initial commit

---
 src/Makefile.am               |   31 +
 src/Makefile.in               |  476 ++++++
 src/adler32memcpy.cc          |  380 +++++
 src/adler32memcpy.h           |   59 +
 src/disk_blocks.cc            |  313 ++++
 src/disk_blocks.h             |  115 ++
 src/error_diag.cc             |  317 ++++
 src/error_diag.h              |  167 +++
 src/finelock_queue.cc         |  441 ++++++
 src/finelock_queue.h          |  116 ++
 src/logger.cc                 |  151 ++
 src/logger.h                  |  142 ++
 src/main.cc                   |   56 +
 src/os.cc                     |  642 ++++++++
 src/os.h                      |  265 ++++
 src/os_factory.cc             |   41 +
 src/pattern.cc                |  421 ++++++
 src/pattern.h                 |  124 ++
 src/queue.cc                  |  118 ++
 src/queue.h                   |   85 ++
 src/sat.cc                    | 1897 ++++++++++++++++++++++++
 src/sat.h                     |  309 ++++
 src/sat_factory.cc            |   21 +
 src/sattypes.h                |  156 ++
 src/stressapptest_config.h.in |  188 +++
 src/worker.cc                 | 3258 +++++++++++++++++++++++++++++++++++++++++
 src/worker.h                  |  782 ++++++++++
 27 files changed, 11071 insertions(+)
 create mode 100644 src/Makefile.am
 create mode 100644 src/Makefile.in
 create mode 100644 src/adler32memcpy.cc
 create mode 100644 src/adler32memcpy.h
 create mode 100644 src/disk_blocks.cc
 create mode 100644 src/disk_blocks.h
 create mode 100644 src/error_diag.cc
 create mode 100644 src/error_diag.h
 create mode 100644 src/finelock_queue.cc
 create mode 100644 src/finelock_queue.h
 create mode 100644 src/logger.cc
 create mode 100644 src/logger.h
 create mode 100644 src/main.cc
 create mode 100644 src/os.cc
 create mode 100644 src/os.h
 create mode 100644 src/os_factory.cc
 create mode 100644 src/pattern.cc
 create mode 100644 src/pattern.h
 create mode 100644 src/queue.cc
 create mode 100644 src/queue.h
 create mode 100644 src/sat.cc
 create mode 100644 src/sat.h
 create mode 100644 src/sat_factory.cc
 create mode 100644 src/sattypes.h
 create mode 100644 src/stressapptest_config.h.in
 create mode 100644 src/worker.cc
 create mode 100644 src/worker.h

(limited to 'src')

diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..e044974
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,31 @@
+bin_PROGRAMS = stressapptest
+
+AM_DEFAULT_SOURCE_EXT=.cc
+
+MAINFILES = main.cc
+CFILES = os.cc
+CFILES += os_factory.cc
+CFILES += pattern.cc
+CFILES += queue.cc
+CFILES += sat.cc
+CFILES += sat_factory.cc
+CFILES += worker.cc
+CFILES += finelock_queue.cc
+CFILES += error_diag.cc
+CFILES += disk_blocks.cc
+CFILES += adler32memcpy.cc
+CFILES += logger.cc
+
+HFILES = os.h
+HFILES += pattern.h
+HFILES += queue.h
+HFILES += sat.h
+HFILES += worker.h
+HFILES += sattypes.h
+HFILES += finelock_queue.h
+HFILES += error_diag.h
+HFILES += disk_blocks.h
+HFILES += adler32memcpy.h
+HFILES += logger.h
+
+stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000..7cd6f2a
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,476 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+bin_PROGRAMS = stressapptest$(EXEEXT)
+subdir = src
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+	$(srcdir)/stressapptest_config.h.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = stressapptest_config.h
+CONFIG_CLEAN_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)"
+binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
+PROGRAMS = $(bin_PROGRAMS)
+am__objects_1 = main.$(OBJEXT)
+am__objects_2 = os.$(OBJEXT) os_factory.$(OBJEXT) pattern.$(OBJEXT) \
+	queue.$(OBJEXT) sat.$(OBJEXT) sat_factory.$(OBJEXT) \
+	worker.$(OBJEXT) finelock_queue.$(OBJEXT) error_diag.$(OBJEXT) \
+	disk_blocks.$(OBJEXT) adler32memcpy.$(OBJEXT) logger.$(OBJEXT)
+am__objects_3 =
+am_stressapptest_OBJECTS = $(am__objects_1) $(am__objects_2) \
+	$(am__objects_3)
+stressapptest_OBJECTS = $(am_stressapptest_OBJECTS)
+stressapptest_LDADD = $(LDADD)
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
+	-o $@
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = $(stressapptest_SOURCES)
+DIST_SOURCES = $(stressapptest_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_DEFAULT_SOURCE_EXT = .cc
+MAINFILES = main.cc
+CFILES = os.cc os_factory.cc pattern.cc queue.cc sat.cc sat_factory.cc \
+	worker.cc finelock_queue.cc error_diag.cc disk_blocks.cc \
+	adler32memcpy.cc logger.cc
+HFILES = os.h pattern.h queue.h sat.h worker.h sattypes.h \
+	finelock_queue.h error_diag.h disk_blocks.h adler32memcpy.h \
+	logger.h
+stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
+all: stressapptest_config.h
+	$(MAKE) $(AM_MAKEFLAGS) all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  src/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign  src/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+stressapptest_config.h: stamp-h1
+	@if test ! -f $@; then \
+	  rm -f stamp-h1; \
+	  $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
+	else :; fi
+
+stamp-h1: $(srcdir)/stressapptest_config.h.in $(top_builddir)/config.status
+	@rm -f stamp-h1
+	cd $(top_builddir) && $(SHELL) ./config.status src/stressapptest_config.h
+$(srcdir)/stressapptest_config.h.in:  $(am__configure_deps) 
+	cd $(top_srcdir) && $(AUTOHEADER)
+	rm -f stamp-h1
+	touch $@
+
+distclean-hdr:
+	-rm -f stressapptest_config.h stamp-h1
+install-binPROGRAMS: $(bin_PROGRAMS)
+	@$(NORMAL_INSTALL)
+	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
+	  if test -f $$p \
+	  ; then \
+	    f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
+	   echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
+	   $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
+	  else :; fi; \
+	done
+
+uninstall-binPROGRAMS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(bin_PROGRAMS)'; for p in $$list; do \
+	  f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
+	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
+	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
+	done
+
+clean-binPROGRAMS:
+	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES) 
+	@rm -f stressapptest$(EXEEXT)
+	$(CXXLINK) $(stressapptest_OBJECTS) $(stressapptest_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adler32memcpy.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/disk_blocks.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error_diag.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/finelock_queue.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/logger.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/os.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/os_factory.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pattern.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/queue.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sat.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sat_factory.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/worker.Po@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES) stressapptest_config.h.in $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
+	list='$(SOURCES) $(HEADERS) stressapptest_config.h.in $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
+	fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES) stressapptest_config.h.in $(TAGS_DEPENDENCIES) \
+		$(TAGS_FILES) $(LISP)
+	tags=; \
+	list='$(SOURCES) $(HEADERS) stressapptest_config.h.in $(LISP) $(TAGS_FILES)'; \
+	unique=`for i in $$list; do \
+	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+	  done | \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$tags $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) stressapptest_config.h
+installdirs:
+	for dir in "$(DESTDIR)$(bindir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-hdr distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+	clean-generic ctags distclean distclean-compile \
+	distclean-generic distclean-hdr distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-binPROGRAMS install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
+	tags uninstall uninstall-am uninstall-binPROGRAMS
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/adler32memcpy.cc b/src/adler32memcpy.cc
new file mode 100644
index 0000000..529dcc4
--- /dev/null
+++ b/src/adler32memcpy.cc
@@ -0,0 +1,380 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "adler32memcpy.h"
+
+// We are using (a modified form of) adler-32 checksum algorithm instead
+// of CRC since adler-32 is faster than CRC.
+// (Comparison: http://guru.multimedia.cx/crc32-vs-adler32/)
+// This form of adler is bit modified, instead of treating the data in
+// units of bytes, 32-bit data is taken as a unit and two 64-bit
+// checksums are done (we could have one checksum but two checksums
+// make the code run faster).
+
+// Adler-32 implementation:
+//   Data is treated as 1-byte numbers and,
+//   there are two 16-bit numbers a and b
+//   Initialize a with 1 and b with 0.
+//   for each data unit 'd'
+//      a += d
+//      b += a
+//   checksum = a<<16 + b
+//   This sum should never overflow.
+//
+// Adler-64+64 implementation:
+//   (applied in this code)
+//   Data is treated as 32-bit numbers and whole data is separated into two
+//   streams, and hence the two checksums a1, a2, b1 and b2.
+//   Initialize a1 and a2 with 1, b1 and b2 with 0
+//   add first dataunit to a1
+//   add a1 to b1
+//   add second dataunit to a1
+//   add a1 to b1
+//   add third dataunit to a2
+//   add a2 to b2
+//   add fourth dataunit to a2
+//   add a2 to b2
+//   ...
+//   repeat the sequence back for next 4 dataunits
+//
+//   variable A = XMM6 and variable B = XMM7.
+//   (a1 = lower 8 bytes of XMM6 and b1 = lower 8 bytes of XMM7)
+
+// Assumptions
+// 1. size_in_bytes is a multiple of 16.
+// 2. srcmem and dstmem are 16 byte aligned.
+// 3. size_in_bytes is less than 2^19 bytes.
+
+// Assumption 3 ensures that there is no overflow when numbers are being
+// added (we can remove this assumption by doing modulus with a prime
+// number when it is just about to overflow but that would be a very costly
+// exercise)
+
+// Returns true if the checksums are equal.
+bool AdlerChecksum::Equals(const AdlerChecksum &other) const {
+  return ( (a1_ == other.a1_) && (a2_ == other.a2_) &&
+           (b1_ == other.b1_) && (b2_ == other.b2_) );
+}
+
+// Returns string representation of the Adler checksum.
+string AdlerChecksum::ToHexString() const {
+  char buffer[128];
+  snprintf(buffer, sizeof(buffer), "%llx%llx%llx%llx", a1_, a2_, b1_, b2_);
+  return string(buffer);
+}
+
+// Sets components of the Adler checksum.
+void AdlerChecksum::Set(uint64 a1, uint64 a2, uint64 b1, uint64 b2) {
+  a1_ = a1;
+  a2_ = a2;
+  b1_ = b1;
+  b2_ = b2;
+}
+
+// Calculates Adler checksum for supplied data.
+bool CalculateAdlerChecksum(uint64 *data64, unsigned int size_in_bytes,
+                            AdlerChecksum *checksum) {
+  // Use this data wrapper to access memory with 64bit read/write.
+  datacast_t data;
+  unsigned int count = size_in_bytes / sizeof(data);
+
+  if (count > (1U) << 19) {
+    // Size is too large, must be strictly less than 512 KB.
+    return false;
+  }
+
+  uint64 a1 = 1;
+  uint64 a2 = 1;
+  uint64 b1 = 0;
+  uint64 b2 = 0;
+
+  unsigned int i = 0;
+  while (i < count) {
+    // Process 64 bits at a time.
+    data.l64 = data64[i];
+    a1 = a1 + data.l32.l;
+    b1 = b1 + a1;
+    a1 = a1 + data.l32.h;
+    b1 = b1 + a1;
+    i++;
+
+    data.l64 = data64[i];
+    a2 = a2 + data.l32.l;
+    b2 = b2 + a2;
+    a2 = a2 + data.l32.h;
+    b2 = b2 + a2;
+    i++;
+  }
+  checksum->Set(a1, a2, b1, b2);
+  return true;
+}
+
+// C implementation of Adler memory copy.
+bool AdlerMemcpyC(uint64 *dstmem64, uint64 *srcmem64,
+                  unsigned int size_in_bytes, AdlerChecksum *checksum) {
+  // Use this data wrapper to access memory with 64bit read/write.
+  datacast_t data;
+  unsigned int count = size_in_bytes / sizeof(data);
+
+  if (count > ((1U) << 19)) {
+    // Size is too large, must be strictly less than 512 KB.
+    return false;
+  }
+
+  uint64 a1 = 1;
+  uint64 a2 = 1;
+  uint64 b1 = 0;
+  uint64 b2 = 0;
+
+  unsigned int i = 0;
+  while (i < count) {
+    // Process 64 bits at a time.
+    data.l64 = srcmem64[i];
+    a1 = a1 + data.l32.l;
+    b1 = b1 + a1;
+    a1 = a1 + data.l32.h;
+    b1 = b1 + a1;
+    dstmem64[i] = data.l64;
+    i++;
+
+    data.l64 = srcmem64[i];
+    a2 = a2 + data.l32.l;
+    b2 = b2 + a2;
+    a2 = a2 + data.l32.h;
+    b2 = b2 + a2;
+    dstmem64[i] = data.l64;
+    i++;
+  }
+  checksum->Set(a1, a2, b1, b2);
+  return true;
+}
+
+// C implementation of Adler memory copy with some float point ops,
+// attempting to warm up the CPU.
+bool AdlerMemcpyWarmC(uint64 *dstmem64, uint64 *srcmem64,
+                      unsigned int size_in_bytes, AdlerChecksum *checksum) {
+  // Use this data wrapper to access memory with 64bit read/write.
+  datacast_t data;
+  unsigned int count = size_in_bytes / sizeof(data);
+
+  if (count > ((1U) << 19)) {
+    // Size is too large, must be strictly less than 512 KB.
+    return false;
+  }
+
+  uint64 a1 = 1;
+  uint64 a2 = 1;
+  uint64 b1 = 0;
+  uint64 b2 = 0;
+
+  double a = 2.0 * static_cast<double>(srcmem64[0]);
+  double b = 5.0 * static_cast<double>(srcmem64[0]);
+  double c = 7.0 * static_cast<double>(srcmem64[0]);
+  double d = 9.0 * static_cast<double>(srcmem64[0]);
+
+  unsigned int i = 0;
+  while (i < count) {
+    // Process 64 bits at a time.
+    data.l64 = srcmem64[i];
+    a1 = a1 + data.l32.l;
+    b1 = b1 + a1;
+    a1 = a1 + data.l32.h;
+    b1 = b1 + a1;
+    dstmem64[i] = data.l64;
+    i++;
+
+    // Warm cpu up.
+    a = a * b;
+    b = b + c;
+
+    data.l64 = srcmem64[i];
+    a2 = a2 + data.l32.l;
+    b2 = b2 + a2;
+    a2 = a2 + data.l32.h;
+    b2 = b2 + a2;
+    dstmem64[i] = data.l64;
+    i++;
+
+    // Warm cpu up.
+    c = c * d;
+    d = d + d;
+  }
+
+  // Warm cpu up.
+  d = a + b + c + d;
+  if (d == 1.0) {
+    // Reference the result so that it can't be discarded by the compiler.
+    printf("Log: This will probably never happen.\n");
+  }
+
+  checksum->Set(a1, a2, b1, b2);
+  return true;
+}
+
+// x86_64 SSE2 assembly implementation of fast and stressful Adler memory copy.
+bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
+                    unsigned int size_in_bytes, AdlerChecksum *checksum) {
+// Use assembly implementation only with 64bit compilation.
+#ifndef STRESSAPPTEST_CPU_X86_64
+  // Fall back to C implementation for 32bit compilation.
+  return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum);
+#else
+  // Elements 0 to 3 are used for holding checksum terms a1, a2,
+  // b1, b2 respectively. These elements are filled by asm code.
+  // Elements 4 and 5 are used by asm code to for ANDing MMX data and removing
+  // 2 words from each MMX register (A MMX reg has 4 words, by ANDing we are
+  // setting word index 0 and word index 2 to zero).
+  // Element 6 and 7 are used for setting a1 and a2 to 1.
+  volatile uint64 checksum_arr[] = {0, 0, 0, 0,
+    0x00000000ffffffffUL, 0x00000000ffffffffUL, 1, 1};
+
+  if ((size_in_bytes >> 19) > 0) {
+    // Size is too large. Must be less than 2^19 bytes = 512 KB.
+    return false;
+  }
+
+  // Number of 32-bit words which are not added to a1/a2 in the main loop.
+  uint64 remaining_words = (size_in_bytes % 48) / 4;
+
+  // Since we are moving 48 bytes at a time number of iterations = total size/48
+  // is value of counter.
+  uint64 num_of_48_byte_units = size_in_bytes / 48;
+
+  asm volatile(
+      // Source address is in ESI (extended source index)
+      // destination is in EDI (extended destination index)
+      // and counter is already in ECX (extended counter index).
+      "cmp  $0, %%ecx;"   // Compare counter to zero.
+      "jz END;"
+
+      // XMM6 is initialized with 1 and XMM7 with 0.
+      "prefetchnta  0(%%rsi);"
+      "prefetchnta 64(%%rsi);"
+      "movdqu   48(%%rax), %%xmm6;"
+      "xorps      %%xmm7, %%xmm7;"
+
+      // Start of the loop which copies 48 bytes from source to dst each time.
+      "TOP:\n"
+
+      // Make 6 moves each of 16 bytes from srcmem to XMM registers.
+      // We are using 2 words out of 4 words in each XMM register,
+      // word index 0 and word index 2)
+      "movdqa   0(%%rsi), %%xmm0;"
+      "movdqu   4(%%rsi), %%xmm1;"  // Be careful to use unaligned move here.
+      "movdqa  16(%%rsi), %%xmm2;"
+      "movdqu  20(%%rsi), %%xmm3;"
+      "movdqa  32(%%rsi), %%xmm4;"
+      "movdqu  36(%%rsi), %%xmm5;"
+
+      // Move 3 * 16 bytes from XMM registers to dstmem.
+      // Note: this copy must be performed before pinsrw instructions since
+      // they will modify the XMM registers.
+      "movntdq %%xmm0,  0(%%rdi);"
+      "movntdq %%xmm2, 16(%%rdi);"
+      "movntdq %%xmm4, 32(%%rdi);"
+
+      // Sets the word[1] and word[3] of XMM0 to XMM5 to zero.
+      "andps 32(%%rax), %%xmm0;"
+      "andps 32(%%rax), %%xmm1;"
+      "andps 32(%%rax), %%xmm2;"
+      "andps 32(%%rax), %%xmm3;"
+      "andps 32(%%rax), %%xmm4;"
+      "andps 32(%%rax), %%xmm5;"
+
+      // Add XMM0 to XMM6 and then add XMM6 to XMM7.
+      // Repeat this for XMM1, ..., XMM5.
+      // Overflow(for XMM7) can occur only if there are more
+      // than 2^16 additions => more than 2^17 words => more than 2^19 bytes so
+      // if size_in_bytes > 2^19 than overflow occurs.
+      "paddq %%xmm0, %%xmm6;"
+      "paddq %%xmm6, %%xmm7;"
+      "paddq %%xmm1, %%xmm6;"
+      "paddq %%xmm6, %%xmm7;"
+      "paddq %%xmm2, %%xmm6;"
+      "paddq %%xmm6, %%xmm7;"
+      "paddq %%xmm3, %%xmm6;"
+      "paddq %%xmm6, %%xmm7;"
+      "paddq %%xmm4, %%xmm6;"
+      "paddq %%xmm6, %%xmm7;"
+      "paddq %%xmm5, %%xmm6;"
+      "paddq %%xmm6, %%xmm7;"
+
+      // Increment ESI and EDI by 48 bytes and decrement counter by 1.
+      "add $48, %%rsi;"
+      "add $48, %%rdi;"
+      "prefetchnta  0(%%rsi);"
+      "prefetchnta 64(%%rsi);"
+      "dec  %%rcx;"
+      "jnz TOP;"
+
+      // Now only remaining_words 32-bit words are left.
+      // make a loop, add first two words to a1 and next two to a2 (just like
+      // above loop, the only extra thing we are doing is rechecking
+      // %rdx (=remaining_words) everytime we add a number to a1/a2.
+      "REM_IS_STILL_NOT_ZERO:\n"
+      // Unless remaining_words becomes less than 4 words(16 bytes)
+      // there is not much issue and remaining_words will always
+      // be a multiple of four by assumption.
+      "cmp $4, %%rdx;"
+      // In case for some weird reasons if remaining_words becomes
+      // less than 4 but not zero then also break the code and go off to END.
+      "jl END;"
+      // Otherwise just go on and copy data in chunks of 4-words at a time till
+      // whole data (<48 bytes) is copied.
+      "movdqa  0(%%rsi), %%xmm0;"      // Copy next 4-words to XMM0 and to XMM1.
+
+      "movdqa  0(%%rsi), %%xmm5;"      // Accomplish movdqu 4(%%rsi) without
+      "pshufd $0x39, %%xmm5, %%xmm1;"  // indexing off memory boundary.
+
+      "movntdq %%xmm0,  0(%%rdi);"     // Copy 4-words to destination.
+      "andps  32(%%rax), %%xmm0;"
+      "andps  32(%%rax), %%xmm1;"
+      "paddq     %%xmm0, %%xmm6;"
+      "paddq     %%xmm6, %%xmm7;"
+      "paddq     %%xmm1, %%xmm6;"
+      "paddq     %%xmm6, %%xmm7;"
+      "add $16, %%rsi;"
+      "add $16, %%rdi;"
+      "sub $4, %%rdx;"
+      // Decrement %%rdx by 4 since %%rdx is number of 32-bit
+      // words left after considering all 48-byte units.
+      "jmp REM_IS_STILL_NOT_ZERO;"
+
+      "END:\n"
+      // Report checksum values A and B (both right now are two concatenated
+      // 64 bit numbers and have to be converted to 64 bit numbers)
+      // seems like Adler128 (since size of each part is 4 byte rather than
+      // 1 byte).
+      "movdqa %%xmm6,   0(%%rax);"
+      "movdqa %%xmm7,  16(%%rax);"
+      "sfence;"
+
+      // No output registers.
+      :
+      // Input registers.
+      : "S" (srcmem64), "D" (dstmem64), "a" (checksum_arr),
+        "c" (num_of_48_byte_units), "d" (remaining_words)
+  );  // asm.
+
+  if (checksum != NULL) {
+    checksum->Set(checksum_arr[0], checksum_arr[1],
+                  checksum_arr[2], checksum_arr[3]);
+  }
+
+  // Everything went fine, so return true (this does not mean
+  // that there is no problem with memory this just mean that data was copied
+  // from src to dst and checksum was calculated successfully).
+  return true;
+#endif
+}
diff --git a/src/adler32memcpy.h b/src/adler32memcpy.h
new file mode 100644
index 0000000..d053340
--- /dev/null
+++ b/src/adler32memcpy.h
@@ -0,0 +1,59 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_ADLER32MEMCPY_H_
+#define STRESSAPPTEST_ADLER32MEMCPY_H_
+
+#include <string>
+#include "sattypes.h"
+
+// Encapsulation for Adler checksum. Please see adler32memcpy.cc for more
+// detail on the adler checksum algorithm.
+class AdlerChecksum {
+ public:
+  AdlerChecksum() {}
+  ~AdlerChecksum() {}
+  // Returns true if the checksums are equal.
+  bool Equals(const AdlerChecksum &other) const;
+  // Returns string representation of the Adler checksum
+  string ToHexString() const;
+  // Sets components of the Adler checksum.
+  void Set(uint64 a1, uint64 a2, uint64 b1, uint64 b2);
+
+ private:
+  // Components of Adler checksum.
+  uint64 a1_, a2_, b1_, b2_;
+
+  DISALLOW_COPY_AND_ASSIGN(AdlerChecksum);
+};
+
+// Calculates Adler checksum for supplied data.
+bool CalculateAdlerChecksum(uint64 *data64, unsigned int size_in_bytes,
+                            AdlerChecksum *checksum);
+
+// C implementation of Adler memory copy.
+bool AdlerMemcpyC(uint64 *dstmem64, uint64 *srcmem64,
+                    unsigned int size_in_bytes, AdlerChecksum *checksum);
+
+// C implementation of Adler memory copy with some float point ops,
+// attempting to warm up the CPU.
+bool AdlerMemcpyWarmC(uint64 *dstmem64, uint64 *srcmem64,
+                      unsigned int size_in_bytes, AdlerChecksum *checksum);
+
+// x86_64 SSE2 assembly implementation of fast and stressful Adler memory copy.
+bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
+                    unsigned int size_in_bytes, AdlerChecksum *checksum);
+
+
+#endif  // STRESSAPPTEST_ADLER32MEMCPY_H_
diff --git a/src/disk_blocks.cc b/src/disk_blocks.cc
new file mode 100644
index 0000000..c7860b0
--- /dev/null
+++ b/src/disk_blocks.cc
@@ -0,0 +1,313 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Thread-safe container of disk blocks
+
+#include <utility>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "disk_blocks.h"
+
+DiskBlockTable::DiskBlockTable() {
+  nelems_ = 0;
+  pthread_mutex_init(&data_mutex_, NULL);
+  pthread_mutex_init(&parameter_mutex_, NULL);
+  pthread_cond_init(&data_condition_, NULL);
+}
+
+DiskBlockTable::~DiskBlockTable() {
+  CleanTable();
+  pthread_mutex_destroy(&data_mutex_);
+  pthread_mutex_destroy(&parameter_mutex_);
+  pthread_cond_destroy(&data_condition_);
+}
+
+void DiskBlockTable::CleanTable() {
+  pthread_mutex_lock(&data_mutex_);
+  for (map<int64, StorageData*>::iterator it =
+           addr_to_block_.begin(); it != addr_to_block_.end(); ++it) {
+    delete it->second;
+  }
+  addr_to_block_.erase(addr_to_block_.begin(), addr_to_block_.end());
+  nelems_ = 0;
+  pthread_cond_broadcast(&data_condition_);
+  pthread_mutex_unlock(&data_mutex_);
+}
+
+// 64-bit non-negative random number generator.  Stolen from
+// depot/google3/base/tracecontext_unittest.cc.
+int64 DiskBlockTable::Random64() {
+  int64 x = random();
+  x = (x << 30) ^ random();
+  x = (x << 30) ^ random();
+  if (x >= 0)
+    return x;
+  else
+    return -x;
+}
+
+int64 DiskBlockTable::NumElems() {
+  unsigned int nelems;
+  pthread_mutex_lock(&data_mutex_);
+  nelems = nelems_;
+  pthread_mutex_unlock(&data_mutex_);
+  return nelems;
+}
+
+void DiskBlockTable::InsertOnStructure(BlockData *block) {
+  int64 address = block->GetAddress();
+  StorageData *sd = new StorageData();
+  sd->block = block;
+  sd->pos = nelems_;
+  // Creating new block ...
+  pthread_mutex_lock(&data_mutex_);
+  if (pos_to_addr_.size() <= nelems_) {
+    pos_to_addr_.insert(pos_to_addr_.end(), address);
+  } else {
+    pos_to_addr_[nelems_] = address;
+  }
+  addr_to_block_.insert(std::make_pair(address, sd));
+  nelems_++;
+  pthread_cond_broadcast(&data_condition_);
+  pthread_mutex_unlock(&data_mutex_);
+}
+
+int DiskBlockTable::RemoveBlock(BlockData *block) {
+  // For write threads, check the reference counter and remove
+  // it from the structure.
+  int64 address = block->GetAddress();
+  AddrToBlockMap::iterator it = addr_to_block_.find(address);
+  int ret = 1;
+  if (it != addr_to_block_.end()) {
+    int curr_pos = it->second->pos;
+    int last_pos = nelems_ - 1;
+    AddrToBlockMap::iterator last_it = addr_to_block_.find(
+        pos_to_addr_[last_pos]);
+    sat_assert(nelems_ > 0);
+    sat_assert(last_it != addr_to_block_.end());
+    // Everything is fine, updating ...
+    pthread_mutex_lock(&data_mutex_);
+    pos_to_addr_[curr_pos] = pos_to_addr_[last_pos];
+    last_it->second->pos = curr_pos;
+    delete it->second;
+    addr_to_block_.erase(it);
+    nelems_--;
+    block->DecreaseReferenceCounter();
+    if (block->GetReferenceCounter() == 0)
+      delete block;
+    pthread_cond_broadcast(&data_condition_);
+    pthread_mutex_unlock(&data_mutex_);
+  } else {
+    ret = 0;
+  }
+  return ret;
+}
+
+int DiskBlockTable::ReleaseBlock(BlockData *block) {
+  // If is a random thread, just check the reference counter.
+  int ret = 1;
+  pthread_mutex_lock(&data_mutex_);
+  int references = block->GetReferenceCounter();
+  if (references > 0) {
+    if (references == 1)
+      delete block;
+    else
+      block->DecreaseReferenceCounter();
+  } else {
+    ret = 0;
+  }
+  pthread_mutex_unlock(&data_mutex_);
+  return ret;
+}
+
+BlockData *DiskBlockTable::GetRandomBlock() {
+  struct timespec ts;
+  struct timeval tp;
+  int result = 0;
+  gettimeofday(&tp, NULL);
+  ts.tv_sec  = tp.tv_sec;
+  ts.tv_nsec = tp.tv_usec * 1000;
+  ts.tv_sec += 2;  // Wait for 2 seconds.
+  pthread_mutex_lock(&data_mutex_);
+  while (!nelems_ && result != ETIMEDOUT) {
+    result = pthread_cond_timedwait(&data_condition_, &data_mutex_, &ts);
+  }
+  if (result == ETIMEDOUT) {
+    pthread_mutex_unlock(&data_mutex_);
+    return NULL;
+  } else {
+    int64 random_number = Random64();
+    int64 random_pos = random_number % nelems_;
+    int64 address = pos_to_addr_[random_pos];
+    AddrToBlockMap::const_iterator it = addr_to_block_.find(address);
+    sat_assert(it != addr_to_block_.end());
+    BlockData *b = it->second->block;
+    // A block is returned only if its content is written on disk.
+    if (b->BlockIsInitialized()) {
+      b->IncreaseReferenceCounter();
+    } else {
+      b = NULL;
+    }
+    pthread_mutex_unlock(&data_mutex_);
+    return b;
+  }
+}
+
+void DiskBlockTable::SetParameters(
+    int sector_size, int write_block_size, int64 device_sectors,
+    int64 segment_size, string device_name) {
+  pthread_mutex_lock(&parameter_mutex_);
+  sector_size_ = sector_size;
+  write_block_size_ = write_block_size;
+  device_sectors_ = device_sectors;
+  segment_size_ = segment_size;
+  device_name_ = device_name;
+  CleanTable();
+  pthread_mutex_unlock(&parameter_mutex_);
+}
+
+BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
+  int64 sector = 0;
+  BlockData *block = new BlockData();
+
+  bool good_sequence = false;
+  int num_sectors;
+
+  if (block == NULL) {
+    logprintf(0, "Process Error: Unable to allocate memory "
+              "for sector data for disk %s.\n", device_name_.c_str());
+    return NULL;
+  }
+
+  pthread_mutex_lock(&parameter_mutex_);
+
+  sat_assert(device_sectors_ != 0);
+
+  // Align the first sector with the beginning of a write block
+  num_sectors = write_block_size_ / sector_size_;
+
+  for (int i = 0; i < kBlockRetry && !good_sequence; i++) {
+    good_sequence = true;
+
+    // Use the entire disk or a small segment of the disk to allocate the first
+    // sector in the block from.
+
+    if (segment_size_ == -1) {
+      sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % (
+          device_sectors_ / num_sectors);
+      sector *= num_sectors;
+    } else {
+      sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % (
+          segment_size_ / num_sectors);
+      sector *= num_sectors;
+      sector += segment * segment_size_;
+
+      // Make sure the block is within the segment.
+      if (sector + num_sectors > (segment + 1) * segment_size_) {
+        good_sequence = false;
+        continue;
+      }
+    }
+    // Make sure the entire block is in range.
+    if (sector + num_sectors > device_sectors_) {
+      good_sequence = false;
+      continue;
+    }
+    // Check to see if the block is free. Since the blocks are
+    // now aligned to the write_block_size, it is not necessary
+    // to check each sector, just the first block (a sector
+    // overlap will never occur).
+
+    pthread_mutex_lock(&data_mutex_);
+    if (addr_to_block_.find(sector) != addr_to_block_.end()) {
+      good_sequence = false;
+    }
+    pthread_mutex_unlock(&data_mutex_);
+  }
+
+  if (good_sequence) {
+    block->SetParameters(sector, write_block_size_);
+    block->IncreaseReferenceCounter();
+    InsertOnStructure(block);
+  } else {
+    // No contiguous sequence of num_sectors sectors was found within
+    // kBlockRetry iterations so return an error value.
+    delete block;
+    block = NULL;
+  }
+  pthread_mutex_unlock(&parameter_mutex_);
+
+  return block;
+}
+
+// BlockData
+
+BlockData::BlockData() {
+  addr_ = 0;
+  size_ = 0;
+  references_ = 0;
+  initialized_ = false;
+  pthread_mutex_init(&data_mutex_, NULL);
+}
+
+BlockData::~BlockData() {
+  pthread_mutex_destroy(&data_mutex_);
+}
+
+void BlockData::SetParameters(int64 address, int64 size) {
+  addr_ = address;
+  size_ = size;
+}
+
+void BlockData::IncreaseReferenceCounter() {
+  references_++;
+}
+
+void BlockData::DecreaseReferenceCounter() {
+  references_--;
+}
+
+int BlockData::GetReferenceCounter() {
+  return references_;
+}
+
+void BlockData::SetBlockAsInitialized() {
+  pthread_mutex_lock(&data_mutex_);
+  initialized_ = true;
+  pthread_mutex_unlock(&data_mutex_);
+}
+
+bool BlockData::BlockIsInitialized() {
+  pthread_mutex_lock(&data_mutex_);
+  bool initialized = initialized_;
+  pthread_mutex_unlock(&data_mutex_);
+  return initialized;
+}
+
+int64 BlockData::GetAddress() {
+  return addr_;
+}
+
+int64 BlockData::GetSize() {
+  return size_;
+}
+
+Pattern *BlockData::GetPattern() {
+  return pattern_;
+}
+
+void BlockData::SetPattern(Pattern *p) {
+  pattern_ = p;
+}
diff --git a/src/disk_blocks.h b/src/disk_blocks.h
new file mode 100644
index 0000000..f4ca93f
--- /dev/null
+++ b/src/disk_blocks.h
@@ -0,0 +1,115 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Interface for a thread-safe container of disk blocks
+
+#ifndef STRESSAPPTEST_DISK_BLOCKS_H_
+#define STRESSAPPTEST_DISK_BLOCKS_H_
+
+#include <sys/types.h>
+#include <pthread.h>
+#include <time.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <map>
+#include <vector>
+#include <string>
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "pattern.h"
+
+// Data about a block written to disk so that it can be verified later.
+class BlockData {
+ public:
+  BlockData();
+  ~BlockData();
+  void SetParameters(int64 address, int64 size);
+  void IncreaseReferenceCounter();
+  void DecreaseReferenceCounter();
+  int GetReferenceCounter();
+  void SetBlockAsInitialized();
+  bool BlockIsInitialized();
+  int64 GetAddress();
+  int64 GetSize();
+  void SetPattern(Pattern *p);
+  Pattern *GetPattern();
+ protected:
+  int64 addr_;         // address of first sector in block
+  int64 size_;         // size of block
+  int references_;      // reference counter
+  bool initialized_;     // flag indicating the block was written on disk
+  Pattern *pattern_;
+  pthread_mutex_t data_mutex_;
+  DISALLOW_COPY_AND_ASSIGN(BlockData);
+};
+
+// Disk Block table - store data from blocks to be write / read by
+// a DiskThread
+class DiskBlockTable {
+ public:
+  DiskBlockTable();
+  virtual ~DiskBlockTable();
+
+  // Get Number of elements stored on table
+  int64 NumElems();
+  // Clean all table data
+  void CleanTable();
+  // Get a random block from the list. Only returns if a element
+  // is available (consider that other thread must have added them.
+  BlockData *GetRandomBlock();
+  // Set all initial parameters. Assumes all existent data is
+  // invalid and, therefore, must be removed.
+  void SetParameters(int sector_size, int write_block_size,
+                     int64 device_sectors,
+                     int64 segment_size,
+                     string device_name);
+  // Return a new block in a unused address.
+  BlockData *GetUnusedBlock(int64 segment);
+  // Remove block from structure (called by write threads)
+  int RemoveBlock(BlockData *block);
+  // Release block to be erased (called by random threads)
+  int ReleaseBlock(BlockData *block);
+
+ protected:
+
+  void InsertOnStructure(BlockData *block);
+  //  Generate a random 64-bit integer (virtual so it could be
+  //  override by the tests)
+  virtual int64 Random64();
+
+  struct StorageData {
+    BlockData *block;
+    int pos;
+  };
+
+  static const int kBlockRetry = 100;       // Number of retries to allocate
+                                            // sectors.
+
+  typedef map<int64, StorageData*> AddrToBlockMap;
+  typedef vector<int64> PosToAddrVector;
+  PosToAddrVector pos_to_addr_;
+  AddrToBlockMap addr_to_block_;
+  int64 nelems_;
+  int sector_size_;          // Sector size, in bytes
+  int write_block_size_;     // Block size, in bytes
+  string device_name_;       // Device name
+  int64 device_sectors_;     // Number of sectors in device
+  int64 segment_size_;       // Segment size, in bytes
+  pthread_mutex_t data_mutex_;
+  pthread_cond_t data_condition_;
+  pthread_mutex_t parameter_mutex_;
+  DISALLOW_COPY_AND_ASSIGN(DiskBlockTable);
+};
+
+#endif  // STRESSAPPTEST_BLOCKS_H_
diff --git a/src/error_diag.cc b/src/error_diag.cc
new file mode 100644
index 0000000..53f056f
--- /dev/null
+++ b/src/error_diag.cc
@@ -0,0 +1,317 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// error_diag.cc: Collects device errors for analysis to more accurately
+//                pin-point failed component.
+
+#include <set>
+#include <list>
+#include <map>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "error_diag.h"
+#include "sattypes.h"
+
+
+// DeviceTree constructor.
+DeviceTree::DeviceTree(string name)
+  : parent_(0), name_(name) {
+  pthread_mutex_init(&device_tree_mutex_, NULL);
+}
+
+// DeviceTree destructor.
+DeviceTree::~DeviceTree() {
+  // Deallocate subtree devices.
+  for (std::map<string, DeviceTree*>::iterator itr = subdevices_.begin();
+      itr != subdevices_.end();
+      ++itr) {
+    delete itr->second;
+  }
+  // Deallocate device errors.
+  for (std::list<ErrorInstance*>::iterator itr = errors_.begin();
+      itr != errors_.end();
+      ++itr) {
+    delete (*itr);
+  }
+  pthread_mutex_destroy(&device_tree_mutex_);
+}
+
+// Atomically find named device in sub device tree.
+// Returns 0 if not found
+DeviceTree *DeviceTree::FindInSubTree(string name) {
+  DeviceTree *ret;
+  pthread_mutex_lock(&device_tree_mutex_);
+  ret = UnlockedFindInSubTree(name);
+  pthread_mutex_unlock(&device_tree_mutex_);
+  return ret;
+}
+
+// Find named device in sub device tree (Non-atomic).
+// Returns 0 if not found
+DeviceTree *DeviceTree::UnlockedFindInSubTree(string name) {
+  std::map<string, DeviceTree*>::iterator itr = subdevices_.find(name);
+  if (itr != subdevices_.end()) {
+    return itr->second;
+  } else {
+    // Search sub-tree.
+    for (std::map<string, DeviceTree*>::iterator itr = subdevices_.begin();
+        itr != subdevices_.end();
+        ++itr) {
+      DeviceTree *result = itr->second->UnlockedFindInSubTree(name);
+      if (result != 0)
+        return result;
+    }
+    return 0;
+  }
+}
+
+// Atomically add error instance to device.
+void DeviceTree::AddErrorInstance(ErrorInstance *error_instance) {
+  pthread_mutex_lock(&device_tree_mutex_);
+  errors_.push_back(error_instance);
+  pthread_mutex_unlock(&device_tree_mutex_);
+}
+
+// Find or add queried device as necessary.
+DeviceTree *DeviceTree::FindOrAddDevice(string name) {
+  // Assume named device does not exist and try to insert the device anyway.
+  // No-op if named device already exists.
+  InsertSubDevice(name);
+  // Find and return sub device pointer.
+  return FindInSubTree(name);
+}
+
+// Pretty prints device tree.
+void DeviceTree::PrettyPrint(string spacer) {
+  for (std::map<string, DeviceTree*>::iterator itr = subdevices_.begin();
+      itr != subdevices_.end();
+      ++itr) {
+    printf("%s%s\n", spacer.c_str(), itr->first.c_str());
+    itr->second->PrettyPrint(spacer+spacer);
+  }
+}
+
+// Atomically add sub device.
+// No-op if named device already exists.
+void DeviceTree::InsertSubDevice(string name) {
+  pthread_mutex_lock(&device_tree_mutex_);
+  if (UnlockedFindInSubTree(name) != 0) {
+    pthread_mutex_unlock(&device_tree_mutex_);
+    return;
+  }
+  subdevices_[name] = new DeviceTree(name);
+  subdevices_[name]->parent_ = this;
+  pthread_mutex_unlock(&device_tree_mutex_);
+}
+
+
+// Returns true of any error associated with this device is fatal.
+bool DeviceTree::KnownBad() {
+  pthread_mutex_lock(&device_tree_mutex_);
+  for (std::list<ErrorInstance*>::iterator itr = errors_.begin();
+      itr != errors_.end();
+      ++itr) {
+    if ((*itr)->severity_ == SAT_ERROR_FATAL) {
+      pthread_mutex_unlock(&device_tree_mutex_);
+      return true;
+    }
+  }
+  pthread_mutex_unlock(&device_tree_mutex_);
+  return false;
+}
+
+
+// ErrorDiag constructor.
+ErrorDiag::ErrorDiag() {
+  os_ = 0;
+  system_tree_root_ = 0;
+}
+
+// ErrorDiag destructor.
+ErrorDiag::~ErrorDiag() {
+  if (system_tree_root_)
+    delete system_tree_root_;
+}
+
+// Set platform specific handle and initialize device tree.
+// Returns false on error. true otherwise.
+bool ErrorDiag::set_os(OsLayer *os) {
+  os_ = os;
+  return(InitializeDeviceTree());
+}
+
+// Create and initialize system device tree.
+// Returns false on error. true otherwise.
+bool ErrorDiag::InitializeDeviceTree() {
+  system_tree_root_ = new DeviceTree("system_root");
+  if (!system_tree_root_)
+    return false;
+  return true;
+}
+
+// Logs info about a CECC.
+// Returns -1 on error, 1 if diagnoser reports error externally; 0 otherwise.
+int ErrorDiag::AddCeccError(string dimm_string) {
+  DeviceTree *dimm_device = system_tree_root_->FindOrAddDevice(dimm_string);
+  ECCErrorInstance *error = new ECCErrorInstance;
+  if (!error)
+    return -1;
+  error->severity_ = SAT_ERROR_CORRECTABLE;
+  dimm_device->AddErrorInstance(error);
+  return 0;
+}
+
+// Logs info about a UECC.
+// Returns -1 on error, 1 if diagnoser reports error externally; 0 otherwise.
+int ErrorDiag::AddUeccError(string dimm_string) {
+  DeviceTree *dimm_device = system_tree_root_->FindOrAddDevice(dimm_string);
+  ECCErrorInstance *error = new ECCErrorInstance;
+  if (!error)
+    return -1;
+  error->severity_ = SAT_ERROR_FATAL;
+  dimm_device->AddErrorInstance(error);
+  return 0;
+}
+
+// Logs info about a miscompare.
+// Returns -1 on error, 1 if diagnoser reports error externally; 0 otherwise.
+int ErrorDiag::AddMiscompareError(string dimm_string, uint64 addr, int count) {
+  DeviceTree *dimm_device = system_tree_root_->FindOrAddDevice(dimm_string);
+  MiscompareErrorInstance *error = new MiscompareErrorInstance;
+  if (!error)
+    return -1;
+  error->severity_ = SAT_ERROR_FATAL;
+  error->addr_ = addr;
+  dimm_device->AddErrorInstance(error);
+  os_->ErrorReport(dimm_string.c_str(), "miscompare", count);
+  return 1;
+}
+
+// Utility Function to translate a virtual address to DIMM number.
+// Returns -1 on error, 1 if diagnoser reports error externally; 0 otherwise.
+string ErrorDiag::AddressToDimmString(OsLayer *os, void *addr, int offset) {
+  char dimm_string[256] = "";
+  char *vbyteaddr = reinterpret_cast<char*>(addr) + offset;
+  uint64 paddr = os->VirtualToPhysical(vbyteaddr);
+  os->FindDimm(paddr, dimm_string, sizeof(dimm_string));
+  return string(dimm_string);
+}
+
+// Info about a miscompare from a drive.
+// Returns -1 on error, 1 if diagnoser reports error externally; 0 otherwise.
+int ErrorDiag::AddHDDMiscompareError(string devicename, int block, int offset,
+                                     void *src_addr, void *dst_addr) {
+  bool mask_hdd_error = false;
+
+  HDDMiscompareErrorInstance *error = new HDDMiscompareErrorInstance;
+  if (!error)
+    return -1;
+
+  error->addr_ = reinterpret_cast<uint64>(src_addr);
+  error->addr2_ = reinterpret_cast<uint64>(dst_addr);
+  error->offset_ = offset;
+  error->block_ = block;
+
+  string src_dimm = AddressToDimmString(os_, src_addr, offset);
+  string dst_dimm = AddressToDimmString(os_, dst_addr, offset);
+
+  // DIMM name look up success
+  if (src_dimm.compare("DIMM Unknown")) {
+    // Add src DIMM as possible miscompare cause.
+    DeviceTree *src_dimm_dev = system_tree_root_->FindOrAddDevice(src_dimm);
+    error->causes_.insert(src_dimm_dev);
+    if (src_dimm_dev->KnownBad()) {
+      mask_hdd_error = true;
+      logprintf(5, "Log: supressed %s miscompare report: "
+                "known bad source: %s\n", devicename.c_str(), src_dimm.c_str());
+    }
+  }
+  if (dst_dimm.compare("DIMM Unknown")) {
+    // Add dst DIMM as possible miscompare cause.
+    DeviceTree *dst_dimm_dev = system_tree_root_->FindOrAddDevice(dst_dimm);
+    error->causes_.insert(dst_dimm_dev);
+    if (dst_dimm_dev->KnownBad()) {
+      mask_hdd_error = true;
+      logprintf(5, "Log: supressed %s miscompare report: "
+                "known bad destination: %s\n", devicename.c_str(),
+                dst_dimm.c_str());
+    }
+  }
+
+  DeviceTree *hdd_dev = system_tree_root_->FindOrAddDevice(devicename);
+  hdd_dev->AddErrorInstance(error);
+
+  // HDD error was not masked by bad DIMMs: report bad HDD.
+  if (!mask_hdd_error) {
+    os_->ErrorReport(devicename.c_str(), "miscompare", 1);
+    error->severity_ = SAT_ERROR_FATAL;
+    return 1;
+  }
+  return 0;
+}
+
+// Info about a sector tag miscompare from a drive.
+// Returns -1 on error, 1 if diagnoser reports error externally; 0 otherwise.
+int ErrorDiag::AddHDDSectorTagError(string devicename, int block, int offset,
+                                    int sector, void *src_addr,
+                                    void *dst_addr) {
+  bool mask_hdd_error = false;
+
+  HDDSectorTagErrorInstance *error = new HDDSectorTagErrorInstance;
+  if (!error)
+    return -1;
+
+  error->addr_ = reinterpret_cast<uint64>(src_addr);
+  error->addr2_ = reinterpret_cast<uint64>(dst_addr);
+  error->sector_ = sector;
+  error->block_ = block;
+
+  string src_dimm = AddressToDimmString(os_, src_addr, offset);
+  string dst_dimm = AddressToDimmString(os_, dst_addr, offset);
+
+  // DIMM name look up success
+  if (src_dimm.compare("DIMM Unknown")) {
+    // Add src DIMM as possible miscompare cause.
+    DeviceTree *src_dimm_dev = system_tree_root_->FindOrAddDevice(src_dimm);
+    error->causes_.insert(src_dimm_dev);
+    if (src_dimm_dev->KnownBad()) {
+      mask_hdd_error = true;
+      logprintf(5, "Log: supressed %s sector tag error report: "
+                "known bad source: %s\n", devicename.c_str(), src_dimm.c_str());
+    }
+  }
+  if (dst_dimm.compare("DIMM Unknown")) {
+    // Add dst DIMM as possible miscompare cause.
+    DeviceTree *dst_dimm_dev = system_tree_root_->FindOrAddDevice(dst_dimm);
+    error->causes_.insert(dst_dimm_dev);
+    if (dst_dimm_dev->KnownBad()) {
+      mask_hdd_error = true;
+      logprintf(5, "Log: supressed %s sector tag error report: "
+                "known bad destination: %s\n", devicename.c_str(),
+                dst_dimm.c_str());
+    }
+  }
+
+  DeviceTree *hdd_dev = system_tree_root_->FindOrAddDevice(devicename);
+  hdd_dev->AddErrorInstance(error);
+
+  // HDD error was not masked by bad DIMMs: report bad HDD.
+  if (!mask_hdd_error) {
+    os_->ErrorReport(devicename.c_str(), "sector", 1);
+    error->severity_ = SAT_ERROR_FATAL;
+    return 1;
+  }
+  return 0;
+}
diff --git a/src/error_diag.h b/src/error_diag.h
new file mode 100644
index 0000000..7faedb8
--- /dev/null
+++ b/src/error_diag.h
@@ -0,0 +1,167 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// error_diag.h: Ambiguous error diagnosis class
+
+#ifndef STRESSAPPTEST_ERROR_DIAG_H_
+#define STRESSAPPTEST_ERROR_DIAG_H_
+
+#include <pthread.h>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "sattypes.h"
+#include "os.h"
+
+class ErrorInstance;
+
+// This describes the components of the system.
+class DeviceTree {
+ public:
+  explicit DeviceTree(string name);
+  ~DeviceTree();
+
+  // Atomically find arbitrary device in subtree.
+  DeviceTree *FindInSubTree(string name);
+  // Find or add named device.
+  DeviceTree *FindOrAddDevice(string name);
+  // Atomically add sub device.
+  void InsertSubDevice(string name);
+  // Returns parent device.
+  DeviceTree *GetParent() { return parent_; }
+  // Pretty prints device tree.
+  void PrettyPrint(string spacer = " ");
+  // Atomically add error instance to device.
+  void AddErrorInstance(ErrorInstance *error_instance);
+  // Returns true of device is known to be bad.
+  bool KnownBad();
+  // Returns number of direct sub devices.
+  int NumDirectSubDevices() { return subdevices_.size(); }
+
+ private:
+  // Unlocked version of FindInSubTree.
+  DeviceTree *UnlockedFindInSubTree(string name);
+
+  std::map<string, DeviceTree*> subdevices_;    // Map of sub-devices.
+  std::list<ErrorInstance*> errors_;            // Log of errors.
+  DeviceTree *parent_;                          // Pointer to parent device.
+  string name_;                                 // Device name.
+  pthread_mutex_t device_tree_mutex_;           // Mutex protecting device tree.
+};
+
+
+// enum type for collected errors.
+enum SATErrorType {
+  SAT_ERROR_NONE = 0,
+  SAT_ERROR_ECC,
+  SAT_ERROR_MISCOMPARE,
+  SAT_ERROR_SECTOR_TAG,
+};
+
+// enum type for error severity.
+enum SATErrorSeverity {
+  SAT_ERROR_CORRECTABLE = 0,
+  SAT_ERROR_FATAL,
+};
+
+// This describes an error and it's likely causes.
+class ErrorInstance {
+ public:
+  ErrorInstance(): type_(SAT_ERROR_NONE), severity_(SAT_ERROR_CORRECTABLE) {}
+
+  SATErrorType type_;             // Type of error: ECC, miscompare, sector.
+  SATErrorSeverity severity_;     // Correctable, or fatal.
+  std::set<DeviceTree*> causes_;  // Devices that can cause this type of error.
+};
+
+// This describes ECC errors.
+class ECCErrorInstance: public ErrorInstance {
+ public:
+  ECCErrorInstance() { type_ = SAT_ERROR_ECC; }
+
+  uint64 addr_;               // Address where error occured.
+};
+
+// This describes miscompare errors.
+class MiscompareErrorInstance: public ErrorInstance {
+ public:
+  MiscompareErrorInstance() { type_ = SAT_ERROR_MISCOMPARE; }
+
+  uint64 addr_;               // Address where miscompare occured.
+};
+
+// This describes HDD miscompare errors.
+class HDDMiscompareErrorInstance: public MiscompareErrorInstance {
+ public:
+  uint64 addr2_;             // addr_ and addr2_ are src and dst memory addr.
+  int offset_;               // offset.
+  int block_;                // error block.
+};
+
+// This describes HDD miscompare errors.
+class HDDSectorTagErrorInstance: public ErrorInstance {
+ public:
+  HDDSectorTagErrorInstance() { type_ = SAT_ERROR_SECTOR_TAG; }
+
+  uint64 addr_;
+  uint64 addr2_;             // addr_ and addr2_ are src and dst memory addr.
+  int sector_;               // error sector.
+  int block_;                // error block.
+};
+
+// Generic error storage and sorting class.
+class ErrorDiag {
+ public:
+  ErrorDiag();
+  virtual ~ErrorDiag();
+
+  // Add info about a CECC.
+  virtual int AddCeccError(string dimm_string);
+
+  // Add info about a UECC.
+  virtual int AddUeccError(string dimm_string);
+
+  // Add info about a miscompare.
+  virtual int AddMiscompareError(string dimm_string, uint64 addr, int count);
+
+  // Add info about a miscompare from a drive.
+  virtual int AddHDDMiscompareError(string devicename, int block, int offset,
+                            void *src_addr, void *dst_addr);
+
+  // Add info about a sector tag miscompare from a drive.
+  virtual int AddHDDSectorTagError(string devicename, int block, int offset,
+                           int sector, void *src_addr, void *dst_addr);
+
+  // Set platform specific handle and initialize device tree.
+  bool set_os(OsLayer *os);
+
+ protected:
+  // Create and initialize system device tree.
+  virtual bool InitializeDeviceTree();
+
+  // Utility Function to translate a virtual address to DIMM number.
+  string AddressToDimmString(OsLayer *os, void *addr, int offset);
+
+  DeviceTree *system_tree_root_;  // System device tree.
+  OsLayer *os_;                   // Platform handle.
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ErrorDiag);
+};
+
+#endif  // STRESSAPPTEST_ERROR_DIAG_H_
diff --git a/src/finelock_queue.cc b/src/finelock_queue.cc
new file mode 100644
index 0000000..27cc37d
--- /dev/null
+++ b/src/finelock_queue.cc
@@ -0,0 +1,441 @@
+// Copyright 2007 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is an interface to a simple thread safe container with fine-grain locks,
+// used to hold data blocks and patterns.
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "finelock_queue.h"
+#include "os.h"
+
+// Page entry queue implementation follows.
+// Push and Get functions are analogous to lock and unlock operations on a given
+// page entry, while preserving queue semantics.
+//
+// The actual 'queue' implementation is actually just an array. The entries are
+// never shuffled or re-ordered like that of a real queue. Instead, Get
+// functions return a random page entry of a given type and lock that particular
+// page entry until it is unlocked by corresponding Put functions.
+//
+// In this implementation, a free page is those page entries where pattern is
+// null (pe->pattern == 0)
+
+
+// Constructor: Allocates memory and initialize locks.
+FineLockPEQueue::FineLockPEQueue(
+                 uint64 queuesize, int64 pagesize) {
+  q_size_ = queuesize;
+  pages_ = new struct page_entry[q_size_];
+  pagelocks_ = new pthread_mutex_t[q_size_];
+  page_size_ = pagesize;
+
+  // What metric should we measure this run.
+  queue_metric_ = kTouch;
+
+  {  // Init all the page locks.
+    for (int64 i = 0; i < q_size_; i++)
+      pthread_mutex_init(&(pagelocks_[i]), NULL);
+  }
+
+  {  // Init the random number generator.
+    for (int i = 0; i < 4; i++) {
+      rand_seed_[i] = i + 0xbeef;
+      pthread_mutex_init(&(randlocks_[i]), NULL);
+    }
+  }
+
+  // Try to make a linear congruential generator with our queue size.
+  // We need this to deterministically search all the queue (being able to find
+  // a single available element is a design requirement), but we don't want to
+  // cause any page to be more likley chosen than another. The previous
+  // sequential retry heavily biased pages at the beginning of a bunch, or
+  // isolated pages surrounded by unqualified ones.
+  int64 length = queuesize;
+  int64 modlength = length;
+  int64 a;
+  int64 c;
+
+  if (length < 3) {
+    a = 1;
+    c = 1;
+  } else {
+    // Search for a nontrivial generator.
+    a = getA(length) % length;
+    // If this queue size doesn't have a nontrivial generator (where the
+    // multiplier is greater then one) we'll check increasing queue sizes,
+    // and discard out of bounds results.
+    while (a == 1) {
+      modlength++;
+      a = getA(modlength) % modlength;
+    }
+    c = getC(modlength);
+  }
+
+  // This is our final generator.
+  a_ = a;
+  c_ = c;
+  modlength_ = modlength;
+}
+
+// Part of building a linear congruential generator n1 = (a * n0 + c) % m
+// Get 'a', where a - 1 must be divisible by all prime
+// factors of 'm', our queue size.
+int64 FineLockPEQueue::getA(int64 m) {
+  int64 remaining = m;
+  int64 a = 1;
+  if ((((remaining / 4) * 4) == remaining)) {
+    a = 2;
+  }
+  // For each number, let's see if it's divisible,
+  // then divide it out.
+  for (int64 i = 2; i <= m; i++) {
+    if (((remaining / i) * i) == remaining) {
+      remaining /= i;
+      // Keep dividing it out until there's no more.
+      while (((remaining / i) * i) == remaining)
+        remaining /= i;
+      a *= i;
+    }
+  }
+
+  // Return 'a' as specified.
+  return (a + 1) % m;
+}
+
+
+// Part of building a linear congruential generator n1 = (a * n0 + c) % m
+// Get a prime number approx 3/4 the size of our queue.
+int64 FineLockPEQueue::getC(int64 m) {
+  // Start here at 3/4.
+  int64 start = (3 * m) / 4 + 1;
+  int64 possible_prime = start;
+  // Keep trying until we find a prime.
+  for (possible_prime = start; possible_prime > 1; possible_prime--) {
+    bool failed = false;
+    for (int64 i = 2; i < possible_prime; i++) {
+      if (((possible_prime / i) * i) == possible_prime) {
+        failed = true;
+        break;
+      }
+    }
+    if (!failed) {
+      return possible_prime;
+    }
+  }
+  // One is prime enough.
+  return 1;
+}
+
+// Destructor: Clean-up allocated memory and destroy pthread locks.
+FineLockPEQueue::~FineLockPEQueue() {
+  int64 i;
+  for (i = 0; i < q_size_; i++)
+    pthread_mutex_destroy(&(pagelocks_[i]));
+  delete[] pagelocks_;
+  delete[] pages_;
+  for (i = 0; i < 4; i++) {
+    pthread_mutex_destroy(&(randlocks_[i]));
+  }
+}
+
+
+bool FineLockPEQueue::QueueAnalysis() {
+  const char *measurement = "Error";
+  uint64 buckets[32];
+
+  if (queue_metric_ == kTries)
+    measurement = "Failed retrievals";
+  else if (queue_metric_ == kTouch)
+    measurement = "Reads per page";
+
+  // Buckets for each log2 access counts.
+  for (int b = 0; b < 32; b++) {
+    buckets[b] = 0;
+  }
+
+  // Bucketize the page counts by highest bit set.
+  for (int64 i = 0; i < q_size_; i++) {
+    uint32 readcount = pages_[i].touch;
+    int b = 0;
+    for (b = 0; b < 31; b++) {
+      if (readcount < (1 << b))
+        break;
+    }
+
+    buckets[b]++;
+  }
+
+  logprintf(12, "Log:  %s histogram\n", measurement);
+  for (int b = 0; b < 32; b++) {
+    if (buckets[b])
+      logprintf(12, "Log:  %12d - %12d: %12d\n",
+          ((1 << b) >> 1), 1 << b, buckets[b]);
+  }
+
+  return true;
+}
+
+namespace {
+// Callback mechanism for exporting last action.
+OsLayer *g_os;
+FineLockPEQueue *g_fpqueue = 0;
+
+// Global callback to hook into Os object.
+bool err_log_callback(uint64 paddr, string *buf) {
+  if (g_fpqueue) {
+    return g_fpqueue->ErrorLogCallback(paddr, buf);
+  }
+  return false;
+}
+}
+
+// Setup global state for exporting callback.
+void FineLockPEQueue::set_os(OsLayer *os) {
+  g_os = os;
+  g_fpqueue = this;
+}
+
+OsLayer::ErrCallback FineLockPEQueue::get_err_log_callback() {
+  return err_log_callback;
+}
+
+// This call is used to export last transaction info on a particular physical
+// address.
+bool FineLockPEQueue::ErrorLogCallback(uint64 paddr, string *message) {
+  struct page_entry pe;
+  OsLayer *os = g_os;
+  sat_assert(g_os);
+  char buf[256];
+
+  // Find the page of this paddr.
+  int gotpage = GetPageFromPhysical(paddr, &pe);
+  if (!gotpage) {
+    return false;
+  }
+
+  // Find offset into the page.
+  uint64 addr_diff = paddr - pe.paddr;
+
+  // Find vaddr of this paddr. Make sure it matches,
+  // as sometimes virtual memory is not contiguous.
+  char *vaddr =
+    reinterpret_cast<char*>(os->PrepareTestMem(pe.offset, page_size_));
+  uint64 new_paddr = os->VirtualToPhysical(vaddr + addr_diff);
+  os->ReleaseTestMem(vaddr, pe.offset, page_size_);
+
+  // Is the physical address at this page offset the same as
+  // the physical address we were given?
+  if (new_paddr != paddr) {
+    return false;
+  }
+
+  // Print all the info associated with this page.
+  message->assign(" (Last Transaction:");
+
+  if (pe.lastpattern) {
+    int offset = addr_diff / 8;
+    datacast_t data;
+
+    data.l32.l = pe.lastpattern->pattern(offset << 1);
+    data.l32.h = pe.lastpattern->pattern((offset << 1) + 1);
+
+    snprintf(buf, sizeof(buf), " %s data=%#016llx",
+                  pe.lastpattern->name(), data.l64);
+    message->append(buf);
+  }
+  snprintf(buf, sizeof(buf), " tsc=%#llx)", pe.ts);
+  message->append(buf);
+  return true;
+}
+
+bool FineLockPEQueue::GetPageFromPhysical(uint64 paddr,
+                                          struct page_entry *pe) {
+  // Traverse through array until finding a page
+  // that contains the address we want..
+  for (int64 i = 0; i < q_size_; i++) {
+    uint64 page_addr = pages_[i].paddr;
+    // This assumes linear vaddr.
+    if ((page_addr <= paddr) && (page_addr + page_size_ > paddr)) {
+      *pe = pages_[i];
+      return true;
+    }
+  }
+  return false;
+}
+
+
+// Get a random number from the slot we locked.
+uint64 FineLockPEQueue::GetRandom64FromSlot(int slot) {
+  // 64 bit LCG numbers suggested on the internets by
+  // http://nuclear.llnl.gov/CNP/rng/rngman/node4.html and others.
+  uint64 result = 2862933555777941757ULL * rand_seed_[slot] + 3037000493ULL;
+  rand_seed_[slot] = result;
+  return result;
+}
+
+// Get a random number, we have 4 generators to choose from so hopefully we
+// won't be blocking on this.
+uint64 FineLockPEQueue::GetRandom64() {
+  // Try each available slot.
+  for (int i = 0; i < 4; i++) {
+    if (pthread_mutex_trylock(&(randlocks_[i])) == 0) {
+      uint64 result = GetRandom64FromSlot(i);
+      pthread_mutex_unlock(&(randlocks_[i]));
+      return result;
+    }
+  }
+  // Forget it, just wait.
+  int i = 0;
+  if (pthread_mutex_lock(&(randlocks_[i])) == 0) {
+    uint64 result = GetRandom64FromSlot(i);
+    pthread_mutex_unlock(&(randlocks_[i]));
+    return result;
+  }
+
+  logprintf(0, "Process Error: Could not acquire random lock.\n");
+  sat_assert(0);
+  return 0;
+}
+
+
+// Helper function to get a random page entry with given predicate,
+// ie, page_is_valid() or page_is_empty() as defined in finelock_queue.h.
+//
+// Setting tag to a value other than kDontCareTag (-1)
+// indicates that we need a tag match, otherwise any tag will do.
+//
+// Returns true on success, false on failure.
+bool FineLockPEQueue::GetRandomWithPredicateTag(struct page_entry *pe,
+                      bool (*pred_func)(struct page_entry*),
+                      int32 tag) {
+  if (!pe || !q_size_)
+    return false;
+
+  // Randomly index into page entry array.
+  uint64 first_try = GetRandom64() % q_size_;
+  uint64 next_try = 1;
+
+  // Traverse through array until finding a page meeting given predicate.
+  for (int64 i = 0; i < q_size_; i++) {
+    uint64 index = (next_try + first_try) % q_size_;
+    // Go through the loop linear conguentially. We are offsetting by
+    // 'first_try' so this path will be a different sequence for every
+    // initioal value chosen.
+    next_try = (a_ * next_try + c_) % (modlength_);
+    while (next_try >= q_size_) {
+      // If we have chosen a modlength greater than the queue size,
+      // discard out of bounds results.
+      next_try = (a_ * next_try + c_) % (modlength_);
+    }
+
+    // If page does not meet predicate, don't trylock (expensive).
+    if (!(pred_func)(&pages_[index]))
+      continue;
+
+    // If page does not meet tag predicate, don't trylock (expensive).
+    if ((tag != kDontCareTag) && !(pages_[index].tag & tag))
+      continue;
+
+    if (pthread_mutex_trylock(&(pagelocks_[index])) == 0) {
+      // If page property (valid/empty) changes before successfully locking,
+      // release page and move on.
+      if (!(pred_func)(&pages_[index])) {
+        pthread_mutex_unlock(&(pagelocks_[index]));
+        continue;
+      } else {
+        // A page entry with given predicate is locked, returns success.
+        *pe = pages_[index];
+
+        // Add metrics as necessary.
+        if (pred_func == page_is_valid) {
+          // Measure time to fetch valid page.
+          if (queue_metric_ == kTries)
+            pe->touch = i;
+          // Measure number of times each page is read.
+          if (queue_metric_ == kTouch)
+            pe->touch++;
+        }
+
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+// Without tag hint.
+bool FineLockPEQueue::GetRandomWithPredicate(struct page_entry *pe,
+                      bool (*pred_func)(struct page_entry*)) {
+  return GetRandomWithPredicateTag(pe, pred_func, kDontCareTag);
+}
+
+
+// GetValid() randomly finds a valid page, locks it and returns page entry by
+// pointer.
+//
+// Returns true on success, false on failure.
+bool FineLockPEQueue::GetValid(struct page_entry *pe) {
+  return GetRandomWithPredicate(pe, page_is_valid);
+}
+
+bool FineLockPEQueue::GetValid(struct page_entry *pe, int32 mask) {
+  return GetRandomWithPredicateTag(pe, page_is_valid, mask);
+}
+
+// GetEmpty() randomly finds an empty page, locks it and returns page entry by
+// pointer.
+//
+// Returns true on success, false on failure.
+bool FineLockPEQueue::GetEmpty(struct page_entry *pe, int32 mask) {
+  return GetRandomWithPredicateTag(pe, page_is_empty, mask);
+}
+bool FineLockPEQueue::GetEmpty(struct page_entry *pe) {
+  return GetRandomWithPredicate(pe, page_is_empty);
+}
+
+// PutEmpty puts an empty page back into the queue, making it available by
+// releasing the per-page-entry lock.
+//
+// Returns true on success, false on failure.
+bool FineLockPEQueue::PutEmpty(struct page_entry *pe) {
+  if (!pe || !q_size_)
+    return false;
+
+  int64 index = pe->offset / page_size_;
+  if (!valid_index(index))
+    return false;
+
+  pages_[index] = *pe;
+  // Enforce that page entry is indeed empty.
+  pages_[index].pattern = 0;
+  return (pthread_mutex_unlock(&(pagelocks_[index])) == 0);
+}
+
+// PutValid puts a valid page back into the queue, making it available by
+// releasing the per-page-entry lock.
+//
+// Returns true on success, false on failure.
+bool FineLockPEQueue::PutValid(struct page_entry *pe) {
+  if (!pe || !page_is_valid(pe) || !q_size_)
+    return false;
+
+  int64 index = pe->offset / page_size_;
+  if (!valid_index(index))
+    return false;
+
+  pages_[index] = *pe;
+  return (pthread_mutex_unlock(&(pagelocks_[index])) == 0);
+}
diff --git a/src/finelock_queue.h b/src/finelock_queue.h
new file mode 100644
index 0000000..54b154e
--- /dev/null
+++ b/src/finelock_queue.h
@@ -0,0 +1,116 @@
+// Copyright 2007 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This page entry queue implementation with fine grain locks aim to ease
+// lock contention over previous queue implementation (with one lock protecting
+// the entire queue).
+
+#ifndef STRESSAPPTEST_FINELOCK_QUEUE_H_
+#define STRESSAPPTEST_FINELOCK_QUEUE_H_
+
+#include <string>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "sattypes.h"
+#include "pattern.h"
+#include "queue.h"     // Using page_entry struct.
+#include "os.h"
+
+// This is a threadsafe randomized queue of pages with per-page entry lock
+// for worker threads to use.
+class FineLockPEQueue {
+ public:
+  FineLockPEQueue(uint64 queuesize, int64 pagesize);
+  ~FineLockPEQueue();
+
+  // Put and get functions for page entries.
+  bool GetEmpty(struct page_entry *pe);
+  bool GetValid(struct page_entry *pe);
+  bool PutEmpty(struct page_entry *pe);
+  bool PutValid(struct page_entry *pe);
+
+  // Put and get functions for page entries, selecting on tags.
+  bool GetEmpty(struct page_entry *pe, int32 tag);
+  bool GetValid(struct page_entry *pe, int32 tag);
+
+  bool QueueAnalysis();
+  bool GetPageFromPhysical(uint64 paddr, struct page_entry *pe);
+  void set_os(OsLayer *os);
+  OsLayer::ErrCallback get_err_log_callback();
+  bool ErrorLogCallback(uint64 paddr, string *buf);
+
+ private:
+  // Not that much blocking random number generator.
+  uint64 GetRandom64();
+  uint64 GetRandom64FromSlot(int slot);
+
+  // Helper function to check index range, returns true if index is valid.
+  bool valid_index(int64 index) { return index >= 0 && index < q_size_; }
+
+  // Returns true if page entry is valid, false otherwise.
+  static bool page_is_valid(struct page_entry *pe) {
+    return pe->pattern != NULL;
+  }
+  // Returns true if page entry is empty, false otherwise.
+  static bool page_is_empty(struct page_entry *pe) {
+    return pe->pattern == NULL;
+  }
+
+  // Helper function to get a random page entry with given predicate,
+  // ie, page_is_valid() or page_is_empty() as defined above.
+  bool GetRandomWithPredicate(struct page_entry *pe,
+                              bool (*pred_func)(struct page_entry*));
+
+  // Helper function to get a random page entry with given predicate,
+  // ie, page_is_valid() or page_is_empty() as defined above.
+  bool GetRandomWithPredicateTag(struct page_entry *pe,
+                                 bool (*pred_func)(struct page_entry*),
+                                 int32 tag);
+
+  // Used to make a linear congruential path through the queue.
+  int64 getA(int64 m);
+  int64 getC(int64 m);
+
+  pthread_mutex_t *pagelocks_;  // Per-page-entry locks.
+  struct page_entry *pages_;     // Where page entries are held.
+  int64 q_size_;                 // Size of the queue.
+  int64 page_size_;              // For calculating array index from offset.
+
+  enum {
+    kTries = 1,     // Measure the number of attempts in the queue
+                    // before getting a matching page.
+    kTouch = 2 }    // Measure the number of touches on each page.
+    queue_metric_;  // What to measure in the 'tries' field.
+
+  // Progress pseudorandomly through the queue. It's required that we can find
+  // every value in the list, but progressing through the same order each time
+  // causes bunching of pages, leading to long seach times for the correct
+  // type of pages.
+  int64 a_;                      // 'a' multiplicative value for progressing
+                                 // linear congruentially through the list.
+  int64 c_;                      // 'c' additive value for prgressing randomly
+                                 // through the list.
+  int64 modlength_;              // 'm' mod value for linear congruential
+                                 // generator. Used when q_size_ doesn't
+                                 // generate a good progression through the
+                                 // list.
+
+  uint64 rand_seed_[4];          // Random number state for 4 generators.
+  pthread_mutex_t randlocks_[4];  // Per-random-generator locks.
+
+  DISALLOW_COPY_AND_ASSIGN(FineLockPEQueue);
+};
+
+#endif  // STRESSAPPTEST_FINELOCK_QUEUE_H_
diff --git a/src/logger.cc b/src/logger.cc
new file mode 100644
index 0000000..81f1e3e
--- /dev/null
+++ b/src/logger.cc
@@ -0,0 +1,151 @@
+// Copyright 2009 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "logger.h"
+
+#include <pthread.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "sattypes.h"
+
+
+Logger *Logger::GlobalLogger() {
+  static Logger logger;
+  return &logger;
+}
+
+void Logger::VLogF(int priority, const char *format, va_list args) {
+  if (priority > verbosity_) {
+    return;
+  }
+  char buffer[4096];
+  int length = vsnprintf(buffer, sizeof buffer, format, args);
+  if (length >= sizeof buffer) {
+    length = sizeof buffer;
+    buffer[sizeof buffer - 1] = '\n';
+  }
+  QueueLogLine(new string(buffer, length));
+}
+
+void Logger::StartThread() {
+  LOGGER_ASSERT(!thread_running_);
+  thread_running_ = true;
+  LOGGER_ASSERT(0 == pthread_create(&thread_, NULL, &StartRoutine, this));
+}
+
+void Logger::StopThread() {
+  LOGGER_ASSERT(thread_running_);
+  thread_running_ = false;
+  LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+  bool need_cond_signal = queued_lines_.empty();
+  queued_lines_.push_back(NULL);
+  LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
+  if (need_cond_signal) {
+    LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_));
+  }
+  LOGGER_ASSERT(0 == pthread_join(thread_, NULL));
+}
+
+Logger::Logger() : verbosity_(20), log_fd_(-1), thread_running_(false) {
+  LOGGER_ASSERT(0 == pthread_mutex_init(&queued_lines_mutex_, NULL));
+  LOGGER_ASSERT(0 == pthread_cond_init(&queued_lines_cond_, NULL));
+  LOGGER_ASSERT(0 == pthread_cond_init(&full_queue_cond_, NULL));
+}
+
+Logger::~Logger() {
+  LOGGER_ASSERT(0 == pthread_mutex_destroy(&queued_lines_mutex_));
+  LOGGER_ASSERT(0 == pthread_cond_destroy(&queued_lines_cond_));
+  LOGGER_ASSERT(0 == pthread_cond_destroy(&full_queue_cond_));
+}
+
+void Logger::QueueLogLine(string *line) {
+  LOGGER_ASSERT(line != NULL);
+  LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+  if (thread_running_) {
+    if (queued_lines_.size() >= kMaxQueueSize) {
+      LOGGER_ASSERT(0 == pthread_cond_wait(&full_queue_cond_,
+                                           &queued_lines_mutex_));
+    }
+    if (queued_lines_.empty()) {
+      LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_));
+    }
+    queued_lines_.push_back(line);
+  } else {
+    WriteAndDeleteLogLine(line);
+  }
+  LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
+}
+
+namespace {
+void WriteToFile(const string& line, int fd) {
+  LOGGER_ASSERT(write(fd, line.data(), line.size()) == line.size());
+}
+}
+
+void Logger::WriteAndDeleteLogLine(string *line) {
+  LOGGER_ASSERT(line != NULL);
+  if (log_fd_ >= 0) {
+    WriteToFile(*line, log_fd_);
+  }
+  WriteToFile(*line, 1);
+  delete line;
+}
+
+void *Logger::StartRoutine(void *ptr) {
+  Logger *self = static_cast<Logger*>(ptr);
+  self->ThreadMain();
+  return NULL;
+}
+
+void Logger::ThreadMain() {
+  vector<string*> local_queue;
+  LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+
+  for (;;) {
+    if (queued_lines_.empty()) {
+      LOGGER_ASSERT(0 == pthread_cond_wait(&queued_lines_cond_,
+                                           &queued_lines_mutex_));
+      continue;
+    }
+
+    // We move the log lines into a local queue so we can release the lock
+    // while writing them to disk, preventing other threads from blocking on
+    // our writes.
+    local_queue.swap(queued_lines_);
+    if (local_queue.size() >= kMaxQueueSize) {
+      LOGGER_ASSERT(0 == pthread_cond_broadcast(&full_queue_cond_));
+    }
+
+    // Unlock while we process our local queue.
+    LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
+    for (vector<string*>::const_iterator it = local_queue.begin();
+         it != local_queue.end(); ++it) {
+      if (*it == NULL) {
+        // NULL is guaranteed to be at the end.
+        return;
+      }
+      WriteAndDeleteLogLine(*it);
+    }
+    local_queue.clear();
+    // We must hold the lock at the start of each iteration of this for loop.
+    LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+  }
+}
diff --git a/src/logger.h b/src/logger.h
new file mode 100644
index 0000000..3eaea57
--- /dev/null
+++ b/src/logger.h
@@ -0,0 +1,142 @@
+// Copyright 2009 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_LOGGER_H_
+#define STRESSAPPTEST_LOGGER_H_
+
+#include <pthread.h>
+#include <stdarg.h>
+
+#include <string>
+#include <vector>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "sattypes.h"
+
+// Attempts to log additional lines will block when the queue reaches this size.
+// Due to how the logging thread works, up to twice this many log lines may be
+// outstanding at any point.
+static const int kMaxQueueSize = 250;
+
+
+// This is only for use by the Logger class, do not use it elsewhere!
+//
+// All Logger assertions should use this macro instead of sat_assert().
+//
+// This is like sat_assert() from sattypes.h, but whereas sat_assert() tries to
+// log the assertion after printing it to stderr, this only prints it to stderr.
+// Logging from within the wrong part of the logger would trigger a deadlock,
+// and even in places where it wouldn't there's a very good chance that the
+// logger is in no condition to handle new log lines.
+#define LOGGER_ASSERT(x) \
+{\
+  if (!(x)) {\
+    fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
+    exit(1);\
+  }\
+}
+
+
+// This class handles logging in SAT.  It is a singleton accessed via
+// GlobalLogger().
+//
+// By default log lines are written in the calling thread.  Call StartThread()
+// to launch a dedicated thread for the writes.
+class Logger {
+ public:
+  // Returns a pointer to the single global Logger instance.  Will not return
+  // NULL.
+  static Logger *GlobalLogger();
+
+  // Lines with a priority numerically greater than this will not be logged.
+  // May not be called while multiple threads are running.
+  void SetVerbosity(int verbosity) {
+    verbosity_ = verbosity;
+  }
+
+  // Sets a file to log to, in addition to stdout.  May not be called while
+  // multiple threads are running.
+  //
+  // Args:
+  //   log_fd: The file descriptor to write to.  Will not be closed by this
+  //           object.
+  void SetLogFd(int log_fd) {
+    LOGGER_ASSERT(log_fd >= 0);
+    log_fd_ = log_fd;
+  }
+
+  // Set output to be written to stdout only.  This is the default mode.  May
+  // not be called while multiple threads are running.
+  void SetStdoutOnly() {
+    log_fd_ = -1;
+  }
+
+  // Logs a line, with a vprintf(3)-like interface.  This will block on writing
+  // the line to stdout/disk iff the dedicated logging thread is not running.
+  // This will block on adding the line to the queue if doing so would exceed
+  // kMaxQueueSize.
+  //
+  // Args:
+  //   priority: If this is numerically greater than the verbosity, the line
+  //             will not be logged.
+  //   format: see vprintf(3)
+  //   args: see vprintf(3)
+  void VLogF(int priority, const char *format, va_list args);
+
+  // Starts the dedicated logging thread.  May not be called while multiple
+  // threads are already running.
+  void StartThread();
+
+  // Stops the dedicated logging thread.  May only be called when the logging
+  // thread is the only other thread running.  Any queued lines will be logged
+  // before this returns.  Waits for the thread to finish before returning.
+  void StopThread();
+
+ private:
+  Logger();
+
+  ~Logger();
+
+  // Args:
+  //   line: Must be non-NULL.  This function takes ownership of it.
+  void QueueLogLine(string *line);
+
+  // Args:
+  //   line: Must be non-NULL.  This function takes ownership of it.
+  void WriteAndDeleteLogLine(string *line);
+
+  // Callback for pthread_create(3).
+  static void *StartRoutine(void *ptr);
+
+  // Processes the log queue.
+  void ThreadMain();
+
+  pthread_t thread_;
+  int verbosity_;
+  int log_fd_;
+  bool thread_running_;
+  vector<string*> queued_lines_;
+  // This doubles as a mutex for log_fd_ when the logging thread is not running.
+  pthread_mutex_t queued_lines_mutex_;
+  // Lets the logging thread know that the queue is no longer empty.
+  pthread_cond_t queued_lines_cond_;
+  // Lets the threads blocked on the queue having reached kMaxQueueSize know
+  // that the queue has been emptied.
+  pthread_cond_t full_queue_cond_;
+
+  DISALLOW_COPY_AND_ASSIGN(Logger);
+};
+
+#endif  // STRESSAPPTEST_LOGGER_H_
diff --git a/src/main.cc b/src/main.cc
new file mode 100644
index 0000000..04cd536
--- /dev/null
+++ b/src/main.cc
@@ -0,0 +1,56 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// sat.cc : a stress test for stressful testing
+
+#include "sattypes.h"
+#include "sat.h"
+
+int main(int argc, char **argv) {
+  Sat *sat = SatFactory();
+  if (sat == NULL) {
+    logprintf(0, "Process Error: failed to allocate Sat object\n");
+    return 255;
+  }
+
+  if (!sat->ParseArgs(argc, argv)) {
+    logprintf(0, "Process Error: Sat::ParseArgs() failed\n");
+    sat->bad_status();
+  } else if (!sat->Initialize()) {
+    logprintf(0, "Process Error: Sat::Initialize() failed\n");
+    sat->bad_status();
+  } else if (!sat->Run()) {
+    logprintf(0, "Process Error: Sat::Run() failed\n");
+    sat->bad_status();
+  }
+  sat->PrintResults();
+  if (!sat->Cleanup()) {
+    logprintf(0, "Process Error: Sat::Cleanup() failed\n");
+    sat->bad_status();
+  }
+
+  int retval;
+  if (sat->status() != 0) {
+    logprintf(0, "Process Error: Fatal issue encountered. See above logs for "
+              "details.\n");
+    retval = 1;
+  } else if (sat->errors() != 0) {
+    retval = 1;
+  } else {
+    retval = 0;
+  }
+
+  delete sat;
+  return retval;
+}
diff --git a/src/os.cc b/src/os.cc
new file mode 100644
index 0000000..5c8c8e0
--- /dev/null
+++ b/src/os.cc
@@ -0,0 +1,642 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+// Author: nsanders
+//
+// os.cc : os and machine specific implementation
+// Copyright 2006 Google Inc.
+// for open source release under GPL
+
+// This file includes an abstracted interface
+// for linux-distro specific and HW specific
+// interfaces.
+
+#include "os.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <unistd.h>
+
+#ifndef SHM_HUGETLB
+#define SHM_HUGETLB      04000  // remove when glibc defines it
+#endif
+
+#include <string>
+#include <list>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "sattypes.h"
+#include "error_diag.h"
+
+// OsLayer initialization.
+OsLayer::OsLayer() {
+  testmem_ = 0;
+  testmemsize_ = 0;
+  totalmemsize_ = 0;
+  error_injection_ = false;
+  normal_mem_ = true;
+  time_initialized_ = 0;
+
+  regionsize_ = 0;
+  regioncount_ = 1;
+  num_cpus_ = 0;
+  num_nodes_ = 0;
+  num_cpus_per_node_ = 0;
+  error_diagnoser_ = 0;
+  err_log_callback_ = 0;
+}
+
+// OsLayer cleanup.
+OsLayer::~OsLayer() {
+  if (error_diagnoser_)
+    delete error_diagnoser_;
+}
+
+// OsLayer initialization.
+bool OsLayer::Initialize() {
+  time_initialized_ = time(NULL);
+  use_hugepages_ = false;
+  shmid_ = 0;
+  if (num_cpus_ == 0) {
+    num_nodes_ = 1;
+    num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
+    num_cpus_per_node_ = num_cpus_ / num_nodes_;
+  }
+  logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
+  sat_assert(CPU_SETSIZE >= num_cpus_);
+  cpu_sets_.resize(num_nodes_);
+  cpu_sets_valid_.resize(num_nodes_);
+  // Create error diagnoser.
+  error_diagnoser_ = new ErrorDiag();
+  if (!error_diagnoser_->set_os(this))
+    return false;
+  return true;
+}
+
+// Machine type detected. Can we implement all these functions correctly?
+bool OsLayer::IsSupported() {
+  // This is the default empty implementation.
+  // SAT won't really run correctly.
+  return false;
+}
+
+int OsLayer::AddressMode() {
+  // Detect 32/64 bit binary.
+  void *pvoid = 0;
+  return sizeof(pvoid) * 8;
+}
+
+// Translates user virtual to physical address.
+uint64 OsLayer::VirtualToPhysical(void *vaddr) {
+  // Needs platform specific implementation.
+  return 0;
+}
+
+// Returns the HD device that contains this file.
+string OsLayer::FindFileDevice(string filename) {
+  return "hdUnknown";
+}
+
+// Returns a list of locations corresponding to HD devices.
+list<string> OsLayer::FindFileDevices() {
+  // No autodetection on unknown systems.
+  list<string> locations;
+  return locations;
+}
+
+// We need to flush the cacheline here.
+void OsLayer::Flush(void *vaddr) {
+  // Use the generic flush. This function is just so we can override
+  // this if we are so inclined.
+  FastFlush(vaddr);
+}
+
+// Translate user virtual to physical address.
+int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
+  char tmpbuf[256];
+  snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
+  snprintf(buf, len, "%s", tmpbuf);
+  return 0;
+}
+
+
+// Classifies addresses according to "regions"
+// This isn't really implemented meaningfully here..
+int32 OsLayer::FindRegion(uint64 addr) {
+  static bool warned = false;
+
+  if (regionsize_ == 0) {
+    regionsize_ = totalmemsize_ / 8;
+    if (regionsize_ < 512 * kMegabyte)
+      regionsize_ = 512 * kMegabyte;
+    regioncount_ = totalmemsize_ / regionsize_;
+    if (regioncount_ < 1) regioncount_ = 1;
+  }
+
+  int32 region_num = addr / regionsize_;
+  if (region_num >= regioncount_) {
+    if (!warned) {
+        logprintf(0, "Log: region number %d exceeds region count %d\n",
+                  region_num, regioncount_);
+        warned = true;
+    }
+    region_num = region_num % regioncount_;
+  }
+  return region_num;
+}
+
+// Report which cores are associated with a given region.
+cpu_set_t *OsLayer::FindCoreMask(int32 region) {
+  sat_assert(region >= 0);
+  region %= num_nodes_;
+  if (!cpu_sets_valid_[region]) {
+    CPU_ZERO(&cpu_sets_[region]);
+    for (int i = 0; i < num_cpus_per_node_; ++i) {
+      CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
+    }
+    logprintf(5, "Log: Region %d mask 0x%08X\n",
+                 region, cpuset_to_uint32(&cpu_sets_[region]));
+    cpu_sets_valid_[region] = true;
+  }
+  return &cpu_sets_[region];
+}
+
+// Report an error in an easily parseable way.
+bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
+  time_t now = time(NULL);
+  int ttf = now - time_initialized_;
+  logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
+  return true;
+}
+
+// Read the number of hugepages out of the kernel interface in proc.
+int64 OsLayer::FindHugePages() {
+  char buf[65] = "0";
+
+  // This is a kernel interface to query the numebr of hugepages
+  // available in the system.
+  static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
+  int hpfile = open(hugepages_info_file, O_RDONLY);
+
+  ssize_t bytes_read = read(hpfile, buf, 64);
+  close(hpfile);
+
+  if (bytes_read <= 0) {
+    logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
+                  "read did not provide data\n");
+    return 0;
+  }
+
+  if (bytes_read == 64) {
+    logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
+                 "is surprisingly large\n");
+    return 0;
+  }
+
+  // Add a null termintation to be string safe.
+  buf[bytes_read] = '\0';
+  // Read the page count.
+  int64 pages = strtoull(buf, NULL, 10);  // NOLINT
+
+  return pages;
+}
+
+int64 OsLayer::FindFreeMemSize() {
+  int64 size = 0;
+  int64 minsize = 0;
+  if (totalmemsize_ > 0)
+    return totalmemsize_;
+
+  int64 pages = sysconf(_SC_PHYS_PAGES);
+  int64 avpages = sysconf(_SC_AVPHYS_PAGES);
+  int64 pagesize = sysconf(_SC_PAGESIZE);
+  int64 physsize = pages * pagesize;
+  int64 avphyssize = avpages * pagesize;
+
+  // Assume 2MB hugepages.
+  int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
+
+  if ((pages == -1) || (pagesize == -1)) {
+    logprintf(0, "Process Error: sysconf could not determine memory size.\n");
+    return 0;
+  }
+
+  // We want to leave enough stuff for things to run.
+  // If more than 2GB is present, leave 192M + 5% for other stuff.
+  // If less than 2GB is present use 85% of what's available.
+  // These are fairly arbitrary numbers that seem to work OK.
+  //
+  // TODO(nsanders): is there a more correct way to determine target
+  // memory size?
+  if (physsize < 2048LL * kMegabyte)
+    minsize = ((pages * 85) / 100) * pagesize;
+  else
+    minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+
+  // Use hugepage sizing if available.
+  if (hugepagesize > 0) {
+    if (hugepagesize < minsize) {
+      logprintf(0, "Procedural Error: Not enough hugepages. "
+                   "%lldMB available < %lldMB required.\n",
+                hugepagesize / kMegabyte,
+                minsize / kMegabyte);
+      // Require the calculated minimum amount of memory.
+      size = minsize;
+    } else {
+      // Require that we get all hugepages.
+      size = hugepagesize;
+    }
+  } else {
+    // Require the calculated minimum amount of memory.
+    size = minsize;
+  }
+
+  logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
+               "Targeting %lld MB (%lld%%)\n",
+            physsize / kMegabyte,
+            avphyssize / kMegabyte,
+            hugepagesize / kMegabyte,
+            size / kMegabyte,
+            size * 100 / physsize);
+
+  totalmemsize_ = size;
+  return size;
+}
+
+// Allocates all memory available.
+int64 OsLayer::AllocateAllMem() {
+  int64 length = FindFreeMemSize();
+  bool retval = AllocateTestMem(length, 0);
+  if (retval)
+    return length;
+  else
+    return 0;
+}
+
+// Allocate the target memory. This may be from malloc, hugepage pool
+// or other platform specific sources.
+bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
+  // Try hugepages first.
+  void *buf = 0;
+
+  if (paddr_base)
+    logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
+              " ignore.\n", paddr_base);
+
+  {  // Allocate hugepage mapped memory.
+    int shmid;
+    void *shmaddr;
+
+    if ((shmid = shmget(2, length,
+            SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
+      int err = errno;
+      char errtxt[256] = "";
+      strerror_r(err, errtxt, sizeof(errtxt));
+      logprintf(12, "Log: failed to allocate shared mem object - err %d (%s)\n",
+                err, errtxt);
+      goto hugepage_failover;
+    }
+
+    shmaddr = shmat(shmid, NULL, NULL);
+    if (shmaddr == reinterpret_cast<void*>(-1)) {
+      int err = errno;
+      char errtxt[256] = "";
+      shmctl(shmid, IPC_RMID, NULL);
+      strerror_r(err, errtxt, sizeof(errtxt));
+      logprintf(0, "Log: failed to attach shared mem object - err %d (%s).\n",
+                err, errtxt);
+      goto hugepage_failover;
+    }
+    use_hugepages_ = true;
+    shmid_ = shmid;
+    buf = shmaddr;
+    logprintf(0, "Log: Using hugepages 0x%x at %p.\n", shmid, shmaddr);
+  }
+  hugepage_failover:
+
+
+  if (!use_hugepages_) {
+    // Use memalign to ensure that blocks are aligned enough for disk direct IO.
+    buf = static_cast<char*>(memalign(4096, length));
+    if (buf)
+      logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
+    else
+      logprintf(0, "Process Error: memalign returned 0\n");
+  }
+
+  testmem_ = buf;
+  if (buf) {
+    testmemsize_ = length;
+  } else {
+    testmemsize_ = 0;
+  }
+
+  return (buf != 0);
+}
+
+// Free the test memory.
+void OsLayer::FreeTestMem() {
+  if (testmem_) {
+    if (use_hugepages_) {
+      shmdt(testmem_);
+      shmctl(shmid_, IPC_RMID, NULL);
+    } else {
+      free(testmem_);
+    }
+    testmem_ = 0;
+    testmemsize_ = 0;
+  }
+}
+
+
+// Prepare the target memory. It may requre mapping in, or this may be a noop.
+void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
+  sat_assert((offset + length) <= testmemsize_);
+  return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
+}
+
+// Release the test memory resources, if any.
+void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
+}
+
+// No error polling on unknown systems.
+int OsLayer::ErrorPoll() {
+  return 0;
+}
+
+// Generally, poll for errors once per second.
+void OsLayer::ErrorWait() {
+  sat_sleep(1);
+  return;
+}
+
+// Open a PCI bus-dev-func as a file and return its file descriptor.
+// Error is indicated by return value less than zero.
+int OsLayer::PciOpen(int bus, int device, int function) {
+  char dev_file[256];
+
+  snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
+           bus, device, function);
+
+  int fd = open(dev_file, O_RDWR);
+  if (fd == -1) {
+    logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
+                 "function %d (errno %d).\n",
+              bus, device, function, errno);
+    return -1;
+  }
+
+  return fd;
+}
+
+
+// Read and write functions to access PCI config.
+uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
+  // Strict aliasing rules lawyers will cause data corruption
+  // on cast pointers in some gccs.
+  union {
+    uint32 l32;
+    uint16 l16;
+    uint8 l8;
+  } datacast;
+  datacast.l32 = 0;
+  uint32 size = width / 8;
+
+  sat_assert((width == 32) || (width == 16) || (width == 8));
+  sat_assert(offset <= (256 - size));
+
+  if (lseek(fd, offset, SEEK_SET) < 0) {
+    logprintf(0, "Process Error: Can't seek %x\n", offset);
+    return 0;
+  }
+  if (read(fd, &datacast, size) != size) {
+    logprintf(0, "Process Error: Can't read %x\n", offset);
+    return 0;
+  }
+
+  // Extract the data.
+  switch (width) {
+    case 8:
+      sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
+      return datacast.l8;
+    case 16:
+      sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
+      return datacast.l16;
+    case 32:
+      return datacast.l32;
+  }
+  return 0;
+}
+
+void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
+  // Strict aliasing rules lawyers will cause data corruption
+  // on cast pointers in some gccs.
+  union {
+    uint32 l32;
+    uint16 l16;
+    uint8 l8;
+  } datacast;
+  datacast.l32 = 0;
+  uint32 size = width / 8;
+
+  sat_assert((width == 32) || (width == 16) || (width == 8));
+  sat_assert(offset <= (256 - size));
+
+  // Cram the data into the right alignment.
+  switch (width) {
+    case 8:
+      sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
+      datacast.l8 = value;
+    case 16:
+      sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
+      datacast.l16 = value;
+    case 32:
+      datacast.l32 = value;
+  }
+
+  if (lseek(fd, offset, SEEK_SET) < 0) {
+    logprintf(0, "Process Error: Can't seek %x\n", offset);
+    return;
+  }
+  if (write(fd, &datacast, size) != size) {
+    logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
+    return;
+  }
+
+  return;
+}
+
+
+
+// Open dev msr.
+int OsLayer::OpenMSR(uint32 core, uint32 address) {
+  char buf[256];
+  snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
+  int fd = open(buf, O_RDWR);
+  if (fd < 0)
+    return fd;
+
+  uint32 pos = lseek(fd, address, SEEK_SET);
+  if (pos != address) {
+    close(fd);
+    logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
+    return -1;
+  }
+
+  return fd;
+}
+
+bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
+  int fd = OpenMSR(core, address);
+  if (fd < 0)
+    return false;
+
+  // Read from the msr.
+  bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
+
+  if (!res)
+    logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
+
+  close(fd);
+
+  return res;
+}
+
+bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
+  int fd = OpenMSR(core, address);
+  if (fd < 0)
+    return false;
+
+  // Write to the msr
+  bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
+
+  if (!res)
+    logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
+
+  close(fd);
+
+  return res;
+}
+
+// Extract bits [n+len-1, n] from a 32 bit word.
+// so GetBitField(0x0f00, 8, 4) == 0xf.
+uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
+  return (val >> n) & ((1<<len) - 1);
+}
+
+// Generic CPU stress workload that would work on any CPU/Platform.
+// Float-point array moving average calculation.
+bool OsLayer::CpuStressWorkload() {
+  double float_arr[100];
+  double sum = 0;
+  unsigned int seed = 12345;
+
+  // Initialize array with random numbers.
+  for (int i = 0; i < 100; i++) {
+    float_arr[i] = rand_r(&seed);
+    if (rand_r(&seed) % 2)
+      float_arr[i] *= -1.0;
+  }
+
+  // Calculate moving average.
+  for (int i = 0; i < 100000000; i++) {
+    float_arr[i % 100] =
+      (float_arr[i % 100] + float_arr[(i + 1) % 100] +
+       float_arr[(i + 99) % 100]) / 3;
+    sum += float_arr[i % 100];
+  }
+
+  // Artificial printf so the loops do not get optimized away.
+  if (sum == 0.0)
+    logprintf(12, "Log: I'm Feeling Lucky!\n");
+  return true;
+}
+
+PCIDevices OsLayer::GetPCIDevices() {
+  PCIDevices device_list;
+  DIR *dir;
+  struct dirent *buf = new struct dirent();
+  struct dirent *entry;
+  dir = opendir(kSysfsPath);
+  if (!dir)
+    logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
+  while (readdir_r(dir, buf, &entry) == 0 && entry) {
+    PCIDevice *device;
+    unsigned int dev, func;
+    // ".", ".." or a special non-device perhaps.
+    if (entry->d_name[0] == '.')
+      continue;
+
+    device = new PCIDevice();
+    if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
+               &device->domain, &device->bus, &dev, &func) < 4) {
+      logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
+      free(device);
+      continue;
+    }
+    device->dev = dev;
+    device->func = func;
+    device->vendor_id = PCIGetValue(entry->d_name, "vendor");
+    device->device_id = PCIGetValue(entry->d_name, "device");
+    PCIGetResources(entry->d_name, device);
+    device_list.insert(device_list.end(), device);
+  }
+  closedir(dir);
+  delete buf;
+  return device_list;
+}
+
+int OsLayer::PCIGetValue(string name, string object) {
+  int fd, len;
+  char filename[256];
+  char buf[256];
+  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
+           name.c_str(), object.c_str());
+  fd = open(filename, O_RDONLY);
+  if (fd < 0)
+    return 0;
+  len = read(fd, buf, 256);
+  close(fd);
+  buf[len] = '\0';
+  return strtol(buf, NULL, 0);  // NOLINT
+}
+
+int OsLayer::PCIGetResources(string name, PCIDevice *device) {
+  char filename[256];
+  char buf[256];
+  FILE *file;
+  int64 start;
+  int64 end;
+  int64 size;
+  int i;
+  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
+           name.c_str(), "resource");
+  file = fopen(filename, "r");
+  if (!file) {
+    logprintf(0, "Process Error: impossible to find resource file for %s",
+              filename);
+    return errno;
+  }
+  for (i = 0; i < 6; i++) {
+    if (!fgets(buf, 256, file))
+      break;
+    sscanf(buf, "%llx %llx", &start, &end);  // NOLINT
+    size = 0;
+    if (start)
+      size = end - start + 1;
+    device->base_addr[i] = start;
+    device->size[i] = size;
+  }
+  fclose(file);
+  return 0;
+}
diff --git a/src/os.h b/src/os.h
new file mode 100644
index 0000000..6ace58c
--- /dev/null
+++ b/src/os.h
@@ -0,0 +1,265 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+// Author: nsanders, menderico
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_OS_H_  // NOLINT
+#define STRESSAPPTEST_OS_H_
+
+#include <dirent.h>
+#include <string>
+#include <list>
+#include <map>
+#include <vector>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "adler32memcpy.h"  // NOLINT
+#include "sattypes.h"       // NOLINT
+
+const char kSysfsPath[] = "/sys/bus/pci/devices";
+
+struct PCIDevice {
+  int32 domain;
+  uint16 bus;
+  uint8 dev;
+  uint8 func;
+  uint16 vendor_id;
+  uint16 device_id;
+  uint64 base_addr[6];
+  uint64 size[6];
+};
+
+typedef vector<PCIDevice*> PCIDevices;
+
+class ErrorDiag;
+
+// This class implements OS/Platform specific funtions.
+class OsLayer {
+ public:
+  OsLayer();
+  virtual ~OsLayer();
+
+  // Initializes data strctures and open files.
+  // Returns false on error.
+  virtual bool Initialize();
+
+  // Virtual to physical. This implementation is optional for
+  // subclasses to implement.
+  // Takes a pointer, and returns the corresponding bus address.
+  virtual uint64 VirtualToPhysical(void *vaddr);
+
+  // Prints failed dimm. This implementation is optional for
+  // subclasses to implement.
+  // Takes a bus address and string, and prints the DIMM name
+  // into the string. Returns error status.
+  virtual int FindDimm(uint64 addr, char *buf, int len);
+  // Print dimm info, plus more available info.
+  virtual int FindDimmExtended(uint64 addr, char *buf, int len) {
+    return FindDimm(addr, buf, len);
+  }
+
+
+  // Classifies addresses according to "regions"
+  // This may mean different things on different platforms.
+  virtual int32 FindRegion(uint64 paddr);
+  // Find cpu cores associated with a region. Either NUMA or arbitrary.
+  virtual cpu_set_t *FindCoreMask(int32 region);
+
+  // Returns the HD device that contains this file.
+  virtual string FindFileDevice(string filename);
+
+  // Returns a list of paths coresponding to HD devices found on this machine.
+  virtual list<string> FindFileDevices();
+
+  // Polls for errors. This implementation is optional.
+  // This will poll once for errors and return zero iff no errors were found.
+  virtual int ErrorPoll();
+
+  // Delay an appropriate amount of time between polling.
+  virtual void ErrorWait();
+
+  // Report errors. This implementation is mandatory.
+  // This will output a machine readable line regarding the error.
+  virtual bool ErrorReport(const char *part, const char *symptom, int count);
+
+  // Flushes cacheline. Used to distinguish read or write errors.
+  // Subclasses may implement this in machine specific ways..
+  // Takes a pointer, and flushed the cacheline containing that pointer.
+  virtual void Flush(void *vaddr);
+
+  // Fast flush, for use in performance critical code.
+  // This is bound at compile time, and will not pick up
+  // any runtime machine configuration info.
+  inline static void FastFlush(void *vaddr) {
+#ifdef STRESSAPPTEST_CPU_PPC
+    asm volatile("dcbf 0,%0; sync" : : "r" (vaddr));
+#else
+    // Put mfence before and after clflush to make sure:
+    // 1. The write before the clflush is committed to memory bus;
+    // 2. The read after the clflush is hitting the memory bus.
+    //
+    // From Intel manual:
+    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+    // to be ordered by any other fencing, serializing or other CLFLUSH
+    // instruction. For example, software can use an MFENCE instruction to
+    // insure that previous stores are included in the write-back.
+    asm volatile("mfence");
+    asm volatile("clflush (%0)" :: "r" (vaddr));
+    asm volatile("mfence");
+#endif
+  }
+
+  // Get time in cpu timer ticks. Useful for matching MCEs with software
+  // actions.
+  inline static uint64 GetTimestamp(void) {
+    uint64 tsc;
+#ifdef STRESSAPPTEST_CPU_PPC
+  uint32 tbl, tbu, temp;
+  __asm __volatile(
+     "1:\n"
+     "mftbu  %2\n"
+     "mftb   %0\n"
+     "mftbu  %1\n"
+     "cmpw   %2,%1\n"
+     "bne    1b\n"
+     : "=r"(tbl), "=r"(tbu), "=r"(temp)
+     :
+     : "cc");
+
+  tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
+#else
+    datacast_t data;
+    __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
+    tsc = data.l64;
+
+#endif
+    return (tsc);
+  }
+
+  // Find the free memory on the machine.
+  virtual int64 FindFreeMemSize();
+
+  // Allocates test memory of length bytes.
+  // Subclasses must implement this.
+  // Call PepareTestMem to get a pointer.
+  virtual int64 AllocateAllMem();  // Returns length.
+  // Returns success.
+  virtual bool AllocateTestMem(int64 length, uint64 paddr_base);
+  virtual void FreeTestMem();
+
+  // Prepares the memory for use. You must call this
+  // before using test memory, and after you are done.
+  virtual void *PrepareTestMem(uint64 offset, uint64 length);
+  virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length);
+
+  // Machine type detected. Can we implement all these functions correctly?
+  // Returns true if machine type is detected and implemented.
+  virtual bool IsSupported();
+
+  // Returns 32 for 32-bit, 64 for 64-bit.
+  virtual int AddressMode();
+
+  // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file.
+  virtual int PciOpen(int bus, int device, int function);
+  virtual void PciWrite(int fd, uint32 offset, uint32 value, int width);
+  virtual uint32 PciRead(int fd, uint32 offset, int width);
+
+  // Read MSRs
+  virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data);
+  virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data);
+
+  // Extract bits [n+len-1, n] from a 32 bit word.
+  // so GetBitField(0x0f00, 8, 4) == 0xf.
+  virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len);
+
+  // Platform and CPU specific CPU-stressing function.
+  // Returns true on success, false otherwise.
+  virtual bool CpuStressWorkload();
+
+  // Causes false errors for unittesting.
+  // Setting to "true" causes errors to be injected.
+  void set_error_injection(bool errors) { error_injection_ = errors; }
+  bool error_injection() const { return error_injection_; }
+
+  // Is SAT using normal malloc'd memory, or exotic mmap'd memory.
+  bool normal_mem() const { return normal_mem_; }
+
+  // Get numa config, if available..
+  int num_nodes() const { return num_nodes_; }
+  int num_cpus() const { return num_cpus_; }
+
+  // Handle to platform-specific error diagnoser.
+  ErrorDiag *error_diagnoser_;
+
+  // Detect all PCI Devices.
+  virtual PCIDevices GetPCIDevices();
+
+  // Default platform dependent warm Adler memcpy to C implementation
+  // for compatibility.
+  virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
+                               unsigned int size_in_bytes,
+                               AdlerChecksum *checksum)
+    {return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);}
+
+  // Store a callback to use to print
+  // app-specific info about the last error location.
+  // This call back is called with a physical address, and the app can fill in
+  // the most recent transaction that occurred at that address.
+  typedef bool (*ErrCallback)(uint64 paddr, string *buf);
+  void set_err_log_callback(
+    ErrCallback err_log_callback) {
+    err_log_callback_ = err_log_callback;
+  }
+  ErrCallback get_err_log_callback() { return err_log_callback_; }
+
+ protected:
+  void *testmem_;            // Location of test memory.
+  int64 testmemsize_;        // Size of test memory.
+  int64 totalmemsize_;       // Size of available memory.
+  bool  error_injection_;    // Do error injection?
+  bool  normal_mem_;         // Memory DMA capable?
+  bool  use_hugepages_;      // Use hugepage shmem?
+  int   shmid_;              // Handle to shmem
+
+  int64 regionsize_;         // Size of memory "regions"
+  int   regioncount_;        // Number of memory "regions"
+  int   num_cpus_;           // Number of cpus in the system.
+  int   num_nodes_;          // Number of nodes in the system.
+  int   num_cpus_per_node_;  // Number of cpus per node in the system.
+
+  time_t time_initialized_;  // Start time of test.
+
+  vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
+  vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.
+
+  // Get file descriptor for dev msr.
+  virtual int OpenMSR(uint32 core, uint32 address);
+  // Auxiliary methods for PCI device configuration
+  int PCIGetValue(string name, string object);
+  int PCIGetResources(string name, PCIDevice *device);
+
+  // Look up how many hugepages there are.
+  virtual int64 FindHugePages();
+
+  // Link to find last transaction at an error location.
+  ErrCallback err_log_callback_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OsLayer);
+};
+
+// Selects and returns the proper OS and hardware interface.
+OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);
+
+#endif  // STRESSAPPTEST_OS_H_ NOLINT
diff --git a/src/os_factory.cc b/src/os_factory.cc
new file mode 100644
index 0000000..8acf573
--- /dev/null
+++ b/src/os_factory.cc
@@ -0,0 +1,41 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file generates an OS interface class consistant with the
+// current machine type. No machine type detection is currently done.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <string.h>
+
+#include <map>
+#include <string>
+
+#include "os.h"
+
+
+// Select the proper OS and hardware interface.
+OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options) {
+  OsLayer *os = 0;
+  os = new OsLayer();
+
+  // Check for memory allocation failure.
+  if (!os) {
+    logprintf(0, "Process Error: Can't allocate memory\n");
+    return 0;
+  }
+  os->Initialize();
+  return os;
+}
diff --git a/src/pattern.cc b/src/pattern.cc
new file mode 100644
index 0000000..2fb552a
--- /dev/null
+++ b/src/pattern.cc
@@ -0,0 +1,421 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// pattern.cc : library of stressful data patterns
+
+#include <sys/types.h>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "pattern.h"
+#include "sattypes.h"
+
+// Static data patterns.
+
+static unsigned int walkingOnes_data[] =   {
+  0x00000001, 0x00000002, 0x00000004, 0x00000008,
+  0x00000010, 0x00000020, 0x00000040, 0x00000080,
+  0x00000100, 0x00000200, 0x00000400, 0x00000800,
+  0x00001000, 0x00002000, 0x00004000, 0x00008000,
+  0x00010000, 0x00020000, 0x00040000, 0x00080000,
+  0x00100000, 0x00200000, 0x00400000, 0x00800000,
+  0x01000000, 0x02000000, 0x04000000, 0x08000000,
+  0x10000000, 0x20000000, 0x40000000, 0x80000000,
+  0x40000000, 0x20000000, 0x10000000, 0x08000000,
+  0x04000000, 0x02000000, 0x01000000, 0x00800000,
+  0x00400000, 0x00200000, 0x00100000, 0x00080000,
+  0x00040000, 0x00020000, 0x00010000, 0x00008000,
+  0x00004000, 0x00002000, 0x00001000, 0x00000800,
+  0x00000400, 0x00000200, 0x00000100, 0x00000080,
+  0x00000040, 0x00000020, 0x00000010, 0x00000008,
+  0x00000004, 0x00000002, 0x00000001, 0x00000000
+};
+static const struct PatternData walkingOnes = {
+  "walkingOnes",
+  walkingOnes_data,
+  (sizeof walkingOnes_data / sizeof walkingOnes_data[0]) - 1,
+  {1, 1, 2, 1}  // Weight for choosing 32/64/128/256 bit wide of this pattern
+};
+
+static unsigned int walkingInvOnes_data[] =   {
+  0x00000001, 0xfffffffe, 0x00000002, 0xfffffffd,
+  0x00000004, 0xfffffffb, 0x00000008, 0xfffffff7,
+  0x00000010, 0xffffffef, 0x00000020, 0xffffffdf,
+  0x00000040, 0xffffffbf, 0x00000080, 0xffffff7f,
+  0x00000100, 0xfffffeff, 0x00000200, 0xfffffdff,
+  0x00000400, 0xfffffbff, 0x00000800, 0xfffff7ff,
+  0x00001000, 0xffffefff, 0x00002000, 0xffffdfff,
+  0x00004000, 0xffffbfff, 0x00008000, 0xffff7fff,
+  0x00010000, 0xfffeffff, 0x00020000, 0xfffdffff,
+  0x00040000, 0xfffbffff, 0x00080000, 0xfff7ffff,
+  0x00100000, 0xffefffff, 0x00200000, 0xffdfffff,
+  0x00400000, 0xffbfffff, 0x00800000, 0xff7fffff,
+  0x01000000, 0xfeffffff, 0x02000000, 0xfdffffff,
+  0x04000000, 0xfbffffff, 0x08000000, 0xf7ffffff,
+  0x10000000, 0xefffffff, 0x20000000, 0xdfffffff,
+  0x40000000, 0xbfffffff, 0x80000000, 0x7fffffff,
+  0x40000000, 0xbfffffff, 0x20000000, 0xdfffffff,
+  0x10000000, 0xefffffff, 0x08000000, 0xf7ffffff,
+  0x04000000, 0xfbffffff, 0x02000000, 0xfdffffff,
+  0x01000000, 0xfeffffff, 0x00800000, 0xff7fffff,
+  0x00400000, 0xffbfffff, 0x00200000, 0xffdfffff,
+  0x00100000, 0xffefffff, 0x00080000, 0xfff7ffff,
+  0x00040000, 0xfffbffff, 0x00020000, 0xfffdffff,
+  0x00010000, 0xfffeffff, 0x00008000, 0xffff7fff,
+  0x00004000, 0xffffbfff, 0x00002000, 0xffffdfff,
+  0x00001000, 0xffffefff, 0x00000800, 0xfffff7ff,
+  0x00000400, 0xfffffbff, 0x00000200, 0xfffffdff,
+  0x00000100, 0xfffffeff, 0x00000080, 0xffffff7f,
+  0x00000040, 0xffffffbf, 0x00000020, 0xffffffdf,
+  0x00000010, 0xffffffef, 0x00000008, 0xfffffff7,
+  0x00000004, 0xfffffffb, 0x00000002, 0xfffffffd,
+  0x00000001, 0xfffffffe, 0x00000000, 0xffffffff
+};
+static const struct PatternData walkingInvOnes = {
+  "walkingInvOnes",
+  walkingInvOnes_data,
+  (sizeof walkingInvOnes_data / sizeof walkingInvOnes_data[0]) - 1,
+  {2, 2, 5, 5}
+};
+
+static unsigned int walkingZeros_data[] =   {
+  0xfffffffe, 0xfffffffd, 0xfffffffb, 0xfffffff7,
+  0xffffffef, 0xffffffdf, 0xffffffbf, 0xffffff7f,
+  0xfffffeff, 0xfffffdff, 0xfffffbff, 0xfffff7ff,
+  0xffffefff, 0xffffdfff, 0xffffbfff, 0xffff7fff,
+  0xfffeffff, 0xfffdffff, 0xfffbffff, 0xfff7ffff,
+  0xffefffff, 0xffdfffff, 0xffbfffff, 0xff7fffff,
+  0xfeffffff, 0xfdffffff, 0xfbffffff, 0xf7ffffff,
+  0xefffffff, 0xdfffffff, 0xbfffffff, 0x7fffffff,
+  0xbfffffff, 0xdfffffff, 0xefffffff, 0xf7ffffff,
+  0xfbffffff, 0xfdffffff, 0xfeffffff, 0xff7fffff,
+  0xffbfffff, 0xffdfffff, 0xffefffff, 0xfff7ffff,
+  0xfffbffff, 0xfffdffff, 0xfffeffff, 0xffff7fff,
+  0xffffbfff, 0xffffdfff, 0xffffefff, 0xfffff7ff,
+  0xfffffbff, 0xfffffdff, 0xfffffeff, 0xffffff7f,
+  0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7,
+  0xfffffffb, 0xfffffffd, 0xfffffffe, 0xffffffff
+};
+static const struct PatternData walkingZeros = {
+  "walkingZeros",
+  walkingZeros_data,
+  (sizeof walkingZeros_data / sizeof walkingZeros_data[0]) - 1,
+  {1, 1, 2, 1}
+};
+
+static unsigned int OneZero_data[] =   { 0x00000000, 0xffffffff};
+static const struct PatternData OneZero = {
+  "OneZero",
+  OneZero_data,
+  (sizeof OneZero_data / sizeof OneZero_data[0]) - 1,
+  {5, 5, 15, 5}
+};
+
+static unsigned int JustZero_data[] =   { 0x00000000, 0x00000000};
+static const struct PatternData JustZero = {
+  "JustZero",
+  JustZero_data,
+  (sizeof JustZero_data / sizeof JustZero_data[0]) - 1,
+  {2, 0, 0, 0}
+};
+
+static unsigned int JustOne_data[] =   { 0xffffffff, 0xffffffff};
+static const struct PatternData JustOne = {
+  "JustOne",
+  JustOne_data,
+  (sizeof JustOne_data / sizeof JustOne_data[0]) - 1,
+  {2, 0, 0, 0}
+};
+
+static unsigned int JustFive_data[] =   { 0x55555555, 0x55555555};
+static const struct PatternData JustFive = {
+  "JustFive",
+  JustFive_data,
+  (sizeof JustFive_data / sizeof JustFive_data[0]) - 1,
+  {2, 0, 0, 0}
+};
+
+static unsigned int JustA_data[] =   { 0xaaaaaaaa, 0xaaaaaaaa};
+static const struct PatternData JustA = {
+  "JustA",
+  JustA_data,
+  (sizeof JustA_data / sizeof JustA_data[0]) - 1,
+  {2, 0, 0, 0}
+};
+
+static unsigned int FiveA_data[] =   { 0x55555555, 0xaaaaaaaa};
+static const struct PatternData FiveA = {
+  "FiveA",
+  FiveA_data,
+  (sizeof FiveA_data / sizeof FiveA_data[0]) - 1,
+  {1, 1, 1, 1}
+};
+
+static unsigned int FiveA8_data[] =   {
+  0x5aa5a55a, 0xa55a5aa5, 0xa55a5aa5, 0x5aa5a55a
+};
+static const struct PatternData FiveA8 = {
+  "FiveA8",
+  FiveA8_data,
+  (sizeof FiveA8_data / sizeof FiveA8_data[0]) - 1,
+  {1, 1, 1, 1}
+};
+
+static unsigned int Long8b10b_data[] =   { 0x16161616, 0x16161616 };
+static const struct PatternData Long8b10b = {
+  "Long8b10b",
+  Long8b10b_data,
+  (sizeof Long8b10b_data / sizeof Long8b10b_data[0]) - 1,
+  {2, 0, 0, 0}
+};
+
+static unsigned int Short8b10b_data[] =   { 0xb5b5b5b5, 0xb5b5b5b5 };
+static const struct PatternData Short8b10b = {
+  "Short8b10b",
+  Short8b10b_data,
+  (sizeof Short8b10b_data / sizeof Short8b10b_data[0]) - 1,
+  {2, 0, 0, 0}
+};
+
+static unsigned int Checker8b10b_data[] =   { 0xb5b5b5b5, 0x4a4a4a4a };
+static const struct PatternData Checker8b10b = {
+  "Checker8b10b",
+  Checker8b10b_data,
+  (sizeof Checker8b10b_data / sizeof Checker8b10b_data[0]) - 1,
+  {1, 0, 0, 1}
+};
+
+static unsigned int Five7_data[] =   { 0x55555557, 0x55575555 };
+static const struct PatternData Five7 = {
+  "Five7",
+  Five7_data,
+  (sizeof Five7_data / sizeof Five7_data[0]) - 1,
+  {0, 2, 0, 0}
+};
+
+static unsigned int Zero2fd_data[] =   { 0x00020002, 0xfffdfffd };
+static const struct PatternData Zero2fd = {
+  "Zero2fd",
+  Zero2fd_data,
+  (sizeof Zero2fd_data / sizeof Zero2fd_data[0]) - 1,
+  {0, 2, 0, 0}
+};
+
+// Extern array of useable patterns.
+static const struct PatternData pattern_array[] = {
+  walkingOnes,
+  walkingInvOnes,
+  walkingZeros,
+  OneZero,
+  JustZero,
+  JustOne,
+  JustFive,
+  JustA,
+  FiveA,
+  FiveA8,
+  Long8b10b,
+  Short8b10b,
+  Checker8b10b,
+  Five7,
+  Zero2fd,
+};
+static const int pattern_array_size =
+    sizeof pattern_array / sizeof pattern_array[0];
+
+Pattern::Pattern() {
+  crc_ = NULL;
+}
+
+Pattern::~Pattern() {
+  if (crc_ != NULL) {
+    delete crc_;
+  }
+}
+
+// Calculate CRC for this pattern. This must match
+// the CRC calculation in worker.cc.
+int Pattern::CalculateCrc() {
+  // TODO(johnhuang):
+  // Consider refactoring to the form:
+  // while (i < count) AdlerInc(uint64, uint64, AdlerChecksum*)
+  uint64 a1 = 1;
+  uint64 a2 = 1;
+  uint64 b1 = 0;
+  uint64 b2 = 0;
+
+  // checksum is calculated using only the first 4096 bytes of data.
+  int i = 0;
+  int blocksize = 4096;
+  int count = blocksize / sizeof i;
+  while (i < count) {
+    a1 += pattern(i);
+    b1 += a1;
+    i++;
+    a1 += pattern(i);
+    b1 += a1;
+    i++;
+
+    a2 += pattern(i);
+    b2 += a2;
+    i++;
+    a2 += pattern(i);
+    b2 += a2;
+    i++;
+  }
+  if (crc_ != NULL) {
+    delete crc_;
+  }
+  crc_ = new AdlerChecksum();
+  crc_->Set(a1, a2, b1, b2);
+  return 0;
+}
+
+// Initialize pattern's CRC.
+int Pattern::Initialize(const struct PatternData &pattern_init,
+                        int buswidth,
+                        bool invert,
+                        int weight) {
+  int result = 1;
+
+  pattern_ = &pattern_init;
+  busshift_ = 2;
+  inverse_ = invert;
+  weight_ = weight;
+
+  name_.clear();
+  name_.append(pattern_->name);
+
+  if (invert)
+    name_.append("~");
+
+  if (buswidth == 32) {
+    name_.append("32");
+    busshift_ = 0;
+  } else if (buswidth == 64) {
+    name_.append("64");
+    busshift_ = 1;
+  } else if (buswidth == 128) {
+    name_.append("128");
+    busshift_ = 2;
+  } else if (buswidth == 256) {
+    name_.append("256");
+    busshift_ = 3;
+  } else {
+    logprintf(0, "Process Error: Confused by bus width %d\n",
+              buswidth);
+    name_.append("Broken");
+    result = 0;
+  }
+
+  CalculateCrc();
+
+  return result;
+}
+
+
+PatternList::PatternList() {
+  size_= 0;
+  initialized_ = 0;
+}
+
+PatternList::~PatternList() {
+  if (initialized_) {
+    Destroy();
+  }
+}
+
+// Fill in the class with references to the static data patterns
+int PatternList::Initialize() {
+  int patterncount = 0;
+  int weightcount = 0;
+
+  patterns_.resize(pattern_array_size * 8);
+  for (int i = 0; i < pattern_array_size; i++) {
+    // Non inverted.
+    weightcount += pattern_array[i].weight[0];
+    patterns_[patterncount++].Initialize(pattern_array[i], 32, false,
+                                         pattern_array[i].weight[0]);
+    weightcount += pattern_array[i].weight[1];
+    patterns_[patterncount++].Initialize(pattern_array[i], 64, false,
+                                         pattern_array[i].weight[1]);
+    weightcount += pattern_array[i].weight[2];
+    patterns_[patterncount++].Initialize(pattern_array[i], 128, false,
+                                         pattern_array[i].weight[2]);
+    weightcount += pattern_array[i].weight[3];
+    patterns_[patterncount++].Initialize(pattern_array[i], 256, false,
+                                         pattern_array[i].weight[3]);
+
+    // Inverted.
+    weightcount += pattern_array[i].weight[0];
+    patterns_[patterncount++].Initialize(pattern_array[i], 32, true,
+                                         pattern_array[i].weight[0]);
+    weightcount += pattern_array[i].weight[1];
+    patterns_[patterncount++].Initialize(pattern_array[i], 64, true,
+                                         pattern_array[i].weight[1]);
+    weightcount += pattern_array[i].weight[2];
+    patterns_[patterncount++].Initialize(pattern_array[i], 128, true,
+                                         pattern_array[i].weight[2]);
+    weightcount += pattern_array[i].weight[3];
+    patterns_[patterncount++].Initialize(pattern_array[i], 256, true,
+                                         pattern_array[i].weight[3]);
+  }
+  size_ = patterncount;
+  weightcount_ = weightcount;
+  initialized_ = 1;
+
+  logprintf(12, "Log: initialized %d data patterns\n", size_);
+
+  return 1;
+}
+
+// Free the stuff.
+int PatternList::Destroy() {
+  if (!initialized_)
+    return 0;
+
+  patterns_.clear();
+  size_ = 0;
+  initialized_ = 0;
+
+  return 1;
+}
+
+// Return pattern numbered "i"
+Pattern *PatternList::GetPattern(int i) {
+  if (i < size_) {
+    return &patterns_[i];
+  }
+
+  logprintf(0, "Process Error: Out of bounds pattern access\n");
+  return 0;
+}
+
+// Return a randomly selected pattern.
+Pattern *PatternList::GetRandomPattern() {
+  unsigned int target = random();
+  target = target % weightcount_;
+
+  unsigned int i = 0;
+  unsigned int sum = 0;
+  while (target > sum) {
+    sum += patterns_[i].weight();
+    i++;
+  }
+  if (i < size_) {
+    return &patterns_[i];
+  }
+
+  logprintf(0, "Process Error: Out of bounds pattern access\n");
+  return 0;
+}
diff --git a/src/pattern.h b/src/pattern.h
new file mode 100644
index 0000000..b1168aa
--- /dev/null
+++ b/src/pattern.h
@@ -0,0 +1,124 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// pattern.h : global pattern references and initialization
+
+// This file implements easy access to statically declared
+// data patterns.
+
+#ifndef STRESSAPPTEST_PATTERN_H_
+#define STRESSAPPTEST_PATTERN_H_
+
+#include <vector>
+#include <string>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "adler32memcpy.h"
+#include "sattypes.h"
+
+// 2 = 128 bit bus, 1 = 64 bit bus, 0 = 32 bit bus
+const int kBusShift = 2;
+
+// Pattern and CRC data structure
+struct PatternData {
+  const char *name;          // Name of this pattern.
+  unsigned int *pat;         // Data array.
+  unsigned int mask;         // Size - 1. data[index & mask] is always valid.
+  unsigned char weight[4];   // Weighted frequency of this pattern.
+                             // Each pattern has 32,64,128,256 width versions.
+                             // All weights are added up, a random number is
+                             // chosen between 0-sum(weights), and the
+                             // appropriate pattern is chosen. Thus a weight of
+                             // 1 is rare, a weight of 10 is 2x as likely to be
+                             // chosen as a weight of 5.
+};
+
+// Data structure to access data patterns.
+class Pattern {
+ public:
+  Pattern();
+  ~Pattern();
+  // Fill pattern data and calculate CRC.
+  int Initialize(const struct PatternData &pattern_init,
+                 int buswidth,
+                 bool invert,
+                 int weight);
+
+  // Access data members.
+  // "busshift_" allows for repeating each pattern word 1, 2, 4, etc. times.
+  // in order to create patterns of different width.
+  unsigned int pattern(unsigned int offset) {
+    unsigned int data = pattern_->pat[(offset >> busshift_) & pattern_->mask];
+    if (inverse_)
+      data = ~data;
+    return data;
+  }
+  const AdlerChecksum *crc() {return crc_;}
+  unsigned int mask() {return pattern_->mask;}
+  unsigned int weight() {return weight_;}
+  const char *name() {return name_.c_str();}
+
+ private:
+  int CalculateCrc();
+  const struct PatternData *pattern_;
+  int busshift_;        // Target data bus width.
+  bool inverse_;        // Invert the data from the original pattern.
+  AdlerChecksum *crc_;  // CRC of this pattern.
+  string name_;         // The human readable pattern name.
+  int weight_;          // This is the likelihood that this
+                        // pattern will be chosen.
+  // We want to copy this!
+  // DISALLOW_COPY_AND_ASSIGN(Pattern);
+};
+
+// Object used to access global pattern list.
+class PatternList {
+ public:
+  PatternList();
+  ~PatternList();
+  // Initialize pointers to global data patterns, and calculate CRC.
+  int Initialize();
+  int Destroy();
+
+  // Return the pattern designated by index i.
+  Pattern *GetPattern(int i);
+  // Return a random pattern according to the specified weighted probability.
+  Pattern *GetRandomPattern();
+  // Return the number of patterns available.
+  int Size() {return size_;}
+
+ private:
+  vector<class Pattern> patterns_;
+  int weightcount_;  // Total count of pattern weights.
+  int size_;
+  int initialized_;
+  DISALLOW_COPY_AND_ASSIGN(PatternList);
+};
+
+// CrcIncrement allows an abstracted way to add a 32bit
+// value into a running CRC. This function should be fast, and
+// generate meaningful CRCs for the types of data patterns that
+// we are using here.
+// This CRC formula may not be optimal, but it does work.
+// It may be improved in the future.
+static inline uint32 CrcIncrement(uint32 crc, uint32 expected, int index) {
+  uint32 addition = (expected ^ index);
+  uint32 carry = (addition & crc) >> 31;
+
+  return crc + addition + carry;
+}
+
+
+#endif  // STRESSAPPTEST_PATTERN_H_
diff --git a/src/queue.cc b/src/queue.cc
new file mode 100644
index 0000000..d735e68
--- /dev/null
+++ b/src/queue.cc
@@ -0,0 +1,118 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// queue.cc : simple thread safe queue implementation
+
+#include <stdlib.h>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "queue.h"
+#include "sattypes.h"
+
+// Page entry queue implementation follows.
+// Push inserts pages, pop returns a random entry.
+
+
+PageEntryQueue::PageEntryQueue(uint64 queuesize) {
+  // There must always be one empty queue location,
+  // since in == out => empty.
+  q_size_ = queuesize + 1;
+  pages_ = new struct page_entry[q_size_];
+  nextin_ = 0;
+  nextout_ = 0;
+  popped_ = 0;
+  pushed_ = 0;
+  pthread_mutex_init(&q_mutex_, NULL);
+}
+PageEntryQueue::~PageEntryQueue() {
+  delete[] pages_;
+  pthread_mutex_destroy(&q_mutex_);
+}
+
+// Add a page into this queue.
+int PageEntryQueue::Push(struct page_entry *pe) {
+  int result = 0;
+  int64 nextnextin;
+
+  if (!pe)
+    return 0;
+
+  pthread_mutex_lock(&q_mutex_);
+  nextnextin = (nextin_ + 1) % q_size_;
+
+  if (nextnextin != nextout_) {
+    pages_[nextin_] = *pe;
+
+    nextin_ = nextnextin;
+    result = 1;
+
+    pushed_++;
+  }
+
+  pthread_mutex_unlock(&q_mutex_);
+
+  return result;
+}
+
+// Retrieve a random page from this queue.
+int PageEntryQueue::PopRandom(struct page_entry *pe) {
+  int result = 0;
+  int64 lastin;
+  int64 entries;
+  int64 newindex;
+  struct page_entry tmp;
+
+  if (!pe)
+    return 0;
+
+  // TODO(nsanders): we should improve random to get 64 bit randoms, and make
+  // it more thread friendly.
+  uint64 rand = random();
+
+  int retval = pthread_mutex_lock(&q_mutex_);
+  if (retval)
+    logprintf(0, "Process Error: pthreads mutex failure %d\n", retval);
+
+
+  if (nextin_ != nextout_) {
+    // Randomized fetch.
+    // Swap random entry with next out.
+    {
+      lastin = (nextin_ - 1 + q_size_) % q_size_;
+      entries = (lastin - nextout_ + q_size_) % q_size_;
+
+      newindex = nextout_;
+      if (entries)
+        newindex = ((rand % entries) + nextout_) % q_size_;
+
+      // Swap the pages.
+      tmp = pages_[nextout_];
+      pages_[nextout_] = pages_[newindex];
+      pages_[newindex] = tmp;
+    }
+
+    // Return next out page.
+    *pe = pages_[nextout_];
+
+    nextout_ = (nextout_ + 1) % q_size_;
+    result = 1;
+
+    popped_++;
+  }
+
+  pthread_mutex_unlock(&q_mutex_);
+
+  return result;
+}
diff --git a/src/queue.h b/src/queue.h
new file mode 100644
index 0000000..a6296b1
--- /dev/null
+++ b/src/queue.h
@@ -0,0 +1,85 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// queue.h : simple queue api
+
+// This is an interface to a simple thread safe queue,
+// used to hold data blocks and patterns.
+// The order in which the blocks are returned is random.
+
+#ifndef STRESSAPPTEST_QUEUE_H_  // NOLINT
+#define STRESSAPPTEST_QUEUE_H_
+
+#include <sys/types.h>
+#include <pthread.h>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "sattypes.h"  // NOLINT
+#include "pattern.h"   // NOLINT
+
+// Tag indicating no preference.
+static const int kDontCareTag = -1;
+// Tag indicating no preference.
+static const int kInvalidTag = 0xf001;
+
+
+// This describes a block of memory, and the expected fill pattern.
+struct page_entry {
+  uint64 offset;
+  void *addr;
+  uint64 paddr;
+  class Pattern *pattern;
+  int32 tag;     // These are tags for use in NUMA affinity or other uses.
+  uint32 touch;  // Counter of the number of reads from this page.
+  uint64 ts;     // Timestamp of the last read from this page.
+  class Pattern *lastpattern;  // Expected Pattern at last read.
+};
+
+static inline void init_pe(struct page_entry *pe) {
+  pe->offset = 0;
+  pe->addr = NULL;
+  pe->pattern = NULL;
+  pe->tag = kInvalidTag;
+  pe->touch = 0;
+  pe->ts = 0;
+  pe->lastpattern = NULL;
+}
+
+// This is a threadsafe randomized queue of pages for
+// worker threads to use.
+class PageEntryQueue {
+ public:
+  explicit PageEntryQueue(uint64 queuesize);
+  ~PageEntryQueue();
+
+  // Push a page onto the list.
+  int Push(struct page_entry *pe);
+  // Pop a random page off of the list.
+  int PopRandom(struct page_entry *pe);
+
+ private:
+  struct page_entry *pages_;  // Where the pages are held.
+  int64 nextin_;
+  int64 nextout_;
+  int64 q_size_;  // Size of the queue.
+  int64 pushed_;  // Number of pages pushed, total.
+  int64 popped_;  // Number of pages popped, total.
+  pthread_mutex_t q_mutex_;
+
+  DISALLOW_COPY_AND_ASSIGN(PageEntryQueue);
+};
+
+
+#endif  // MILES_TESTS_SAT_QUEUE_H_ NOLINT
diff --git a/src/sat.cc b/src/sat.cc
new file mode 100644
index 0000000..e714ba2
--- /dev/null
+++ b/src/sat.cc
@@ -0,0 +1,1897 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// sat.cc : a stress test for stressful testing
+
+// stressapptest (or SAT, from Stressful Application Test) is a test
+// designed to stress the system, as well as provide a comprehensive
+// memory interface test.
+
+// stressapptest can be run using memory only, or using many system components.
+
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+#include <sys/times.h>
+
+// #define __USE_GNU
+// #define __USE_LARGEFILE64
+#include <fcntl.h>
+
+#include <list>
+#include <string>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "disk_blocks.h"
+#include "logger.h"
+#include "os.h"
+#include "sat.h"
+#include "sattypes.h"
+#include "worker.h"
+
+// stressapptest versioning here.
+#ifndef PACKAGE_VERSION
+static const char* kVersion = "1.0.0";
+#else
+static const char* kVersion = PACKAGE_VERSION;
+#endif
+
+// Global stressapptest reference, for use by signal handler.
+// This makes Sat objects not safe for multiple instances.
+namespace {
+  Sat *g_sat = NULL;
+
+  // Signal handler for catching break or kill.
+  //
+  // This must be installed after g_sat is assigned and while there is a single
+  // thread.
+  //
+  // This must be uninstalled while there is only a single thread, and of course
+  // before g_sat is cleared or deleted.
+  void SatHandleBreak(int signal) {
+    g_sat->Break();
+  }
+}
+
+// Opens the logfile for writing if necessary
+bool Sat::InitializeLogfile() {
+  // Open logfile.
+  if (use_logfile_) {
+    logfile_ = open(logfilename_,
+                    O_WRONLY | O_CREAT | O_DSYNC,
+                    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+    if (logfile_ < 0) {
+      printf("Fatal Error: cannot open file %s for logging\n",
+             logfilename_);
+      bad_status();
+      return false;
+    }
+    // We seek to the end once instead of opening in append mode because no
+    // other processes should be writing to it while this one exists.
+    if (lseek(logfile_, 0, SEEK_END) == -1) {
+      printf("Fatal Error: cannot seek to end of logfile (%s)\n",
+             logfilename_);
+      bad_status();
+      return false;
+    }
+    Logger::GlobalLogger()->SetLogFd(logfile_);
+  }
+  return true;
+}
+
+// Check that the environment is known and safe to run on.
+// Return 1 if good, 0 if unsuppported.
+bool Sat::CheckEnvironment() {
+  // Check that this is not a debug build. Debug builds lack
+  // enough performance to stress the system.
+#if !defined NDEBUG
+  if (run_on_anything_) {
+    logprintf(1, "Log: Running DEBUG version of SAT, "
+                 "with significantly reduced coverage.\n");
+  } else {
+    logprintf(0, "Process Error: Running DEBUG version of SAT, "
+                 "with significantly reduced coverage.\n");
+    logprintf(0, "Log: Command line option '-A' bypasses this error.\n");
+    bad_status();
+    return false;
+  }
+#elif !defined CHECKOPTS
+  #error Build system regression - COPTS disregarded.
+#endif
+
+  // Use all CPUs if nothing is specified.
+  if (memory_threads_ == -1) {
+    memory_threads_ = os_->num_cpus();
+    logprintf(7, "Log: Defaulting to %d copy threads\n", memory_threads_);
+  }
+
+  // Use all memory if no size is specified.
+  if (size_mb_ == 0)
+    size_mb_ = os_->FindFreeMemSize() / kMegabyte;
+  size_ = static_cast<int64>(size_mb_) * kMegabyte;
+
+  // Autodetect file locations.
+  if (findfiles_ && (file_threads_ == 0)) {
+    // Get a space separated sting of disk locations.
+    list<string> locations = os_->FindFileDevices();
+
+    // Extract each one.
+    while (!locations.empty()) {
+      // Copy and remove the disk name.
+      string disk = locations.back();
+      locations.pop_back();
+
+      logprintf(12, "Log: disk at %s\n", disk.c_str());
+      file_threads_++;
+      filename_.push_back(disk + "/sat_disk.a");
+      file_threads_++;
+      filename_.push_back(disk + "/sat_disk.b");
+    }
+  }
+
+  // We'd better have some memory by this point.
+  if (size_ < 1) {
+    logprintf(0, "Process Error: No memory found to test.\n");
+    bad_status();
+    return false;
+  }
+
+  if (tag_mode_ && ((file_threads_ > 0) ||
+                    (disk_threads_ > 0) ||
+                    (net_threads_ > 0))) {
+    logprintf(0, "Process Error: Memory tag mode incompatible "
+                 "with disk/network DMA.\n");
+    bad_status();
+    return false;
+  }
+
+  if ((address_mode_ == 32) &&
+      (os_->normal_mem()) &&
+      (size_ >= 1499 * kMegabyte)) {
+    if (run_on_anything_) {
+      int64 new_size_mb = 1499;
+      logprintf(1, "Log: 32 bit binary: reducing from %lldMB to %lldMB\n",
+                size_mb_,
+                new_size_mb);
+      size_mb_ = new_size_mb;
+      size_ = size_mb_ * kMegabyte;
+    } else {
+      logprintf(0, "Process Error: %dMB test memory too large "
+                   "for 32 bit binary.\n",
+                static_cast<int>(size_ / kMegabyte));
+      logprintf(0, "Log: Command line option '-A' bypasses this error.\n");
+      bad_status();
+      return false;
+    }
+  }
+
+  // If platform is 32 bit Xeon, floor memory size to multiple of 4.
+  if (address_mode_ == 32) {
+    size_mb_ = (size_mb_ / 4) * 4;
+    size_ = size_mb_ * kMegabyte;
+    logprintf(1, "Log: Flooring memory allocation to multiple of 4: %lldMB\n",
+              size_mb_);
+  }
+
+  // Check if this system is on the whitelist for supported systems.
+  if (!os_->IsSupported()) {
+    if (run_on_anything_) {
+      logprintf(1, "Log: Unsupported system. Running with reduced coverage.\n");
+      // This is ok, continue on.
+    } else {
+      logprintf(0, "Process Error: Unsupported system, "
+                   "no error reporting available\n");
+      logprintf(0, "Log: Command line option '-A' bypasses this error.\n");
+      bad_status();
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Allocates memory to run the test on
+bool Sat::AllocateMemory() {
+  // Allocate our test memory.
+  bool result = os_->AllocateTestMem(size_, paddr_base_);
+  if (!result) {
+    logprintf(0, "Process Error: failed to allocate memory\n");
+    bad_status();
+    return false;
+  }
+  return true;
+}
+
+// Sets up access to data patterns
+bool Sat::InitializePatterns() {
+  // Initialize pattern data.
+  patternlist_ = new PatternList();
+  if (!patternlist_) {
+    logprintf(0, "Process Error: failed to allocate patterns\n");
+    bad_status();
+    return false;
+  }
+  if (!patternlist_->Initialize()) {
+    logprintf(0, "Process Error: failed to initialize patternlist\n");
+    bad_status();
+    return false;
+  }
+  return true;
+}
+
+// Get any valid page, no tag specified.
+bool Sat::GetValid(struct page_entry *pe) {
+  return GetValid(pe, kDontCareTag);
+}
+
+
+// Fetch and return empty and full pages into the empty and full pools.
+bool Sat::GetValid(struct page_entry *pe, int32 tag) {
+  bool result = false;
+  // Get valid page depending on implementation.
+  if (pe_q_implementation_ == SAT_FINELOCK)
+    result = finelock_q_->GetValid(pe, tag);
+  else if (pe_q_implementation_ == SAT_ONELOCK)
+    result = valid_->PopRandom(pe);
+
+  if (result) {
+    pe->addr = os_->PrepareTestMem(pe->offset, page_length_);  // Map it.
+
+    // Tag this access and current pattern.
+    pe->ts = os_->GetTimestamp();
+    pe->lastpattern = pe->pattern;
+
+    return (pe->addr != 0);     // Return success or failure.
+  }
+  return false;
+}
+
+bool Sat::PutValid(struct page_entry *pe) {
+  if (pe->addr != 0)
+    os_->ReleaseTestMem(pe->addr, pe->offset, page_length_);  // Unmap the page.
+  pe->addr = 0;
+
+  // Put valid page depending on implementation.
+  if (pe_q_implementation_ == SAT_FINELOCK)
+    return finelock_q_->PutValid(pe);
+  else if (pe_q_implementation_ == SAT_ONELOCK)
+    return valid_->Push(pe);
+  else
+    return false;
+}
+
+// Get an empty page with any tag.
+bool Sat::GetEmpty(struct page_entry *pe) {
+  return GetEmpty(pe, kDontCareTag);
+}
+
+bool Sat::GetEmpty(struct page_entry *pe, int32 tag) {
+  bool result = false;
+  // Get empty page depending on implementation.
+  if (pe_q_implementation_ == SAT_FINELOCK)
+    result = finelock_q_->GetEmpty(pe, tag);
+  else if (pe_q_implementation_ == SAT_ONELOCK)
+    result = empty_->PopRandom(pe);
+
+  if (result) {
+    pe->addr = os_->PrepareTestMem(pe->offset, page_length_);  // Map it.
+    return (pe->addr != 0);     // Return success or failure.
+  }
+  return false;
+}
+
+bool Sat::PutEmpty(struct page_entry *pe) {
+  if (pe->addr != 0)
+    os_->ReleaseTestMem(pe->addr, pe->offset, page_length_);  // Unmap the page.
+  pe->addr = 0;
+
+  // Put empty page depending on implementation.
+  if (pe_q_implementation_ == SAT_FINELOCK)
+    return finelock_q_->PutEmpty(pe);
+  else if (pe_q_implementation_ == SAT_ONELOCK)
+    return empty_->Push(pe);
+  else
+    return false;
+}
+
+// Set up the bitmap of physical pages in case we want to see which pages were
+// accessed under this run of SAT.
+void Sat::AddrMapInit() {
+  if (!do_page_map_)
+    return;
+  // Find about how much physical mem is in the system.
+  // TODO(nsanders): Find some way to get the max
+  // and min phys addr in the system.
+  uint64 maxsize = os_->FindFreeMemSize() * 4;
+  sat_assert(maxsize != 0);
+
+  // Make a bitmask of this many pages. Assume that the memory is relatively
+  // zero based. This is true on x86, typically.
+  // This is one bit per page.
+  uint64 arraysize = maxsize / 4096 / 8;
+  unsigned char *bitmap = new unsigned char[arraysize];
+  sat_assert(bitmap);
+
+  // Mark every page as 0, not seen.
+  memset(bitmap, 0, arraysize);
+
+  page_bitmap_size_ = maxsize;
+  page_bitmap_ = bitmap;
+}
+
+// Add the 4k pages in this block to the array of pages SAT has seen.
+void Sat::AddrMapUpdate(struct page_entry *pe) {
+  if (!do_page_map_)
+    return;
+
+  // Go through 4k page blocks.
+  uint64 arraysize = page_bitmap_size_ / 4096 / 8;
+
+  char *base = reinterpret_cast<char*>(pe->addr);
+  for (int i = 0; i < page_length_; i += 4096) {
+    uint64 paddr = os_->VirtualToPhysical(base + i);
+
+    int offset = paddr / 4096 / 8;
+    unsigned char mask = 1 << ((paddr / 4096) % 8);
+
+    if (offset >= arraysize) {
+      logprintf(0, "Process Error: Physical address %#llx is "
+                   "greater than expected %#llx.\n",
+                paddr, page_bitmap_size_);
+      sat_assert(0);
+    }
+    page_bitmap_[offset] |= mask;
+  }
+}
+
+// Print out the physical memory ranges that SAT has accessed.
+void Sat::AddrMapPrint() {
+  if (!do_page_map_)
+    return;
+
+  uint64 pages = page_bitmap_size_ / 4096;
+
+  uint64 last_page = 0;
+  bool valid_range = false;
+
+  logprintf(4, "Log: Printing tested physical ranges.\n");
+
+  for (uint64 i = 0; i < pages; i ++) {
+    int offset = i / 8;
+    unsigned char mask = 1 << (i % 8);
+
+    bool touched = page_bitmap_[offset] & mask;
+    if (touched && !valid_range) {
+      valid_range = true;
+      last_page = i * 4096;
+    } else if (!touched && valid_range) {
+      valid_range = false;
+      logprintf(4, "Log: %#016llx - %#016llx\n", last_page, (i * 4096) - 1);
+    }
+  }
+  logprintf(4, "Log: Done printing physical ranges.\n");
+}
+
+// Initializes page lists and fills pages with data patterns.
+bool Sat::InitializePages() {
+  int result = 1;
+  // Calculate needed page totals.
+  int64 neededpages = memory_threads_ +
+    invert_threads_ +
+    check_threads_ +
+    net_threads_ +
+    file_threads_;
+
+  // Empty-valid page ratio is adjusted depending on queue implementation.
+  // since fine-grain-locked queue keeps both valid and empty entries in the
+  // same queue and randomly traverse to find pages, the empty-valid ratio
+  // should be more even.
+  if (pe_q_implementation_ == SAT_FINELOCK)
+    freepages_ = pages_ / 5 * 2;  // Mark roughly 2/5 of all pages as Empty.
+  else
+    freepages_ = (pages_ / 100) + (2 * neededpages);
+
+  if (freepages_ < neededpages) {
+    logprintf(0, "Process Error: freepages < neededpages.\n");
+    logprintf(1, "Stats: Total: %lld, Needed: %lld, Marked free: %lld\n",
+              static_cast<int64>(pages_),
+              static_cast<int64>(neededpages),
+              static_cast<int64>(freepages_));
+    bad_status();
+    return false;
+  }
+
+  if (freepages_ >  pages_/2) {
+    logprintf(0, "Process Error: not enough pages for IO\n");
+    logprintf(1, "Stats: Total: %lld, Needed: %lld, Available: %lld\n",
+              static_cast<int64>(pages_),
+              static_cast<int64>(freepages_),
+              static_cast<int64>(pages_/2));
+    bad_status();
+    return false;
+  }
+  logprintf(12, "Log: Allocating pages, Total: %lld Free: %lld\n",
+            pages_,
+            freepages_);
+
+  // Initialize page locations.
+  for (int64 i = 0; i < pages_; i++) {
+    struct page_entry pe;
+    init_pe(&pe);
+    pe.offset = i * page_length_;
+    result &= PutEmpty(&pe);
+  }
+
+  if (!result) {
+    logprintf(0, "Process Error: while initializing empty_ list\n");
+    bad_status();
+    return false;
+  }
+
+  // Fill valid pages with test patterns.
+  // Use fill threads to do this.
+  WorkerStatus fill_status;
+  WorkerVector fill_vector;
+
+  logprintf(12, "Starting Fill threads: %d threads, %d pages\n",
+            fill_threads_, pages_);
+  // Initialize the fill threads.
+  for (int i = 0; i < fill_threads_; i++) {
+    FillThread *thread = new FillThread();
+    thread->InitThread(i, this, os_, patternlist_, &fill_status);
+    if (i != fill_threads_ - 1) {
+        logprintf(12, "Starting Fill Threads %d: %d pages\n",
+                  i, pages_ / fill_threads_);
+        thread->SetFillPages(pages_ / fill_threads_);
+      // The last thread finishes up all the leftover pages.
+    } else {
+      logprintf(12, "Starting Fill Threads %d: %d pages\n",
+                i, pages_ - pages_ / fill_threads_ * i);
+        thread->SetFillPages(pages_ - pages_ / fill_threads_ * i);
+    }
+    fill_vector.push_back(thread);
+  }
+
+  // Spawn the fill threads.
+  fill_status.Initialize();
+  for (WorkerVector::const_iterator it = fill_vector.begin();
+       it != fill_vector.end(); ++it)
+    (*it)->SpawnThread();
+
+  // Reap the finished fill threads.
+  for (WorkerVector::const_iterator it = fill_vector.begin();
+       it != fill_vector.end(); ++it) {
+    (*it)->JoinThread();
+    if ((*it)->GetStatus() != 1) {
+      logprintf(0, "Thread %d failed with status %d at %.2f seconds\n",
+                (*it)->ThreadID(), (*it)->GetStatus(),
+                (*it)->GetRunDurationUSec() * 1.0/1000000);
+      bad_status();
+      return false;
+    }
+    delete (*it);
+  }
+  fill_vector.clear();
+  fill_status.Destroy();
+  logprintf(12, "Log: Done filling pages.\n");
+  logprintf(12, "Log: Allocating pages.\n");
+
+  AddrMapInit();
+
+  // Initialize page locations.
+  for (int64 i = 0; i < pages_; i++) {
+    struct page_entry pe;
+    // Only get valid pages with uninitialized tags here.
+    char buf[256];
+    if (GetValid(&pe, kInvalidTag)) {
+      int64 paddr = os_->VirtualToPhysical(pe.addr);
+      int32 region = os_->FindRegion(paddr);
+
+      os_->FindDimm(paddr, buf, sizeof(buf));
+      if (i < 256) {
+        logprintf(12, "Log: address: %#llx, %s\n", paddr, buf);
+      }
+      region_[region]++;
+      pe.paddr = paddr;
+      pe.tag = 1 << region;
+      region_mask_ |= pe.tag;
+
+      // Generate a physical region map
+      AddrMapUpdate(&pe);
+
+      // Note: this does not allocate free pages among all regions
+      // fairly. However, with large enough (thousands) random number
+      // of pages being marked free in each region, the free pages
+      // count in each region end up pretty balanced.
+      if (i < freepages_) {
+        result &= PutEmpty(&pe);
+      } else {
+        result &= PutValid(&pe);
+      }
+    } else {
+      logprintf(0, "Log: didn't tag all pages. %d - %d = %d\n",
+                pages_, i, pages_ - i);
+      return false;
+    }
+  }
+  logprintf(12, "Log: Done allocating pages.\n");
+
+  AddrMapPrint();
+
+  for (int i = 0; i < 32; i++) {
+    if (region_mask_ & (1 << i)) {
+      region_count_++;
+      logprintf(12, "Log: Region %d: %d.\n", i, region_[i]);
+    }
+  }
+  logprintf(5, "Log: Region mask: 0x%x\n", region_mask_);
+
+  return true;
+}
+
+// Print SAT version info.
+bool Sat::PrintVersion() {
+  logprintf(1, "Stats: SAT revision %s, %d bit binary\n",
+            kVersion, address_mode_);
+  logprintf(5, "Log: %s from %s\n", Timestamp(), BuildChangelist());
+
+  return true;
+}
+
+
+// Initializes the resources that SAT needs to run.
+// This needs to be called before Run(), and after ParseArgs().
+// Returns true on success, false on error, and will exit() on help message.
+bool Sat::Initialize() {
+  g_sat = this;
+
+  // Initializes sync'd log file to ensure output is saved.
+  if (!InitializeLogfile())
+    return false;
+  Logger::GlobalLogger()->StartThread();
+
+  logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str());
+  PrintVersion();
+
+  std::map<std::string, std::string> options;
+
+  GoogleOsOptions(&options);
+
+  // Initialize OS/Hardware interface.
+  os_ = OsLayerFactory(options);
+  if (!os_) {
+    bad_status();
+    return false;
+  }
+  if (error_injection_) os_->set_error_injection(true);
+
+  // Checks that OS/Build/Platform is supported.
+  if (!CheckEnvironment())
+    return false;
+
+  // Run SAT in monitor only mode, do not continue to allocate resources.
+  if (monitor_mode_) {
+    logprintf(5, "Log: Running in monitor-only mode. "
+                 "Will not allocate any memory nor run any stress test. "
+                 "Only polling ECC errors.\n");
+    return true;
+  }
+
+  // Allocate the memory to test.
+  if (!AllocateMemory())
+    return false;
+
+  logprintf(5, "Stats: Starting SAT, %dM, %d seconds\n",
+            static_cast<int>(size_/kMegabyte),
+            runtime_seconds_);
+
+  if (!InitializePatterns())
+    return false;
+
+  // Initialize memory allocation.
+  pages_ = size_ / page_length_;
+
+  // Allocate page queue depending on queue implementation switch.
+  if (pe_q_implementation_ == SAT_FINELOCK) {
+      finelock_q_ = new FineLockPEQueue(pages_, page_length_);
+      if (finelock_q_ == NULL)
+        return false;
+      finelock_q_->set_os(os_);
+      os_->set_err_log_callback(finelock_q_->get_err_log_callback());
+  } else if (pe_q_implementation_ == SAT_ONELOCK) {
+      empty_ = new PageEntryQueue(pages_);
+      valid_ = new PageEntryQueue(pages_);
+      if ((empty_ == NULL) || (valid_ == NULL))
+        return false;
+  }
+
+  if (!InitializePages()) {
+    logprintf(0, "Process Error: Initialize Pages failed\n");
+    return false;
+  }
+
+  return true;
+}
+
+// Constructor and destructor.
+Sat::Sat() {
+  // Set defaults, command line might override these.
+  runtime_seconds_ = 20;
+  page_length_ = kSatPageSize;
+  disk_pages_ = kSatDiskPage;
+  pages_ = 0;
+  size_mb_ = 0;
+  size_ = size_mb_ * kMegabyte;
+  freepages_ = 0;
+  paddr_base_ = 0;
+
+  user_break_ = false;
+  verbosity_ = 8;
+  Logger::GlobalLogger()->SetVerbosity(verbosity_);
+  strict_ = 1;
+  warm_ = 0;
+  run_on_anything_ = 0;
+  use_logfile_ = 0;
+  logfile_ = 0;
+  // Detect 32/64 bit binary.
+  void *pvoid = 0;
+  address_mode_ = sizeof(pvoid) * 8;
+  error_injection_ = false;
+  crazy_error_injection_ = false;
+  max_errorcount_ = 0;  // Zero means no early exit.
+  stop_on_error_ = false;
+  error_poll_ = true;
+  findfiles_ = false;
+
+  do_page_map_ = false;
+  page_bitmap_ = 0;
+  page_bitmap_size_ = 0;
+
+  // Cache coherency data initialization.
+  cc_test_ = false;         // Flag to trigger cc threads.
+  cc_cacheline_count_ = 2;  // Two datastructures of cache line size.
+  cc_inc_count_ = 1000;     // Number of times to increment the shared variable.
+  cc_cacheline_data_ = 0;   // Cache Line size datastructure.
+
+  sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL));
+  file_threads_ = 0;
+  net_threads_ = 0;
+  listen_threads_ = 0;
+  // Default to autodetect number of cpus, and run that many threads.
+  memory_threads_ = -1;
+  invert_threads_ = 0;
+  fill_threads_ = 8;
+  check_threads_ = 0;
+  cpu_stress_threads_ = 0;
+  disk_threads_ = 0;
+  total_threads_ = 0;
+
+  region_mask_ = 0;
+  region_count_ = 0;
+  for (int i = 0; i < 32; i++) {
+    region_[i] = 0;
+  }
+  region_mode_ = 0;
+
+  errorcount_ = 0;
+  statuscount_ = 0;
+
+  valid_ = 0;
+  empty_ = 0;
+  finelock_q_ = 0;
+  // Default to use fine-grain lock for better performance.
+  pe_q_implementation_ = SAT_FINELOCK;
+
+  os_ = 0;
+  patternlist_ = 0;
+  logfilename_[0] = 0;
+
+  read_block_size_ = 512;
+  write_block_size_ = -1;
+  segment_size_ = -1;
+  cache_size_ = -1;
+  blocks_per_segment_ = -1;
+  read_threshold_ = -1;
+  write_threshold_ = -1;
+  non_destructive_ = 1;
+  monitor_mode_ = 0;
+  tag_mode_ = 0;
+  random_threads_ = 0;
+
+  pause_delay_ = 600;
+  pause_duration_ = 15;
+}
+
+// Destructor.
+Sat::~Sat() {
+  // We need to have called Cleanup() at this point.
+  // We should probably enforce this.
+}
+
+
+#define ARG_KVALUE(argument, variable, value)         \
+  if (!strcmp(argv[i], argument)) {                   \
+    variable = value;                                 \
+    continue;                                         \
+  }
+
+#define ARG_IVALUE(argument, variable)                \
+  if (!strcmp(argv[i], argument)) {                   \
+    i++;                                              \
+    if (i < argc)                                     \
+      variable = strtoull(argv[i], NULL, 0);          \
+    continue;                                         \
+  }
+
+#define ARG_SVALUE(argument, variable)                     \
+  if (!strcmp(argv[i], argument)) {                        \
+    i++;                                                   \
+    if (i < argc)                                          \
+      snprintf(variable, sizeof(variable), "%s", argv[i]); \
+    continue;                                              \
+  }
+
+// Configures SAT from command line arguments.
+// This will call exit() given a request for
+// self-documentation or unexpected args.
+bool Sat::ParseArgs(int argc, char **argv) {
+  int i;
+  uint64 filesize = page_length_ * disk_pages_;
+
+  // Parse each argument.
+  for (i = 1; i < argc; i++) {
+    // Switch to fall back to corase-grain-lock queue. (for benchmarking)
+    ARG_KVALUE("--coarse_grain_lock", pe_q_implementation_, SAT_ONELOCK);
+
+    // Set number of megabyte to use.
+    ARG_IVALUE("-M", size_mb_);
+
+    // Set number of seconds to run.
+    ARG_IVALUE("-s", runtime_seconds_);
+
+    // Set number of memory copy threads.
+    ARG_IVALUE("-m", memory_threads_);
+
+    // Set number of memory invert threads.
+    ARG_IVALUE("-i", invert_threads_);
+
+    // Set number of check-only threads.
+    ARG_IVALUE("-c", check_threads_);
+
+    // Set number of cache line size datastructures.
+    ARG_IVALUE("--cc_inc_count", cc_inc_count_);
+
+    // Set number of cache line size datastructures
+    ARG_IVALUE("--cc_line_count", cc_cacheline_count_);
+
+    // Flag set when cache coherency tests need to be run
+    ARG_KVALUE("--cc_test", cc_test_, 1);
+
+    // Set number of CPU stress threads.
+    ARG_IVALUE("-C", cpu_stress_threads_);
+
+    // Set logfile name.
+    ARG_SVALUE("-l", logfilename_);
+
+    // Verbosity level.
+    ARG_IVALUE("-v", verbosity_);
+
+    // Set maximum number of errors to collect. Stop running after this many.
+    ARG_IVALUE("--max_errors", max_errorcount_);
+
+    // Set pattern block size.
+    ARG_IVALUE("-p", page_length_);
+
+    // Set pattern block size.
+    ARG_IVALUE("--filesize", filesize);
+
+    // NUMA options.
+    ARG_KVALUE("--local_numa", region_mode_, kLocalNuma);
+    ARG_KVALUE("--remote_numa", region_mode_, kRemoteNuma);
+
+    // Autodetect tempfile locations.
+    ARG_KVALUE("--findfiles", findfiles_, 1);
+
+    // Inject errors to force miscompare code paths
+    ARG_KVALUE("--force_errors", error_injection_, true);
+    ARG_KVALUE("--force_errors_like_crazy", crazy_error_injection_, true);
+    if (crazy_error_injection_)
+      error_injection_ = true;
+
+    // Stop immediately on any arror, for debugging HW problems.
+    ARG_KVALUE("--stop_on_errors", stop_on_error_, 1);
+
+    // Don't use internal error polling, allow external detection.
+    ARG_KVALUE("--no_errors", error_poll_, 0);
+
+    // Never check data as you go.
+    ARG_KVALUE("-F", strict_, 0);
+
+    // Warm the cpu as you go.
+    ARG_KVALUE("-W", warm_, 1);
+
+    // Allow runnign on unknown systems with base unimplemented OsLayer
+    ARG_KVALUE("-A", run_on_anything_, 1);
+
+    // Size of read blocks for disk test.
+    ARG_IVALUE("--read-block-size", read_block_size_);
+
+    // Size of write blocks for disk test.
+    ARG_IVALUE("--write-block-size", write_block_size_);
+
+    // Size of segment for disk test.
+    ARG_IVALUE("--segment-size", segment_size_);
+
+    // Size of disk cache size for disk test.
+    ARG_IVALUE("--cache-size", cache_size_);
+
+    // Number of blocks to test per segment.
+    ARG_IVALUE("--blocks-per-segment", blocks_per_segment_);
+
+    // Maximum time a block read should take before warning.
+    ARG_IVALUE("--read-threshold", read_threshold_);
+
+    // Maximum time a block write should take before warning.
+    ARG_IVALUE("--write-threshold", write_threshold_);
+
+    // Do not write anything to disk in the disk test.
+    ARG_KVALUE("--destructive", non_destructive_, 0);
+
+    // Run SAT in monitor mode. No test load at all.
+    ARG_KVALUE("--monitor_mode", monitor_mode_, true);
+
+    // Run SAT in address mode. Tag all cachelines by virt addr.
+    ARG_KVALUE("--tag_mode", tag_mode_, true);
+
+    // Dump range map of tested pages..
+    ARG_KVALUE("--do_page_map", do_page_map_, true);
+
+    // Specify the physical address base to test.
+    ARG_IVALUE("--paddr_base", paddr_base_);
+
+    // Specify the frequency for power spikes.
+    ARG_IVALUE("--pause_delay", pause_delay_);
+
+    // Specify the duration of each pause (for power spikes).
+    ARG_IVALUE("--pause_duration", pause_duration_);
+
+    // Disk device names
+    if (!strcmp(argv[i], "-d")) {
+      i++;
+      if (i < argc) {
+        disk_threads_++;
+        diskfilename_.push_back(string(argv[i]));
+        blocktables_.push_back(new DiskBlockTable());
+      }
+      continue;
+    }
+
+    // Set number of disk random threads for each disk write thread.
+    ARG_IVALUE("--random-threads", random_threads_);
+
+    // Set a tempfile to use in a file thread.
+    if (!strcmp(argv[i], "-f")) {
+      i++;
+      if (i < argc) {
+        file_threads_++;
+        filename_.push_back(string(argv[i]));
+      }
+      continue;
+    }
+
+    // Set a hostname to use in a network thread.
+    if (!strcmp(argv[i], "-n")) {
+      i++;
+      if (i < argc) {
+        net_threads_++;
+        ipaddrs_.push_back(string(argv[i]));
+      }
+      continue;
+    }
+
+    // Run threads that listen for incoming SAT net connections.
+    ARG_KVALUE("--listen", listen_threads_, 1);
+
+    if (CheckGoogleSpecificArgs(argc, argv, &i)) {
+      continue;
+    }
+
+    // Default:
+    PrintVersion();
+    PrintHelp();
+    if (strcmp(argv[i], "-h") && strcmp(argv[i], "--help")) {
+      printf("\n Unknown argument %s\n", argv[i]);
+      bad_status();
+      exit(1);
+    }
+    // Forget it, we printed the help, just bail.
+    // We don't want to print test status, or any log parser stuff.
+    exit(0);
+  }
+
+  Logger::GlobalLogger()->SetVerbosity(verbosity_);
+
+  // Update relevant data members with parsed input.
+  // Translate MB into bytes.
+  size_ = static_cast<int64>(size_mb_) * kMegabyte;
+
+  // Set logfile flag.
+  if (strcmp(logfilename_, ""))
+    use_logfile_ = 1;
+  // Checks valid page length.
+  if (page_length_ &&
+      !(page_length_ & (page_length_ - 1)) &&
+      (page_length_ > 1023)) {
+    // Prints if we have changed from default.
+    if (page_length_ != kSatPageSize)
+      logprintf(12, "Log: Updating page size to %d\n", page_length_);
+  } else {
+    // Revert to default page length.
+    logprintf(6, "Process Error: "
+              "Invalid page size %d\n", page_length_);
+    page_length_ = kSatPageSize;
+    return false;
+  }
+
+  // Set disk_pages_ if filesize or page size changed.
+  if (filesize != page_length_ * disk_pages_) {
+    disk_pages_ = filesize / page_length_;
+    if (disk_pages_ == 0)
+      disk_pages_ = 1;
+  }
+
+  // Print each argument.
+  for (int i = 0; i < argc; i++) {
+    if (i)
+      cmdline_ += " ";
+    cmdline_ += argv[i];
+  }
+
+  return true;
+}
+
+void Sat::PrintHelp() {
+  printf("Usage: ./sat(32|64) [options]\n"
+         " -M mbytes        megabytes of ram to test\n"
+         " -s seconds       number of seconds to run\n"
+         " -m threads       number of memory copy threads to run\n"
+         " -i threads       number of memory invert threads to run\n"
+         " -C threads       number of memory CPU stress threads to run\n"
+         " --findfiles      find locations to do disk IO automatically\n"
+         " -d device        add a direct write disk thread with block "
+         "device (or file) 'device'\n"
+         " -f filename      add a disk thread with "
+         "tempfile 'filename'\n"
+         " -l logfile       log output to file 'logfile'\n"
+         " --max_errors n   exit early after finding 'n' errors\n"
+         " -v level         verbosity (0-20), default is 8\n"
+         " -W               Use more CPU-stressful memory copy\n"
+         " -A               run in degraded mode on incompatible systems\n"
+         " -p pagesize      size in bytes of memory chunks\n"
+         " --filesize size  size of disk IO tempfiles\n"
+         " -n ipaddr        add a network thread connecting to "
+         "system at 'ipaddr'\n"
+         " --listen         run a thread to listen for and respond "
+         "to network threads.\n"
+         " --no_errors      run without checking for ECC or other errors\n"
+         " --force_errors   inject false errors to test error handling\n"
+         " --force_errors_like_crazy   inject a lot of false errors "
+         "to test error handling\n"
+         " -F               don't result check each transaction\n"
+         "--stop_on_errors  Stop after finding the first error.\n"
+         " --read-block-size     size of block for reading (-d)\n"
+         " --write-block-size    size of block for writing (-d). If not "
+         "defined, the size of block for writing will be defined as the "
+         "size of block for reading\n"
+         " --segment-size   size of segments to split disk into (-d)\n"
+         " --cache-size     size of disk cache (-d)\n"
+         " --blocks-per-segment  number of blocks to read/write per "
+         "segment per iteration (-d)\n"
+         " --read-threshold      maximum time (in us) a block read should "
+         "take (-d)\n"
+         " --write-threshold     maximum time (in us) a block write "
+         "should take (-d)\n"
+         " --random-threads      number of random threads for each disk "
+         "write thread (-d)\n"
+         " --destructive    write/wipe disk partition (-d)\n"
+         " --monitor_mode   only do ECC error polling, no stress load.\n"
+         " --cc_test        do the cache coherency testing\n"
+         " --cc_inc_count   number of times to increment the "
+         "cacheline's member\n"
+         " --cc_line_count  number of cache line sized datastructures "
+         "to allocate for the cache coherency threads to operate\n"
+         " --paddr_base     allocate memory starting from this address\n"
+         " --pause_delay    delay (in seconds) between power spikes\n"
+         " --pause_duration duration (in seconds) of each pause\n"
+         " --local_numa : choose memory regions associated with "
+         "each CPU to be tested by that CPU\n"
+         "--remote_numa : choose memory regions not associated with "
+         "each CPU to be tested by that CPU\n");
+}
+
+bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) {
+  // Do nothing, no google-specific argument on public stressapptest
+  return false;
+}
+
+void Sat::GoogleOsOptions(std::map<std::string, std::string> *options) {
+  // Do nothing, no OS-specific argument on public stressapptest
+}
+
+namespace {
+  // This counts the bits set in a bitmask.
+  // This is used to determine number of cores in an available mask.
+  int countbits(uint32 bitfield) {
+    int numbits = 0;
+    for (int i = 0; i < 32; i++) {
+      if (bitfield & (1 << i)) {
+        numbits++;
+      }
+    }
+    return numbits;
+  }
+}
+
+// Launch the SAT task threads. Returns 0 on error.
+void Sat::InitializeThreads() {
+  // Memory copy threads.
+  AcquireWorkerLock();
+
+  logprintf(12, "Log: Starting worker threads\n");
+  WorkerVector *memory_vector = new WorkerVector();
+
+  // Error polling thread.
+  // This may detect ECC corrected errors, disk problems, or
+  // any other errors normally hidden from userspace.
+  WorkerVector *error_vector = new WorkerVector();
+  if (error_poll_) {
+    ErrorPollThread *thread = new ErrorPollThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &continuous_status_);
+
+    error_vector->insert(error_vector->end(), thread);
+  } else {
+    logprintf(5, "Log: Skipping error poll thread due to --no_errors flag\n");
+  }
+  workers_map_.insert(make_pair(kErrorType, error_vector));
+
+  // Only start error poll threads for monitor-mode SAT,
+  // skip all other types of worker threads.
+  if (monitor_mode_) {
+    ReleaseWorkerLock();
+    return;
+  }
+
+  for (int i = 0; i < memory_threads_; i++) {
+    CopyThread *thread = new CopyThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &power_spike_status_);
+
+    if ((region_count_ > 1) && (region_mode_)) {
+      int32 region = region_find(i % region_count_);
+      cpu_set_t *cpuset = os_->FindCoreMask(region);
+      sat_assert(cpuset);
+      int32 cpu_mask = cpuset_to_uint32(cpuset);
+      if (region_mode_ == kLocalNuma) {
+        // Choose regions associated with this CPU.
+        thread->set_cpu_mask(cpu_mask);
+        thread->set_tag(1 << region);
+      } else if (region_mode_ == kRemoteNuma) {
+        // Choose regions not associated with this CPU..
+        thread->set_cpu_mask(cpu_mask);
+        thread->set_tag(region_mask_ & ~(1 << region));
+      }
+    } else {
+      int cores = countbits(thread->AvailableCpus());
+      // Don't restrict thread location if we have more than one
+      // thread per core. Not so good for performance.
+      if (cpu_stress_threads_ + memory_threads_ <= cores) {
+        // Place a thread on alternating cores first.
+        // This assures interleaved core use with no overlap.
+        int nthcore = i;
+        int nthbit = (((2 * nthcore) % cores) +
+                      (((2 * nthcore) / cores) % 2)) % cores;
+        if (thread->AvailableCpus() != ((1 << cores) - 1)) {
+          // We are assuming the bits are contiguous.
+          // Complain if this is not so.
+          logprintf(0, "Log: cores = %x, expected %x\n",
+                    thread->AvailableCpus(), ((1 << (cores + 1)) - 1));
+        }
+
+        // Set thread affinity.
+        thread->set_cpu_mask(1 << nthbit);
+      }
+    }
+    memory_vector->insert(memory_vector->end(), thread);
+  }
+  workers_map_.insert(make_pair(kMemoryType, memory_vector));
+
+  // File IO threads.
+  WorkerVector *fileio_vector = new WorkerVector();
+  for (int i = 0; i < file_threads_; i++) {
+    FileThread *thread = new FileThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &power_spike_status_);
+    thread->SetFile(filename_[i].c_str());
+    // Set disk threads high priority. They don't take much processor time,
+    // but blocking them will delay disk IO.
+    thread->SetPriority(WorkerThread::High);
+
+    fileio_vector->insert(fileio_vector->end(), thread);
+  }
+  workers_map_.insert(make_pair(kFileIOType, fileio_vector));
+
+  // Net IO threads.
+  WorkerVector *netio_vector = new WorkerVector();
+  WorkerVector *netslave_vector = new WorkerVector();
+  if (listen_threads_ > 0) {
+    // Create a network slave thread. This listens for connections.
+    NetworkListenThread *thread = new NetworkListenThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &continuous_status_);
+
+    netslave_vector->insert(netslave_vector->end(), thread);
+  }
+  for (int i = 0; i < net_threads_; i++) {
+    NetworkThread *thread = new NetworkThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &continuous_status_);
+    thread->SetIP(ipaddrs_[i].c_str());
+
+    netio_vector->insert(netio_vector->end(), thread);
+  }
+  workers_map_.insert(make_pair(kNetIOType, netio_vector));
+  workers_map_.insert(make_pair(kNetSlaveType, netslave_vector));
+
+  // Result check threads.
+  WorkerVector *check_vector = new WorkerVector();
+  for (int i = 0; i < check_threads_; i++) {
+    CheckThread *thread = new CheckThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &continuous_status_);
+
+    check_vector->insert(check_vector->end(), thread);
+  }
+  workers_map_.insert(make_pair(kCheckType, check_vector));
+
+  // Memory invert threads.
+  logprintf(12, "Log: Starting invert threads\n");
+  WorkerVector *invert_vector = new WorkerVector();
+  for (int i = 0; i < invert_threads_; i++) {
+    InvertThread *thread = new InvertThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &continuous_status_);
+
+    invert_vector->insert(invert_vector->end(), thread);
+  }
+  workers_map_.insert(make_pair(kInvertType, invert_vector));
+
+  // Disk stress threads.
+  WorkerVector *disk_vector = new WorkerVector();
+  WorkerVector *random_vector = new WorkerVector();
+  logprintf(12, "Log: Starting disk stress threads\n");
+  for (int i = 0; i < disk_threads_; i++) {
+    // Creating write threads
+    DiskThread *thread = new DiskThread(blocktables_[i]);
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &power_spike_status_);
+    thread->SetDevice(diskfilename_[i].c_str());
+    if (thread->SetParameters(read_block_size_, write_block_size_,
+                              segment_size_, cache_size_,
+                              blocks_per_segment_,
+                              read_threshold_, write_threshold_,
+                              non_destructive_)) {
+      disk_vector->insert(disk_vector->end(), thread);
+    } else {
+      logprintf(12, "Log: DiskThread::SetParameters() failed\n");
+      delete thread;
+    }
+
+    for (int j = 0; j < random_threads_; j++) {
+      // Creating random threads
+      RandomDiskThread *rthread = new RandomDiskThread(blocktables_[i]);
+      rthread->InitThread(total_threads_++, this, os_, patternlist_,
+                          &power_spike_status_);
+      rthread->SetDevice(diskfilename_[i].c_str());
+      if (rthread->SetParameters(read_block_size_, write_block_size_,
+                                 segment_size_, cache_size_,
+                                 blocks_per_segment_,
+                                 read_threshold_, write_threshold_,
+                                 non_destructive_)) {
+        random_vector->insert(random_vector->end(), rthread);
+      } else {
+      logprintf(12, "Log: RandomDiskThread::SetParameters() failed\n");
+        delete rthread;
+      }
+    }
+  }
+
+  workers_map_.insert(make_pair(kDiskType, disk_vector));
+  workers_map_.insert(make_pair(kRandomDiskType, random_vector));
+
+  // CPU stress threads.
+  WorkerVector *cpu_vector = new WorkerVector();
+  logprintf(12, "Log: Starting cpu stress threads\n");
+  for (int i = 0; i < cpu_stress_threads_; i++) {
+    CpuStressThread *thread = new CpuStressThread();
+    thread->InitThread(total_threads_++, this, os_, patternlist_,
+                       &continuous_status_);
+
+    // Don't restrict thread location if we have more than one
+    // thread per core. Not so good for performance.
+    int cores = countbits(thread->AvailableCpus());
+    if (cpu_stress_threads_ + memory_threads_ <= cores) {
+      // Place a thread on alternating cores first.
+      // Go in reverse order for CPU stress threads. This assures interleaved
+      // core use with no overlap.
+      int nthcore = (cores - 1) - i;
+      int nthbit = (((2 * nthcore) % cores) +
+                    (((2 * nthcore) / cores) % 2)) % cores;
+      if (thread->AvailableCpus() != ((1 << cores) - 1)) {
+        logprintf(0, "Log: cores = %x, expected %x\n",
+                  thread->AvailableCpus(), ((1 << (cores + 1)) - 1));
+      }
+
+      // Set thread affinity.
+      thread->set_cpu_mask(1 << nthbit);
+    }
+
+
+    cpu_vector->insert(cpu_vector->end(), thread);
+  }
+  workers_map_.insert(make_pair(kCPUType, cpu_vector));
+
+  // CPU Cache Coherency Threads - one for each core available.
+  if (cc_test_) {
+    WorkerVector *cc_vector = new WorkerVector();
+    logprintf(12, "Log: Starting cpu cache coherency threads\n");
+
+    // Allocate the shared datastructure to be worked on by the threads.
+    cc_cacheline_data_ = reinterpret_cast<cc_cacheline_data*>(
+        malloc(sizeof(cc_cacheline_data) * cc_cacheline_count_));
+    sat_assert(cc_cacheline_data_ != NULL);
+
+    // Initialize the strucutre.
+    memset(cc_cacheline_data_, 0,
+           sizeof(cc_cacheline_data) * cc_cacheline_count_);
+
+    int num_cpus = CpuCount();
+    // Allocate all the nums once so that we get a single chunk
+    // of contiguous memory.
+    int *num;
+    int err_result = posix_memalign(
+        reinterpret_cast<void**>(&num),
+        kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_);
+    sat_assert(err_result == 0);
+
+    int cline;
+    for (cline = 0; cline < cc_cacheline_count_; cline++) {
+      memset(num, 0, sizeof(num_cpus) * num_cpus);
+      cc_cacheline_data_[cline].num = num;
+      num += num_cpus;
+    }
+
+    int tnum;
+    for (tnum = 0; tnum < num_cpus; tnum++) {
+      CpuCacheCoherencyThread *thread =
+          new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_,
+                                      tnum, cc_inc_count_);
+      thread->InitThread(total_threads_++, this, os_, patternlist_,
+                         &continuous_status_);
+      // Pin the thread to a particular core.
+      thread->set_cpu_mask(1 << tnum);
+
+      // Insert the thread into the vector.
+      cc_vector->insert(cc_vector->end(), thread);
+    }
+    workers_map_.insert(make_pair(kCCType, cc_vector));
+  }
+  ReleaseWorkerLock();
+}
+
+// Return the number of cpus actually present in the machine.
+int Sat::CpuCount() {
+  return sysconf(_SC_NPROCESSORS_CONF);
+}
+
+// Notify and reap worker threads.
+void Sat::JoinThreads() {
+  logprintf(12, "Log: Joining worker threads\n");
+  power_spike_status_.StopWorkers();
+  continuous_status_.StopWorkers();
+
+  AcquireWorkerLock();
+  for (WorkerMap::const_iterator map_it = workers_map_.begin();
+       map_it != workers_map_.end(); ++map_it) {
+    for (WorkerVector::const_iterator it = map_it->second->begin();
+         it != map_it->second->end(); ++it) {
+      logprintf(12, "Log: Joining thread %d\n", (*it)->ThreadID());
+      (*it)->JoinThread();
+    }
+  }
+  ReleaseWorkerLock();
+
+  QueueStats();
+
+  // Finish up result checking.
+  // Spawn 4 check threads to minimize check time.
+  logprintf(12, "Log: Finished countdown, begin to result check\n");
+  WorkerStatus reap_check_status;
+  WorkerVector reap_check_vector;
+
+  // No need for check threads for monitor mode.
+  if (!monitor_mode_) {
+    // Initialize the check threads.
+    for (int i = 0; i < fill_threads_; i++) {
+      CheckThread *thread = new CheckThread();
+      thread->InitThread(total_threads_++, this, os_, patternlist_,
+                         &reap_check_status);
+      logprintf(12, "Log: Finished countdown, begin to result check\n");
+      reap_check_vector.push_back(thread);
+    }
+  }
+
+  reap_check_status.Initialize();
+  // Check threads should be marked to stop ASAP.
+  reap_check_status.StopWorkers();
+
+  // Spawn the check threads.
+  for (WorkerVector::const_iterator it = reap_check_vector.begin();
+       it != reap_check_vector.end(); ++it) {
+    logprintf(12, "Log: Spawning thread %d\n", (*it)->ThreadID());
+    (*it)->SpawnThread();
+  }
+
+  // Join the check threads.
+  for (WorkerVector::const_iterator it = reap_check_vector.begin();
+       it != reap_check_vector.end(); ++it) {
+    logprintf(12, "Log: Joining thread %d\n", (*it)->ThreadID());
+    (*it)->JoinThread();
+  }
+
+  // Reap all children. Stopped threads should have already ended.
+  // Result checking threads will end when they have finished
+  // result checking.
+  logprintf(12, "Log: Join all outstanding threads\n");
+
+  // Find all errors.
+  errorcount_ = GetTotalErrorCount();
+
+  AcquireWorkerLock();
+  for (WorkerMap::const_iterator map_it = workers_map_.begin();
+       map_it != workers_map_.end(); ++map_it) {
+    for (WorkerVector::const_iterator it = map_it->second->begin();
+         it != map_it->second->end(); ++it) {
+      logprintf(12, "Log: Reaping thread status %d\n", (*it)->ThreadID());
+      if ((*it)->GetStatus() != 1) {
+        logprintf(0, "Process Error: Thread %d failed with status %d at "
+                  "%.2f seconds\n",
+                  (*it)->ThreadID(), (*it)->GetStatus(),
+                  (*it)->GetRunDurationUSec()*1.0/1000000);
+        bad_status();
+      }
+      int priority = 12;
+      if ((*it)->GetErrorCount())
+        priority = 5;
+      logprintf(priority, "Log: Thread %d found %lld hardware incidents\n",
+                (*it)->ThreadID(), (*it)->GetErrorCount());
+    }
+  }
+  ReleaseWorkerLock();
+
+
+  // Add in any errors from check threads.
+  for (WorkerVector::const_iterator it = reap_check_vector.begin();
+       it != reap_check_vector.end(); ++it) {
+    logprintf(12, "Log: Reaping thread status %d\n", (*it)->ThreadID());
+    if ((*it)->GetStatus() != 1) {
+      logprintf(0, "Process Error: Thread %d failed with status %d at "
+                "%.2f seconds\n",
+                (*it)->ThreadID(), (*it)->GetStatus(),
+                (*it)->GetRunDurationUSec()*1.0/1000000);
+      bad_status();
+    }
+    errorcount_ += (*it)->GetErrorCount();
+    int priority = 12;
+    if ((*it)->GetErrorCount())
+      priority = 5;
+    logprintf(priority, "Log: Thread %d found %lld hardware incidents\n",
+              (*it)->ThreadID(), (*it)->GetErrorCount());
+    delete (*it);
+  }
+  reap_check_vector.clear();
+  reap_check_status.Destroy();
+}
+
+// Print queuing information.
+void Sat::QueueStats() {
+  finelock_q_->QueueAnalysis();
+}
+
+void Sat::AnalysisAllStats() {
+  float max_runtime_sec = 0.;
+  float total_data = 0.;
+  float total_bandwidth = 0.;
+  float thread_runtime_sec = 0.;
+
+  for (WorkerMap::const_iterator map_it = workers_map_.begin();
+       map_it != workers_map_.end(); ++map_it) {
+    for (WorkerVector::const_iterator it = map_it->second->begin();
+         it != map_it->second->end(); ++it) {
+      thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000;
+      total_data += (*it)->GetMemoryCopiedData();
+      total_data += (*it)->GetDeviceCopiedData();
+      if (thread_runtime_sec > max_runtime_sec) {
+        max_runtime_sec = thread_runtime_sec;
+      }
+    }
+  }
+
+  total_bandwidth = total_data / max_runtime_sec;
+
+  logprintf(0, "Stats: Completed: %.2fM in %.2fs %.2fMB/s, "
+            "with %d hardware incidents, %d errors\n",
+            total_data,
+            max_runtime_sec,
+            total_bandwidth,
+            errorcount_,
+            statuscount_);
+}
+
+void Sat::MemoryStats() {
+  float memcopy_data = 0.;
+  float memcopy_bandwidth = 0.;
+  WorkerMap::const_iterator mem_it = workers_map_.find(
+      static_cast<int>(kMemoryType));
+  WorkerMap::const_iterator file_it = workers_map_.find(
+      static_cast<int>(kFileIOType));
+  sat_assert(mem_it != workers_map_.end());
+  sat_assert(file_it != workers_map_.end());
+  for (WorkerVector::const_iterator it = mem_it->second->begin();
+       it != mem_it->second->end(); ++it) {
+    memcopy_data += (*it)->GetMemoryCopiedData();
+    memcopy_bandwidth += (*it)->GetMemoryBandwidth();
+  }
+  for (WorkerVector::const_iterator it = file_it->second->begin();
+       it != file_it->second->end(); ++it) {
+    memcopy_data += (*it)->GetMemoryCopiedData();
+    memcopy_bandwidth += (*it)->GetMemoryBandwidth();
+  }
+  GoogleMemoryStats(&memcopy_data, &memcopy_bandwidth);
+  logprintf(4, "Stats: Memory Copy: %.2fM at %.2fMB/s\n",
+            memcopy_data,
+            memcopy_bandwidth);
+}
+
+void Sat::GoogleMemoryStats(float *memcopy_data,
+                            float *memcopy_bandwidth) {
+  // Do nothing, should be implemented by subclasses.
+}
+
+void Sat::FileStats() {
+  float file_data = 0.;
+  float file_bandwidth = 0.;
+  WorkerMap::const_iterator file_it = workers_map_.find(
+      static_cast<int>(kFileIOType));
+  sat_assert(file_it != workers_map_.end());
+  for (WorkerVector::const_iterator it = file_it->second->begin();
+       it != file_it->second->end(); ++it) {
+    file_data += (*it)->GetDeviceCopiedData();
+    file_bandwidth += (*it)->GetDeviceBandwidth();
+  }
+  logprintf(4, "Stats: File Copy: %.2fM at %.2fMB/s\n",
+            file_data,
+            file_bandwidth);
+}
+
+void Sat::CheckStats() {
+  float check_data = 0.;
+  float check_bandwidth = 0.;
+  WorkerMap::const_iterator check_it = workers_map_.find(
+      static_cast<int>(kCheckType));
+  sat_assert(check_it != workers_map_.end());
+  for (WorkerVector::const_iterator it = check_it->second->begin();
+       it != check_it->second->end(); ++it) {
+    check_data += (*it)->GetMemoryCopiedData();
+    check_bandwidth += (*it)->GetMemoryBandwidth();
+  }
+  logprintf(4, "Stats: Data Check: %.2fM at %.2fMB/s\n",
+            check_data,
+            check_bandwidth);
+}
+
+void Sat::NetStats() {
+  float net_data = 0.;
+  float net_bandwidth = 0.;
+  WorkerMap::const_iterator netio_it = workers_map_.find(
+      static_cast<int>(kNetIOType));
+  WorkerMap::const_iterator netslave_it = workers_map_.find(
+      static_cast<int>(kNetSlaveType));
+  sat_assert(netio_it != workers_map_.end());
+  sat_assert(netslave_it != workers_map_.end());
+  for (WorkerVector::const_iterator it = netio_it->second->begin();
+       it != netio_it->second->end(); ++it) {
+    net_data += (*it)->GetDeviceCopiedData();
+    net_bandwidth += (*it)->GetDeviceBandwidth();
+  }
+  for (WorkerVector::const_iterator it = netslave_it->second->begin();
+       it != netslave_it->second->end(); ++it) {
+    net_data += (*it)->GetDeviceCopiedData();
+    net_bandwidth += (*it)->GetDeviceBandwidth();
+  }
+  logprintf(4, "Stats: Net Copy: %.2fM at %.2fMB/s\n",
+            net_data,
+            net_bandwidth);
+}
+
+void Sat::InvertStats() {
+  float invert_data = 0.;
+  float invert_bandwidth = 0.;
+  WorkerMap::const_iterator invert_it = workers_map_.find(
+      static_cast<int>(kInvertType));
+  sat_assert(invert_it != workers_map_.end());
+  for (WorkerVector::const_iterator it = invert_it->second->begin();
+       it != invert_it->second->end(); ++it) {
+    invert_data += (*it)->GetMemoryCopiedData();
+    invert_bandwidth += (*it)->GetMemoryBandwidth();
+  }
+  logprintf(4, "Stats: Invert Data: %.2fM at %.2fMB/s\n",
+            invert_data,
+            invert_bandwidth);
+}
+
+void Sat::DiskStats() {
+  float disk_data = 0.;
+  float disk_bandwidth = 0.;
+  WorkerMap::const_iterator disk_it = workers_map_.find(
+      static_cast<int>(kDiskType));
+  WorkerMap::const_iterator random_it = workers_map_.find(
+      static_cast<int>(kRandomDiskType));
+  sat_assert(disk_it != workers_map_.end());
+  sat_assert(random_it != workers_map_.end());
+  for (WorkerVector::const_iterator it = disk_it->second->begin();
+       it != disk_it->second->end(); ++it) {
+    disk_data += (*it)->GetDeviceCopiedData();
+    disk_bandwidth += (*it)->GetDeviceBandwidth();
+  }
+  for (WorkerVector::const_iterator it = random_it->second->begin();
+       it != random_it->second->end(); ++it) {
+    disk_data += (*it)->GetDeviceCopiedData();
+    disk_bandwidth += (*it)->GetDeviceBandwidth();
+  }
+
+  logprintf(4, "Stats: Disk: %.2fM at %.2fMB/s\n",
+            disk_data,
+            disk_bandwidth);
+}
+
+// Process worker thread data for bandwidth information, and error results.
+// You can add more methods here just subclassing SAT.
+void Sat::RunAnalysis() {
+  AnalysisAllStats();
+  MemoryStats();
+  FileStats();
+  NetStats();
+  CheckStats();
+  InvertStats();
+  DiskStats();
+}
+
+// Get total error count, summing across all threads..
+int64 Sat::GetTotalErrorCount() {
+  int64 errors = 0;
+
+  AcquireWorkerLock();
+  for (WorkerMap::const_iterator map_it = workers_map_.begin();
+       map_it != workers_map_.end(); ++map_it) {
+    for (WorkerVector::const_iterator it = map_it->second->begin();
+         it != map_it->second->end(); ++it) {
+      errors += (*it)->GetErrorCount();
+    }
+  }
+  ReleaseWorkerLock();
+  return errors;
+}
+
+
+void Sat::SpawnThreads() {
+  logprintf(12, "Log: Initializing WorkerStatus objects\n");
+  power_spike_status_.Initialize();
+  continuous_status_.Initialize();
+  logprintf(12, "Log: Spawning worker threads\n");
+  for (WorkerMap::const_iterator map_it = workers_map_.begin();
+       map_it != workers_map_.end(); ++map_it) {
+    for (WorkerVector::const_iterator it = map_it->second->begin();
+         it != map_it->second->end(); ++it) {
+      logprintf(12, "Log: Spawning thread %d\n", (*it)->ThreadID());
+      (*it)->SpawnThread();
+    }
+  }
+}
+
+// Delete used worker thread objects.
+void Sat::DeleteThreads() {
+  logprintf(12, "Log: Deleting worker threads\n");
+  for (WorkerMap::const_iterator map_it = workers_map_.begin();
+       map_it != workers_map_.end(); ++map_it) {
+    for (WorkerVector::const_iterator it = map_it->second->begin();
+         it != map_it->second->end(); ++it) {
+      logprintf(12, "Log: Deleting thread %d\n", (*it)->ThreadID());
+      delete (*it);
+    }
+    delete map_it->second;
+  }
+  workers_map_.clear();
+  logprintf(12, "Log: Destroying WorkerStatus objects\n");
+  power_spike_status_.Destroy();
+  continuous_status_.Destroy();
+}
+
+namespace {
+// Calculates the next time an action in Sat::Run() should occur, based on a
+// schedule derived from a start point and a regular frequency.
+//
+// Using frequencies instead of intervals with their accompanying drift allows
+// users to better predict when the actions will occur throughout a run.
+//
+// Arguments:
+//   frequency: seconds
+//   start: unixtime
+//   now: unixtime
+//
+// Returns: unixtime
+inline time_t NextOccurance(time_t frequency, time_t start, time_t now) {
+  return start + frequency + (((now - start) / frequency) * frequency);
+}
+}
+
+// Run the actual test.
+bool Sat::Run() {
+  // Install signal handlers to gracefully exit in the middle of a run.
+  //
+  // Why go through this whole rigmarole?  It's the only standards-compliant
+  // (C++ and POSIX) way to handle signals in a multithreaded program.
+  // Specifically:
+  //
+  // 1) (C++) The value of a variable not of type "volatile sig_atomic_t" is
+  //    unspecified upon entering a signal handler and, if modified by the
+  //    handler, is unspecified after leaving the handler.
+  //
+  // 2) (POSIX) After the value of a variable is changed in one thread, another
+  //    thread is only guaranteed to see the new value after both threads have
+  //    acquired or released the same mutex or rwlock, synchronized to the
+  //    same barrier, or similar.
+  //
+  // #1 prevents the use of #2 in a signal handler, so the signal handler must
+  // be called in the same thread that reads the "volatile sig_atomic_t"
+  // variable it sets.  We enforce that by blocking the signals in question in
+  // the worker threads, forcing them to be handled by this thread.
+  logprintf(12, "Log: Installing signal handlers\n");
+  sigset_t new_blocked_signals;
+  sigemptyset(&new_blocked_signals);
+  sigaddset(&new_blocked_signals, SIGINT);
+  sigaddset(&new_blocked_signals, SIGTERM);
+  sigset_t prev_blocked_signals;
+  pthread_sigmask(SIG_BLOCK, &new_blocked_signals, &prev_blocked_signals);
+  sighandler_t prev_sigint_handler = signal(SIGINT, SatHandleBreak);
+  sighandler_t prev_sigterm_handler = signal(SIGTERM, SatHandleBreak);
+
+  // Kick off all the worker threads.
+  logprintf(12, "Log: Launching worker threads\n");
+  InitializeThreads();
+  SpawnThreads();
+  pthread_sigmask(SIG_SETMASK, &prev_blocked_signals, NULL);
+
+  logprintf(12, "Log: Starting countdown with %d seconds\n", runtime_seconds_);
+
+  // In seconds.
+  static const time_t kSleepFrequency = 5;
+  // All of these are in seconds.  You probably want them to be >=
+  // kSleepFrequency and multiples of kSleepFrequency, but neither is necessary.
+  static const time_t kInjectionFrequency = 10;
+  static const time_t kPrintFrequency = 10;
+
+  const time_t start = time(NULL);
+  const time_t end = start + runtime_seconds_;
+  time_t now = start;
+  time_t next_print = start + kPrintFrequency;
+  time_t next_pause = start + pause_delay_;
+  time_t next_resume = 0;
+  time_t next_injection;
+  if (crazy_error_injection_) {
+    next_injection = start + kInjectionFrequency;
+  } else {
+    next_injection = 0;
+  }
+
+  while (now < end) {
+    // This is an int because it's for logprintf().
+    const int seconds_remaining = end - now;
+
+    if (user_break_) {
+      // Handle early exit.
+      logprintf(0, "Log: User exiting early (%d seconds remaining)\n",
+                seconds_remaining);
+      break;
+    }
+
+    // If we have an error limit, check it here and see if we should exit.
+    if (max_errorcount_ != 0) {
+      uint64 errors = GetTotalErrorCount();
+      if (errors > max_errorcount_) {
+        logprintf(0, "Log: Exiting early (%d seconds remaining) "
+                     "due to excessive failures (%lld)\n",
+                  seconds_remaining,
+                  errors);
+        break;
+      }
+    }
+
+    if (now >= next_print) {
+      // Print a count down message.
+      logprintf(5, "Log: Seconds remaining: %d\n", seconds_remaining);
+      next_print = NextOccurance(kPrintFrequency, start, now);
+    }
+
+    if (next_injection && now >= next_injection) {
+      // Inject an error.
+      logprintf(4, "Log: Injecting error (%d seconds remaining)\n",
+                seconds_remaining);
+      struct page_entry src;
+      GetValid(&src);
+      src.pattern = patternlist_->GetPattern(0);
+      PutValid(&src);
+      next_injection = NextOccurance(kInjectionFrequency, start, now);
+    }
+
+    if (next_pause && now >= next_pause) {
+      // Tell worker threads to pause in preparation for a power spike.
+      logprintf(4, "Log: Pausing worker threads in preparation for power spike "
+                "(%d seconds remaining)\n", seconds_remaining);
+      power_spike_status_.PauseWorkers();
+      logprintf(12, "Log: Worker threads paused\n");
+      next_pause = 0;
+      next_resume = now + pause_duration_;
+    }
+
+    if (next_resume && now >= next_resume) {
+      // Tell worker threads to resume in order to cause a power spike.
+      logprintf(4, "Log: Resuming worker threads to cause a power spike (%d "
+                "seconds remaining)\n", seconds_remaining);
+      power_spike_status_.ResumeWorkers();
+      logprintf(12, "Log: Worker threads resumed\n");
+      next_pause = NextOccurance(pause_delay_, start, now);
+      next_resume = 0;
+    }
+
+    sat_sleep(NextOccurance(kSleepFrequency, start, now) - now);
+    now = time(NULL);
+  }
+
+  JoinThreads();
+
+  logprintf(0, "Stats: Found %lld hardware incidents\n", errorcount_);
+
+  if (!monitor_mode_)
+    RunAnalysis();
+
+  DeleteThreads();
+
+  logprintf(12, "Log: Uninstalling signal handlers\n");
+  signal(SIGINT, prev_sigint_handler);
+  signal(SIGTERM, prev_sigterm_handler);
+
+  return true;
+}
+
+// Clean up all resources.
+bool Sat::Cleanup() {
+  g_sat = NULL;
+  Logger::GlobalLogger()->StopThread();
+  Logger::GlobalLogger()->SetStdoutOnly();
+  if (logfile_) {
+    close(logfile_);
+    logfile_ = 0;
+  }
+  if (patternlist_) {
+    patternlist_->Destroy();
+    delete patternlist_;
+    patternlist_ = 0;
+  }
+  if (os_) {
+    os_->FreeTestMem();
+    delete os_;
+    os_ = 0;
+  }
+  if (empty_) {
+    delete empty_;
+    empty_ = 0;
+  }
+  if (valid_) {
+    delete valid_;
+    valid_ = 0;
+  }
+  if (finelock_q_) {
+    delete finelock_q_;
+    finelock_q_ = 0;
+  }
+  if (page_bitmap_) {
+    delete[] page_bitmap_;
+  }
+
+  for (int i = 0; i < blocktables_.size(); i++) {
+    delete blocktables_[i];
+  }
+
+  if (cc_cacheline_data_) {
+    // The num integer arrays for all the cacheline structures are
+    // allocated as a single chunk. The pointers in the cacheline struct
+    // are populated accordingly. Hence calling free on the first
+    // cacheline's num's address is going to free the entire array.
+    // TODO(aganti): Refactor this to have a class for the cacheline
+    // structure (currently defined in worker.h) and clean this up
+    // in the destructor of that class.
+    if (cc_cacheline_data_[0].num) {
+      free(cc_cacheline_data_[0].num);
+    }
+    free(cc_cacheline_data_);
+  }
+
+  sat_assert(0 == pthread_mutex_destroy(&worker_lock_));
+
+  return true;
+}
+
+
+// Pretty print really obvious results.
+bool Sat::PrintResults() {
+  bool result = true;
+
+  logprintf(4, "\n");
+  if (statuscount_) {
+    logprintf(4, "Status: FAIL - test encountered procedural errors\n");
+    result = false;
+  } else if (errorcount_) {
+    logprintf(4, "Status: FAIL - test discovered HW problems\n");
+    result = false;
+  } else {
+    logprintf(4, "Status: PASS - please verify no corrected errors\n");
+  }
+  logprintf(4, "\n");
+
+  return result;
+}
+
+// Helper functions.
+void Sat::AcquireWorkerLock() {
+  sat_assert(0 == pthread_mutex_lock(&worker_lock_));
+}
+void Sat::ReleaseWorkerLock() {
+  sat_assert(0 == pthread_mutex_unlock(&worker_lock_));
+}
+
+void logprintf(int priority, const char *format, ...) {
+  va_list args;
+  va_start(args, format);
+  Logger::GlobalLogger()->VLogF(priority, format, args);
+  va_end(args);
+}
diff --git a/src/sat.h b/src/sat.h
new file mode 100644
index 0000000..b1ad085
--- /dev/null
+++ b/src/sat.h
@@ -0,0 +1,309 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// sat.h : sat stress test object interface and data structures
+
+#ifndef STRESSAPPTEST_SAT_H_
+#define STRESSAPPTEST_SAT_H_
+
+#include <signal.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "finelock_queue.h"
+#include "queue.h"
+#include "sattypes.h"
+#include "worker.h"
+#include "os.h"
+
+// SAT stress test class.
+class Sat {
+ public:
+  // Enum for page queue implementation switch.
+  enum PageQueueType { SAT_ONELOCK, SAT_FINELOCK };
+
+  Sat();
+  virtual ~Sat();
+
+  // Read configuration from arguments. Called first.
+  bool ParseArgs(int argc, char **argv);
+  virtual bool CheckGoogleSpecificArgs(int argc, char **argv, int *i);
+  // Initialize data structures, subclasses, and resources,
+  // based on command line args.
+  // Called after ParseArgs().
+  bool Initialize();
+
+  // Execute the test. Initialize() and ParseArgs() must be called first.
+  // This must be called from a single-threaded program.
+  bool Run();
+
+  // Pretty print result summary.
+  // Called after Run().
+  // Return value is success or failure of the SAT run, *not* of this function!
+  bool PrintResults();
+
+  // Pretty print version info.
+  bool PrintVersion();
+
+  // Pretty print help.
+  virtual void PrintHelp();
+
+  // Clean up allocations and resources.
+  // Called last.
+  bool Cleanup();
+
+  // Abort Run().  Only for use by Run()-installed signal handlers.
+  void Break() { user_break_ = true; }
+
+  // Fetch and return empty and full pages into the empty and full pools.
+  bool GetValid(struct page_entry *pe);
+  bool PutValid(struct page_entry *pe);
+  bool GetEmpty(struct page_entry *pe);
+  bool PutEmpty(struct page_entry *pe);
+
+  bool GetValid(struct page_entry *pe, int32 tag);
+  bool GetEmpty(struct page_entry *pe, int32 tag);
+
+  // Accessor functions.
+  int verbosity() const { return verbosity_; }
+  int logfile() const { return logfile_; }
+  int page_length() const { return page_length_; }
+  int disk_pages() const { return disk_pages_; }
+  int strict() const { return strict_; }
+  int tag_mode() const { return tag_mode_; }
+  int status() const { return statuscount_; }
+  void bad_status() { statuscount_++; }
+  int errors() const { return errorcount_; }
+  int warm() const { return warm_; }
+  bool stop_on_error() const { return stop_on_error_; }
+  int32 region_mask() const { return region_mask_; }
+  // Semi-accessor to find the "nth" region to avoid replicated bit searching..
+  int32 region_find(int32 num) const {
+    for (int i = 0; i < 32; i++) {
+      if ((1 << i) & region_mask_) {
+        if (num == 0)
+          return i;
+        num--;
+      }
+    }
+    return 0;
+  }
+
+  // Causes false errors for unittesting.
+  // Setting to "true" causes errors to be injected.
+  void set_error_injection(bool errors) { error_injection_ = errors; }
+  bool error_injection() const { return error_injection_; }
+
+ protected:
+  // Opens log file for writing. Returns 0 on failure.
+  bool InitializeLogfile();
+  // Checks for supported environment. Returns 0 on failure.
+  bool CheckEnvironment();
+  // Allocates size_ bytes of test memory.
+  bool AllocateMemory();
+  // Initializes datapattern reference structures.
+  bool InitializePatterns();
+  // Initializes test memory with datapatterns.
+  bool InitializePages();
+
+  // Start up worker threads.
+  virtual void InitializeThreads();
+  // Spawn worker threads.
+  void SpawnThreads();
+  // Reap worker threads.
+  void JoinThreads();
+  // Run bandwidth and error analysis.
+  virtual void RunAnalysis();
+  // Delete worker threads.
+  void DeleteThreads();
+
+  // Return the number of cpus in the system.
+  int CpuCount();
+
+  // Collect error counts from threads.
+  int64 GetTotalErrorCount();
+
+  // Command line arguments.
+  string cmdline_;
+
+  // Memory and test configuration.
+  int runtime_seconds_;               // Seconds to run.
+  int page_length_;                   // Length of each memory block.
+  int64 pages_;                       // Number of memory blocks.
+  int64 size_;                        // Size of memory tested, in bytes.
+  int64 size_mb_;                     // Size of memory tested, in MB.
+  int64 freepages_;                   // How many invalid pages we need.
+  int disk_pages_;                    // Number of pages per temp file.
+  uint64 paddr_base_;                 // Physical address base.
+
+  // Control flags.
+  volatile sig_atomic_t user_break_;  // User has signalled early exit.  Used as
+                                      // a boolean.
+  int verbosity_;                     // How much to print.
+  int strict_;                        // Check results per transaction.
+  int warm_;                          // FPU warms CPU while coying.
+  int address_mode_;                  // 32 or 64 bit binary.
+  bool stop_on_error_;                // Exit immendiately on any error.
+  bool findfiles_;                    // Autodetect tempfile locations.
+
+  bool error_injection_;              // Simulate errors, for unittests.
+  bool crazy_error_injection_;        // Simulate lots of errors.
+  int64 max_errorcount_;              // Number of errors before forced exit.
+  int run_on_anything_;               // Ignore unknown machine ereor.
+  int use_logfile_;                   // Log to a file.
+  char logfilename_[255];             // Name of file to log to.
+  int logfile_;                       // File handle to log to.
+
+  // Disk thread options.
+  int read_block_size_;               // Size of block to read from disk.
+  int write_block_size_;              // Size of block to write to disk.
+  int64 segment_size_;                // Size of segment to split disk into.
+  int cache_size_;                    // Size of disk cache.
+  int blocks_per_segment_;            // Number of blocks to test per segment.
+  int read_threshold_;                // Maximum time (in us) a read should take
+                                      // before warning of a slow read.
+  int write_threshold_;               // Maximum time (in us) a write should
+                                      // take before warning of a slow write.
+  int non_destructive_;               // Whether to use non-destructive mode for
+                                      // the disk test.
+
+  // Generic Options.
+  int monitor_mode_;                  // Switch for monitor-only mode SAT.
+                                      // This switch trumps most of the other
+                                      // argument, as SAT will only run error
+                                      // polling threads.
+  int tag_mode_;                      // Do tagging of memory and strict
+                                      // checking for misplaced cachelines.
+
+  bool do_page_map_;                  // Should we print a list of used pages?
+  unsigned char *page_bitmap_;        // Store bitmap of physical pages seen.
+  uint64 page_bitmap_size_;           // Length of physical memory represented.
+
+  // Cpu Cache Coherency Options.
+  bool cc_test_;                      // Flag to decide whether to start the
+                                      // cache coherency threads.
+  int cc_cacheline_count_;            // Number of cache line size structures.
+  int cc_inc_count_;                  // Number of times to increment the shared
+                                      // cache lines structure members.
+
+  // Thread control.
+  int file_threads_;                  // Threads of file IO.
+  int net_threads_;                   // Threads of network IO.
+  int listen_threads_;                // Threads for network IO to connect.
+  int memory_threads_;                // Threads of memcpy.
+  int invert_threads_;                // Threads of invert.
+  int fill_threads_;                  // Threads of memset.
+  int check_threads_;                 // Threads of strcmp.
+  int cpu_stress_threads_;            // Threads of CPU stress workload.
+  int disk_threads_;                  // Threads of disk test.
+  int random_threads_;                // Number of random disk threads.
+  int total_threads_;                 // Total threads used.
+  bool error_poll_;                   // Poll for system errors.
+
+  // Resources.
+  cc_cacheline_data *cc_cacheline_data_;  // The cache line sized datastructure
+                                          // used by the ccache threads
+                                          // (in worker.h).
+  vector<string> filename_;           // Filenames for file IO.
+  vector<string> ipaddrs_;            // Addresses for network IO.
+  vector<string> diskfilename_;       // Filename for disk IO device.
+  // Block table for IO device.
+  vector<DiskBlockTable*> blocktables_;
+
+  int32 region_mask_;                 // Bitmask of available NUMA regions.
+  int32 region_count_;                // Count of available NUMA regions.
+  int32 region_[32];                  // Pagecount per region.
+  int region_mode_;                   // What to do with NUMA hints?
+  static const int kLocalNuma = 1;    // Target local memory.
+  static const int kRemoteNuma = 2;   // Target remote memory.
+
+  // Results.
+  int64 errorcount_;                  // Total hardware incidents seen.
+  int statuscount_;                   // Total test errors seen.
+
+  // Thread type constants and types
+  enum ThreadType {
+    kMemoryType = 0,
+    kFileIOType = 1,
+    kNetIOType = 2,
+    kNetSlaveType = 3,
+    kCheckType = 4,
+    kInvertType = 5,
+    kDiskType = 6,
+    kRandomDiskType = 7,
+    kCPUType = 8,
+    kErrorType = 9,
+    kCCType = 10
+  };
+
+  // Helper functions.
+  virtual void AcquireWorkerLock();
+  virtual void ReleaseWorkerLock();
+  pthread_mutex_t worker_lock_;  // Lock access to the worker thread structure.
+  typedef vector<WorkerThread*> WorkerVector;
+  typedef map<int, WorkerVector*> WorkerMap;
+  // Contains all worker threads.
+  WorkerMap workers_map_;
+  // Delay between power spikes.
+  time_t pause_delay_;
+  // The duration of each pause (for power spikes).
+  time_t pause_duration_;
+  // For the workers we pause and resume to create power spikes.
+  WorkerStatus power_spike_status_;
+  // For the workers we never pause.
+  WorkerStatus continuous_status_;
+
+  class OsLayer *os_;                   // Os abstraction: put hacks here.
+  class PatternList *patternlist_;      // Access to global data patterns.
+
+  // RunAnalysis methods
+  void AnalysisAllStats();              // Summary of all runs.
+  void MemoryStats();
+  void FileStats();
+  void NetStats();
+  void CheckStats();
+  void InvertStats();
+  void DiskStats();
+
+  void QueueStats();
+
+  // Physical page use reporting.
+  void AddrMapInit();
+  void AddrMapUpdate(struct page_entry *pe);
+  void AddrMapPrint();
+
+  // additional memory data from google-specific tests.
+  virtual void GoogleMemoryStats(float *memcopy_data,
+                                 float *memcopy_bandwidth);
+
+  virtual void GoogleOsOptions(std::map<std::string, std::string> *options);
+
+  // Page queues, only one of (valid_+empty_) or (finelock_q_) will be used
+  // at a time. A commandline switch controls which queue implementation will
+  // be used.
+  class PageEntryQueue *valid_;        // Page queue structure, valid pages.
+  class PageEntryQueue *empty_;        // Page queue structure, free pages.
+  class FineLockPEQueue *finelock_q_;  // Page queue with fine-grain locks
+  Sat::PageQueueType pe_q_implementation_;   // Queue implementation switch
+
+  DISALLOW_COPY_AND_ASSIGN(Sat);
+};
+
+Sat *SatFactory();
+
+#endif  // STRESSAPPTEST_SAT_H_
diff --git a/src/sat_factory.cc b/src/sat_factory.cc
new file mode 100644
index 0000000..5cf3e4c
--- /dev/null
+++ b/src/sat_factory.cc
@@ -0,0 +1,21 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// sat_factory.h : factory for SAT
+
+#include "sat.h"  // NOLINT
+
+Sat *SatFactory() {
+  return new Sat();
+}
diff --git a/src/sattypes.h b/src/sattypes.h
new file mode 100644
index 0000000..2a58862
--- /dev/null
+++ b/src/sattypes.h
@@ -0,0 +1,156 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_SATTYPES_H_
+#define STRESSAPPTEST_SATTYPES_H_
+
+#include <arpa/inet.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <time.h>
+#include <string.h>
+#include <string>
+
+#ifdef HAVE_CONFIG_H  // Built using autoconf
+#include "stressapptest_config.h"
+using namespace std;
+using namespace __gnu_cxx;
+
+typedef signed long long   int64;
+typedef signed int         int32;
+typedef signed short int   int16;
+typedef signed char        int8;
+
+typedef unsigned long long uint64;
+typedef unsigned int       uint32;
+typedef unsigned short     uint16;
+typedef unsigned char      uint8;
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName)        \
+  TypeName(const TypeName&);                      \
+  void operator=(const TypeName&)
+
+inline const char* Timestamp() {
+  return STRESSAPPTEST_TIMESTAMP;
+}
+
+inline const char* BuildChangelist() {
+  return "open source release";
+}
+
+#else
+  #include "googlesattypes.h"
+#endif
+// Workaround to allow 32/64 bit conversion
+// without running into strict aliasing problems.
+union datacast_t {
+  uint64 l64;
+  struct {
+    uint32 l;
+    uint32 h;
+  } l32;
+};
+
+
+// File sync'd print to console and log
+void logprintf(int priority, const char *format, ...);
+
+// We print to stderr ourselves first in case we're in such a bad state that the
+// logger can't work.
+#define sat_assert(x) \
+{\
+  if (!(x)) {\
+    fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
+    logprintf(0, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
+    exit(1);\
+  }\
+}
+
+#if !defined(CPU_SETSIZE)
+  // Define type and macros for cpu mask operations
+  // Note: this code is hacked together to deal with difference
+  // function signatures across versions of glibc, ie those that take
+  // cpu_set_t versus those that take unsigned long.  -johnhuang
+  typedef unsigned long cpu_set_t;
+  #define CPU_SETSIZE                   32
+  #define CPU_ISSET(index, cpu_set_ptr) (*(cpu_set_ptr) & 1 << (index))
+  #define CPU_SET(index, cpu_set_ptr)   (*(cpu_set_ptr) |= 1 << (index))
+  #define CPU_ZERO(cpu_set_ptr)         (*(cpu_set_ptr) = 0)
+  #define CPU_CLR(index, cpu_set_ptr)   (*(cpu_set_ptr) &= ~(1 << (index)))
+#endif
+
+// Make using CPUSET non-super-painful.
+static inline uint32 cpuset_to_uint32(cpu_set_t *cpuset) {
+  uint32 value = 0;
+  for (int index = 0; index < CPU_SETSIZE; index++) {
+    if (CPU_ISSET(index, cpuset)) {
+      if (index < 32) {
+          value |= 1 << index;
+      } else {
+        logprintf(0, "Process Error: Cpu index (%d) higher than 32\n", index);
+        sat_assert(0);
+      }
+    }
+  }
+  return value;
+}
+
+static inline void cpuset_from_uint32(uint32 mask, cpu_set_t *cpuset) {
+  CPU_ZERO(cpuset);
+  for (int index = 0; index < 32; index++) {
+    if (mask & (1 << index))
+      CPU_SET(index, cpuset);
+  }
+}
+
+static const int32 kUSleepOneSecond = 1000000;
+
+// This is guaranteed not to use signals.
+inline bool sat_usleep(int32 microseconds) {
+  timespec req;
+  req.tv_sec = microseconds / 1000000;
+  // Convert microseconds argument to nano seconds.
+  req.tv_nsec = (microseconds % 1000000) * 1000;
+  return nanosleep(&req, NULL) == 0;
+}
+
+// This is guaranteed not to use signals.
+inline bool sat_sleep(time_t seconds) {
+  timespec req;
+  req.tv_sec = seconds;
+  req.tv_nsec = 0;
+  return nanosleep(&req, NULL) == 0;
+}
+
+// Get an error code description for use in error messages.
+//
+// Args:
+//   error_num: an errno error code
+inline string ErrorString(int error_num) {
+  char buf[256];
+  return string(strerror_r(error_num, buf, sizeof buf));
+}
+
+// Define handy constants here
+static const int kTicksPerSec = 100;
+static const int kMegabyte = (1024LL*1024LL);
+static const int kSatDiskPageMax = 32;
+static const int kSatDiskPage = 8;
+static const int kSatPageSize = (1024LL*1024LL);
+static const int kCacheLineSize = 64;
+static const uint16_t kNetworkPort = 19996;
+
+#endif  // STRESSAPPTEST_SATTYPES_H_
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
new file mode 100644
index 0000000..efb8b2c
--- /dev/null
+++ b/src/stressapptest_config.h.in
@@ -0,0 +1,188 @@
+/* src/stressapptest_config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define to 1 if the `closedir' function returns void instead of `int'. */
+#undef CLOSEDIR_VOID
+
+/* Define to 1 if you have the <arpa/inet.h> header file. */
+#undef HAVE_ARPA_INET_H
+
+/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you
+   don't. */
+#undef HAVE_DECL_STRERROR_R
+
+/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
+   */
+#undef HAVE_DIRENT_H
+
+/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
+#undef HAVE_DOPRNT
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#undef HAVE_GETTIMEOFDAY
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#undef HAVE_MALLOC_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `memset' function. */
+#undef HAVE_MEMSET
+
+/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
+#undef HAVE_NDIR_H
+
+/* Define to 1 if you have the <netdb.h> header file. */
+#undef HAVE_NETDB_H
+
+/* Define to 1 if you have the `select' function. */
+#undef HAVE_SELECT
+
+/* Define to 1 if you have the `socket' function. */
+#undef HAVE_SOCKET
+
+/* Define to 1 if stdbool.h conforms to C99. */
+#undef HAVE_STDBOOL_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the `strerror_r' function. */
+#undef HAVE_STRERROR_R
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strtol' function. */
+#undef HAVE_STRTOL
+
+/* Define to 1 if you have the `strtoull' function. */
+#undef HAVE_STRTOULL
+
+/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
+   */
+#undef HAVE_SYS_DIR_H
+
+/* Define to 1 if you have the <sys/ioctl.h> header file. */
+#undef HAVE_SYS_IOCTL_H
+
+/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
+   */
+#undef HAVE_SYS_NDIR_H
+
+/* Define to 1 if you have the <sys/select.h> header file. */
+#undef HAVE_SYS_SELECT_H
+
+/* Define to 1 if you have the <sys/socket.h> header file. */
+#undef HAVE_SYS_SOCKET_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the `vprintf' function. */
+#undef HAVE_VPRINTF
+
+/* Define to 1 if the system has the type `_Bool'. */
+#undef HAVE__BOOL
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#undef RETSIGTYPE
+
+/* Define to the type of arg 1 for `select'. */
+#undef SELECT_TYPE_ARG1
+
+/* Define to the type of args 2, 3 and 4 for `select'. */
+#undef SELECT_TYPE_ARG234
+
+/* Define to the type of arg 5 for `select'. */
+#undef SELECT_TYPE_ARG5
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if strerror_r returns char *. */
+#undef STRERROR_R_CHAR_P
+
+/* Defined if the target CPU is i686 */
+#undef STRESSAPPTEST_CPU_I686
+
+/* Defined if the target CPU is PowerPC */
+#undef STRESSAPPTEST_CPU_PPC
+
+/* Defined if the target CPU is x86_64 */
+#undef STRESSAPPTEST_CPU_X86_64
+
+/* Timestamp when ./configure was executed */
+#undef STRESSAPPTEST_TIMESTAMP
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+#undef inline
+#endif
+
+/* Define to `int' if <sys/types.h> does not define. */
+#undef pid_t
+
+/* Define to equivalent of C99 restrict keyword, or to nothing if this is not
+   supported. Do not define if restrict is supported directly. */
+#undef restrict
+
+/* Define to `int' if <sys/types.h> does not define. */
+#undef ssize_t
+
+/* Define to the type of an unsigned integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+#undef uint16_t
+
+/* Define to empty if the keyword `volatile' does not work. Warning: valid
+   code using `volatile' can become incorrect without. Disable with care. */
+#undef volatile
diff --git a/src/worker.cc b/src/worker.cc
new file mode 100644
index 0000000..6a00db2
--- /dev/null
+++ b/src/worker.cc
@@ -0,0 +1,3258 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// worker.cc : individual tasks that can be run in combination to
+// stress the system
+
+#include <errno.h>
+#include <malloc.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <sys/select.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/times.h>
+
+// These are necessary, but on by default
+// #define __USE_GNU
+// #define __USE_LARGEFILE64
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <arpa/inet.h>
+#include <linux/unistd.h>  // for gettid
+// For size of block device
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+// For asynchronous I/O
+#include <linux/aio_abi.h>
+
+#include <sys/syscall.h>
+
+#include <set>
+#include <string>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "error_diag.h"  // NOLINT
+#include "os.h"          // NOLINT
+#include "pattern.h"     // NOLINT
+#include "queue.h"       // NOLINT
+#include "sat.h"         // NOLINT
+#include "sattypes.h"    // NOLINT
+#include "worker.h"      // NOLINT
+
+// Syscalls
+// Why ubuntu, do you hate gettid so bad?
+#if !defined(__NR_gettid)
+#  define __NR_gettid             224
+#endif
+
+#define gettid() syscall(__NR_gettid)
+#if !defined(CPU_SETSIZE)
+_syscall3(int, sched_getaffinity, pid_t, pid,
+          unsigned int, len, cpu_set_t*, mask)
+_syscall3(int, sched_setaffinity, pid_t, pid,
+          unsigned int, len, cpu_set_t*, mask)
+#endif
+
+// Linux aio syscalls.
+#if !defined(__NR_io_setup)
+#define __NR_io_setup   206
+#define __NR_io_destroy 207
+#define __NR_io_getevents       208
+#define __NR_io_submit  209
+#define __NR_io_cancel  210
+#endif
+
+#define io_setup(nr_events, ctxp) \
+  syscall(__NR_io_setup, (nr_events), (ctxp))
+#define io_submit(ctx_id, nr, iocbpp) \
+  syscall(__NR_io_submit, (ctx_id), (nr), (iocbpp))
+#define io_getevents(ctx_id, io_getevents, nr, events, timeout) \
+  syscall(__NR_io_getevents, (ctx_id), (io_getevents), (nr), (events), \
+    (timeout))
+#define io_cancel(ctx_id, iocb, result) \
+  syscall(__NR_io_cancel, (ctx_id), (iocb), (result))
+#define io_destroy(ctx) \
+  syscall(__NR_io_destroy, (ctx))
+
+namespace {
+  // Get HW core ID from cpuid instruction.
+  inline int apicid(void) {
+    int cpu;
+#ifdef STRESSAPPTEST_CPU_PPC
+    cpu = 0;
+#else
+    __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx");
+#endif
+    return (cpu >> 24);
+  }
+
+  // Work around the sad fact that there are two (gnu, xsi) incompatible
+  // versions of strerror_r floating around google. Awesome.
+  bool sat_strerror(int err, char *buf, int len) {
+    buf[0] = 0;
+    char *errmsg = reinterpret_cast<char*>(strerror_r(err, buf, len));
+    int retval = reinterpret_cast<int64>(errmsg);
+    if (retval == 0)
+      return true;
+    if (retval == -1)
+      return false;
+    if (errmsg != buf) {
+      strncpy(buf, errmsg, len);
+      buf[len - 1] = 0;
+    }
+    return true;
+  }
+
+
+  inline uint64 addr_to_tag(void *address) {
+    return reinterpret_cast<uint64>(address);
+  }
+}
+
+#if !defined(O_DIRECT)
+// Sometimes this isn't available.
+// Disregard if it's not defined.
+  #define O_DIRECT            0
+#endif
+
+// A struct to hold captured errors, for later reporting.
+struct ErrorRecord {
+  uint64 actual;  // This is the actual value read.
+  uint64 reread;  // This is the actual value, reread.
+  uint64 expected;  // This is what it should have been.
+  uint64 *vaddr;  // This is where it was (or wasn't).
+  char *vbyteaddr;  // This is byte specific where the data was (or wasn't).
+  uint64 paddr;  // This is the bus address, if available.
+  uint64 *tagvaddr;  // This holds the tag value if this data was tagged.
+  uint64 tagpaddr;  // This holds the physical address corresponding to the tag.
+};
+
+// This is a helper function to create new threads with pthreads.
+static void *ThreadSpawnerGeneric(void *ptr) {
+  WorkerThread *worker = static_cast<WorkerThread*>(ptr);
+  worker->StartRoutine();
+  return NULL;
+}
+
+
+void WorkerStatus::Initialize() {
+  sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
+  sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
+  sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL,
+                                       num_workers_ + 1));
+}
+
+void WorkerStatus::Destroy() {
+  sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_));
+  sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_));
+  sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
+}
+
+void WorkerStatus::PauseWorkers() {
+  if (SetStatus(PAUSE) != PAUSE)
+    WaitOnPauseBarrier();
+}
+
+void WorkerStatus::ResumeWorkers() {
+  if (SetStatus(RUN) == PAUSE)
+    WaitOnPauseBarrier();
+}
+
+void WorkerStatus::StopWorkers() {
+  if (SetStatus(STOP) == PAUSE)
+    WaitOnPauseBarrier();
+}
+
+bool WorkerStatus::ContinueRunning() {
+  // This loop is an optimization.  We use it to immediately re-check the status
+  // after resuming from a pause, instead of returning and waiting for the next
+  // call to this function.
+  for (;;) {
+    switch (GetStatus()) {
+      case RUN:
+        return true;
+      case PAUSE:
+        // Wait for the other workers to call this function so that
+        // PauseWorkers() can return.
+        WaitOnPauseBarrier();
+        // Wait for ResumeWorkers() to be called.
+        WaitOnPauseBarrier();
+        break;
+      case STOP:
+        return false;
+    }
+  }
+}
+
+bool WorkerStatus::ContinueRunningNoPause() {
+  return (GetStatus() != STOP);
+}
+
+void WorkerStatus::RemoveSelf() {
+  // Acquire a read lock on status_rwlock_ while (status_ != PAUSE).
+  for (;;) {
+    AcquireStatusReadLock();
+    if (status_ != PAUSE)
+      break;
+    // We need to obey PauseWorkers() just like ContinueRunning() would, so that
+    // the other threads won't wait on pause_barrier_ forever.
+    ReleaseStatusLock();
+    // Wait for the other workers to call this function so that PauseWorkers()
+    // can return.
+    WaitOnPauseBarrier();
+    // Wait for ResumeWorkers() to be called.
+    WaitOnPauseBarrier();
+  }
+
+  // This lock would be unnecessary if we held a write lock instead of a read
+  // lock on status_rwlock_, but that would also force all threads calling
+  // ContinueRunning() to wait on this one.  Using a separate lock avoids that.
+  AcquireNumWorkersLock();
+  // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't
+  // in use because (status != PAUSE).
+  sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
+  sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_));
+  --num_workers_;
+  ReleaseNumWorkersLock();
+
+  // Release status_rwlock_.
+  ReleaseStatusLock();
+}
+
+
+// Parent thread class.
+WorkerThread::WorkerThread() {
+  status_ = 0;
+  pages_copied_ = 0;
+  errorcount_ = 0;
+  runduration_usec_ = 0;
+  priority_ = Normal;
+  worker_status_ = NULL;
+  thread_spawner_ = &ThreadSpawnerGeneric;
+  tag_mode_ = false;
+}
+
+WorkerThread::~WorkerThread() {}
+
+// Constructors. Just init some default values.
+FillThread::FillThread() {
+  num_pages_to_fill_ = 0;
+}
+
+// Initialize file name to empty.
+FileThread::FileThread() {
+  filename_ = "";
+  devicename_ = "";
+  pass_ = 0;
+  page_io_ = true;
+  crc_page_ = -1;
+}
+
+// If file thread used bounce buffer in memory, account for the extra
+// copy for memory bandwidth calculation.
+float FileThread::GetMemoryCopiedData() {
+  if (!os_->normal_mem())
+    return GetCopiedData();
+  else
+    return 0;
+}
+
+// Initialize target hostname to be invalid.
+NetworkThread::NetworkThread() {
+  snprintf(ipaddr_, sizeof(ipaddr_), "Unknown");
+  sock_ = 0;
+}
+
+// Initialize?
+NetworkSlaveThread::NetworkSlaveThread() {
+}
+
+// Initialize?
+NetworkListenThread::NetworkListenThread() {
+}
+
+// Init member variables.
+void WorkerThread::InitThread(int thread_num_init,
+                              class Sat *sat_init,
+                              class OsLayer *os_init,
+                              class PatternList *patternlist_init,
+                              WorkerStatus *worker_status) {
+  sat_assert(worker_status);
+  worker_status->AddWorkers(1);
+
+  thread_num_ = thread_num_init;
+  sat_ = sat_init;
+  os_ = os_init;
+  patternlist_ = patternlist_init;
+  worker_status_ = worker_status;
+
+  cpu_mask_ = AvailableCpus();
+  tag_ = 0xffffffff;
+
+  tag_mode_ = sat_->tag_mode();
+}
+
+
+// Use pthreads to prioritize a system thread.
+bool WorkerThread::InitPriority() {
+  // This doesn't affect performance that much, and may not be too safe.
+
+  bool ret = BindToCpus(cpu_mask_);
+  if (!ret)
+    logprintf(11, "Log: Bind to %x failed.\n", cpu_mask_);
+
+  logprintf(11, "Log: Thread %d running on apic ID %d mask %x (%x).\n",
+            thread_num_, apicid(), CurrentCpus(), cpu_mask_);
+#if 0
+  if (priority_ == High) {
+    sched_param param;
+    param.sched_priority = 1;
+    // Set the priority; others are unchanged.
+    logprintf(0, "Log: Changing priority to SCHED_FIFO %d\n",
+              param.sched_priority);
+    if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+      char buf[256];
+      sat_strerror(errno, buf, sizeof(buf));
+      logprintf(0, "Process Error: sched_setscheduler "
+                   "failed - error %d %s\n",
+                errno, buf);
+    }
+  }
+#endif
+  return true;
+}
+
+// Use pthreads to create a system thread.
+int WorkerThread::SpawnThread() {
+  // Create the new thread.
+  int result = pthread_create(&thread_, NULL, thread_spawner_, this);
+  if (result) {
+    char buf[256];
+    sat_strerror(result, buf, sizeof(buf));
+    logprintf(0, "Process Error: pthread_create "
+                  "failed - error %d %s\n", result,
+              buf);
+    status_ += 1;
+    return false;
+  }
+
+  // 0 is pthreads success.
+  return true;
+}
+
+// Kill the worker thread with SIGINT.
+int WorkerThread::KillThread() {
+  pthread_kill(thread_, SIGINT);
+  return 0;
+}
+
+// Block until thread has exited.
+int WorkerThread::JoinThread() {
+  int result = pthread_join(thread_, NULL);
+
+  if (result) {
+    logprintf(0, "Process Error: pthread_join failed - error %d\n", result);
+    status_ = 0;
+  }
+
+  // 0 is pthreads success.
+  return (!result);
+}
+
+
+void WorkerThread::StartRoutine() {
+  InitPriority();
+  StartThreadTimer();
+  Work();
+  StopThreadTimer();
+  worker_status_->RemoveSelf();
+}
+
+
+// Thread work loop. Execute until marked finished.
+int WorkerThread::Work() {
+  do {
+    logprintf(9, "Log: ...\n");
+    // Sleep for 1 second.
+    sat_sleep(1);
+  } while (IsReadyToRun());
+
+  return 0;
+}
+
+
+// Returns CPU mask of CPUs available to this process,
+// Conceptually, each bit represents a logical CPU, ie:
+//   mask = 3  (11b):   cpu0, 1
+//   mask = 13 (1101b): cpu0, 2, 3
+uint32 WorkerThread::AvailableCpus() {
+  cpu_set_t curr_cpus;
+  CPU_ZERO(&curr_cpus);
+  sched_getaffinity(getppid(), sizeof(curr_cpus), &curr_cpus);
+  return cpuset_to_uint32(&curr_cpus);
+}
+
+
+// Returns CPU mask of CPUs this thread is bound to,
+// Conceptually, each bit represents a logical CPU, ie:
+//   mask = 3  (11b):   cpu0, 1
+//   mask = 13 (1101b): cpu0, 2, 3
+uint32 WorkerThread::CurrentCpus() {
+  cpu_set_t curr_cpus;
+  CPU_ZERO(&curr_cpus);
+  sched_getaffinity(0, sizeof(curr_cpus), &curr_cpus);
+  return cpuset_to_uint32(&curr_cpus);
+}
+
+
+// Bind worker thread to specified CPU(s)
+//   Args:
+//     thread_mask: cpu_set_t representing CPUs, ie
+//                  mask = 1  (01b):   cpu0
+//                  mask = 3  (11b):   cpu0, 1
+//                  mask = 13 (1101b): cpu0, 2, 3
+//
+//   Returns true on success, false otherwise.
+bool WorkerThread::BindToCpus(uint32 thread_mask) {
+  uint32 process_mask = AvailableCpus();
+  if (thread_mask == process_mask)
+    return true;
+
+  logprintf(11, "Log: available CPU mask - %x\n", process_mask);
+  if ((thread_mask | process_mask) != process_mask) {
+    // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist.
+    logprintf(0, "Log: requested CPUs %x not a subset of available %x\n",
+              thread_mask, process_mask);
+    return false;
+  }
+  cpu_set_t cpuset;
+  cpuset_from_uint32(thread_mask, &cpuset);
+  return (sched_setaffinity(gettid(), sizeof(cpuset), &cpuset) == 0);
+}
+
+
+// A worker thread can yield itself to give up CPU until it's scheduled again.
+//   Returns true on success, false on error.
+bool WorkerThread::YieldSelf() {
+  return (sched_yield() == 0);
+}
+
+
+// Fill this page with its pattern.
+bool WorkerThread::FillPage(struct page_entry *pe) {
+  // Error check arguments.
+  if (pe == 0) {
+    logprintf(0, "Process Error: Fill Page entry null\n");
+    return 0;
+  }
+
+  // Mask is the bitmask of indexes used by the pattern.
+  // It is the pattern size -1. Size is always a power of 2.
+  uint64 *memwords = static_cast<uint64*>(pe->addr);
+  int length = sat_->page_length();
+
+  if (tag_mode_) {
+    // Select tag or data as appropriate.
+    for (int i = 0; i < length / wordsize_; i++) {
+      datacast_t data;
+
+      if ((i & 0x7) == 0) {
+        data.l64 = addr_to_tag(&memwords[i]);
+      } else {
+        data.l32.l = pe->pattern->pattern(i << 1);
+        data.l32.h = pe->pattern->pattern((i << 1) + 1);
+      }
+      memwords[i] = data.l64;
+    }
+  } else {
+    // Just fill in untagged data directly.
+    for (int i = 0; i < length / wordsize_; i++) {
+      datacast_t data;
+
+      data.l32.l = pe->pattern->pattern(i << 1);
+      data.l32.h = pe->pattern->pattern((i << 1) + 1);
+      memwords[i] = data.l64;
+    }
+  }
+
+  return 1;
+}
+
+
+// Tell the thread how many pages to fill.
+void FillThread::SetFillPages(int64 num_pages_to_fill_init) {
+  num_pages_to_fill_ = num_pages_to_fill_init;
+}
+
+// Fill this page with a random pattern.
+bool FillThread::FillPageRandom(struct page_entry *pe) {
+  // Error check arguments.
+  if (pe == 0) {
+    logprintf(0, "Process Error: Fill Page entry null\n");
+    return 0;
+  }
+  if ((patternlist_ == 0) || (patternlist_->Size() == 0)) {
+    logprintf(0, "Process Error: No data patterns available\n");
+    return 0;
+  }
+
+  // Choose a random pattern for this block.
+  pe->pattern = patternlist_->GetRandomPattern();
+  if (pe->pattern == 0) {
+    logprintf(0, "Process Error: Null data pattern\n");
+    return 0;
+  }
+
+  // Actually fill the page.
+  return FillPage(pe);
+}
+
+
+// Memory fill work loop. Execute until alloted pages filled.
+int FillThread::Work() {
+  int result = 1;
+
+  logprintf(9, "Log: Starting fill thread %d\n", thread_num_);
+
+  // We want to fill num_pages_to_fill pages, and
+  // stop when we've filled that many.
+  // We also want to capture early break
+  struct page_entry pe;
+  int64 loops = 0;
+  while (IsReadyToRun() && (loops < num_pages_to_fill_)) {
+    result &= sat_->GetEmpty(&pe);
+    if (!result) {
+      logprintf(0, "Process Error: fill_thread failed to pop pages, "
+                "bailing\n");
+      break;
+    }
+
+    // Fill the page with pattern
+    result &= FillPageRandom(&pe);
+    if (!result) break;
+
+    // Put the page back on the queue.
+    result &= sat_->PutValid(&pe);
+    if (!result) {
+      logprintf(0, "Process Error: fill_thread failed to push pages, "
+                "bailing\n");
+      break;
+    }
+    loops++;
+  }
+
+  // Fill in thread status.
+  pages_copied_ = loops;
+  status_ = result;
+  logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n",
+            thread_num_, status_, pages_copied_);
+  return 0;
+}
+
+
+// Print error information about a data miscompare.
+void WorkerThread::ProcessError(struct ErrorRecord *error,
+                                int priority,
+                                const char *message) {
+  char dimm_string[256] = "";
+
+  int apic_id = apicid();
+  uint32 cpumask = CurrentCpus();
+
+  // Determine if this is a write or read error.
+  os_->Flush(error->vaddr);
+  error->reread = *(error->vaddr);
+
+  char *good = reinterpret_cast<char*>(&(error->expected));
+  char *bad = reinterpret_cast<char*>(&(error->actual));
+
+  sat_assert(error->expected != error->actual);
+  unsigned int offset = 0;
+  for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
+    if (good[offset] != bad[offset])
+      break;
+  }
+
+  error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
+
+  // Find physical address if possible.
+  error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
+
+  // Pretty print DIMM mapping if available.
+  os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
+
+  // Report parseable error.
+  if (priority < 5) {
+    // Run miscompare error through diagnoser for logging and reporting.
+    os_->error_diagnoser_->AddMiscompareError(dimm_string,
+                                              reinterpret_cast<uint64>
+                                              (error->vaddr), 1);
+
+    logprintf(priority,
+              "%s: miscompare on CPU %d(%x) at %p(0x%llx:%s): "
+              "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
+              message,
+              apic_id,
+              cpumask,
+              error->vaddr,
+              error->paddr,
+              dimm_string,
+              error->actual,
+              error->reread,
+              error->expected);
+  }
+
+
+  // Overwrite incorrect data with correct data to prevent
+  // future miscompares when this data is reused.
+  *(error->vaddr) = error->expected;
+  os_->Flush(error->vaddr);
+}
+
+
+
+// Print error information about a data miscompare.
+void FileThread::ProcessError(struct ErrorRecord *error,
+                              int priority,
+                              const char *message) {
+  char dimm_string[256] = "";
+
+  // Determine if this is a write or read error.
+  os_->Flush(error->vaddr);
+  error->reread = *(error->vaddr);
+
+  char *good = reinterpret_cast<char*>(&(error->expected));
+  char *bad = reinterpret_cast<char*>(&(error->actual));
+
+  sat_assert(error->expected != error->actual);
+  unsigned int offset = 0;
+  for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
+    if (good[offset] != bad[offset])
+      break;
+  }
+
+  error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
+
+  // Find physical address if possible.
+  error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
+
+  // Pretty print DIMM mapping if available.
+  os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
+
+  // If crc_page_ is valid, ie checking content read back from file,
+  // track src/dst memory addresses. Otherwise catagorize as general
+  // mememory miscompare for CRC checking everywhere else.
+  if (crc_page_ != -1) {
+    int miscompare_byteoffset = static_cast<char*>(error->vbyteaddr) -
+                                static_cast<char*>(page_recs_[crc_page_].dst);
+    os_->error_diagnoser_->AddHDDMiscompareError(devicename_,
+                                                 crc_page_,
+                                                 miscompare_byteoffset,
+                                                 page_recs_[crc_page_].src,
+                                                 page_recs_[crc_page_].dst);
+  } else {
+    os_->error_diagnoser_->AddMiscompareError(dimm_string,
+                                              reinterpret_cast<uint64>
+                                              (error->vaddr), 1);
+  }
+
+  logprintf(priority,
+            "%s: miscompare on %s at %p(0x%llx:%s): read:0x%016llx, "
+            "reread:0x%016llx expected:0x%016llx\n",
+            message,
+            devicename_.c_str(),
+            error->vaddr,
+            error->paddr,
+            dimm_string,
+            error->actual,
+            error->reread,
+            error->expected);
+
+  // Overwrite incorrect data with correct data to prevent
+  // future miscompares when this data is reused.
+  *(error->vaddr) = error->expected;
+  os_->Flush(error->vaddr);
+}
+
+
+// Do a word by word result check of a region.
+// Print errors on mismatches.
+int WorkerThread::CheckRegion(void *addr,
+                              class Pattern *pattern,
+                              int64 length,
+                              int offset,
+                              int64 pattern_offset) {
+  uint64 *memblock = static_cast<uint64*>(addr);
+  const int kErrorLimit = 128;
+  int errors = 0;
+  int overflowerrors = 0;  // Count of overflowed errors.
+  bool page_error = false;
+  string errormessage("Hardware Error");
+  struct ErrorRecord
+    recorded[kErrorLimit];  // Queued errors for later printing.
+
+  // For each word in the data region.
+  for (int i = 0; i < length / wordsize_; i++) {
+    uint64 actual = memblock[i];
+    uint64 expected;
+
+    // Determine the value that should be there.
+    datacast_t data;
+    int index = 2 * i + pattern_offset;
+    data.l32.l = pattern->pattern(index);
+    data.l32.h = pattern->pattern(index + 1);
+    expected = data.l64;
+    // Check tags if necessary.
+    if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) {
+      expected = addr_to_tag(&memblock[i]);
+    }
+
+
+    // If the value is incorrect, save an error record for later printing.
+    if (actual != expected) {
+      if (errors < kErrorLimit) {
+        recorded[errors].actual = actual;
+        recorded[errors].expected = expected;
+        recorded[errors].vaddr = &memblock[i];
+        errors++;
+      } else {
+        page_error = true;
+        // If we have overflowed the error queue, just print the errors now.
+        logprintf(10, "Log: Error record overflow, too many miscompares!\n");
+        errormessage = "Page Error";
+        break;
+      }
+    }
+  }
+
+  // Find if this is a whole block corruption.
+  if (page_error && !tag_mode_) {
+    int patsize = patternlist_->Size();
+    for (int pat = 0; pat < patsize; pat++) {
+      class Pattern *altpattern = patternlist_->GetPattern(pat);
+      const int kGood = 0;
+      const int kBad = 1;
+      const int kGoodAgain = 2;
+      const int kNoMatch = 3;
+      int state = kGood;
+      unsigned int badstart = 0;
+      unsigned int badend = 0;
+
+      // Don't match against ourself!
+      if (pattern == altpattern)
+        continue;
+
+      for (int i = 0; i < length / wordsize_; i++) {
+        uint64 actual = memblock[i];
+        datacast_t expected;
+        datacast_t possible;
+
+        // Determine the value that should be there.
+        int index = 2 * i + pattern_offset;
+
+        expected.l32.l = pattern->pattern(index);
+        expected.l32.h = pattern->pattern(index + 1);
+
+        possible.l32.l = pattern->pattern(index);
+        possible.l32.h = pattern->pattern(index + 1);
+
+        if (state == kGood) {
+          if (actual == expected.l64) {
+            continue;
+          } else if (actual == possible.l64) {
+            badstart = i;
+            badend = i;
+            state = kBad;
+            continue;
+          } else {
+            state = kNoMatch;
+            break;
+          }
+        } else if (state == kBad) {
+          if (actual == possible.l64) {
+            badend = i;
+            continue;
+          } else if (actual == expected.l64) {
+            state = kGoodAgain;
+            continue;
+          } else {
+            state = kNoMatch;
+            break;
+          }
+        } else if (state == kGoodAgain) {
+          if (actual == expected.l64) {
+            continue;
+          } else {
+            state = kNoMatch;
+            break;
+          }
+        }
+      }
+
+      if ((state == kGoodAgain) || (state == kBad)) {
+        unsigned int blockerrors = badend - badstart + 1;
+        errormessage = "Block Error";
+        ProcessError(&recorded[0], 0, errormessage.c_str());
+        logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
+                  "%d bytes from offset 0x%x to 0x%x\n",
+                  &memblock[badstart],
+                  altpattern->name(), pattern->name(),
+                  blockerrors * wordsize_,
+                  offset + badstart * wordsize_,
+                  offset + badend * wordsize_);
+        errorcount_ += blockerrors;
+        return blockerrors;
+      }
+    }
+  }
+
+
+  // Process error queue after all errors have been recorded.
+  for (int err = 0; err < errors; err++) {
+    int priority = 5;
+    if (errorcount_ + err < 30)
+      priority = 0;  // Bump up the priority for the first few errors.
+    ProcessError(&recorded[err], priority, errormessage.c_str());
+  }
+
+  if (page_error) {
+    // For each word in the data region.
+    int error_recount = 0;
+    for (int i = 0; i < length / wordsize_; i++) {
+      uint64 actual = memblock[i];
+      uint64 expected;
+      datacast_t data;
+      // Determine the value that should be there.
+      int index = 2 * i + pattern_offset;
+
+      data.l32.l = pattern->pattern(index);
+      data.l32.h = pattern->pattern(index + 1);
+      expected = data.l64;
+
+      // Check tags if necessary.
+      if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) {
+        expected = addr_to_tag(&memblock[i]);
+      }
+
+      // If the value is incorrect, save an error record for later printing.
+      if (actual != expected) {
+        if (error_recount < kErrorLimit) {
+          // We already reported these.
+          error_recount++;
+        } else {
+          // If we have overflowed the error queue, print the errors now.
+          struct ErrorRecord er;
+          er.actual = actual;
+          er.expected = expected;
+          er.vaddr = &memblock[i];
+
+          // Do the error printout. This will take a long time and
+          // likely change the machine state.
+          ProcessError(&er, 12, errormessage.c_str());
+          overflowerrors++;
+        }
+      }
+    }
+  }
+
+  // Keep track of observed errors.
+  errorcount_ += errors + overflowerrors;
+  return errors + overflowerrors;
+}
+
+float WorkerThread::GetCopiedData() {
+  return pages_copied_ * sat_->page_length() / kMegabyte;
+}
+
+// Calculate the CRC of a region.
+// Result check if the CRC mismatches.
+int WorkerThread::CrcCheckPage(struct page_entry *srcpe) {
+  const int blocksize = 4096;
+  const int blockwords = blocksize / wordsize_;
+  int errors = 0;
+
+  const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
+  uint64 *memblock = static_cast<uint64*>(srcpe->addr);
+  int blocks = sat_->page_length() / blocksize;
+  for (int currentblock = 0; currentblock < blocks; currentblock++) {
+    uint64 *memslice = memblock + currentblock * blockwords;
+
+    AdlerChecksum crc;
+    if (tag_mode_) {
+      AdlerAddrCrcC(memslice, blocksize, &crc, srcpe);
+    } else {
+      CalculateAdlerChecksum(memslice, blocksize, &crc);
+    }
+
+    // If the CRC does not match, we'd better look closer.
+    if (!crc.Equals(*expectedcrc)) {
+      logprintf(11, "Log: CrcCheckPage Falling through to slow compare, "
+                "CRC mismatch %s != %s\n",
+                crc.ToHexString().c_str(),
+                expectedcrc->ToHexString().c_str());
+      int errorcount = CheckRegion(memslice,
+                                   srcpe->pattern,
+                                   blocksize,
+                                   currentblock * blocksize, 0);
+      if (errorcount == 0) {
+        logprintf(0, "Log: CrcCheckPage CRC mismatch %s != %s, "
+                     "but no miscompares found.\n",
+                  crc.ToHexString().c_str(),
+                  expectedcrc->ToHexString().c_str());
+      }
+      errors += errorcount;
+    }
+  }
+
+  // For odd length transfers, we should never hit this.
+  int leftovers = sat_->page_length() % blocksize;
+  if (leftovers) {
+    uint64 *memslice = memblock + blocks * blockwords;
+    errors += CheckRegion(memslice,
+                          srcpe->pattern,
+                          leftovers,
+                          blocks * blocksize, 0);
+  }
+  return errors;
+}
+
+
+// Print error information about a data miscompare.
+void WorkerThread::ProcessTagError(struct ErrorRecord *error,
+                                   int priority,
+                                   const char *message) {
+  char dimm_string[256] = "";
+  char tag_dimm_string[256] = "";
+  bool read_error = false;
+
+  int apic_id = apicid();
+  uint32 cpumask = CurrentCpus();
+
+  // Determine if this is a write or read error.
+  os_->Flush(error->vaddr);
+  error->reread = *(error->vaddr);
+
+  // Distinguish read and write errors.
+  if (error->actual != error->reread) {
+    read_error = true;
+  }
+
+  sat_assert(error->expected != error->actual);
+
+  error->vbyteaddr = reinterpret_cast<char*>(error->vaddr);
+
+  // Find physical address if possible.
+  error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
+  error->tagpaddr = os_->VirtualToPhysical(error->tagvaddr);
+
+  // Pretty print DIMM mapping if available.
+  os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string));
+  // Pretty print DIMM mapping if available.
+  os_->FindDimm(error->tagpaddr, tag_dimm_string, sizeof(tag_dimm_string));
+
+  // Report parseable error.
+  if (priority < 5) {
+    logprintf(priority,
+              "%s: Tag from %p(0x%llx:%s) (%s) miscompare on CPU %d(%x) at "
+              "%p(0x%llx:%s): "
+              "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
+              message,
+              error->tagvaddr, error->tagpaddr,
+              tag_dimm_string,
+              read_error?"read error":"write error",
+              apic_id,
+              cpumask,
+              error->vaddr,
+              error->paddr,
+              dimm_string,
+              error->actual,
+              error->reread,
+              error->expected);
+  }
+
+  errorcount_ += 1;
+
+  // Overwrite incorrect data with correct data to prevent
+  // future miscompares when this data is reused.
+  *(error->vaddr) = error->expected;
+  os_->Flush(error->vaddr);
+}
+
+
+// Print out and log a tag error.
+bool WorkerThread::ReportTagError(
+    uint64 *mem64,
+    uint64 actual,
+    uint64 tag) {
+  struct ErrorRecord er;
+  er.actual = actual;
+
+  er.expected = tag;
+  er.vaddr = mem64;
+
+  // Generate vaddr from tag.
+  er.tagvaddr = reinterpret_cast<uint64*>(actual);
+
+  ProcessTagError(&er, 0, "Hardware Error");
+  return true;
+}
+
+// C implementation of Adler memory copy, with memory tagging.
+bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64,
+                                    uint64 *srcmem64,
+                                    unsigned int size_in_bytes,
+                                    AdlerChecksum *checksum,
+                                    struct page_entry *pe) {
+  // Use this data wrapper to access memory with 64bit read/write.
+  datacast_t data;
+  datacast_t dstdata;
+  unsigned int count = size_in_bytes / sizeof(data);
+
+  if (count > ((1U) << 19)) {
+    // Size is too large, must be strictly less than 512 KB.
+    return false;
+  }
+
+  uint64 a1 = 1;
+  uint64 a2 = 1;
+  uint64 b1 = 0;
+  uint64 b2 = 0;
+
+  class Pattern *pattern = pe->pattern;
+
+  unsigned int i = 0;
+  while (i < count) {
+    // Process 64 bits at a time.
+    if ((i & 0x7) == 0) {
+      data.l64 = srcmem64[i];
+      dstdata.l64 = dstmem64[i];
+      uint64 src_tag = addr_to_tag(&srcmem64[i]);
+      uint64 dst_tag = addr_to_tag(&dstmem64[i]);
+      // Detect if tags have been corrupted.
+      if (data.l64 != src_tag)
+        ReportTagError(&srcmem64[i], data.l64, src_tag);
+      if (dstdata.l64 != dst_tag)
+        ReportTagError(&dstmem64[i], dstdata.l64, dst_tag);
+
+      data.l32.l = pattern->pattern(i << 1);
+      data.l32.h = pattern->pattern((i << 1) + 1);
+      a1 = a1 + data.l32.l;
+      b1 = b1 + a1;
+      a1 = a1 + data.l32.h;
+      b1 = b1 + a1;
+
+      data.l64  = dst_tag;
+      dstmem64[i] = data.l64;
+
+    } else {
+      data.l64 = srcmem64[i];
+      a1 = a1 + data.l32.l;
+      b1 = b1 + a1;
+      a1 = a1 + data.l32.h;
+      b1 = b1 + a1;
+      dstmem64[i] = data.l64;
+    }
+    i++;
+
+    data.l64 = srcmem64[i];
+    a2 = a2 + data.l32.l;
+    b2 = b2 + a2;
+    a2 = a2 + data.l32.h;
+    b2 = b2 + a2;
+    dstmem64[i] = data.l64;
+    i++;
+  }
+  checksum->Set(a1, a2, b1, b2);
+  return true;
+}
+
+
+// C implementation of Adler memory crc.
+bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
+                                 unsigned int size_in_bytes,
+                                 AdlerChecksum *checksum,
+                                 struct page_entry *pe) {
+  // Use this data wrapper to access memory with 64bit read/write.
+  datacast_t data;
+  unsigned int count = size_in_bytes / sizeof(data);
+
+  if (count > ((1U) << 19)) {
+    // Size is too large, must be strictly less than 512 KB.
+    return false;
+  }
+
+  uint64 a1 = 1;
+  uint64 a2 = 1;
+  uint64 b1 = 0;
+  uint64 b2 = 0;
+
+  class Pattern *pattern = pe->pattern;
+
+  unsigned int i = 0;
+  while (i < count) {
+    // Process 64 bits at a time.
+    if ((i & 0x7) == 0) {
+      data.l64 = srcmem64[i];
+      uint64 src_tag = addr_to_tag(&srcmem64[i]);
+      // Check that tags match expected.
+      if (data.l64 != src_tag)
+        ReportTagError(&srcmem64[i], data.l64, src_tag);
+
+
+      data.l32.l = pattern->pattern(i << 1);
+      data.l32.h = pattern->pattern((i << 1) + 1);
+      a1 = a1 + data.l32.l;
+      b1 = b1 + a1;
+      a1 = a1 + data.l32.h;
+      b1 = b1 + a1;
+
+
+    } else {
+      data.l64 = srcmem64[i];
+      a1 = a1 + data.l32.l;
+      b1 = b1 + a1;
+      a1 = a1 + data.l32.h;
+      b1 = b1 + a1;
+    }
+    i++;
+
+    data.l64 = srcmem64[i];
+    a2 = a2 + data.l32.l;
+    b2 = b2 + a2;
+    a2 = a2 + data.l32.h;
+    b2 = b2 + a2;
+    i++;
+  }
+  checksum->Set(a1, a2, b1, b2);
+  return true;
+}
+
+// Copy a block of memory quickly, while keeping a CRC of the data.
+// Result check if the CRC mismatches.
+int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
+                              struct page_entry *srcpe) {
+  int errors = 0;
+  const int blocksize = 4096;
+  const int blockwords = blocksize / wordsize_;
+  int blocks = sat_->page_length() / blocksize;
+
+  // Base addresses for memory copy
+  uint64 *targetmembase = static_cast<uint64*>(dstpe->addr);
+  uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr);
+  // Remember the expected CRC
+  const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
+
+  for (int currentblock = 0; currentblock < blocks; currentblock++) {
+    uint64 *targetmem = targetmembase + currentblock * blockwords;
+    uint64 *sourcemem = sourcemembase + currentblock * blockwords;
+
+    AdlerChecksum crc;
+    if (tag_mode_) {
+      AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
+    } else {
+      AdlerMemcpyC(targetmem, sourcemem, blocksize, &crc);
+    }
+
+    // Investigate miscompares.
+    if (!crc.Equals(*expectedcrc)) {
+      logprintf(11, "Log: CrcCopyPage Falling through to slow compare, "
+                "CRC mismatch %s != %s\n", crc.ToHexString().c_str(),
+                expectedcrc->ToHexString().c_str());
+      int errorcount = CheckRegion(sourcemem,
+                                   srcpe->pattern,
+                                   blocksize,
+                                   currentblock * blocksize, 0);
+      if (errorcount == 0) {
+        logprintf(0, "Log: CrcCopyPage CRC mismatch %s != %s, "
+                     "but no miscompares found. Retrying with fresh data.\n",
+                  crc.ToHexString().c_str(),
+                  expectedcrc->ToHexString().c_str());
+        if (!tag_mode_) {
+          // Copy the data originally read from this region back again.
+          // This data should have any corruption read originally while
+          // calculating the CRC.
+          memcpy(sourcemem, targetmem, blocksize);
+          errorcount = CheckRegion(sourcemem,
+                                   srcpe->pattern,
+                                   blocksize,
+                                   currentblock * blocksize, 0);
+          if (errorcount == 0) {
+            logprintf(0, "Process Error: CrcCopyPage CRC mismatch %s != %s, "
+                         "but no miscompares found on second pass.\n",
+                      crc.ToHexString().c_str(),
+                      expectedcrc->ToHexString().c_str());
+          }
+        }
+      }
+      errors += errorcount;
+    }
+  }
+
+  // For odd length transfers, we should never hit this.
+  int leftovers = sat_->page_length() % blocksize;
+  if (leftovers) {
+    uint64 *targetmem = targetmembase + blocks * blockwords;
+    uint64 *sourcemem = sourcemembase + blocks * blockwords;
+
+    errors += CheckRegion(sourcemem,
+                          srcpe->pattern,
+                          leftovers,
+                          blocks * blocksize, 0);
+    int leftoverwords = leftovers / wordsize_;
+    for (int i = 0; i < leftoverwords; i++) {
+      targetmem[i] = sourcemem[i];
+    }
+  }
+
+  // Update pattern reference to reflect new contents.
+  dstpe->pattern = srcpe->pattern;
+
+  // Clean clean clean the errors away.
+  if (errors) {
+    // TODO(nsanders): Maybe we should patch rather than fill? Filling may
+    // cause bad data to be propogated across the page.
+    FillPage(dstpe);
+  }
+  return errors;
+}
+
+
+
+// Invert a block of memory quickly, traversing downwards.
+int InvertThread::InvertPageDown(struct page_entry *srcpe) {
+  const int blocksize = 4096;
+  const int blockwords = blocksize / wordsize_;
+  int blocks = sat_->page_length() / blocksize;
+
+  // Base addresses for memory copy
+  unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr);
+
+  for (int currentblock = blocks-1; currentblock >= 0; currentblock--) {
+    unsigned int *sourcemem = sourcemembase + currentblock * blockwords;
+    for (int i = blockwords - 32; i >= 0; i -= 32) {
+      for (int index = i + 31; index >= i; --index) {
+        unsigned int actual = sourcemem[index];
+        sourcemem[index] = ~actual;
+      }
+      OsLayer::FastFlush(&sourcemem[i]);
+    }
+  }
+
+  return 0;
+}
+
+// Invert a block of memory, traversing upwards.
+int InvertThread::InvertPageUp(struct page_entry *srcpe) {
+  const int blocksize = 4096;
+  const int blockwords = blocksize / wordsize_;
+  int blocks = sat_->page_length() / blocksize;
+
+  // Base addresses for memory copy
+  unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr);
+
+  for (int currentblock = 0; currentblock < blocks; currentblock++) {
+    unsigned int *sourcemem = sourcemembase + currentblock * blockwords;
+    for (int i = 0; i < blockwords; i += 32) {
+      for (int index = i; index <= i + 31; ++index) {
+        unsigned int actual = sourcemem[index];
+        sourcemem[index] = ~actual;
+      }
+      OsLayer::FastFlush(&sourcemem[i]);
+    }
+  }
+  return 0;
+}
+
+// Copy a block of memory quickly, while keeping a CRC of the data.
+// Result check if the CRC mismatches. Warm the CPU while running
+int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
+                                  struct page_entry *srcpe) {
+  int errors = 0;
+  const int blocksize = 4096;
+  const int blockwords = blocksize / wordsize_;
+  int blocks = sat_->page_length() / blocksize;
+
+  // Base addresses for memory copy
+  uint64 *targetmembase = static_cast<uint64*>(dstpe->addr);
+  uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr);
+  // Remember the expected CRC
+  const AdlerChecksum *expectedcrc = srcpe->pattern->crc();
+
+  for (int currentblock = 0; currentblock < blocks; currentblock++) {
+    uint64 *targetmem = targetmembase + currentblock * blockwords;
+    uint64 *sourcemem = sourcemembase + currentblock * blockwords;
+
+    AdlerChecksum crc;
+    if (tag_mode_) {
+      AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
+    } else {
+      os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc);
+    }
+
+    // Investigate miscompares.
+    if (!crc.Equals(*expectedcrc)) {
+      logprintf(11, "Log: CrcWarmCopyPage Falling through to slow compare, "
+                "CRC mismatch %s != %s\n", crc.ToHexString().c_str(),
+                expectedcrc->ToHexString().c_str());
+      int errorcount = CheckRegion(sourcemem,
+                                   srcpe->pattern,
+                                   blocksize,
+                                   currentblock * blocksize, 0);
+      if (errorcount == 0) {
+        logprintf(0, "Log: CrcWarmCopyPage CRC mismatch %s != %s, "
+                     "but no miscompares found. Retrying with fresh data.\n",
+                  crc.ToHexString().c_str(),
+                  expectedcrc->ToHexString().c_str());
+        if (!tag_mode_) {
+          // Copy the data originally read from this region back again.
+          // This data should have any corruption read originally while
+          // calculating the CRC.
+          memcpy(sourcemem, targetmem, blocksize);
+          errorcount = CheckRegion(sourcemem,
+                                   srcpe->pattern,
+                                   blocksize,
+                                   currentblock * blocksize, 0);
+          if (errorcount == 0) {
+            logprintf(0, "Process Error: CrcWarmCopyPage CRC mismatch %s "
+                         "!= %s, but no miscompares found on second pass.\n",
+                      crc.ToHexString().c_str(),
+                      expectedcrc->ToHexString().c_str());
+          }
+        }
+      }
+      errors += errorcount;
+    }
+  }
+
+  // For odd length transfers, we should never hit this.
+  int leftovers = sat_->page_length() % blocksize;
+  if (leftovers) {
+    uint64 *targetmem = targetmembase + blocks * blockwords;
+    uint64 *sourcemem = sourcemembase + blocks * blockwords;
+
+    errors += CheckRegion(sourcemem,
+                          srcpe->pattern,
+                          leftovers,
+                          blocks * blocksize, 0);
+    int leftoverwords = leftovers / wordsize_;
+    for (int i = 0; i < leftoverwords; i++) {
+      targetmem[i] = sourcemem[i];
+    }
+  }
+
+  // Update pattern reference to reflect new contents.
+  dstpe->pattern = srcpe->pattern;
+
+  // Clean clean clean the errors away.
+  if (errors) {
+    // TODO(nsanders): Maybe we should patch rather than fill? Filling may
+    // cause bad data to be propogated across the page.
+    FillPage(dstpe);
+  }
+  return errors;
+}
+
+
+
+// Memory check work loop. Execute until done, then exhaust pages.
+int CheckThread::Work() {
+  struct page_entry pe;
+  int result = 1;
+  int64 loops = 0;
+
+  logprintf(9, "Log: Starting Check thread %d\n", thread_num_);
+
+  // We want to check all the pages, and
+  // stop when there aren't any left.
+  while (1) {
+    result &= sat_->GetValid(&pe);
+    if (!result) {
+      if (IsReadyToRunNoPause())
+        logprintf(0, "Process Error: check_thread failed to pop pages, "
+                  "bailing\n");
+      else
+        result = 1;
+      break;
+    }
+
+    // Do the result check.
+    CrcCheckPage(&pe);
+
+    // Push pages back on the valid queue if we are still going,
+    // throw them out otherwise.
+    if (IsReadyToRunNoPause())
+      result &= sat_->PutValid(&pe);
+    else
+      result &= sat_->PutEmpty(&pe);
+    if (!result) {
+      logprintf(0, "Process Error: check_thread failed to push pages, "
+                "bailing\n");
+      break;
+    }
+    loops++;
+  }
+
+  pages_copied_ = loops;
+  status_ = result;
+  logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n",
+            thread_num_, status_, pages_copied_);
+  return 1;
+}
+
+
+// Memory copy work loop. Execute until marked done.
+int CopyThread::Work() {
+  struct page_entry src;
+  struct page_entry dst;
+  int result = 1;
+  int64 loops = 0;
+
+  logprintf(9, "Log: Starting copy thread %d: cpu %x, mem %x\n",
+            thread_num_, cpu_mask_, tag_);
+
+  while (IsReadyToRun()) {
+    // Pop the needed pages.
+    result &= sat_->GetValid(&src, tag_);
+    result &= sat_->GetEmpty(&dst, tag_);
+    if (!result) {
+      logprintf(0, "Process Error: copy_thread failed to pop pages, "
+                "bailing\n");
+      break;
+    }
+
+    // Force errors for unittests.
+    if (sat_->error_injection()) {
+      if (loops == 8) {
+        char *addr = reinterpret_cast<char*>(src.addr);
+        int offset = random() % sat_->page_length();
+        addr[offset] = 0xba;
+      }
+    }
+
+    // We can use memcpy, or CRC check while we copy.
+    if (sat_->warm()) {
+      CrcWarmCopyPage(&dst, &src);
+    } else if (sat_->strict()) {
+      CrcCopyPage(&dst, &src);
+    } else {
+      memcpy(dst.addr, src.addr, sat_->page_length());
+      dst.pattern = src.pattern;
+    }
+
+    result &= sat_->PutValid(&dst);
+    result &= sat_->PutEmpty(&src);
+
+    // Copy worker-threads yield themselves at the end of each copy loop,
+    // to avoid threads from preempting each other in the middle of the inner
+    // copy-loop. Cooperations between Copy worker-threads results in less
+    // unnecessary cache thrashing (which happens when context-switching in the
+    // middle of the inner copy-loop).
+    YieldSelf();
+
+    if (!result) {
+      logprintf(0, "Process Error: copy_thread failed to push pages, "
+                "bailing\n");
+      break;
+    }
+    loops++;
+  }
+
+  pages_copied_ = loops;
+  status_ = result;
+  logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
+            thread_num_, status_, pages_copied_);
+  return 1;
+}
+
+
+
+// Memory invert work loop. Execute until marked done.
+int InvertThread::Work() {
+  struct page_entry src;
+  int result = 1;
+  int64 loops = 0;
+
+  logprintf(9, "Log: Starting invert thread %d\n", thread_num_);
+
+  while (IsReadyToRun()) {
+    // Pop the needed pages.
+    result &= sat_->GetValid(&src);
+    if (!result) {
+      logprintf(0, "Process Error: invert_thread failed to pop pages, "
+                "bailing\n");
+      break;
+    }
+
+    if (sat_->strict())
+      CrcCheckPage(&src);
+
+    // For the same reason CopyThread yields itself (see YieldSelf comment
+    // in CopyThread::Work(), InvertThread yields itself after each invert
+    // operation to improve cooperation between different worker threads
+    // stressing the memory/cache.
+    InvertPageUp(&src);
+    YieldSelf();
+    InvertPageDown(&src);
+    YieldSelf();
+    InvertPageDown(&src);
+    YieldSelf();
+    InvertPageUp(&src);
+    YieldSelf();
+
+    if (sat_->strict())
+      CrcCheckPage(&src);
+
+    result &= sat_->PutValid(&src);
+    if (!result) {
+      logprintf(0, "Process Error: invert_thread failed to push pages, "
+                "bailing\n");
+      break;
+    }
+    loops++;
+  }
+
+  pages_copied_ = loops * 2;
+  status_ = result;
+  logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
+            thread_num_, status_, pages_copied_);
+  return 1;
+}
+
+
+// Set file name to use for File IO.
+void FileThread::SetFile(const char *filename_init) {
+  filename_ = filename_init;
+  devicename_ = os_->FindFileDevice(filename_);
+}
+
+// Open the file for access.
+bool FileThread::OpenFile(int *pfile) {
+  int fd = open(filename_.c_str(),
+                O_RDWR | O_CREAT | O_SYNC | O_DIRECT,
+                0644);
+  if (fd < 0) {
+    logprintf(0, "Process Error: Failed to create file %s!!\n",
+              filename_.c_str());
+    pages_copied_ = 0;
+    status_ = 0;
+    return 0;
+  }
+  *pfile = fd;
+  return 1;
+}
+
+// Close the file.
+bool FileThread::CloseFile(int fd) {
+  close(fd);
+  return 1;
+}
+
+// Check sector tagging.
+bool FileThread::SectorTagPage(struct page_entry *src, int block) {
+  int page_length = sat_->page_length();
+  struct FileThread::SectorTag *tag =
+    (struct FileThread::SectorTag *)(src->addr);
+
+  // Tag each sector.
+  unsigned char magic = ((0xba + thread_num_) & 0xff);
+  for (int sec = 0; sec < page_length / 512; sec++) {
+    tag[sec].magic = magic;
+    tag[sec].block = block & 0xff;
+    tag[sec].sector = sec & 0xff;
+    tag[sec].pass = pass_ & 0xff;
+  }
+  return true;
+}
+
+bool FileThread::WritePageToFile(int fd, struct page_entry *src) {
+  int page_length = sat_->page_length();
+  // Fill the file with our data.
+  int64 size = write(fd, src->addr, page_length);
+
+  if (size != page_length) {
+    os_->ErrorReport(devicename_.c_str(), "write-error", 1);
+    errorcount_++;
+    logprintf(0, "Block Error: file_thread failed to write, "
+              "bailing\n");
+    return false;
+  }
+  return true;
+}
+
+// Write the data to the file.
+bool FileThread::WritePages(int fd) {
+  int strict = sat_->strict();
+
+  // Start fresh at beginning of file for each batch of pages.
+  lseek(fd, 0, SEEK_SET);
+  for (int i = 0; i < sat_->disk_pages(); i++) {
+    struct page_entry src;
+    if (!GetValidPage(&src))
+      return false;
+    // Save expected pattern.
+    page_recs_[i].pattern = src.pattern;
+    page_recs_[i].src = src.addr;
+
+    // Check data correctness.
+    if (strict)
+      CrcCheckPage(&src);
+
+    SectorTagPage(&src, i);
+
+    bool result = WritePageToFile(fd, &src);
+
+    if (!PutEmptyPage(&src))
+      return false;
+
+    if (!result)
+      return false;
+  }
+  return true;
+}
+
+// Copy data from file into memory block.
+bool FileThread::ReadPageFromFile(int fd, struct page_entry *dst) {
+  int page_length = sat_->page_length();
+
+  // Do the actual read.
+  int64 size = read(fd, dst->addr, page_length);
+  if (size != page_length) {
+    os_->ErrorReport(devicename_.c_str(), "read-error", 1);
+    logprintf(0, "Block Error: file_thread failed to read, "
+              "bailing\n");
+    errorcount_++;
+    return false;
+  }
+  return true;
+}
+
+// Check sector tagging.
+bool FileThread::SectorValidatePage(const struct PageRec &page,
+                                    struct page_entry *dst, int block) {
+  // Error injection.
+  static int calls = 0;
+  calls++;
+
+  // Do sector tag compare.
+  int firstsector = -1;
+  int lastsector = -1;
+  bool badsector = false;
+  int page_length = sat_->page_length();
+
+  // Cast data block into an array of tagged sectors.
+  struct FileThread::SectorTag *tag =
+  (struct FileThread::SectorTag *)(dst->addr);
+
+  sat_assert(sizeof(*tag) == 512);
+
+  // Error injection.
+  if (sat_->error_injection()) {
+    if (calls == 2) {
+      for (int badsec = 8; badsec < 17; badsec++)
+        tag[badsec].pass = 27;
+    }
+    if (calls == 18) {
+      (static_cast<int32*>(dst->addr))[27] = 0xbadda7a;
+    }
+  }
+
+  // Check each sector for the correct tag we added earlier,
+  // then revert the tag to the to normal data pattern.
+  unsigned char magic = ((0xba + thread_num_) & 0xff);
+  for (int sec = 0; sec < page_length / 512; sec++) {
+    // Check magic tag.
+    if ((tag[sec].magic != magic) ||
+        (tag[sec].block != (block & 0xff)) ||
+        (tag[sec].sector != (sec & 0xff)) ||
+        (tag[sec].pass != (pass_ & 0xff))) {
+      // Offset calculation for tag location.
+      int offset = sec * sizeof(SectorTag);
+      if (tag[sec].block != (block & 0xff))
+        offset += 1 * sizeof(uint8);
+      else if (tag[sec].sector != (sec & 0xff))
+        offset += 2 * sizeof(uint8);
+      else if (tag[sec].pass != (pass_ & 0xff))
+        offset += 3 * sizeof(uint8);
+
+      // Run sector tag error through diagnoser for logging and reporting.
+      errorcount_ += 1;
+      os_->error_diagnoser_->AddHDDSectorTagError(devicename_, tag[sec].block,
+                                                  offset,
+                                                  tag[sec].sector,
+                                                  page.src, page.dst);
+
+      logprintf(5, "Sector Error: Sector tag @ 0x%x, pass %d/%d. "
+                "sec %x/%x, block %d/%d, magic %x/%x, File: %s \n",
+                block * page_length + 512 * sec,
+                (pass_ & 0xff), (unsigned int)tag[sec].pass,
+                sec, (unsigned int)tag[sec].sector,
+                block, (unsigned int)tag[sec].block,
+                magic, (unsigned int)tag[sec].magic,
+                filename_.c_str());
+
+      // Keep track of first and last bad sector.
+      if (firstsector == -1)
+        firstsector = (block * page_length / 512) + sec;
+      lastsector = (block * page_length / 512) + sec;
+      badsector = true;
+    }
+    // Patch tag back to proper pattern.
+    unsigned int *addr = (unsigned int *)(&tag[sec]);
+    *addr = dst->pattern->pattern(512 * sec / sizeof(*addr));
+  }
+
+  // If we found sector errors:
+  if (badsector == true) {
+    logprintf(5, "Log: file sector miscompare at offset %x-%x. File: %s\n",
+              firstsector * 512,
+              ((lastsector + 1) * 512) - 1,
+              filename_.c_str());
+
+    // Either exit immediately, or patch the data up and continue.
+    if (sat_->stop_on_error()) {
+      exit(1);
+    } else {
+      // Patch up bad pages.
+      for (int block = (firstsector * 512) / page_length;
+          block <= (lastsector * 512) / page_length;
+          block++) {
+        unsigned int *memblock = static_cast<unsigned int *>(dst->addr);
+        int length = page_length / wordsize_;
+        for (int i = 0; i < length; i++) {
+          memblock[i] = dst->pattern->pattern(i);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+
+
+// Get memory for an incoming data transfer..
+bool FileThread::PagePrepare() {
+  // We can only do direct IO to SAT pages if it is normal mem.
+  page_io_ = os_->normal_mem();
+
+  // Init a local buffer if we need it.
+  if (!page_io_) {
+    local_page_ = static_cast<void*>(memalign(512, sat_->page_length()));
+    if (!local_page_) {
+      logprintf(0, "Process Error: disk thread memalign returned 0\n");
+      status_ += 1;
+      return false;
+    }
+  }
+  return true;
+}
+
+
+// Remove memory allocated for data transfer.
+bool FileThread::PageTeardown() {
+  // Free a local buffer if we need to.
+  if (!page_io_) {
+    free(local_page_);
+  }
+  return true;
+}
+
+
+
+// Get memory for an incoming data transfer..
+bool FileThread::GetEmptyPage(struct page_entry *dst) {
+  if (page_io_) {
+    if (!sat_->GetEmpty(dst))
+      return false;
+  } else {
+    dst->addr = local_page_;
+    dst->offset = 0;
+    dst->pattern = 0;
+  }
+  return true;
+}
+
+// Get memory for an outgoing data transfer..
+bool FileThread::GetValidPage(struct page_entry *src) {
+  struct page_entry tmp;
+  if (!sat_->GetValid(&tmp))
+    return false;
+  if (page_io_) {
+    *src = tmp;
+    return true;
+  } else {
+    src->addr = local_page_;
+    src->offset = 0;
+    CrcCopyPage(src, &tmp);
+    if (!sat_->PutValid(&tmp))
+      return false;
+  }
+  return true;
+}
+
+
+// Throw out a used empty page.
+bool FileThread::PutEmptyPage(struct page_entry *src) {
+  if (page_io_) {
+    if (!sat_->PutEmpty(src))
+      return false;
+  }
+  return true;
+}
+
+// Throw out a used, filled page.
+bool FileThread::PutValidPage(struct page_entry *src) {
+  if (page_io_) {
+    if (!sat_->PutValid(src))
+      return false;
+  }
+  return true;
+}
+
+
+
+// Copy data from file into memory blocks.
+bool FileThread::ReadPages(int fd) {
+  int page_length = sat_->page_length();
+  int strict = sat_->strict();
+  int result = 1;
+
+
+  // Read our data back out of the file, into it's new location.
+  lseek(fd, 0, SEEK_SET);
+  for (int i = 0; i < sat_->disk_pages(); i++) {
+    struct page_entry dst;
+    if (!GetEmptyPage(&dst))
+      return false;
+    // Retrieve expected pattern.
+    dst.pattern = page_recs_[i].pattern;
+    // Update page recordpage record.
+    page_recs_[i].dst = dst.addr;
+
+    // Read from the file into destination page.
+    if (!ReadPageFromFile(fd, &dst)) {
+        PutEmptyPage(&dst);
+        return false;
+    }
+
+    SectorValidatePage(page_recs_[i], &dst, i);
+
+    // Ensure that the transfer ended up with correct data.
+    if (strict) {
+      // Record page index currently CRC checked.
+      crc_page_ = i;
+      int errors = CrcCheckPage(&dst);
+      if (errors) {
+        logprintf(5, "Log: file miscompare at block %d, "
+                  "offset %x-%x. File: %s\n",
+                  i, i * page_length, ((i + 1) * page_length) - 1,
+                  filename_.c_str());
+        result = false;
+      }
+      crc_page_ = -1;
+      errorcount_ += errors;
+    }
+    if (!PutValidPage(&dst))
+      return false;
+  }
+  return result;
+}
+
+
+// File IO work loop. Execute until marked done.
+int FileThread::Work() {
+  int result = 1;
+  int fileresult = 1;
+  int64 loops = 0;
+
+  logprintf(9, "Log: Starting file thread %d, file %s, device %s\n",
+            thread_num_,
+            filename_.c_str(),
+            devicename_.c_str());
+
+  if (!PagePrepare())
+    return 0;
+
+  // Open the data IO file.
+  int fd = 0;
+  if (!OpenFile(&fd))
+    return 0;
+
+  pass_ = 0;
+
+  // Load patterns into page records.
+  page_recs_ = new struct PageRec[sat_->disk_pages()];
+  for (int i = 0; i < sat_->disk_pages(); i++) {
+    page_recs_[i].pattern = new struct Pattern();
+  }
+
+  // Loop until done.
+  while (IsReadyToRun()) {
+    // Do the file write.
+    if (!(fileresult &= WritePages(fd)))
+      break;
+
+    // Do the file read.
+    if (!(fileresult &= ReadPages(fd)))
+      break;
+
+    loops++;
+    pass_ = loops;
+  }
+
+  pages_copied_ = loops * sat_->disk_pages();
+  status_ = result;
+
+  // Clean up.
+  CloseFile(fd);
+  PageTeardown();
+
+  logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n",
+            thread_num_, status_, pages_copied_);
+  return 1;
+}
+
+bool NetworkThread::IsNetworkStopSet() {
+  return !IsReadyToRunNoPause();
+}
+
+bool NetworkSlaveThread::IsNetworkStopSet() {
+  // This thread has no completion status.
+  // It finishes whever there is no more data to be
+  // passed back.
+  return true;
+}
+
+// Set ip name to use for Network IO.
+void NetworkThread::SetIP(const char *ipaddr_init) {
+  strncpy(ipaddr_, ipaddr_init, 256);
+}
+
+// Create a socket.
+// Return 0 on error.
+bool NetworkThread::CreateSocket(int *psocket) {
+  int sock = socket(AF_INET, SOCK_STREAM, 0);
+  if (sock == -1) {
+    logprintf(0, "Process Error: Cannot open socket\n");
+    pages_copied_ = 0;
+    status_ = 0;
+    return false;
+  }
+  *psocket = sock;
+  return true;
+}
+
+// Close the socket.
+bool NetworkThread::CloseSocket(int sock) {
+  close(sock);
+  return true;
+}
+
+// Initiate the tcp connection.
+bool NetworkThread::Connect(int sock) {
+  struct sockaddr_in dest_addr;
+  dest_addr.sin_family = AF_INET;
+  dest_addr.sin_port = htons(kNetworkPort);
+  memset(&(dest_addr.sin_zero), '\0', sizeof(dest_addr.sin_zero));
+
+  // Translate dot notation to u32.
+  if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) {
+    logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_);
+    pages_copied_ = 0;
+    status_ = 0;
+    return false;
+  }
+
+  if (-1 == connect(sock, reinterpret_cast<struct sockaddr *>(&dest_addr),
+                    sizeof(struct sockaddr))) {
+    logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_);
+    pages_copied_ = 0;
+    status_ = 0;
+    return false;
+  }
+  return true;
+}
+
+// Initiate the tcp connection.
+bool NetworkListenThread::Listen() {
+  struct sockaddr_in sa;
+
+  memset(&(sa.sin_zero), '\0', sizeof(sa.sin_zero));
+
+  sa.sin_family = AF_INET;
+  sa.sin_addr.s_addr = INADDR_ANY;
+  sa.sin_port = htons(kNetworkPort);
+
+  if (-1 == bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) {
+    char buf[256];
+    sat_strerror(errno, buf, sizeof(buf));
+    logprintf(0, "Process Error: Cannot bind socket: %s\n", buf);
+    pages_copied_ = 0;
+    status_ = 0;
+    return false;
+  }
+  listen(sock_, 3);
+  return true;
+}
+
+// Wait for a connection from a network traffic generation thread.
+bool NetworkListenThread::Wait() {
+    fd_set rfds;
+    struct timeval tv;
+    int retval;
+
+    // Watch sock_ to see when it has input.
+    FD_ZERO(&rfds);
+    FD_SET(sock_, &rfds);
+    // Wait up to five seconds.
+    tv.tv_sec = 5;
+    tv.tv_usec = 0;
+
+    retval = select(sock_ + 1, &rfds, NULL, NULL, &tv);
+
+    return (retval > 0);
+}
+
+// Wait for a connection from a network traffic generation thread.
+bool NetworkListenThread::GetConnection(int *pnewsock) {
+  struct sockaddr_in sa;
+  socklen_t size = sizeof(struct sockaddr_in);
+
+  int newsock = accept(sock_, reinterpret_cast<struct sockaddr *>(&sa), &size);
+  if (newsock < 0)  {
+    logprintf(0, "Process Error: Did not receive connection\n");
+    pages_copied_ = 0;
+    status_ = 0;
+    return false;
+  }
+  *pnewsock = newsock;
+  return true;
+}
+
+bool NetworkThread::SendPage(int sock, struct page_entry *src) {
+  int page_length = sat_->page_length();
+  char *address = static_cast<char*>(src->addr);
+
+  // Send our data over the network.
+  int size = page_length;
+  while (size) {
+    int transferred = send(sock, address + (page_length - size), size, 0);
+    if ((transferred == 0) || (transferred == -1)) {
+      if (!IsNetworkStopSet()) {
+        char buf[256] = "";
+        sat_strerror(errno, buf, sizeof(buf));
+        logprintf(0, "Process Error: Thread %d, "
+                     "Network write failed, bailing. (%s)\n",
+                  thread_num_, buf);
+      }
+      return false;
+    }
+    size = size - transferred;
+  }
+  return true;
+}
+
+
+bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
+  int page_length = sat_->page_length();
+  char *address = static_cast<char*>(dst->addr);
+
+  // Maybe we will get our data back again, maybe not.
+  int size = page_length;
+  while (size) {
+    int transferred = recv(sock, address + (page_length - size), size, 0);
+    if ((transferred == 0) || (transferred == -1)) {
+      // Typically network slave thread should exit as network master
+      // thread stops sending data.
+      if (IsNetworkStopSet()) {
+        int err = errno;
+        if (transferred == 0 && err == 0) {
+          // Two system setups will not sync exactly,
+          // allow early exit, but log it.
+          logprintf(0, "Log: Net thread did not recieve any data, exitting.\n");
+        } else {
+          char buf[256] = "";
+          sat_strerror(err, buf, sizeof(buf));
+          // Print why we failed.
+          logprintf(0, "Process Error: Thread %d, "
+                       "Network read failed, bailing (%s).\n",
+                    thread_num_, buf);
+          // Print arguments and results.
+          logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n",
+                    sock, address + (page_length - size),
+                    size, transferred, err);
+          if ((transferred == 0) &&
+              (page_length - size < 512) &&
+              (page_length - size > 0)) {
+            // Print null terminated data received, to see who's been
+            // sending us supicious unwanted data.
+            address[page_length - size] = 0;
+            logprintf(0, "Log: received  %d bytes: '%s'\n",
+                      page_length - size, address);
+          }
+        }
+      }
+      return false;
+    }
+    size = size - transferred;
+  }
+  return true;
+}
+
+
+// Network IO work loop. Execute until marked done.
+int NetworkThread::Work() {
+  logprintf(9, "Log: Starting network thread %d, ip %s\n",
+            thread_num_,
+            ipaddr_);
+
+  // Make a socket.
+  int sock = 0;
+  if (!CreateSocket(&sock))
+    return 0;
+
+  // Network IO loop requires network slave thread to have already initialized.
+  // We will sleep here for awhile to ensure that the slave thread will be
+  // listening by the time we connect.
+  // Sleep for 15 seconds.
+  sat_sleep(15);
+  logprintf(9, "Log: Starting execution of network thread %d, ip %s\n",
+            thread_num_,
+            ipaddr_);
+
+
+  // Connect to a slave thread.
+  if (!Connect(sock))
+    return 0;
+
+  // Loop until done.
+  int result = 1;
+  int strict = sat_->strict();
+  int64 loops = 0;
+  while (IsReadyToRun()) {
+    struct page_entry src;
+    struct page_entry dst;
+    result &= sat_->GetValid(&src);
+    result &= sat_->GetEmpty(&dst);
+    if (!result) {
+      logprintf(0, "Process Error: net_thread failed to pop pages, "
+                "bailing\n");
+      break;
+    }
+
+    // Check data correctness.
+    if (strict)
+      CrcCheckPage(&src);
+
+    // Do the network write.
+    if (!(result &= SendPage(sock, &src)))
+      break;
+
+    // Update pattern reference to reflect new contents.
+    dst.pattern = src.pattern;
+
+    // Do the network read.
+    if (!(result &= ReceivePage(sock, &dst)))
+      break;
+
+    // Ensure that the transfer ended up with correct data.
+    if (strict)
+      CrcCheckPage(&dst);
+
+    // Return all of our pages to the queue.
+    result &= sat_->PutValid(&dst);
+    result &= sat_->PutEmpty(&src);
+    if (!result) {
+      logprintf(0, "Process Error: net_thread failed to push pages, "
+                "bailing\n");
+      break;
+    }
+    loops++;
+  }
+
+  pages_copied_ = loops;
+  status_ = result;
+
+  // Clean up.
+  CloseSocket(sock);
+
+  logprintf(9, "Log: Completed %d: network thread status %d, "
+               "%d pages copied\n",
+            thread_num_, status_, pages_copied_);
+  return 1;
+}
+
+// Spawn slave threads for incoming connections.
+bool NetworkListenThread::SpawnSlave(int newsock, int threadid) {
+  logprintf(12, "Log: Listen thread spawning slave\n");
+
+  // Spawn slave thread, to reflect network traffic back to sender.
+  ChildWorker *child_worker = new ChildWorker;
+  child_worker->thread.SetSock(newsock);
+  child_worker->thread.InitThread(threadid, sat_, os_, patternlist_,
+                                  &child_worker->status);
+  child_worker->status.Initialize();
+  child_worker->thread.SpawnThread();
+  child_workers_.push_back(child_worker);
+
+  return true;
+}
+
+// Reap slave threads.
+bool NetworkListenThread::ReapSlaves() {
+  bool result = true;
+  // Gather status and reap threads.
+  logprintf(12, "Log: Joining all outstanding threads\n");
+
+  for (int i = 0; i < child_workers_.size(); i++) {
+    NetworkSlaveThread& child_thread = child_workers_[i]->thread;
+    logprintf(12, "Log: Joining slave thread %d\n", i);
+    child_thread.JoinThread();
+    if (child_thread.GetStatus() != 1) {
+      logprintf(0, "Process Error: Slave Thread %d failed with status %d\n", i,
+                child_thread.GetStatus());
+      result = false;
+    }
+    errorcount_ += child_thread.GetErrorCount();
+    logprintf(9, "Log: Slave Thread %d found %lld miscompares\n", i,
+              child_thread.GetErrorCount());
+    pages_copied_ += child_thread.GetPageCount();
+  }
+
+  return result;
+}
+
+// Network listener IO work loop. Execute until marked done.
+int NetworkListenThread::Work() {
+  int result = 1;
+  logprintf(9, "Log: Starting network listen thread %d\n",
+            thread_num_);
+
+  // Make a socket.
+  sock_ = 0;
+  if (!CreateSocket(&sock_))
+    return 0;
+  logprintf(9, "Log: Listen thread created sock\n");
+
+  // Allows incoming connections to be queued up by socket library.
+  int newsock = 0;
+  Listen();
+  logprintf(12, "Log: Listen thread waiting for incoming connections\n");
+
+  // Wait on incoming connections, and spawn worker threads for them.
+  int threadcount = 0;
+  while (IsReadyToRun()) {
+    // Poll for connections that we can accept().
+    if (Wait()) {
+      // Accept those connections.
+      logprintf(12, "Log: Listen thread found incoming connection\n");
+      if (GetConnection(&newsock)) {
+        SpawnSlave(newsock, threadcount);
+        threadcount++;
+      }
+    }
+  }
+
+  // Gather status and join spawned threads.
+  ReapSlaves();
+
+  // Delete the child workers.
+  for (ChildVector::iterator it = child_workers_.begin();
+       it != child_workers_.end(); ++it) {
+    (*it)->status.Destroy();
+    delete *it;
+  }
+  child_workers_.clear();
+
+  CloseSocket(sock_);
+
+  status_ = result;
+  logprintf(9,
+            "Log: Completed %d: network listen thread status %d, "
+            "%d pages copied\n",
+            thread_num_, status_, pages_copied_);
+  return 1;
+}
+
+// Set network reflector socket struct.
+void NetworkSlaveThread::SetSock(int sock) {
+  sock_ = sock;
+}
+
+// Network reflector IO work loop. Execute until marked done.
+int NetworkSlaveThread::Work() {
+  logprintf(9, "Log: Starting network slave thread %d\n",
+            thread_num_);
+
+  // Verify that we have a socket.
+  int sock = sock_;
+  if (!sock)
+    return 0;
+
+  // Loop until done.
+  int result = 1;
+  int64 loops = 0;
+  // Init a local buffer for storing data.
+  void *local_page = static_cast<void*>(memalign(512, sat_->page_length()));
+  if (!local_page) {
+    logprintf(0, "Process Error: Net Slave thread memalign returned 0\n");
+    status_ += 1;
+    return 0;
+  }
+
+  struct page_entry page;
+  page.addr = local_page;
+
+  // This thread will continue to run as long as the thread on the other end of
+  // the socket is still sending and receiving data.
+  while (1) {
+    // Do the network read.
+    if (!ReceivePage(sock, &page))
+      break;
+
+    // Do the network write.
+    if (!SendPage(sock, &page))
+      break;
+
+    loops++;
+  }
+
+  pages_copied_ = loops;
+  status_ = result;
+
+  // Clean up.
+  CloseSocket(sock);
+
+  logprintf(9,
+            "Log: Completed %d: network slave thread status %d, "
+            "%d pages copied\n",
+            thread_num_, status_, pages_copied_);
+  return result;
+}
+
+// Thread work loop. Execute until marked finished.
+int ErrorPollThread::Work() {
+  logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_);
+
+  // This calls a generic error polling function in the Os abstraction layer.
+  do {
+    errorcount_ += os_->ErrorPoll();
+    os_->ErrorWait();
+  } while (IsReadyToRun());
+
+  logprintf(9, "Log: Finished system error poll thread %d: %d errors\n",
+            thread_num_, errorcount_);
+  status_ = 1;
+  return 1;
+}
+
+// Worker thread to heat up CPU.
+int CpuStressThread::Work() {
+  logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_);
+
+  do {
+    // Run ludloff's platform/CPU-specific assembly workload.
+    os_->CpuStressWorkload();
+    YieldSelf();
+  } while (IsReadyToRun());
+
+  logprintf(9, "Log: Finished CPU stress thread %d:\n",
+            thread_num_);
+  status_ = 1;
+  return 1;
+}
+
+CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
+                                                 int cacheline_count,
+                                                 int thread_num,
+                                                 int inc_count) {
+  cc_cacheline_data_ = data;
+  cc_cacheline_count_ = cacheline_count;
+  cc_thread_num_ = thread_num;
+  cc_inc_count_ = inc_count;
+}
+
+// Worked thread to test the cache coherency of the CPUs
+int CpuCacheCoherencyThread::Work() {
+  logprintf(9, "Log: Starting the Cache Coherency thread %d\n",
+            cc_thread_num_);
+  uint64 time_start, time_end;
+  struct timeval tv;
+
+  unsigned int seed = static_cast<unsigned int>(gettid());
+  gettimeofday(&tv, NULL);  // Get the timestamp before increments.
+  time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
+
+  uint64 total_inc = 0;  // Total increments done by the thread.
+  while (IsReadyToRun()) {
+    for (int i = 0; i < cc_inc_count_; i++) {
+      // Choose a datastructure in random and increment the appropriate
+      // member in that according to the offset (which is the same as the
+      // thread number.
+      int r = rand_r(&seed);
+      r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
+      // Increment the member of the randomely selected structure.
+      (cc_cacheline_data_[r].num[cc_thread_num_])++;
+    }
+
+    total_inc += cc_inc_count_;
+
+    // Calculate if the local counter matches with the global value
+    // in all the cache line structures for this particular thread.
+    int cc_global_num = 0;
+    for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
+      cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_];
+      // Reset the cachline member's value for the next run.
+      cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0;
+    }
+    if (sat_->error_injection())
+      cc_global_num = -1;
+
+    if (cc_global_num != cc_inc_count_) {
+      errorcount_++;
+      logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
+                cc_global_num, cc_inc_count_);
+    }
+  }
+  gettimeofday(&tv, NULL);  // Get the timestamp at the end.
+  time_end = tv.tv_sec * 1000000ULL + tv.tv_usec;
+
+  uint64 us_elapsed = time_end - time_start;
+  // inc_rate is the no. of increments per second.
+  double inc_rate = total_inc * 1e6 / us_elapsed;
+
+  logprintf(4, "Stats: CC Thread(%d): Time=%llu us,"
+            " Increments=%llu, Increments/sec = %.6lf\n",
+            cc_thread_num_, us_elapsed, total_inc, inc_rate);
+  logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n",
+            cc_thread_num_);
+  status_ = 1;
+  return 1;
+}
+
+DiskThread::DiskThread(DiskBlockTable *block_table) {
+  read_block_size_ = kSectorSize;   // default 1 sector (512 bytes)
+  write_block_size_ = kSectorSize;  // this assumes read and write block size
+                                    // are the same
+  segment_size_ = -1;               // use the entire disk as one segment
+  cache_size_ = 16 * 1024 * 1024;   // assume 16MiB cache by default
+  // Use a queue such that 3/2 times as much data as the cache can hold
+  // is written before it is read so that there is little chance the read
+  // data is in the cache.
+  queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2;
+  blocks_per_segment_ = 32;
+
+  read_threshold_ = 100000;         // 100ms is a reasonable limit for
+  write_threshold_ = 100000;        // reading/writing a sector
+
+  read_timeout_ = 5000000;          // 5 seconds should be long enough for a
+  write_timeout_ = 5000000;         // timout for reading/writing
+
+  device_sectors_ = 0;
+  non_destructive_ = 0;
+
+  aio_ctx_ = 0;
+  block_table_ = block_table;
+  update_block_table_ = 1;
+}
+
+DiskThread::~DiskThread() {
+}
+
+// Set filename for device file (in /dev).
+void DiskThread::SetDevice(const char *device_name) {
+  device_name_ = device_name;
+}
+
+// Set various parameters that control the behaviour of the test.
+// -1 is used as a sentinel value on each parameter (except non_destructive)
+// to indicate that the parameter not be set.
+bool DiskThread::SetParameters(int read_block_size,
+                               int write_block_size,
+                               int64 segment_size,
+                               int64 cache_size,
+                               int blocks_per_segment,
+                               int64 read_threshold,
+                               int64 write_threshold,
+                               int non_destructive) {
+  if (read_block_size != -1) {
+    // Blocks must be aligned to the disk's sector size.
+    if (read_block_size % kSectorSize != 0) {
+      logprintf(0, "Process Error: Block size must be a multiple of %d "
+                "(thread %d).\n", kSectorSize, thread_num_);
+      return false;
+    }
+
+    read_block_size_ = read_block_size;
+  }
+
+  if (write_block_size != -1) {
+    // Write blocks must be aligned to the disk's sector size and to the
+    // block size.
+    if (write_block_size % kSectorSize != 0) {
+      logprintf(0, "Process Error: Write block size must be a multiple "
+                "of %d (thread %d).\n", kSectorSize, thread_num_);
+      return false;
+    }
+    if (write_block_size % read_block_size_ != 0) {
+      logprintf(0, "Process Error: Write block size must be a multiple "
+                "of the read block size, which is %d (thread %d).\n",
+                read_block_size_, thread_num_);
+      return false;
+    }
+
+    write_block_size_ = write_block_size;
+
+  } else {
+    // Make sure write_block_size_ is still valid.
+    if (read_block_size_ > write_block_size_) {
+      logprintf(5, "Log: Assuming write block size equal to read block size, "
+                "which is %d (thread %d).\n", read_block_size_,
+                thread_num_);
+      write_block_size_ = read_block_size_;
+    } else {
+      if (write_block_size_ % read_block_size_ != 0) {
+        logprintf(0, "Process Error: Write block size (defined as %d) must "
+                  "be a multiple of the read block size, which is %d "
+                  "(thread %d).\n", write_block_size_, read_block_size_,
+                  thread_num_);
+        return false;
+      }
+    }
+  }
+
+  if (cache_size != -1) {
+    cache_size_ = cache_size;
+  }
+
+  if (blocks_per_segment != -1) {
+    if (blocks_per_segment <= 0) {
+      logprintf(0, "Process Error: Blocks per segment must be greater than "
+                   "zero.\n (thread %d)", thread_num_);
+      return false;
+    }
+
+    blocks_per_segment_ = blocks_per_segment;
+  }
+
+  if (read_threshold != -1) {
+    if (read_threshold <= 0) {
+      logprintf(0, "Process Error: Read threshold must be greater than "
+                   "zero (thread %d).\n", thread_num_);
+      return false;
+    }
+
+    read_threshold_ = read_threshold;
+  }
+
+  if (write_threshold != -1) {
+    if (write_threshold <= 0) {
+      logprintf(0, "Process Error: Write threshold must be greater than "
+                   "zero (thread %d).\n", thread_num_);
+      return false;
+    }
+
+    write_threshold_ = write_threshold;
+  }
+
+  if (segment_size != -1) {
+    // Segments must be aligned to the disk's sector size.
+    if (segment_size % kSectorSize != 0) {
+      logprintf(0, "Process Error: Segment size must be a multiple of %d"
+                " (thread %d).\n", kSectorSize, thread_num_);
+      return false;
+    }
+
+    segment_size_ = segment_size / kSectorSize;
+  }
+
+  non_destructive_ = non_destructive;
+
+  // Having a queue of 150% of blocks that will fit in the disk's cache
+  // should be enough to force out the oldest block before it is read and hence,
+  // making sure the data comes form the disk and not the cache.
+  queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2;
+  // Updating DiskBlockTable parameters
+  if (update_block_table_) {
+    block_table_->SetParameters(kSectorSize, write_block_size_,
+                                device_sectors_, segment_size_,
+                                device_name_);
+  }
+  return true;
+}
+
+bool DiskThread::OpenDevice(int *pfile) {
+  int fd = open(device_name_.c_str(),
+                O_RDWR | O_SYNC | O_DIRECT | O_LARGEFILE,
+                0);
+  if (fd < 0) {
+    logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
+              device_name_.c_str(), thread_num_);
+    return false;
+  }
+  *pfile = fd;
+
+  return GetDiskSize(fd);
+}
+
+// Retrieves the size (in bytes) of the disk/file.
+bool DiskThread::GetDiskSize(int fd) {
+  struct stat device_stat;
+  if (fstat(fd, &device_stat) == -1) {
+    logprintf(0, "Process Error: Unable to fstat disk %s (thread %d).\n",
+              device_name_.c_str(), thread_num_);
+    return false;
+  }
+
+  // For a block device, an ioctl is needed to get the size since the size
+  // of the device file (i.e. /dev/sdb) is 0.
+  if (S_ISBLK(device_stat.st_mode)) {
+    uint64 block_size = 0;
+
+    if (ioctl(fd, BLKGETSIZE64, &block_size) == -1) {
+      logprintf(0, "Process Error: Unable to ioctl disk %s (thread %d).\n",
+                device_name_.c_str(), thread_num_);
+      return false;
+    }
+
+    // If an Elephant is initialized with status DEAD its size will be zero.
+    if (block_size == 0) {
+      os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1);
+      ++errorcount_;
+      status_ = 1;  // Avoid a procedural error.
+      return false;
+    }
+
+    device_sectors_ = block_size / kSectorSize;
+
+  } else if (S_ISREG(device_stat.st_mode)) {
+    device_sectors_ = device_stat.st_size / kSectorSize;
+
+  } else {
+    logprintf(0, "Process Error: %s is not a regular file or block "
+              "device (thread %d).\n", device_name_.c_str(),
+              thread_num_);
+    return false;
+  }
+
+  logprintf(12, "Log: Device sectors: %lld on disk %s (thread %d).\n",
+            device_sectors_, device_name_.c_str(), thread_num_);
+
+  if (update_block_table_) {
+    block_table_->SetParameters(kSectorSize, write_block_size_,
+                                device_sectors_, segment_size_,
+                                device_name_);
+  }
+
+  return true;
+}
+
+bool DiskThread::CloseDevice(int fd) {
+  close(fd);
+  return true;
+}
+
+// Return the time in microseconds.
+int64 DiskThread::GetTime() {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  return tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+bool DiskThread::DoWork(int fd) {
+  int64 block_num = 0;
+  blocks_written_ = 0;
+  blocks_read_ = 0;
+  int64 num_segments;
+
+  if (segment_size_ == -1) {
+    num_segments = 1;
+  } else {
+    num_segments = device_sectors_ / segment_size_;
+    if (device_sectors_ % segment_size_ != 0)
+      num_segments++;
+  }
+
+  // Disk size should be at least 3x cache size.  See comment later for
+  // details.
+  sat_assert(device_sectors_ * kSectorSize > 3 * cache_size_);
+
+  // This disk test works by writing blocks with a certain pattern to
+  // disk, then reading them back and verifying it against the pattern
+  // at a later time.  A failure happens when either the block cannot
+  // be written/read or when the read block is different than what was
+  // written.  If a block takes too long to write/read, then a warning
+  // is given instead of an error since taking too long is not
+  // necessarily an error.
+  //
+  // To prevent the read blocks from coming from the disk cache,
+  // enough blocks are written before read such that a block would
+  // be ejected from the disk cache by the time it is read.
+  //
+  // TODO(amistry): Implement some sort of read/write throttling.  The
+  //                flood of asynchronous I/O requests when a drive is
+  //                unplugged is causing the application and kernel to
+  //                become unresponsive.
+
+  while (IsReadyToRun()) {
+    // Write blocks to disk.
+    logprintf(16, "Write phase for disk %s (thread %d).\n",
+              device_name_.c_str(), thread_num_);
+    while (IsReadyToRunNoPause() &&
+           in_flight_sectors_.size() < queue_size_ + 1) {
+      // Confine testing to a particular segment of the disk.
+      int64 segment = (block_num / blocks_per_segment_) % num_segments;
+      if (block_num % blocks_per_segment_ == 0) {
+        logprintf(20, "Log: Starting to write segment %lld out of "
+                  "%lld on disk %s (thread %d).\n",
+                  segment, num_segments, device_name_.c_str(),
+                  thread_num_);
+      }
+      block_num++;
+
+      BlockData *block = block_table_->GetUnusedBlock(segment);
+
+      // If an unused sequence of sectors could not be found, skip to the
+      // next block to process.  Soon, a new segment will come and new
+      // sectors will be able to be allocated.  This effectively puts a
+      // minumim on the disk size at 3x the stated cache size, or 48MiB
+      // if a cache size is not given (since the cache is set as 16MiB
+      // by default).  Given that todays caches are at the low MiB range
+      // and drive sizes at the mid GB, this shouldn't pose a problem.
+      // The 3x minimum comes from the following:
+      //   1. In order to allocate 'y' blocks from a segment, the
+      //      segment must contain at least 2y blocks or else an
+      //      allocation may not succeed.
+      //   2. Assume the entire disk is one segment.
+      //   3. A full write phase consists of writing blocks corresponding to
+      //      3/2 cache size.
+      //   4. Therefore, the one segment must have 2 * 3/2 * cache
+      //      size worth of blocks = 3 * cache size worth of blocks
+      //      to complete.
+      // In non-destructive mode, don't write anything to disk.
+      if (!non_destructive_) {
+        if (!WriteBlockToDisk(fd, block)) {
+          block_table_->RemoveBlock(block);
+          continue;
+        }
+      }
+
+      block->SetBlockAsInitialized();
+
+      blocks_written_++;
+      in_flight_sectors_.push(block);
+    }
+
+    // Verify blocks on disk.
+    logprintf(20, "Read phase for disk %s (thread %d).\n",
+              device_name_.c_str(), thread_num_);
+    while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) {
+      BlockData *block = in_flight_sectors_.front();
+      in_flight_sectors_.pop();
+      ValidateBlockOnDisk(fd, block);
+      block_table_->RemoveBlock(block);
+      blocks_read_++;
+    }
+  }
+
+  pages_copied_ = blocks_written_ + blocks_read_;
+  return true;
+}
+
+// Do an asynchronous disk I/O operation.
+bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
+                            int64 offset, int64 timeout) {
+  // Use the Linux native asynchronous I/O interface for reading/writing.
+  // A read/write consists of three basic steps:
+  //    1. create an io context.
+  //    2. prepare and submit an io request to the context
+  //    3. wait for an event on the context.
+
+  struct {
+    const int opcode;
+    const char *op_str;
+    const char *error_str;
+  } operations[2] = {
+    { IOCB_CMD_PREAD, "read", "disk-read-error" },
+    { IOCB_CMD_PWRITE, "write", "disk-write-error" }
+  };
+
+  struct iocb cb;
+  memset(&cb, 0, sizeof(cb));
+
+  cb.aio_fildes = fd;
+  cb.aio_lio_opcode = operations[op].opcode;
+  cb.aio_buf = (__u64)buf;
+  cb.aio_nbytes = size;
+  cb.aio_offset = offset;
+
+  struct iocb *cbs[] = { &cb };
+  if (io_submit(aio_ctx_, 1, cbs) != 1) {
+    logprintf(0, "Process Error: Unable to submit async %s "
+                 "on disk %s (thread %d).\n",
+              operations[op].op_str, device_name_.c_str(),
+              thread_num_);
+    return false;
+  }
+
+  struct io_event event;
+  memset(&event, 0, sizeof(event));
+  struct timespec tv;
+  tv.tv_sec = timeout / 1000000;
+  tv.tv_nsec = (timeout % 1000000) * 1000;
+  if (io_getevents(aio_ctx_, 1, 1, &event, &tv) != 1) {
+    // A ctrl-c from the keyboard will cause io_getevents to fail with an
+    // EINTR error code.  This is not an error and so don't treat it as such,
+    // but still log it.
+    if (errno == EINTR) {
+      logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n",
+                operations[op].op_str, device_name_.c_str(),
+                thread_num_);
+    } else {
+      os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
+      errorcount_ += 1;
+      logprintf(0, "Hardware Error: Timeout doing async %s to sectors "
+                   "starting at %lld on disk %s (thread %d).\n",
+                operations[op].op_str, offset / kSectorSize,
+                device_name_.c_str(), thread_num_);
+    }
+
+    // Don't bother checking return codes since io_cancel seems to always fail.
+    // Since io_cancel is always failing, destroying and recreating an I/O
+    // context is a workaround for canceling an in-progress I/O operation.
+    // TODO(amistry): Find out why io_cancel isn't working and make it work.
+    io_cancel(aio_ctx_, &cb, &event);
+    io_destroy(aio_ctx_);
+    aio_ctx_ = 0;
+    if (io_setup(5, &aio_ctx_)) {
+      logprintf(0, "Process Error: Unable to create aio context on disk %s"
+                " (thread %d).\n",
+                device_name_.c_str(), thread_num_);
+    }
+
+    return false;
+  }
+
+  // event.res contains the number of bytes written/read or
+  // error if < 0, I think.
+  if (event.res != size) {
+    errorcount_++;
+    os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
+
+    if (event.res < 0) {
+      switch (event.res) {
+        case -EIO:
+          logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
+                       "sectors starting at %lld on disk %s (thread %d).\n",
+                    operations[op].op_str, offset / kSectorSize,
+                    device_name_.c_str(), thread_num_);
+          break;
+        default:
+          logprintf(0, "Hardware Error: Unknown error while doing %s to "
+                       "sectors starting at %lld on disk %s (thread %d).\n",
+                    operations[op].op_str, offset / kSectorSize,
+                    device_name_.c_str(), thread_num_);
+      }
+    } else {
+      logprintf(0, "Hardware Error: Unable to %s to sectors starting at "
+                   "%lld on disk %s (thread %d).\n",
+                operations[op].op_str, offset / kSectorSize,
+                device_name_.c_str(), thread_num_);
+    }
+    return false;
+  }
+
+  return true;
+}
+
+// Write a block to disk.
+bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
+  memset(block_buffer_, 0, block->GetSize());
+
+  // Fill block buffer with a pattern
+  struct page_entry pe;
+  if (!sat_->GetValid(&pe)) {
+    // Even though a valid page could not be obatined, it is not an error
+    // since we can always fill in a pattern directly, albeit slower.
+    unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
+    block->SetPattern(patternlist_->GetRandomPattern());
+
+    logprintf(11, "Log: Warning, using pattern fill fallback in "
+                  "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
+              device_name_.c_str(), thread_num_);
+
+    for (int i = 0; i < block->GetSize()/wordsize_; i++) {
+      memblock[i] = block->GetPattern()->pattern(i);
+    }
+  } else {
+    memcpy(block_buffer_, pe.addr, block->GetSize());
+    block->SetPattern(pe.pattern);
+    sat_->PutValid(&pe);
+  }
+
+  logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
+            " (thread %d).\n",
+            block->GetSize()/kSectorSize, block->GetAddress(),
+            device_name_.c_str(), thread_num_);
+
+  int64 start_time = GetTime();
+
+  if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(),
+                   block->GetAddress() * kSectorSize, write_timeout_)) {
+    return false;
+  }
+
+  int64 end_time = GetTime();
+  logprintf(12, "Log: Writing time: %lld us (thread %d).\n",
+            end_time - start_time, thread_num_);
+  if (end_time - start_time > write_threshold_) {
+    logprintf(5, "Log: Write took %lld us which is longer than threshold "
+                 "%lld us on disk %s (thread %d).\n",
+              end_time - start_time, write_threshold_, device_name_.c_str(),
+              thread_num_);
+  }
+
+  return true;
+}
+
+// Verify a block on disk.
+bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
+  int64 blocks = block->GetSize() / read_block_size_;
+  int64 bytes_read = 0;
+  int64 current_blocks;
+  int64 current_bytes;
+  uint64 address = block->GetAddress();
+
+  logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
+            "(thread %d).\n",
+            address, device_name_.c_str(), thread_num_);
+
+  // Read block from disk and time the read.  If it takes longer than the
+  // threshold, complain.
+  if (lseek(fd, address * kSectorSize, SEEK_SET) == -1) {
+    logprintf(0, "Process Error: Unable to seek to sector %lld in "
+              "DiskThread::ValidateSectorsOnDisk on disk %s "
+              "(thread %d).\n", address, device_name_.c_str(), thread_num_);
+    return false;
+  }
+  int64 start_time = GetTime();
+
+  // Split a large write-sized block into small read-sized blocks and
+  // read them in groups of randomly-sized multiples of read block size.
+  // This assures all data written on disk by this particular block
+  // will be tested using a random reading pattern.
+
+  while (blocks != 0) {
+    // Test all read blocks in a written block.
+    current_blocks = (random() % blocks) + 1;
+    current_bytes = current_blocks * read_block_size_;
+
+    memset(block_buffer_, 0, current_bytes);
+
+    logprintf(20, "Log: Reading %lld sectors starting at sector %lld on "
+              "disk %s (thread %d)\n",
+              current_bytes / kSectorSize,
+              (address * kSectorSize + bytes_read) / kSectorSize,
+              device_name_.c_str(), thread_num_);
+
+    if (!AsyncDiskIO(ASYNC_IO_READ, fd, block_buffer_, current_bytes,
+                     address * kSectorSize + bytes_read,
+                     write_timeout_)) {
+      return false;
+    }
+
+    int64 end_time = GetTime();
+    logprintf(20, "Log: Reading time: %lld us (thread %d).\n",
+              end_time - start_time, thread_num_);
+    if (end_time - start_time > read_threshold_) {
+      logprintf(5, "Log: Read took %lld us which is longer than threshold "
+                "%lld us on disk %s (thread %d).\n",
+                end_time - start_time, read_threshold_,
+                device_name_.c_str(), thread_num_);
+    }
+
+    // In non-destructive mode, don't compare the block to the pattern since
+    // the block was never written to disk in the first place.
+    if (!non_destructive_) {
+      if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes,
+                      0, bytes_read)) {
+        os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
+        errorcount_ += 1;
+        logprintf(0, "Hardware Error: Pattern mismatch in block starting at "
+                  "sector %lld in DiskThread::ValidateSectorsOnDisk on "
+                  "disk %s (thread %d).\n",
+                  address, device_name_.c_str(), thread_num_);
+      }
+    }
+
+    bytes_read += current_blocks * read_block_size_;
+    blocks -= current_blocks;
+  }
+
+  return true;
+}
+
+int DiskThread::Work() {
+  int fd;
+
+  logprintf(9, "Log: Starting disk thread %d, disk %s\n",
+            thread_num_, device_name_.c_str());
+
+  srandom(time(NULL));
+
+  if (!OpenDevice(&fd)) {
+    return 0;
+  }
+
+  // Allocate a block buffer aligned to 512 bytes since the kernel requires it
+  // when using direst IO.
+  block_buffer_ = memalign(kBufferAlignment, write_block_size_);
+  if (block_buffer_ == NULL) {
+    CloseDevice(fd);
+    logprintf(0, "Process Error: Unable to allocate memory for buffers "
+                 "for disk %s (thread %d).\n",
+              device_name_.c_str(), thread_num_);
+    return 0;
+  }
+
+  if (io_setup(5, &aio_ctx_)) {
+    logprintf(0, "Process Error: Unable to create aio context for disk %s"
+              " (thread %d).\n",
+              device_name_.c_str(), thread_num_);
+    return 0;
+  }
+
+  DoWork(fd);
+
+  status_ = 1;
+
+  io_destroy(aio_ctx_);
+  CloseDevice(fd);
+  free(block_buffer_);
+
+  logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, "
+               "%d pages copied\n",
+            thread_num_, device_name_.c_str(), status_, pages_copied_);
+  return 1;
+}
+
+RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
+    : DiskThread(block_table) {
+  update_block_table_ = 0;
+}
+
+RandomDiskThread::~RandomDiskThread() {
+}
+
+bool RandomDiskThread::DoWork(int fd) {
+  blocks_read_ = 0;
+  blocks_written_ = 0;
+  logprintf(11, "Random phase for disk %s (thread %d).\n",
+            device_name_.c_str(), thread_num_);
+  while (IsReadyToRun()) {
+    BlockData *block = block_table_->GetRandomBlock();
+    if (block == NULL) {
+      logprintf(12, "No block available for device %s (thread %d).\n",
+                device_name_.c_str(), thread_num_);
+    } else {
+      ValidateBlockOnDisk(fd, block);
+      block_table_->ReleaseBlock(block);
+      blocks_read_++;
+    }
+  }
+  pages_copied_ = blocks_read_;
+  return true;
+}
+
+MemoryRegionThread::MemoryRegionThread() {
+  error_injection_ = false;
+  pages_ = NULL;
+}
+
+MemoryRegionThread::~MemoryRegionThread() {
+  if (pages_ != NULL)
+    delete pages_;
+}
+
+bool MemoryRegionThread::SetRegion(void *region, int64 size) {
+  int plength = sat_->page_length();
+  int npages = size / plength;
+  if (size % plength) {
+    logprintf(0, "Process Error: region size is not a multiple of SAT "
+              "page length\n");
+    return false;
+  } else {
+    if (pages_ != NULL)
+      delete pages_;
+    pages_ = new PageEntryQueue(npages);
+    char *base_addr = reinterpret_cast<char*>(region);
+    region_ = base_addr;
+    for (int i = 0; i < npages; i++) {
+      struct page_entry pe;
+      init_pe(&pe);
+      pe.addr = reinterpret_cast<void*>(base_addr + i * plength);
+      pe.offset = i * plength;
+
+      pages_->Push(&pe);
+    }
+    return true;
+  }
+}
+
+void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
+                                      int priority,
+                                      const char *message) {
+  uint32 buffer_offset;
+  if (phase_ == kPhaseCopy) {
+    // If the error occurred on the Copy Phase, it means that
+    // the source data (i.e., the main memory) is wrong. so
+    // just pass it to the original ProcessError to call a
+    // bad-dimm error
+    WorkerThread::ProcessError(error, priority, message);
+  } else if (phase_ == kPhaseCheck) {
+    // A error on the Check Phase means that the memory region tested
+    // has an error. Gathering more information and then reporting
+    // the error.
+    // Determine if this is a write or read error.
+    os_->Flush(error->vaddr);
+    error->reread = *(error->vaddr);
+    char *good = reinterpret_cast<char*>(&(error->expected));
+    char *bad = reinterpret_cast<char*>(&(error->actual));
+    sat_assert(error->expected != error->actual);
+    unsigned int offset = 0;
+    for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) {
+      if (good[offset] != bad[offset])
+        break;
+    }
+
+    error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset;
+
+    buffer_offset = error->vbyteaddr - region_;
+
+    // Find physical address if possible.
+    error->paddr = os_->VirtualToPhysical(error->vbyteaddr);
+    logprintf(priority,
+              "%s: miscompare on %s, CRC check at %p(0x%llx), "
+              "offset %llx: read:0x%016llx, reread:0x%016llx "
+              "expected:0x%016llx\n",
+              message,
+              identifier_.c_str(),
+              error->vaddr,
+              error->paddr,
+              buffer_offset,
+              error->actual,
+              error->reread,
+              error->expected);
+  } else {
+    logprintf(0, "Process Error: memory region thread raised an "
+              "unexpected error.");
+  }
+}
+
+int MemoryRegionThread::Work() {
+  struct page_entry source_pe;
+  struct page_entry memregion_pe;
+  int result = 1;
+  int64 loops = 0;
+  const uint64 error_constant = 0x00ba00000000ba00LL;
+
+  // For error injection.
+  int64 *addr = 0x0;
+  int offset = 0;
+  int64 data = 0;
+
+  logprintf(9, "Log: Starting Memory Region thread %d\n", thread_num_);
+
+  while (IsReadyToRun()) {
+    // Getting pages from SAT and queue.
+    phase_ = kPhaseNoPhase;
+    result &= sat_->GetValid(&source_pe);
+    if (!result) {
+      logprintf(0, "Process Error: memory region thread failed to pop "
+                "pages from SAT, bailing\n");
+      break;
+    }
+
+    result &= pages_->PopRandom(&memregion_pe);
+    if (!result) {
+      logprintf(0, "Process Error: memory region thread failed to pop "
+                "pages from queue, bailing\n");
+      break;
+    }
+
+    // Error injection for CRC copy.
+    if ((sat_->error_injection() || error_injection_) && loops == 1) {
+      addr = reinterpret_cast<int64*>(source_pe.addr);
+      offset = random() % (sat_->page_length() / wordsize_);
+      data = addr[offset];
+      addr[offset] = error_constant;
+    }
+
+    // Copying SAT page into memory region.
+    phase_ = kPhaseCopy;
+    CrcCopyPage(&memregion_pe, &source_pe);
+    memregion_pe.pattern = source_pe.pattern;
+
+    // Error injection for CRC Check.
+    if ((sat_->error_injection() || error_injection_) && loops == 2) {
+      addr = reinterpret_cast<int64*>(memregion_pe.addr);
+      offset = random() % (sat_->page_length() / wordsize_);
+      data = addr[offset];
+      addr[offset] = error_constant;
+    }
+
+    // Checking page content in memory region.
+    phase_ = kPhaseCheck;
+    CrcCheckPage(&memregion_pe);
+
+    phase_ = kPhaseNoPhase;
+    // Storing pages on their proper queues.
+    result &= sat_->PutValid(&source_pe);
+    if (!result) {
+      logprintf(0, "Process Error: memory region thread failed to push "
+                "pages into SAT, bailing\n");
+      break;
+    }
+    result &= pages_->Push(&memregion_pe);
+    if (!result) {
+      logprintf(0, "Process Error: memory region thread failed to push "
+                "pages into queue, bailing\n");
+      break;
+    }
+
+    if ((sat_->error_injection() || error_injection_) &&
+        loops >= 1 && loops <= 2) {
+      addr[offset] = data;
+    }
+
+    loops++;
+    YieldSelf();
+  }
+
+  pages_copied_ = loops;
+  status_ = result;
+  logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d "
+            "pages checked\n", thread_num_, status_, pages_copied_);
+  return 1;
+}
diff --git a/src/worker.h b/src/worker.h
new file mode 100644
index 0000000..b85f926
--- /dev/null
+++ b/src/worker.h
@@ -0,0 +1,782 @@
+// Copyright 2006 Google Inc. All Rights Reserved.
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// worker.h : worker thread interface
+
+// This file contains the Worker Thread class interface
+// for the SAT test. Worker Threads implement a repetative
+// task used to test or stress the system.
+
+#ifndef STRESSAPPTEST_WORKER_H_
+#define STRESSAPPTEST_WORKER_H_
+
+#include <pthread.h>
+
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <linux/aio_abi.h>
+
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
+
+// This file must work with autoconf on its public version,
+// so these includes are correct.
+#include "disk_blocks.h"
+#include "queue.h"
+#include "sattypes.h"
+
+
+// Global Datastruture shared by the Cache Coherency Worker Threads.
+struct cc_cacheline_data {
+  int *num;
+};
+
+// Typical usage:
+// (Other workflows may be possible, see function comments for details.)
+// - Control thread creates object.
+// - Control thread calls AddWorkers(1) for each worker thread.
+// - Control thread calls Initialize().
+// - Control thread launches worker threads.
+// - Every worker thread frequently calls ContinueRunning().
+// - Control thread periodically calls PauseWorkers(), effectively sleeps, and
+//     then calls ResumeWorkers().
+// - Some worker threads may exit early, before StopWorkers() is called.  They
+//     call RemoveSelf() after their last call to ContinueRunning().
+// - Control thread eventually calls StopWorkers().
+// - Worker threads exit.
+// - Control thread joins worker threads.
+// - Control thread calls Destroy().
+// - Control thread destroys object.
+//
+// Threadsafety:
+// - ContinueRunning() may be called concurrently by different workers, but not
+//     by a single worker.
+// - No other methods may ever be called concurrently, with themselves or
+//     eachother.
+// - This object may be used by multiple threads only between Initialize() and
+//     Destroy().
+//
+// TODO(matthewb): Move this class and its unittest to their own files.
+class WorkerStatus {
+ public:
+  //--------------------------------
+  // Methods for the control thread.
+  //--------------------------------
+
+  WorkerStatus() : num_workers_(0), status_(RUN) {}
+
+  // Called by the control thread to increase the worker count.  Must be called
+  // before Initialize().  The worker count is 0 upon object initialization.
+  void AddWorkers(int num_new_workers) {
+    // No need to lock num_workers_mutex_ because this is before Initialize().
+    num_workers_ += num_new_workers;
+  }
+
+  // Called by the control thread.  May not be called multiple times.  If
+  // called, Destroy() must be called before destruction.
+  void Initialize();
+
+  // Called by the control thread after joining all worker threads.  Must be
+  // called iff Initialize() was called.  No methods may be called after calling
+  // this.
+  void Destroy();
+
+  // Called by the control thread to tell the workers to pause.  Does not return
+  // until all workers have called ContinueRunning() or RemoveSelf().  May only
+  // be called between Initialize() and Stop().  Must not be called multiple
+  // times without ResumeWorkers() having been called inbetween.
+  void PauseWorkers();
+
+  // Called by the control thread to tell the workers to resume from a pause.
+  // May only be called between Initialize() and Stop().  May only be called
+  // directly after PauseWorkers().
+  void ResumeWorkers();
+
+  // Called by the control thread to tell the workers to stop.  May only be
+  // called between Initialize() and Destroy().  May only be called once.
+  void StopWorkers();
+
+  //--------------------------------
+  // Methods for the worker threads.
+  //--------------------------------
+
+  // Called by worker threads to decrease the worker count by one.  May only be
+  // called between Initialize() and Destroy().  May wait for ResumeWorkers()
+  // when called after PauseWorkers().
+  void RemoveSelf();
+
+  // Called by worker threads between Initialize() and Destroy().  May be called
+  // any number of times.  Return value is whether or not the worker should
+  // continue running.  When called after PauseWorkers(), does not return until
+  // ResumeWorkers() or StopWorkers() has been called.  Number of distinct
+  // calling threads must match the worker count (see AddWorkers() and
+  // RemoveSelf()).
+  bool ContinueRunning();
+
+  // TODO(matthewb): Is this functionality really necessary?  Remove it if not.
+  //
+  // This is a hack!  It's like ContinueRunning(), except it won't pause.  If
+  // any worker threads use this exclusively in place of ContinueRunning() then
+  // PauseWorkers() should never be used!
+  bool ContinueRunningNoPause();
+
+ private:
+  enum Status { RUN, PAUSE, STOP };
+
+  void WaitOnPauseBarrier() {
+    int error = pthread_barrier_wait(&pause_barrier_);
+    if (error != PTHREAD_BARRIER_SERIAL_THREAD)
+      sat_assert(error == 0);
+  }
+
+  void AcquireNumWorkersLock() {
+    sat_assert(0 == pthread_mutex_lock(&num_workers_mutex_));
+  }
+
+  void ReleaseNumWorkersLock() {
+    sat_assert(0 == pthread_mutex_unlock(&num_workers_mutex_));
+  }
+
+  void AcquireStatusReadLock() {
+    sat_assert(0 == pthread_rwlock_rdlock(&status_rwlock_));
+  }
+
+  void AcquireStatusWriteLock() {
+    sat_assert(0 == pthread_rwlock_wrlock(&status_rwlock_));
+  }
+
+  void ReleaseStatusLock() {
+    sat_assert(0 == pthread_rwlock_unlock(&status_rwlock_));
+  }
+
+  Status GetStatus() {
+    AcquireStatusReadLock();
+    Status status = status_;
+    ReleaseStatusLock();
+    return status;
+  }
+
+  // Returns the previous status.
+  Status SetStatus(Status status) {
+    AcquireStatusWriteLock();
+    Status prev_status = status_;
+    status_ = status;
+    ReleaseStatusLock();
+    return prev_status;
+  }
+
+  pthread_mutex_t num_workers_mutex_;
+  int num_workers_;
+
+  pthread_rwlock_t status_rwlock_;
+  Status status_;
+
+  // Guaranteed to not be in use when (status_ != PAUSE).
+  pthread_barrier_t pause_barrier_;
+
+  DISALLOW_COPY_AND_ASSIGN(WorkerStatus);
+};
+
+
+// This is a base class for worker threads.
+// Each thread repeats a specific
+// task on various blocks of memory.
+class WorkerThread {
+ public:
+  // Enum to mark a thread as low/med/high priority.
+  enum Priority {
+    Low,
+    Normal,
+    High,
+  };
+  WorkerThread();
+  virtual ~WorkerThread();
+
+  // Initialize values and thread ID number.
+  void InitThread(int thread_num_init,
+                  class Sat *sat_init,
+                  class OsLayer *os_init,
+                  class PatternList *patternlist_init,
+                  WorkerStatus *worker_status);
+
+  // This function is DEPRECATED, it does nothing.
+  void SetPriority(Priority priority) { priority_ = priority; }
+  // Spawn the worker thread, by running Work().
+  int SpawnThread();
+  // Only for ThreadSpawnerGeneric().
+  void StartRoutine();
+  bool InitPriority();
+
+  // Wait for the thread to complete its cleanup.
+  virtual int JoinThread();
+  // Kill worker thread with SIGINT.
+  virtual int KillThread();
+
+  // This is the task function that the thread executes.
+  // This is implemented per subclass.
+  virtual int Work();
+
+  // Starts per-WorkerThread timer.
+  void StartThreadTimer() {gettimeofday(&start_time_, NULL);}
+  // Reads current timer value and returns run duration without recording it.
+  int64 ReadThreadTimer() {
+    struct timeval end_time_;
+    gettimeofday(&end_time_, NULL);
+    return (end_time_.tv_sec - start_time_.tv_sec)*1000000 +
+      (end_time_.tv_usec - start_time_.tv_usec);
+  }
+  // Stops per-WorkerThread timer and records thread run duration.
+  // Start/Stop ThreadTimer repetitively has cumulative effect, ie the timer
+  // is effectively paused and restarted, so runduration_usec accumulates on.
+  void StopThreadTimer() {
+    runduration_usec_ += ReadThreadTimer();
+  }
+
+  // Acccess member variables.
+  int GetStatus() {return status_;}
+  int64 GetErrorCount() {return errorcount_;}
+  int64 GetPageCount() {return pages_copied_;}
+  int64 GetRunDurationUSec() {return runduration_usec_;}
+
+  // Returns bandwidth defined as pages_copied / thread_run_durations.
+  float GetCopiedData();
+  // Calculate worker thread specific copied data.
+  virtual float GetMemoryCopiedData() {return 0;}
+  virtual float GetDeviceCopiedData() {return 0;}
+  // Calculate worker thread specific bandwidth.
+  virtual float GetMemoryBandwidth()
+    {return GetMemoryCopiedData() / (
+        runduration_usec_ * 1.0 / 1000000);}
+  virtual float GetDeviceBandwidth()
+    {return GetDeviceCopiedData() / (
+        runduration_usec_ * 1.0 / 1000000);}
+
+  void set_cpu_mask(int32 mask) {cpu_mask_ = mask;}
+  void set_tag(int32 tag) {tag_ = tag;}
+
+  // Returns CPU mask, where each bit represents a logical cpu.
+  uint32 AvailableCpus();
+  // Returns CPU mask of CPUs this thread is bound to,
+  uint32 CurrentCpus();
+
+  int ThreadID() {return thread_num_;}
+
+  // Bind worker thread to specified CPU(s)
+  bool BindToCpus(uint32 thread_mask);
+
+ protected:
+  // This function dictates whether the main work loop
+  // continues, waits, or terminates.
+  // All work loops should be of the form:
+  //   do {
+  //     // work.
+  //   } while (IsReadyToRun());
+  virtual bool IsReadyToRun() { return worker_status_->ContinueRunning(); }
+  // TODO(matthewb): Is this function really necessary? Remove it if not.
+  //
+  // Like IsReadyToRun(), except it won't pause.
+  virtual bool IsReadyToRunNoPause() {
+    return worker_status_->ContinueRunningNoPause();
+  }
+
+  // These are functions used by the various work loops.
+  // Pretty print and log a data miscompare.
+  virtual void ProcessError(struct ErrorRecord *er,
+                            int priority,
+                            const char *message);
+
+  // Compare a region of memory with a known data patter, and report errors.
+  virtual int CheckRegion(void *addr,
+                          class Pattern *pat,
+                          int64 length,
+                          int offset,
+                          int64 patternoffset);
+
+  // Fast compare a block of memory.
+  virtual int CrcCheckPage(struct page_entry *srcpe);
+
+  // Fast copy a block of memory, while verifying correctness.
+  virtual int CrcCopyPage(struct page_entry *dstpe,
+                          struct page_entry *srcpe);
+
+  // Fast copy a block of memory, while verifying correctness, and heating CPU.
+  virtual int CrcWarmCopyPage(struct page_entry *dstpe,
+                              struct page_entry *srcpe);
+
+  // Fill a page with its specified pattern.
+  virtual bool FillPage(struct page_entry *pe);
+
+  // Copy with address tagging.
+  virtual bool AdlerAddrMemcpyC(uint64 *dstmem64,
+                                uint64 *srcmem64,
+                                unsigned int size_in_bytes,
+                                AdlerChecksum *checksum,
+                                struct page_entry *pe);
+  // Crc data with address tagging.
+  virtual bool AdlerAddrCrcC(uint64 *srcmem64,
+                             unsigned int size_in_bytes,
+                             AdlerChecksum *checksum,
+                             struct page_entry *pe);
+  // Report a mistagged cacheline.
+  bool ReportTagError(uint64 *mem64,
+                      uint64 actual,
+                      uint64 tag);
+  // Print out the error record of the tag mismatch.
+  void ProcessTagError(struct ErrorRecord *error,
+                       int priority,
+                       const char *message);
+
+  // A worker thread can yield itself to give up CPU until it's scheduled again
+  bool YieldSelf();
+
+ protected:
+  // General state variables that all subclasses need.
+  int thread_num_;                  // Thread ID.
+  volatile int status_;             // Error status.
+  volatile int64 pages_copied_;     // Recorded for memory bandwidth calc.
+  volatile int64 errorcount_;       // Miscompares seen by this thread.
+
+  volatile uint32 cpu_mask_;        // Cores this thread is allowed to run on.
+  volatile uint32 tag_;             // Tag hint for memory this thread can use.
+
+  bool tag_mode_;                   // Tag cachelines with vaddr.
+
+  // Thread timing variables.
+  struct timeval start_time_;        // Worker thread start time.
+  volatile int64 runduration_usec_;  // Worker run duration in u-seconds.
+
+  // Function passed to pthread_create.
+  void *(*thread_spawner_)(void *args);
+  pthread_t thread_;                // Pthread thread ID.
+  Priority priority_;               // Worker thread priority.
+  class Sat *sat_;                  // Reference to parent stest object.
+  class OsLayer *os_;               // Os abstraction: put hacks here.
+  class PatternList *patternlist_;  // Reference to data patterns.
+
+  // Work around style guide ban on sizeof(int).
+  static const uint64 iamint_ = 0;
+  static const int wordsize_ = sizeof(iamint_);
+
+ private:
+  WorkerStatus *worker_status_;
+
+  DISALLOW_COPY_AND_ASSIGN(WorkerThread);
+};
+
+// Worker thread to perform File IO.
+class FileThread : public WorkerThread {
+ public:
+  FileThread();
+  // Set filename to use for file IO.
+  virtual void SetFile(const char *filename_init);
+  virtual int Work();
+
+  // Calculate worker thread specific bandwidth.
+  virtual float GetDeviceCopiedData()
+    {return GetCopiedData()*2;}
+  virtual float GetMemoryCopiedData();
+
+ protected:
+  // Record of where these pages were sourced from, and what
+  // potentially broken components they passed through.
+  struct PageRec {
+     struct Pattern *pattern;  // This is the data it should contain.
+     void *src;  // This is the memory location the data was sourced from.
+     void *dst;  // This is where it ended up.
+  };
+
+  // These are functions used by the various work loops.
+  // Pretty print and log a data miscompare. Disks require
+  // slightly different error handling.
+  virtual void ProcessError(struct ErrorRecord *er,
+                            int priority,
+                            const char *message);
+
+  virtual bool OpenFile(int *pfile);
+  virtual bool CloseFile(int fd);
+
+  // Read and write whole file to disk.
+  virtual bool WritePages(int fd);
+  virtual bool ReadPages(int fd);
+
+  // Read and write pages to disk.
+  virtual bool WritePageToFile(int fd, struct page_entry *src);
+  virtual bool ReadPageFromFile(int fd, struct page_entry *dst);
+
+  // Sector tagging support.
+  virtual bool SectorTagPage(struct page_entry *src, int block);
+  virtual bool SectorValidatePage(const struct PageRec &page,
+                                  struct page_entry *dst,
+                                  int block);
+
+  // Get memory for an incoming data transfer..
+  virtual bool PagePrepare();
+  // Remove memory allocated for data transfer.
+  virtual bool PageTeardown();
+
+  // Get memory for an incoming data transfer..
+  virtual bool GetEmptyPage(struct page_entry *dst);
+  // Get memory for an outgoing data transfer..
+  virtual bool GetValidPage(struct page_entry *dst);
+  // Throw out a used empty page.
+  virtual bool PutEmptyPage(struct page_entry *src);
+  // Throw out a used, filled page.
+  virtual bool PutValidPage(struct page_entry *src);
+
+
+  struct PageRec *page_recs_;          // Array of page records.
+  int crc_page_;                        // Page currently being CRC checked.
+  string filename_;                     // Name of file to access.
+  string devicename_;                   // Name of device file is on.
+
+  bool page_io_;                        // Use page pool for IO.
+  void *local_page_;                   // malloc'd page fon non-pool IO.
+  int pass_;                            // Number of writes to the file so far.
+
+  // Tag to detect file corruption.
+  struct SectorTag {
+    volatile uint8 magic;
+    volatile uint8 block;
+    volatile uint8 sector;
+    volatile uint8 pass;
+    char pad[512-4];
+  };
+
+  DISALLOW_COPY_AND_ASSIGN(FileThread);
+};
+
+
+// Worker thread to perform Network IO.
+class NetworkThread : public WorkerThread {
+ public:
+  NetworkThread();
+  // Set hostname to use for net IO.
+  virtual void SetIP(const char *ipaddr_init);
+  virtual int Work();
+
+  // Calculate worker thread specific bandwidth.
+  virtual float GetDeviceCopiedData()
+    {return GetCopiedData()*2;}
+
+ protected:
+  // IsReadyToRunNoPause() wrapper, for NetworkSlaveThread to override.
+  virtual bool IsNetworkStopSet();
+  virtual bool CreateSocket(int *psocket);
+  virtual bool CloseSocket(int sock);
+  virtual bool Connect(int sock);
+  virtual bool SendPage(int sock, struct page_entry *src);
+  virtual bool ReceivePage(int sock, struct page_entry *dst);
+  char ipaddr_[256];
+  int sock_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NetworkThread);
+};
+
+// Worker thread to reflect Network IO.
+class NetworkSlaveThread : public NetworkThread {
+ public:
+  NetworkSlaveThread();
+  // Set socket for IO.
+  virtual void SetSock(int sock);
+  virtual int Work();
+
+ protected:
+  virtual bool IsNetworkStopSet();
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NetworkSlaveThread);
+};
+
+// Worker thread to detect incoming Network IO.
+class NetworkListenThread : public NetworkThread {
+ public:
+  NetworkListenThread();
+  virtual int Work();
+
+ private:
+  virtual bool Listen();
+  virtual bool Wait();
+  virtual bool GetConnection(int *pnewsock);
+  virtual bool SpawnSlave(int newsock, int threadid);
+  virtual bool ReapSlaves();
+
+  // For serviced incoming connections.
+  struct ChildWorker {
+    WorkerStatus status;
+    NetworkSlaveThread thread;
+  };
+  typedef vector<ChildWorker*> ChildVector;
+  ChildVector child_workers_;
+
+  DISALLOW_COPY_AND_ASSIGN(NetworkListenThread);
+};
+
+// Worker thread to perform Memory Copy.
+class CopyThread : public WorkerThread {
+ public:
+  CopyThread() {}
+  virtual int Work();
+  // Calculate worker thread specific bandwidth.
+  virtual float GetMemoryCopiedData()
+    {return GetCopiedData()*2;}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CopyThread);
+};
+
+// Worker thread to perform Memory Invert.
+class InvertThread : public WorkerThread {
+ public:
+  InvertThread() {}
+  virtual int Work();
+  // Calculate worker thread specific bandwidth.
+  virtual float GetMemoryCopiedData()
+    {return GetCopiedData()*4;}
+
+ private:
+  virtual int InvertPageUp(struct page_entry *srcpe);
+  virtual int InvertPageDown(struct page_entry *srcpe);
+  DISALLOW_COPY_AND_ASSIGN(InvertThread);
+};
+
+// Worker thread to fill blank pages on startup.
+class FillThread : public WorkerThread {
+ public:
+  FillThread();
+  // Set how many pages this thread should fill before exiting.
+  virtual void SetFillPages(int64 num_pages_to_fill_init);
+  virtual int Work();
+
+ private:
+  // Fill a page with the data pattern in pe->pattern.
+  virtual bool FillPageRandom(struct page_entry *pe);
+  int64 num_pages_to_fill_;
+  DISALLOW_COPY_AND_ASSIGN(FillThread);
+};
+
+// Worker thread to verify page data matches pattern data.
+// Thread will check and replace pages until "done" flag is set,
+// then it will check and discard pages until no more remain.
+class CheckThread : public WorkerThread {
+ public:
+  CheckThread() {}
+  virtual int Work();
+  // Calculate worker thread specific bandwidth.
+  virtual float GetMemoryCopiedData()
+    {return GetCopiedData();}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CheckThread);
+};
+
+
+// Worker thread to poll for system error messages.
+// Thread will check for messages until "done" flag is set.
+class ErrorPollThread : public WorkerThread {
+ public:
+  ErrorPollThread() {}
+  virtual int Work();
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ErrorPollThread);
+};
+
+// Computation intensive worker thread to stress CPU.
+class CpuStressThread : public WorkerThread {
+ public:
+  CpuStressThread() {}
+  virtual int Work();
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CpuStressThread);
+};
+
+// Worker thread that tests the correctness of the
+// CPU Cache Coherency Protocol.
+class CpuCacheCoherencyThread : public WorkerThread {
+ public:
+  CpuCacheCoherencyThread(cc_cacheline_data *cc_data,
+                          int cc_cacheline_count_,
+                          int cc_thread_num_,
+                          int cc_inc_count_);
+  virtual int Work();
+
+ protected:
+  cc_cacheline_data *cc_cacheline_data_;  // Datstructure for each cacheline.
+  int cc_local_num_;        // Local counter for each thread.
+  int cc_cacheline_count_;  // Number of cache lines to operate on.
+  int cc_thread_num_;       // The integer id of the thread which is
+                            // used as an index into the integer array
+                            // of the cacheline datastructure.
+  int cc_inc_count_;        // Number of times to increment the counter.
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CpuCacheCoherencyThread);
+};
+
+// Worker thread to perform disk test.
+class DiskThread : public WorkerThread {
+ public:
+  explicit DiskThread(DiskBlockTable *block_table);
+  virtual ~DiskThread();
+  // Calculate disk thread specific bandwidth.
+  virtual float GetDeviceCopiedData() {
+    return (blocks_written_ * write_block_size_ +
+            blocks_read_ * read_block_size_) / kMegabyte;}
+
+  // Set filename for device file (in /dev).
+  virtual void SetDevice(const char *device_name);
+  // Set various parameters that control the behaviour of the test.
+  virtual bool SetParameters(int read_block_size,
+                             int write_block_size,
+                             int64 segment_size,
+                             int64 cache_size,
+                             int blocks_per_segment,
+                             int64 read_threshold,
+                             int64 write_threshold,
+                             int non_destructive);
+
+  virtual int Work();
+
+  virtual float GetMemoryCopiedData() {return 0;}
+
+ protected:
+  static const int kSectorSize = 512;       // Size of sector on disk.
+  static const int kBufferAlignment = 512;  // Buffer alignment required by the
+                                            // kernel.
+  static const int kBlockRetry = 100;       // Number of retries to allocate
+                                            // sectors.
+
+  enum IoOp {
+    ASYNC_IO_READ   = 0,
+    ASYNC_IO_WRITE  = 1
+  };
+
+  virtual bool OpenDevice(int *pfile);
+  virtual bool CloseDevice(int fd);
+
+  // Retrieves the size (in bytes) of the disk/file.
+  virtual bool GetDiskSize(int fd);
+
+  // Retrieves the current time in microseconds.
+  virtual int64 GetTime();
+
+  // Do an asynchronous disk I/O operation.
+  virtual bool AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
+                           int64 offset, int64 timeout);
+
+  // Write a block to disk.
+  virtual bool WriteBlockToDisk(int fd, BlockData *block);
+
+  // Verify a block on disk.
+  virtual bool ValidateBlockOnDisk(int fd, BlockData *block);
+
+  // Main work loop.
+  virtual bool DoWork(int fd);
+
+  int read_block_size_;       // Size of blocks read from disk, in bytes.
+  int write_block_size_;      // Size of blocks written to disk, in bytes.
+  int64 blocks_read_;         // Number of blocks read in work loop.
+  int64 blocks_written_;      // Number of blocks written in work loop.
+  int64 segment_size_;        // Size of disk segments (in bytes) that the disk
+                              // will be split into where testing can be
+                              // confined to a particular segment.
+                              // Allows for control of how evenly the disk will
+                              // be tested.  Smaller segments imply more even
+                              // testing (less random).
+  int blocks_per_segment_;    // Number of blocks that will be tested per
+                              // segment.
+  int cache_size_;            // Size of disk cache, in bytes.
+  int queue_size_;            // Length of in-flight-blocks queue, in blocks.
+  int non_destructive_;       // Use non-destructive mode or not.
+  int update_block_table_;    // If true, assume this is the thread
+                              // responsible for writing the data in the disk
+                              // for this block device and, therefore,
+                              // update the block table. If false, just use
+                              // the block table to get data.
+
+  // read/write times threshold for reporting a problem
+  int64 read_threshold_;      // Maximum time a read should take (in us) before
+                              // a warning is given.
+  int64 write_threshold_;     // Maximum time a write should take (in us) before
+                              // a warning is given.
+  int64 read_timeout_;        // Maximum time a read can take before a timeout
+                              // and the aborting of the read operation.
+  int64 write_timeout_;       // Maximum time a write can take before a timeout
+                              // and the aborting of the write operation.
+
+  string device_name_;        // Name of device file to access.
+  int64 device_sectors_;      // Number of sectors on the device.
+
+  std::queue<BlockData*> in_flight_sectors_;   // Queue of sectors written but
+                                                // not verified.
+  void *block_buffer_;        // Pointer to aligned block buffer.
+
+  aio_context_t aio_ctx_;     // Asynchronous I/O context for Linux native AIO.
+
+  DiskBlockTable *block_table_;  // Disk Block Table, shared by all disk
+                                 // threads that read / write at the same
+                                 // device
+
+  DISALLOW_COPY_AND_ASSIGN(DiskThread);
+};
+
+class RandomDiskThread : public DiskThread {
+ public:
+  explicit RandomDiskThread(DiskBlockTable *block_table);
+  virtual ~RandomDiskThread();
+  // Main work loop.
+  virtual bool DoWork(int fd);
+ protected:
+  DISALLOW_COPY_AND_ASSIGN(RandomDiskThread);
+};
+
+// Worker thread to perform checks in a specific memory region.
+class MemoryRegionThread : public WorkerThread {
+ public:
+  MemoryRegionThread();
+  ~MemoryRegionThread();
+  virtual int Work();
+  void ProcessError(struct ErrorRecord *error, int priority,
+                    const char *message);
+  bool SetRegion(void *region, int64 size);
+  // Calculate worker thread specific bandwidth.
+  virtual float GetMemoryCopiedData()
+    {return GetCopiedData();}
+  virtual float GetDeviceCopiedData()
+    {return GetCopiedData() * 2;}
+  void SetIdentifier(string identifier) {
+    identifier_ = identifier;
+  }
+
+ protected:
+  // Page queue for this particular memory region.
+  char *region_;
+  PageEntryQueue *pages_;
+  bool error_injection_;
+  int phase_;
+  string identifier_;
+  static const int kPhaseNoPhase = 0;
+  static const int kPhaseCopy = 1;
+  static const int kPhaseCheck = 2;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MemoryRegionThread);
+};
+
+#endif  // STRESSAPPTEST_WORKER_H_
-- 
cgit v1.2.3


From b4c2ce302942068ceb3bf480e08601b2ab25154c Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Tue, 27 Oct 2009 05:56:42 +0000
Subject: Update to stressapptest 1.0.1. Fix non-x86 build. Fix lock
 initialization. Standardize on posix_memalign. Do not require -A. Add
 STRESSAPPTEST_OS_BSD config for future BSD support. Improved read error/data
 corruption handling.

---
 src/finelock_queue.cc         | 11 +++++--
 src/os.cc                     | 26 +++++++++++----
 src/os.h                      | 36 ++++++++++++---------
 src/sattypes.h                |  2 ++
 src/stressapptest_config.h.in |  9 ++++++
 src/worker.cc                 | 75 ++++++++++++++++++++++++++-----------------
 6 files changed, 106 insertions(+), 53 deletions(-)

(limited to 'src')

diff --git a/src/finelock_queue.cc b/src/finelock_queue.cc
index 27cc37d..569903a 100644
--- a/src/finelock_queue.cc
+++ b/src/finelock_queue.cc
@@ -45,8 +45,15 @@ FineLockPEQueue::FineLockPEQueue(
   queue_metric_ = kTouch;
 
   {  // Init all the page locks.
-    for (int64 i = 0; i < q_size_; i++)
-      pthread_mutex_init(&(pagelocks_[i]), NULL);
+    for (int64 i = 0; i < q_size_; i++) {
+        pthread_mutex_init(&(pagelocks_[i]), NULL);
+        // Pages start out owned (locked) by Sat::InitializePages.
+        // A locked state indicates that the page state is unknown,
+        // and the lock should not be aquired. As InitializePages creates
+        // the page records, they will be inserted and unlocked, at which point
+        // they are ready to be aquired and filled by worker threads.
+        sat_assert(pthread_mutex_lock(&(pagelocks_[i])) == 0);
+    }
   }
 
   {  // Init the random number generator.
diff --git a/src/os.cc b/src/os.cc
index 5c8c8e0..89947b7 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -1,10 +1,19 @@
 // Copyright 2006 Google Inc. All Rights Reserved.
-// Author: nsanders
-//
-// os.cc : os and machine specific implementation
-// Copyright 2006 Google Inc.
-// for open source release under GPL
+// Author: nsanders, menderico
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
+// os.cc : os and machine specific implementation
 // This file includes an abstracted interface
 // for linux-distro specific and HW specific
 // interfaces.
@@ -85,8 +94,13 @@ bool OsLayer::Initialize() {
 
 // Machine type detected. Can we implement all these functions correctly?
 bool OsLayer::IsSupported() {
+  if (kOpenSource) {
+    // There are no explicitly supported systems in open source version.
+    return true;
+  }
+
   // This is the default empty implementation.
-  // SAT won't really run correctly.
+  // SAT won't report full error information.
   return false;
 }
 
diff --git a/src/os.h b/src/os.h
index 6ace58c..5faa84d 100644
--- a/src/os.h
+++ b/src/os.h
@@ -104,7 +104,7 @@ class OsLayer {
   inline static void FastFlush(void *vaddr) {
 #ifdef STRESSAPPTEST_CPU_PPC
     asm volatile("dcbf 0,%0; sync" : : "r" (vaddr));
-#else
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
     // Put mfence before and after clflush to make sure:
     // 1. The write before the clflush is committed to memory bus;
     // 2. The read after the clflush is hitting the memory bus.
@@ -117,6 +117,8 @@ class OsLayer {
     asm volatile("mfence");
     asm volatile("clflush (%0)" :: "r" (vaddr));
     asm volatile("mfence");
+#else
+  #warning "Unsupported CPU type: Unable to force cache flushes."
 #endif
   }
 
@@ -125,24 +127,26 @@ class OsLayer {
   inline static uint64 GetTimestamp(void) {
     uint64 tsc;
 #ifdef STRESSAPPTEST_CPU_PPC
-  uint32 tbl, tbu, temp;
-  __asm __volatile(
-     "1:\n"
-     "mftbu  %2\n"
-     "mftb   %0\n"
-     "mftbu  %1\n"
-     "cmpw   %2,%1\n"
-     "bne    1b\n"
-     : "=r"(tbl), "=r"(tbu), "=r"(temp)
-     :
-     : "cc");
-
-  tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
-#else
+    uint32 tbl, tbu, temp;
+    __asm __volatile(
+      "1:\n"
+      "mftbu  %2\n"
+      "mftb   %0\n"
+      "mftbu  %1\n"
+      "cmpw   %2,%1\n"
+      "bne    1b\n"
+      : "=r"(tbl), "=r"(tbu), "=r"(temp)
+      :
+      : "cc");
+
+    tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
     datacast_t data;
     __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
     tsc = data.l64;
-
+#else
+  #warning "Unsupported CPU type: your build may not function correctly"
+    tsc = 0;
 #endif
     return (tsc);
   }
diff --git a/src/sattypes.h b/src/sattypes.h
index 2a58862..47fa79f 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -51,7 +51,9 @@ inline const char* BuildChangelist() {
   return "open source release";
 }
 
+static const bool kOpenSource = true;
 #else
+static const bool kOpenSource = false;
   #include "googlesattypes.h"
 #endif
 // Workaround to allow 32/64 bit conversion
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index efb8b2c..7680a29 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -151,6 +151,15 @@
 /* Defined if the target CPU is x86_64 */
 #undef STRESSAPPTEST_CPU_X86_64
 
+/* Defined if the target OS is BSD based */
+#undef STRESSAPPTEST_OS_BSD
+
+/* Defined if the target OS is OSX */
+#undef STRESSAPPTEST_OS_DARWIN
+
+/* Defined if the target OS is Linux */
+#undef STRESSAPPTEST_OS_LINUX
+
 /* Timestamp when ./configure was executed */
 #undef STRESSAPPTEST_TIMESTAMP
 
diff --git a/src/worker.cc b/src/worker.cc
index 6a00db2..39322d2 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -16,7 +16,6 @@
 // stress the system
 
 #include <errno.h>
-#include <malloc.h>
 #include <pthread.h>
 #include <sched.h>
 #include <signal.h>
@@ -40,6 +39,7 @@
 #include <netdb.h>
 #include <arpa/inet.h>
 #include <linux/unistd.h>  // for gettid
+
 // For size of block device
 #include <sys/ioctl.h>
 #include <linux/fs.h>
@@ -64,7 +64,7 @@
 // Syscalls
 // Why ubuntu, do you hate gettid so bad?
 #if !defined(__NR_gettid)
-#  define __NR_gettid             224
+  #define __NR_gettid             224
 #endif
 
 #define gettid() syscall(__NR_gettid)
@@ -100,10 +100,11 @@ namespace {
   // Get HW core ID from cpuid instruction.
   inline int apicid(void) {
     int cpu;
-#ifdef STRESSAPPTEST_CPU_PPC
-    cpu = 0;
-#else
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
     __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx");
+#else
+  #warning "Unsupported CPU type: unable to determine core ID."
+    cpu = 0;
 #endif
     return (cpu >> 24);
   }
@@ -268,6 +269,7 @@ FileThread::FileThread() {
   pass_ = 0;
   page_io_ = true;
   crc_page_ = -1;
+  local_page_ = NULL;
 }
 
 // If file thread used bounce buffer in memory, account for the extra
@@ -611,7 +613,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                               (error->vaddr), 1);
 
     logprintf(priority,
-              "%s: miscompare on CPU %d(%x) at %p(0x%llx:%s): "
+              "%s: miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
               "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
               message,
               apic_id,
@@ -975,13 +977,13 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
   // Report parseable error.
   if (priority < 5) {
     logprintf(priority,
-              "%s: Tag from %p(0x%llx:%s) (%s) miscompare on CPU %d(%x) at "
-              "%p(0x%llx:%s): "
+              "%s: Tag from %p(0x%llx:%s) (%s) "
+              "miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
               "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
               message,
               error->tagvaddr, error->tagpaddr,
               tag_dimm_string,
-              read_error?"read error":"write error",
+              read_error ? "read error" : "write error",
               apic_id,
               cpumask,
               error->vaddr,
@@ -1199,10 +1201,19 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                                    blocksize,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
-            logprintf(0, "Process Error: CrcCopyPage CRC mismatch %s != %s, "
+            int apic_id = apicid();
+            uint32 cpumask = CurrentCpus();
+            logprintf(0, "Process Error: CPU %d(0x%x) CrcCopyPage "
+                         "CRC mismatch %s != %s, "
                          "but no miscompares found on second pass.\n",
+                      apic_id, cpumask,
                       crc.ToHexString().c_str(),
                       expectedcrc->ToHexString().c_str());
+            struct ErrorRecord er;
+            er.actual = sourcemem[0];
+            er.expected = 0x0;
+            er.vaddr = sourcemem;
+            ProcessError(&er, 0, "Hardware Error");
           }
         }
       }
@@ -1486,8 +1497,6 @@ int CopyThread::Work() {
   return 1;
 }
 
-
-
 // Memory invert work loop. Execute until marked done.
 int InvertThread::Work() {
   struct page_entry src;
@@ -1749,8 +1758,6 @@ bool FileThread::SectorValidatePage(const struct PageRec &page,
   return true;
 }
 
-
-
 // Get memory for an incoming data transfer..
 bool FileThread::PagePrepare() {
   // We can only do direct IO to SAT pages if it is normal mem.
@@ -1758,9 +1765,11 @@ bool FileThread::PagePrepare() {
 
   // Init a local buffer if we need it.
   if (!page_io_) {
-    local_page_ = static_cast<void*>(memalign(512, sat_->page_length()));
-    if (!local_page_) {
-      logprintf(0, "Process Error: disk thread memalign returned 0\n");
+    int result = posix_memalign(&local_page_, 512, sat_->page_length());
+    if (result) {
+      logprintf(0, "Process Error: disk thread posix_memalign "
+                   "returned %d (fail)\n",
+                result);
       status_ += 1;
       return false;
     }
@@ -2311,14 +2320,16 @@ int NetworkSlaveThread::Work() {
     return 0;
 
   // Loop until done.
-  int result = 1;
   int64 loops = 0;
   // Init a local buffer for storing data.
-  void *local_page = static_cast<void*>(memalign(512, sat_->page_length()));
-  if (!local_page) {
-    logprintf(0, "Process Error: Net Slave thread memalign returned 0\n");
+  void *local_page = NULL;
+  int result = posix_memalign(&local_page, 512, sat_->page_length());
+  if (result) {
+    logprintf(0, "Process Error: net slave posix_memalign "
+                 "returned %d (fail)\n",
+              result);
     status_ += 1;
-    return 0;
+    return false;
   }
 
   struct page_entry page;
@@ -2339,7 +2350,8 @@ int NetworkSlaveThread::Work() {
   }
 
   pages_copied_ = loops;
-  status_ = result;
+  // No results provided from this type of thread.
+  status_ = 1;
 
   // Clean up.
   CloseSocket(sock);
@@ -2348,7 +2360,7 @@ int NetworkSlaveThread::Work() {
             "Log: Completed %d: network slave thread status %d, "
             "%d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return result;
+  return status_;
 }
 
 // Thread work loop. Execute until marked finished.
@@ -2475,6 +2487,8 @@ DiskThread::DiskThread(DiskBlockTable *block_table) {
   aio_ctx_ = 0;
   block_table_ = block_table;
   update_block_table_ = 1;
+
+  block_buffer_ = NULL;
 }
 
 DiskThread::~DiskThread() {
@@ -3027,13 +3041,16 @@ int DiskThread::Work() {
 
   // Allocate a block buffer aligned to 512 bytes since the kernel requires it
   // when using direst IO.
-  block_buffer_ = memalign(kBufferAlignment, write_block_size_);
-  if (block_buffer_ == NULL) {
+
+  int result = posix_memalign(&block_buffer_, kBufferAlignment,
+                              sat_->page_length());
+  if (result) {
     CloseDevice(fd);
     logprintf(0, "Process Error: Unable to allocate memory for buffers "
-                 "for disk %s (thread %d).\n",
-              device_name_.c_str(), thread_num_);
-    return 0;
+                 "for disk %s (thread %d) posix memalign returned %d.\n",
+              device_name_.c_str(), thread_num_, result);
+    status_ += 1;
+    return false;
   }
 
   if (io_setup(5, &aio_ctx_)) {
-- 
cgit v1.2.3


From 749e58e072824dd5b48c0221708a9ec1cabc0426 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Tue, 27 Oct 2009 06:16:43 +0000
Subject: Add manpage contributed by Michael Prokop <mika@debian.org>. Fix
 typos

---
 src/worker.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/worker.cc b/src/worker.cc
index 39322d2..08b5a4e 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -2099,7 +2099,7 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
         if (transferred == 0 && err == 0) {
           // Two system setups will not sync exactly,
           // allow early exit, but log it.
-          logprintf(0, "Log: Net thread did not recieve any data, exitting.\n");
+          logprintf(0, "Log: Net thread did not receive any data, exiting.\n");
         } else {
           char buf[256] = "";
           sat_strerror(err, buf, sizeof(buf));
-- 
cgit v1.2.3


From eea0aac32a3f522ea51d389f44dcd8abcfc5a6e0 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Fri, 12 Mar 2010 03:35:04 +0000
Subject: Update stressapptest to 1.0.2 * Fix -d option on 32 bit build. * Fix
 -d bandwidth calculation * DiskThread general cleanup * Add libaio dependency
 for cross platform builds * Allow > 32 cores * Add support for --tag_mode and
 -W on the same run * General error handling cleanup * Improve checksum
 generation failure handling - printout core, node, dimm. * -H option
 specifies minimum required hugepage allocation

---
 src/os.cc                     |  32 ++-
 src/os.h                      |  42 ++--
 src/os_factory.cc             |   1 -
 src/sat.cc                    |  70 ++++---
 src/sat.h                     |   1 +
 src/sattypes.h                |  75 ++++---
 src/stressapptest_config.h.in |   6 +
 src/worker.cc                 | 469 ++++++++++++++++++++++++------------------
 src/worker.h                  |  88 +++++---
 9 files changed, 473 insertions(+), 311 deletions(-)

(limited to 'src')

diff --git a/src/os.cc b/src/os.cc
index 89947b7..4784028 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -52,6 +52,7 @@ OsLayer::OsLayer() {
   testmem_ = 0;
   testmemsize_ = 0;
   totalmemsize_ = 0;
+  min_hugepages_bytes_ = 0;
   error_injection_ = false;
   normal_mem_ = true;
   time_initialized_ = 0;
@@ -178,13 +179,22 @@ cpu_set_t *OsLayer::FindCoreMask(int32 region) {
     for (int i = 0; i < num_cpus_per_node_; ++i) {
       CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
     }
-    logprintf(5, "Log: Region %d mask 0x%08X\n",
-                 region, cpuset_to_uint32(&cpu_sets_[region]));
     cpu_sets_valid_[region] = true;
+    logprintf(5, "Log: Region %d mask 0x%s\n",
+                 region, FindCoreMaskFormat(region).c_str());
   }
   return &cpu_sets_[region];
 }
 
+// Return cores associated with a given region in hex string.
+string OsLayer::FindCoreMaskFormat(int32 region) {
+  cpu_set_t* mask = FindCoreMask(region);
+  string format = cpuset_format(mask);
+  if (format.size() < 8)
+    format = string(8 - format.size(), '0') + format;
+  return format;
+}
+
 // Report an error in an easily parseable way.
 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
   time_t now = time(NULL);
@@ -246,16 +256,20 @@ int64 OsLayer::FindFreeMemSize() {
   }
 
   // We want to leave enough stuff for things to run.
-  // If more than 2GB is present, leave 192M + 5% for other stuff.
+  // If the user specified a minimum amount of memory to expect, require that.
+  // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
   // If less than 2GB is present use 85% of what's available.
   // These are fairly arbitrary numbers that seem to work OK.
   //
   // TODO(nsanders): is there a more correct way to determine target
   // memory size?
-  if (physsize < 2048LL * kMegabyte)
+  if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
+    minsize = min_hugepages_bytes_;
+  } else if (physsize < 2048LL * kMegabyte) {
     minsize = ((pages * 85) / 100) * pagesize;
-  else
+  } else {
     minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+  }
 
   // Use hugepage sizing if available.
   if (hugepagesize > 0) {
@@ -325,10 +339,16 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
     if (shmaddr == reinterpret_cast<void*>(-1)) {
       int err = errno;
       char errtxt[256] = "";
-      shmctl(shmid, IPC_RMID, NULL);
       strerror_r(err, errtxt, sizeof(errtxt));
       logprintf(0, "Log: failed to attach shared mem object - err %d (%s).\n",
                 err, errtxt);
+      if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+        int err = errno;
+        char errtxt[256] = "";
+        strerror_r(err, errtxt, sizeof(errtxt));
+        logprintf(0, "Log: failed to remove shared mem object - err %d (%s).\n",
+                  err, errtxt);
+      }
       goto hugepage_failover;
     }
     use_hugepages_ = true;
diff --git a/src/os.h b/src/os.h
index 5faa84d..9ed04d5 100644
--- a/src/os.h
+++ b/src/os.h
@@ -50,6 +50,12 @@ class OsLayer {
   OsLayer();
   virtual ~OsLayer();
 
+  // Set the minimum amount of hugepages that should be available for testing.
+  // Must be set before Initialize().
+  void SetMinimumHugepagesSize(int64 min_bytes) {
+    min_hugepages_bytes_ = min_bytes;
+  }
+
   // Initializes data strctures and open files.
   // Returns false on error.
   virtual bool Initialize();
@@ -75,6 +81,8 @@ class OsLayer {
   virtual int32 FindRegion(uint64 paddr);
   // Find cpu cores associated with a region. Either NUMA or arbitrary.
   virtual cpu_set_t *FindCoreMask(int32 region);
+  // Return cpu cores associated with a region in a hex string.
+  virtual string FindCoreMaskFormat(int32 region);
 
   // Returns the HD device that contains this file.
   virtual string FindFileDevice(string filename);
@@ -228,21 +236,22 @@ class OsLayer {
   ErrCallback get_err_log_callback() { return err_log_callback_; }
 
  protected:
-  void *testmem_;            // Location of test memory.
-  int64 testmemsize_;        // Size of test memory.
-  int64 totalmemsize_;       // Size of available memory.
-  bool  error_injection_;    // Do error injection?
-  bool  normal_mem_;         // Memory DMA capable?
-  bool  use_hugepages_;      // Use hugepage shmem?
-  int   shmid_;              // Handle to shmem
-
-  int64 regionsize_;         // Size of memory "regions"
-  int   regioncount_;        // Number of memory "regions"
-  int   num_cpus_;           // Number of cpus in the system.
-  int   num_nodes_;          // Number of nodes in the system.
-  int   num_cpus_per_node_;  // Number of cpus per node in the system.
-
-  time_t time_initialized_;  // Start time of test.
+  void *testmem_;                // Location of test memory.
+  int64 testmemsize_;            // Size of test memory.
+  int64 totalmemsize_;           // Size of available memory.
+  int64 min_hugepages_bytes_;    // Minimum hugepages size.
+  bool  error_injection_;        // Do error injection?
+  bool  normal_mem_;             // Memory DMA capable?
+  bool  use_hugepages_;          // Use hugepage shmem?
+  int   shmid_;                  // Handle to shmem
+
+  int64 regionsize_;             // Size of memory "regions"
+  int   regioncount_;            // Number of memory "regions"
+  int   num_cpus_;               // Number of cpus in the system.
+  int   num_nodes_;              // Number of nodes in the system.
+  int   num_cpus_per_node_;      // Number of cpus per node in the system.
+
+  time_t time_initialized_;      // Start time of test.
 
   vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
   vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.
@@ -263,7 +272,8 @@ class OsLayer {
   DISALLOW_COPY_AND_ASSIGN(OsLayer);
 };
 
-// Selects and returns the proper OS and hardware interface.
+// Selects and returns the proper OS and hardware interface.  Does not call
+// OsLayer::Initialize() on the new object.
 OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);
 
 #endif  // STRESSAPPTEST_OS_H_ NOLINT
diff --git a/src/os_factory.cc b/src/os_factory.cc
index 8acf573..359f7ee 100644
--- a/src/os_factory.cc
+++ b/src/os_factory.cc
@@ -36,6 +36,5 @@ OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options) {
     logprintf(0, "Process Error: Can't allocate memory\n");
     return 0;
   }
-  os->Initialize();
   return os;
 }
diff --git a/src/sat.cc b/src/sat.cc
index e714ba2..06b4c65 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -582,12 +582,24 @@ bool Sat::Initialize() {
     bad_status();
     return false;
   }
-  if (error_injection_) os_->set_error_injection(true);
+
+  if (min_hugepages_mbytes_ > 0)
+    os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
+
+  if (!os_->Initialize()) {
+    logprintf(0, "Process Error: Failed to initialize OS layer\n");
+    bad_status();
+    delete os_;
+    return false;
+  }
 
   // Checks that OS/Build/Platform is supported.
   if (!CheckEnvironment())
     return false;
 
+  if (error_injection_)
+    os_->set_error_injection(true);
+
   // Run SAT in monitor only mode, do not continue to allocate resources.
   if (monitor_mode_) {
     logprintf(5, "Log: Running in monitor-only mode. "
@@ -641,6 +653,7 @@ Sat::Sat() {
   pages_ = 0;
   size_mb_ = 0;
   size_ = size_mb_ * kMegabyte;
+  min_hugepages_mbytes_ = 0;
   freepages_ = 0;
   paddr_base_ = 0;
 
@@ -765,6 +778,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
     // Set number of megabyte to use.
     ARG_IVALUE("-M", size_mb_);
 
+    // Set minimum megabytes of hugepages to require.
+    ARG_IVALUE("-H", min_hugepages_mbytes_);
+
     // Set number of seconds to run.
     ARG_IVALUE("-s", runtime_seconds_);
 
@@ -972,6 +988,7 @@ bool Sat::ParseArgs(int argc, char **argv) {
 void Sat::PrintHelp() {
   printf("Usage: ./sat(32|64) [options]\n"
          " -M mbytes        megabytes of ram to test\n"
+         " -H mbytes        minimum megabytes of hugepages to require\n"
          " -s seconds       number of seconds to run\n"
          " -m threads       number of memory copy threads to run\n"
          " -i threads       number of memory invert threads to run\n"
@@ -1037,20 +1054,6 @@ void Sat::GoogleOsOptions(std::map<std::string, std::string> *options) {
   // Do nothing, no OS-specific argument on public stressapptest
 }
 
-namespace {
-  // This counts the bits set in a bitmask.
-  // This is used to determine number of cores in an available mask.
-  int countbits(uint32 bitfield) {
-    int numbits = 0;
-    for (int i = 0; i < 32; i++) {
-      if (bitfield & (1 << i)) {
-        numbits++;
-      }
-    }
-    return numbits;
-  }
-}
-
 // Launch the SAT task threads. Returns 0 on error.
 void Sat::InitializeThreads() {
   // Memory copy threads.
@@ -1090,18 +1093,19 @@ void Sat::InitializeThreads() {
       int32 region = region_find(i % region_count_);
       cpu_set_t *cpuset = os_->FindCoreMask(region);
       sat_assert(cpuset);
-      int32 cpu_mask = cpuset_to_uint32(cpuset);
       if (region_mode_ == kLocalNuma) {
         // Choose regions associated with this CPU.
-        thread->set_cpu_mask(cpu_mask);
+        thread->set_cpu_mask(cpuset);
         thread->set_tag(1 << region);
       } else if (region_mode_ == kRemoteNuma) {
         // Choose regions not associated with this CPU..
-        thread->set_cpu_mask(cpu_mask);
+        thread->set_cpu_mask(cpuset);
         thread->set_tag(region_mask_ & ~(1 << region));
       }
     } else {
-      int cores = countbits(thread->AvailableCpus());
+      cpu_set_t available_cpus;
+      thread->AvailableCpus(&available_cpus);
+      int cores = cpuset_count(&available_cpus);
       // Don't restrict thread location if we have more than one
       // thread per core. Not so good for performance.
       if (cpu_stress_threads_ + memory_threads_ <= cores) {
@@ -1110,15 +1114,18 @@ void Sat::InitializeThreads() {
         int nthcore = i;
         int nthbit = (((2 * nthcore) % cores) +
                       (((2 * nthcore) / cores) % 2)) % cores;
-        if (thread->AvailableCpus() != ((1 << cores) - 1)) {
+        cpu_set_t all_cores;
+        cpuset_set_ab(&all_cores, 0, cores);
+        if (!cpuset_isequal(&available_cpus, &all_cores)) {
           // We are assuming the bits are contiguous.
           // Complain if this is not so.
-          logprintf(0, "Log: cores = %x, expected %x\n",
-                    thread->AvailableCpus(), ((1 << (cores + 1)) - 1));
+          logprintf(0, "Log: cores = %s, expected %s\n",
+                    cpuset_format(&available_cpus).c_str(),
+                    cpuset_format(&all_cores).c_str());
         }
 
         // Set thread affinity.
-        thread->set_cpu_mask(1 << nthbit);
+        thread->set_cpu_mask_to_cpu(nthbit);
       }
     }
     memory_vector->insert(memory_vector->end(), thread);
@@ -1238,7 +1245,9 @@ void Sat::InitializeThreads() {
 
     // Don't restrict thread location if we have more than one
     // thread per core. Not so good for performance.
-    int cores = countbits(thread->AvailableCpus());
+    cpu_set_t available_cpus;
+    thread->AvailableCpus(&available_cpus);
+    int cores = cpuset_count(&available_cpus);
     if (cpu_stress_threads_ + memory_threads_ <= cores) {
       // Place a thread on alternating cores first.
       // Go in reverse order for CPU stress threads. This assures interleaved
@@ -1246,13 +1255,16 @@ void Sat::InitializeThreads() {
       int nthcore = (cores - 1) - i;
       int nthbit = (((2 * nthcore) % cores) +
                     (((2 * nthcore) / cores) % 2)) % cores;
-      if (thread->AvailableCpus() != ((1 << cores) - 1)) {
-        logprintf(0, "Log: cores = %x, expected %x\n",
-                  thread->AvailableCpus(), ((1 << (cores + 1)) - 1));
+      cpu_set_t all_cores;
+      cpuset_set_ab(&all_cores, 0, cores);
+      if (!cpuset_isequal(&available_cpus, &all_cores)) {
+        logprintf(0, "Log: cores = %s, expected %s\n",
+                  cpuset_format(&available_cpus).c_str(),
+                  cpuset_format(&all_cores).c_str());
       }
 
       // Set thread affinity.
-      thread->set_cpu_mask(1 << nthbit);
+      thread->set_cpu_mask_to_cpu(nthbit);
     }
 
 
@@ -1298,7 +1310,7 @@ void Sat::InitializeThreads() {
       thread->InitThread(total_threads_++, this, os_, patternlist_,
                          &continuous_status_);
       // Pin the thread to a particular core.
-      thread->set_cpu_mask(1 << tnum);
+      thread->set_cpu_mask_to_cpu(tnum);
 
       // Insert the thread into the vector.
       cc_vector->insert(cc_vector->end(), thread);
diff --git a/src/sat.h b/src/sat.h
index b1ad085..950270f 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -147,6 +147,7 @@ class Sat {
   int64 pages_;                       // Number of memory blocks.
   int64 size_;                        // Size of memory tested, in bytes.
   int64 size_mb_;                     // Size of memory tested, in MB.
+  int64 min_hugepages_mbytes_;        // Minimum hugepages size.
   int64 freepages_;                   // How many invalid pages we need.
   int disk_pages_;                    // Number of pages per temp file.
   uint64 paddr_base_;                 // Physical address base.
diff --git a/src/sattypes.h b/src/sattypes.h
index 47fa79f..96bf13b 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -22,6 +22,7 @@
 #include <sys/types.h>
 #include <time.h>
 #include <string.h>
+#include <algorithm>
 #include <string>
 
 #ifdef HAVE_CONFIG_H  // Built using autoconf
@@ -86,36 +87,64 @@ void logprintf(int priority, const char *format, ...);
   // Note: this code is hacked together to deal with difference
   // function signatures across versions of glibc, ie those that take
   // cpu_set_t versus those that take unsigned long.  -johnhuang
-  typedef unsigned long cpu_set_t;
-  #define CPU_SETSIZE                   32
-  #define CPU_ISSET(index, cpu_set_ptr) (*(cpu_set_ptr) & 1 << (index))
-  #define CPU_SET(index, cpu_set_ptr)   (*(cpu_set_ptr) |= 1 << (index))
+  typedef uint64 cpu_set_t;
+  #define CPU_SETSIZE                   (sizeof(cpu_set_t) * 8)
+  #define CPU_ISSET(index, cpu_set_ptr) (*(cpu_set_ptr) & 1ull << (index))
+  #define CPU_SET(index, cpu_set_ptr)   (*(cpu_set_ptr) |= 1ull << (index))
   #define CPU_ZERO(cpu_set_ptr)         (*(cpu_set_ptr) = 0)
-  #define CPU_CLR(index, cpu_set_ptr)   (*(cpu_set_ptr) &= ~(1 << (index)))
+  #define CPU_CLR(index, cpu_set_ptr)   (*(cpu_set_ptr) &= ~(1ull << (index)))
 #endif
 
-// Make using CPUSET non-super-painful.
-static inline uint32 cpuset_to_uint32(cpu_set_t *cpuset) {
-  uint32 value = 0;
-  for (int index = 0; index < CPU_SETSIZE; index++) {
-    if (CPU_ISSET(index, cpuset)) {
-      if (index < 32) {
-          value |= 1 << index;
-      } else {
-        logprintf(0, "Process Error: Cpu index (%d) higher than 32\n", index);
-        sat_assert(0);
-      }
-    }
-  }
-  return value;
+static inline bool cpuset_isequal(const cpu_set_t *c1, const cpu_set_t *c2) {
+  for (int i = 0; i < CPU_SETSIZE; ++i)
+    if ((CPU_ISSET(i, c1) != 0) != (CPU_ISSET(i, c2) != 0))
+      return false;
+  return true;
+}
+
+static inline bool cpuset_issubset(const cpu_set_t *c1, const cpu_set_t *c2) {
+  for (int i = 0; i < CPU_SETSIZE; ++i)
+    if (CPU_ISSET(i, c1) && !CPU_ISSET(i, c2))
+      return false;
+  return true;
+}
+
+static inline int cpuset_count(const cpu_set_t *cpuset) {
+  int count = 0;
+  for (int i = 0; i < CPU_SETSIZE; ++i)
+    if (CPU_ISSET(i, cpuset))
+      ++count;
+  return count;
 }
 
-static inline void cpuset_from_uint32(uint32 mask, cpu_set_t *cpuset) {
+static inline void cpuset_set_ab(cpu_set_t *cpuset, int a, int b) {
   CPU_ZERO(cpuset);
-  for (int index = 0; index < 32; index++) {
-    if (mask & (1 << index))
-      CPU_SET(index, cpuset);
+  for (int i = a; i < b; ++i)
+    CPU_SET(i, cpuset);
+}
+
+static inline string cpuset_format(const cpu_set_t *cpuset) {
+  string format;
+  int digit = 0, last_non_zero_size = 1;
+  for (int i = 0; i < CPU_SETSIZE; ++i) {
+    if (CPU_ISSET(i, cpuset)) {
+      digit |= 1 << (i & 3);
+    }
+    if ((i & 3) == 3) {
+      format += char(digit <= 9 ? '0' + digit: 'A' + digit - 10);
+      if (digit) {
+        last_non_zero_size = format.size();
+        digit = 0;
+      }
+    }
+  }
+  if (digit) {
+    format += char(digit <= 9 ? '0' + digit: 'A' + digit - 10);
+    last_non_zero_size = format.size();
   }
+  format.erase(last_non_zero_size);
+  reverse(format.begin(), format.end());
+  return format;
 }
 
 static const int32 kUSleepOneSecond = 1000000;
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index 7680a29..535bb34 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -26,6 +26,9 @@
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
+/* Define to 1 if you have the <libaio.h> header file. */
+#undef HAVE_LIBAIO_H
+
 /* Define to 1 if you have the <malloc.h> header file. */
 #undef HAVE_MALLOC_H
 
@@ -41,6 +44,9 @@
 /* Define to 1 if you have the <netdb.h> header file. */
 #undef HAVE_NETDB_H
 
+/* Define to 1 if you have the <pthread.h> header file. */
+#undef HAVE_PTHREAD_H
+
 /* Define to 1 if you have the `select' function. */
 #undef HAVE_SELECT
 
diff --git a/src/worker.cc b/src/worker.cc
index 08b5a4e..c568064 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -44,7 +44,7 @@
 #include <sys/ioctl.h>
 #include <linux/fs.h>
 // For asynchronous I/O
-#include <linux/aio_abi.h>
+#include <libaio.h>
 
 #include <sys/syscall.h>
 
@@ -77,25 +77,9 @@ _syscall3(int, sched_setaffinity, pid_t, pid,
 
 // Linux aio syscalls.
 #if !defined(__NR_io_setup)
-#define __NR_io_setup   206
-#define __NR_io_destroy 207
-#define __NR_io_getevents       208
-#define __NR_io_submit  209
-#define __NR_io_cancel  210
+#error "No aio headers inculded, please install libaio."
 #endif
 
-#define io_setup(nr_events, ctxp) \
-  syscall(__NR_io_setup, (nr_events), (ctxp))
-#define io_submit(ctx_id, nr, iocbpp) \
-  syscall(__NR_io_submit, (ctx_id), (nr), (iocbpp))
-#define io_getevents(ctx_id, io_getevents, nr, events, timeout) \
-  syscall(__NR_io_getevents, (ctx_id), (io_getevents), (nr), (events), \
-    (timeout))
-#define io_cancel(ctx_id, iocb, result) \
-  syscall(__NR_io_cancel, (ctx_id), (iocb), (result))
-#define io_destroy(ctx) \
-  syscall(__NR_io_destroy, (ctx))
-
 namespace {
   // Get HW core ID from cpuid instruction.
   inline int apicid(void) {
@@ -157,7 +141,6 @@ static void *ThreadSpawnerGeneric(void *ptr) {
   return NULL;
 }
 
-
 void WorkerStatus::Initialize() {
   sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
   sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
@@ -245,10 +228,10 @@ void WorkerStatus::RemoveSelf() {
 
 // Parent thread class.
 WorkerThread::WorkerThread() {
-  status_ = 0;
+  status_ = false;
   pages_copied_ = 0;
   errorcount_ = 0;
-  runduration_usec_ = 0;
+  runduration_usec_ = 1;
   priority_ = Normal;
   worker_status_ = NULL;
   thread_spawner_ = &ThreadSpawnerGeneric;
@@ -310,7 +293,7 @@ void WorkerThread::InitThread(int thread_num_init,
   patternlist_ = patternlist_init;
   worker_status_ = worker_status;
 
-  cpu_mask_ = AvailableCpus();
+  AvailableCpus(&cpu_mask_);
   tag_ = 0xffffffff;
 
   tag_mode_ = sat_->tag_mode();
@@ -321,12 +304,15 @@ void WorkerThread::InitThread(int thread_num_init,
 bool WorkerThread::InitPriority() {
   // This doesn't affect performance that much, and may not be too safe.
 
-  bool ret = BindToCpus(cpu_mask_);
+  bool ret = BindToCpus(&cpu_mask_);
   if (!ret)
-    logprintf(11, "Log: Bind to %x failed.\n", cpu_mask_);
+    logprintf(11, "Log: Bind to %s failed.\n",
+              cpuset_format(&cpu_mask_).c_str());
 
-  logprintf(11, "Log: Thread %d running on apic ID %d mask %x (%x).\n",
-            thread_num_, apicid(), CurrentCpus(), cpu_mask_);
+  logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n",
+            thread_num_, apicid(),
+            CurrentCpusFormat().c_str(),
+            cpuset_format(&cpu_mask_).c_str());
 #if 0
   if (priority_ == High) {
     sched_param param;
@@ -356,7 +342,7 @@ int WorkerThread::SpawnThread() {
     logprintf(0, "Process Error: pthread_create "
                   "failed - error %d %s\n", result,
               buf);
-    status_ += 1;
+    status_ = false;
     return false;
   }
 
@@ -365,18 +351,17 @@ int WorkerThread::SpawnThread() {
 }
 
 // Kill the worker thread with SIGINT.
-int WorkerThread::KillThread() {
-  pthread_kill(thread_, SIGINT);
-  return 0;
+bool WorkerThread::KillThread() {
+  return (pthread_kill(thread_, SIGINT) == 0);
 }
 
 // Block until thread has exited.
-int WorkerThread::JoinThread() {
+bool WorkerThread::JoinThread() {
   int result = pthread_join(thread_, NULL);
 
   if (result) {
     logprintf(0, "Process Error: pthread_join failed - error %d\n", result);
-    status_ = 0;
+    status_ = false;
   }
 
   // 0 is pthreads success.
@@ -394,14 +379,14 @@ void WorkerThread::StartRoutine() {
 
 
 // Thread work loop. Execute until marked finished.
-int WorkerThread::Work() {
+bool WorkerThread::Work() {
   do {
     logprintf(9, "Log: ...\n");
     // Sleep for 1 second.
     sat_sleep(1);
   } while (IsReadyToRun());
 
-  return 0;
+  return false;
 }
 
 
@@ -409,11 +394,9 @@ int WorkerThread::Work() {
 // Conceptually, each bit represents a logical CPU, ie:
 //   mask = 3  (11b):   cpu0, 1
 //   mask = 13 (1101b): cpu0, 2, 3
-uint32 WorkerThread::AvailableCpus() {
-  cpu_set_t curr_cpus;
-  CPU_ZERO(&curr_cpus);
-  sched_getaffinity(getppid(), sizeof(curr_cpus), &curr_cpus);
-  return cpuset_to_uint32(&curr_cpus);
+bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) {
+  CPU_ZERO(cpuset);
+  return sched_getaffinity(getppid(), sizeof(*cpuset), cpuset) == 0;
 }
 
 
@@ -421,11 +404,9 @@ uint32 WorkerThread::AvailableCpus() {
 // Conceptually, each bit represents a logical CPU, ie:
 //   mask = 3  (11b):   cpu0, 1
 //   mask = 13 (1101b): cpu0, 2, 3
-uint32 WorkerThread::CurrentCpus() {
-  cpu_set_t curr_cpus;
-  CPU_ZERO(&curr_cpus);
-  sched_getaffinity(0, sizeof(curr_cpus), &curr_cpus);
-  return cpuset_to_uint32(&curr_cpus);
+bool WorkerThread::CurrentCpus(cpu_set_t *cpuset) {
+  CPU_ZERO(cpuset);
+  return sched_getaffinity(0, sizeof(*cpuset), cpuset) == 0;
 }
 
 
@@ -437,21 +418,22 @@ uint32 WorkerThread::CurrentCpus() {
 //                  mask = 13 (1101b): cpu0, 2, 3
 //
 //   Returns true on success, false otherwise.
-bool WorkerThread::BindToCpus(uint32 thread_mask) {
-  uint32 process_mask = AvailableCpus();
-  if (thread_mask == process_mask)
+bool WorkerThread::BindToCpus(const cpu_set_t *thread_mask) {
+  cpu_set_t process_mask;
+  AvailableCpus(&process_mask);
+  if (cpuset_isequal(thread_mask, &process_mask))
     return true;
 
-  logprintf(11, "Log: available CPU mask - %x\n", process_mask);
-  if ((thread_mask | process_mask) != process_mask) {
+  logprintf(11, "Log: available CPU mask - %s\n",
+            cpuset_format(&process_mask).c_str());
+  if (!cpuset_issubset(thread_mask, &process_mask)) {
     // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist.
-    logprintf(0, "Log: requested CPUs %x not a subset of available %x\n",
-              thread_mask, process_mask);
+    logprintf(0, "Log: requested CPUs %s not a subset of available %s\n",
+              cpuset_format(thread_mask).c_str(),
+              cpuset_format(&process_mask).c_str());
     return false;
   }
-  cpu_set_t cpuset;
-  cpuset_from_uint32(thread_mask, &cpuset);
-  return (sched_setaffinity(gettid(), sizeof(cpuset), &cpuset) == 0);
+  return (sched_setaffinity(gettid(), sizeof(*thread_mask), thread_mask) == 0);
 }
 
 
@@ -533,8 +515,8 @@ bool FillThread::FillPageRandom(struct page_entry *pe) {
 
 
 // Memory fill work loop. Execute until alloted pages filled.
-int FillThread::Work() {
-  int result = 1;
+bool FillThread::Work() {
+  bool result = true;
 
   logprintf(9, "Log: Starting fill thread %d\n", thread_num_);
 
@@ -544,7 +526,7 @@ int FillThread::Work() {
   struct page_entry pe;
   int64 loops = 0;
   while (IsReadyToRun() && (loops < num_pages_to_fill_)) {
-    result &= sat_->GetEmpty(&pe);
+    result = result && sat_->GetEmpty(&pe);
     if (!result) {
       logprintf(0, "Process Error: fill_thread failed to pop pages, "
                 "bailing\n");
@@ -552,11 +534,11 @@ int FillThread::Work() {
     }
 
     // Fill the page with pattern
-    result &= FillPageRandom(&pe);
+    result = result && FillPageRandom(&pe);
     if (!result) break;
 
     // Put the page back on the queue.
-    result &= sat_->PutValid(&pe);
+    result = result && sat_->PutValid(&pe);
     if (!result) {
       logprintf(0, "Process Error: fill_thread failed to push pages, "
                 "bailing\n");
@@ -570,7 +552,7 @@ int FillThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n",
             thread_num_, status_, pages_copied_);
-  return 0;
+  return result;
 }
 
 
@@ -581,7 +563,6 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
   char dimm_string[256] = "";
 
   int apic_id = apicid();
-  uint32 cpumask = CurrentCpus();
 
   // Determine if this is a write or read error.
   os_->Flush(error->vaddr);
@@ -613,11 +594,11 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                               (error->vaddr), 1);
 
     logprintf(priority,
-              "%s: miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
+              "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
               "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
               message,
               apic_id,
-              cpumask,
+              CurrentCpusFormat().c_str(),
               error->vaddr,
               error->paddr,
               dimm_string,
@@ -950,7 +931,6 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
   bool read_error = false;
 
   int apic_id = apicid();
-  uint32 cpumask = CurrentCpus();
 
   // Determine if this is a write or read error.
   os_->Flush(error->vaddr);
@@ -978,14 +958,14 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
   if (priority < 5) {
     logprintf(priority,
               "%s: Tag from %p(0x%llx:%s) (%s) "
-              "miscompare on CPU %d(0x%x) at %p(0x%llx:%s): "
+              "miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
               "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
               message,
               error->tagvaddr, error->tagpaddr,
               tag_dimm_string,
               read_error ? "read error" : "write error",
               apic_id,
-              cpumask,
+              CurrentCpusFormat().c_str(),
               error->vaddr,
               error->paddr,
               dimm_string,
@@ -1090,6 +1070,46 @@ bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64,
   return true;
 }
 
+// x86_64 SSE2 assembly implementation of Adler memory copy, with address
+// tagging added as a second step. This is useful for debugging failures
+// that only occur when SSE / nontemporal writes are used.
+bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64,
+                                       uint64 *srcmem64,
+                                       unsigned int size_in_bytes,
+                                       AdlerChecksum *checksum,
+                                       struct page_entry *pe) {
+  // Do ASM copy, ignore checksum.
+  AdlerChecksum ignored_checksum;
+  os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
+
+  // Force cache flush.
+  int length = size_in_bytes / sizeof(*dstmem64);
+  for (int i = 0; i < length; i += sizeof(*dstmem64)) {
+    os_->FastFlush(dstmem64 + i);
+    os_->FastFlush(srcmem64 + i);
+  }
+  // Check results.
+  AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
+  // Patch up address tags.
+  TagAddrC(dstmem64, size_in_bytes);
+  return true;
+}
+
+// Retag pages..
+bool WorkerThread::TagAddrC(uint64 *memwords,
+                            unsigned int size_in_bytes) {
+  // Mask is the bitmask of indexes used by the pattern.
+  // It is the pattern size -1. Size is always a power of 2.
+
+  // Select tag or data as appropriate.
+  int length = size_in_bytes / wordsize_;
+  for (int i = 0; i < length; i += 8) {
+    datacast_t data;
+    data.l64 = addr_to_tag(&memwords[i]);
+    memwords[i] = data.l64;
+  }
+  return true;
+}
 
 // C implementation of Adler memory crc.
 bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
@@ -1122,15 +1142,12 @@ bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64,
       if (data.l64 != src_tag)
         ReportTagError(&srcmem64[i], data.l64, src_tag);
 
-
       data.l32.l = pattern->pattern(i << 1);
       data.l32.h = pattern->pattern((i << 1) + 1);
       a1 = a1 + data.l32.l;
       b1 = b1 + a1;
       a1 = a1 + data.l32.h;
       b1 = b1 + a1;
-
-
     } else {
       data.l64 = srcmem64[i];
       a1 = a1 + data.l32.l;
@@ -1202,11 +1219,10 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
             int apic_id = apicid();
-            uint32 cpumask = CurrentCpus();
-            logprintf(0, "Process Error: CPU %d(0x%x) CrcCopyPage "
+            logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
                          "CRC mismatch %s != %s, "
                          "but no miscompares found on second pass.\n",
-                      apic_id, cpumask,
+                      apic_id, CurrentCpusFormat().c_str(),
                       crc.ToHexString().c_str(),
                       expectedcrc->ToHexString().c_str());
             struct ErrorRecord er;
@@ -1317,7 +1333,7 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
 
     AdlerChecksum crc;
     if (tag_mode_) {
-      AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe);
+      AdlerAddrMemcpyWarm(targetmem, sourcemem, blocksize, &crc, srcpe);
     } else {
       os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc);
     }
@@ -1346,10 +1362,18 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                                    blocksize,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
-            logprintf(0, "Process Error: CrcWarmCopyPage CRC mismatch %s "
-                         "!= %s, but no miscompares found on second pass.\n",
+            int apic_id = apicid();
+            logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
+                         "CRC mismatch %s != %s, "
+                         "but no miscompares found on second pass.\n",
+                      apic_id, CurrentCpusFormat().c_str(),
                       crc.ToHexString().c_str(),
                       expectedcrc->ToHexString().c_str());
+            struct ErrorRecord er;
+            er.actual = sourcemem[0];
+            er.expected = 0x0;
+            er.vaddr = sourcemem;
+            ProcessError(&er, 0, "Hardware Error");
           }
         }
       }
@@ -1388,23 +1412,23 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
 
 
 // Memory check work loop. Execute until done, then exhaust pages.
-int CheckThread::Work() {
+bool CheckThread::Work() {
   struct page_entry pe;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
 
   logprintf(9, "Log: Starting Check thread %d\n", thread_num_);
 
   // We want to check all the pages, and
   // stop when there aren't any left.
-  while (1) {
-    result &= sat_->GetValid(&pe);
+  while (true) {
+    result = result && sat_->GetValid(&pe);
     if (!result) {
       if (IsReadyToRunNoPause())
         logprintf(0, "Process Error: check_thread failed to pop pages, "
                   "bailing\n");
       else
-        result = 1;
+        result = true;
       break;
     }
 
@@ -1414,9 +1438,9 @@ int CheckThread::Work() {
     // Push pages back on the valid queue if we are still going,
     // throw them out otherwise.
     if (IsReadyToRunNoPause())
-      result &= sat_->PutValid(&pe);
+      result = result && sat_->PutValid(&pe);
     else
-      result &= sat_->PutEmpty(&pe);
+      result = result && sat_->PutEmpty(&pe);
     if (!result) {
       logprintf(0, "Process Error: check_thread failed to push pages, "
                 "bailing\n");
@@ -1429,24 +1453,24 @@ int CheckThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 
 // Memory copy work loop. Execute until marked done.
-int CopyThread::Work() {
+bool CopyThread::Work() {
   struct page_entry src;
   struct page_entry dst;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
 
-  logprintf(9, "Log: Starting copy thread %d: cpu %x, mem %x\n",
-            thread_num_, cpu_mask_, tag_);
+  logprintf(9, "Log: Starting copy thread %d: cpu %s, mem %x\n",
+            thread_num_, cpuset_format(&cpu_mask_).c_str(), tag_);
 
   while (IsReadyToRun()) {
     // Pop the needed pages.
-    result &= sat_->GetValid(&src, tag_);
-    result &= sat_->GetEmpty(&dst, tag_);
+    result = result && sat_->GetValid(&src, tag_);
+    result = result && sat_->GetEmpty(&dst, tag_);
     if (!result) {
       logprintf(0, "Process Error: copy_thread failed to pop pages, "
                 "bailing\n");
@@ -1472,8 +1496,8 @@ int CopyThread::Work() {
       dst.pattern = src.pattern;
     }
 
-    result &= sat_->PutValid(&dst);
-    result &= sat_->PutEmpty(&src);
+    result = result && sat_->PutValid(&dst);
+    result = result && sat_->PutEmpty(&src);
 
     // Copy worker-threads yield themselves at the end of each copy loop,
     // to avoid threads from preempting each other in the middle of the inner
@@ -1494,20 +1518,20 @@ int CopyThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 // Memory invert work loop. Execute until marked done.
-int InvertThread::Work() {
+bool InvertThread::Work() {
   struct page_entry src;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
 
   logprintf(9, "Log: Starting invert thread %d\n", thread_num_);
 
   while (IsReadyToRun()) {
     // Pop the needed pages.
-    result &= sat_->GetValid(&src);
+    result = result && sat_->GetValid(&src);
     if (!result) {
       logprintf(0, "Process Error: invert_thread failed to pop pages, "
                 "bailing\n");
@@ -1533,7 +1557,7 @@ int InvertThread::Work() {
     if (sat_->strict())
       CrcCheckPage(&src);
 
-    result &= sat_->PutValid(&src);
+    result = result && sat_->PutValid(&src);
     if (!result) {
       logprintf(0, "Process Error: invert_thread failed to push pages, "
                 "bailing\n");
@@ -1546,7 +1570,7 @@ int InvertThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 
@@ -1565,17 +1589,16 @@ bool FileThread::OpenFile(int *pfile) {
     logprintf(0, "Process Error: Failed to create file %s!!\n",
               filename_.c_str());
     pages_copied_ = 0;
-    status_ = 0;
-    return 0;
+    return false;
   }
   *pfile = fd;
-  return 1;
+  return true;
 }
 
 // Close the file.
 bool FileThread::CloseFile(int fd) {
   close(fd);
-  return 1;
+  return true;
 }
 
 // Check sector tagging.
@@ -1615,7 +1638,7 @@ bool FileThread::WritePages(int fd) {
   int strict = sat_->strict();
 
   // Start fresh at beginning of file for each batch of pages.
-  lseek(fd, 0, SEEK_SET);
+  lseek64(fd, 0, SEEK_SET);
   for (int i = 0; i < sat_->disk_pages(); i++) {
     struct page_entry src;
     if (!GetValidPage(&src))
@@ -1770,7 +1793,7 @@ bool FileThread::PagePrepare() {
       logprintf(0, "Process Error: disk thread posix_memalign "
                    "returned %d (fail)\n",
                 result);
-      status_ += 1;
+      status_ = false;
       return false;
     }
   }
@@ -1839,17 +1862,14 @@ bool FileThread::PutValidPage(struct page_entry *src) {
   return true;
 }
 
-
-
 // Copy data from file into memory blocks.
 bool FileThread::ReadPages(int fd) {
   int page_length = sat_->page_length();
   int strict = sat_->strict();
-  int result = 1;
-
+  bool result = true;
 
   // Read our data back out of the file, into it's new location.
-  lseek(fd, 0, SEEK_SET);
+  lseek64(fd, 0, SEEK_SET);
   for (int i = 0; i < sat_->disk_pages(); i++) {
     struct page_entry dst;
     if (!GetEmptyPage(&dst))
@@ -1888,11 +1908,9 @@ bool FileThread::ReadPages(int fd) {
   return result;
 }
 
-
 // File IO work loop. Execute until marked done.
-int FileThread::Work() {
-  int result = 1;
-  int fileresult = 1;
+bool FileThread::Work() {
+  bool result = true;
   int64 loops = 0;
 
   logprintf(9, "Log: Starting file thread %d, file %s, device %s\n",
@@ -1900,13 +1918,17 @@ int FileThread::Work() {
             filename_.c_str(),
             devicename_.c_str());
 
-  if (!PagePrepare())
-    return 0;
+  if (!PagePrepare()) {
+    status_ = false;
+    return false;
+  }
 
   // Open the data IO file.
   int fd = 0;
-  if (!OpenFile(&fd))
-    return 0;
+  if (!OpenFile(&fd)) {
+    status_ = false;
+    return false;
+  }
 
   pass_ = 0;
 
@@ -1919,11 +1941,11 @@ int FileThread::Work() {
   // Loop until done.
   while (IsReadyToRun()) {
     // Do the file write.
-    if (!(fileresult &= WritePages(fd)))
+    if (!(result = result && WritePages(fd)))
       break;
 
     // Do the file read.
-    if (!(fileresult &= ReadPages(fd)))
+    if (!(result = result && ReadPages(fd)))
       break;
 
     loops++;
@@ -1939,7 +1961,7 @@ int FileThread::Work() {
 
   logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 bool NetworkThread::IsNetworkStopSet() {
@@ -1965,7 +1987,7 @@ bool NetworkThread::CreateSocket(int *psocket) {
   if (sock == -1) {
     logprintf(0, "Process Error: Cannot open socket\n");
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   *psocket = sock;
@@ -1989,7 +2011,7 @@ bool NetworkThread::Connect(int sock) {
   if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) {
     logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_);
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
 
@@ -1997,7 +2019,7 @@ bool NetworkThread::Connect(int sock) {
                     sizeof(struct sockaddr))) {
     logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_);
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   return true;
@@ -2018,7 +2040,7 @@ bool NetworkListenThread::Listen() {
     sat_strerror(errno, buf, sizeof(buf));
     logprintf(0, "Process Error: Cannot bind socket: %s\n", buf);
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   listen(sock_, 3);
@@ -2052,13 +2074,14 @@ bool NetworkListenThread::GetConnection(int *pnewsock) {
   if (newsock < 0)  {
     logprintf(0, "Process Error: Did not receive connection\n");
     pages_copied_ = 0;
-    status_ = 0;
+    status_ = false;
     return false;
   }
   *pnewsock = newsock;
   return true;
 }
 
+// Send a page, return false if a page was not sent.
 bool NetworkThread::SendPage(int sock, struct page_entry *src) {
   int page_length = sat_->page_length();
   char *address = static_cast<char*>(src->addr);
@@ -2074,6 +2097,7 @@ bool NetworkThread::SendPage(int sock, struct page_entry *src) {
         logprintf(0, "Process Error: Thread %d, "
                      "Network write failed, bailing. (%s)\n",
                   thread_num_, buf);
+        status_ = false;
       }
       return false;
     }
@@ -2082,7 +2106,7 @@ bool NetworkThread::SendPage(int sock, struct page_entry *src) {
   return true;
 }
 
-
+// Receive a page. Return false if a page was not received.
 bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
   int page_length = sat_->page_length();
   char *address = static_cast<char*>(dst->addr);
@@ -2107,6 +2131,7 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
           logprintf(0, "Process Error: Thread %d, "
                        "Network read failed, bailing (%s).\n",
                     thread_num_, buf);
+          status_ = false;
           // Print arguments and results.
           logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n",
                     sock, address + (page_length - size),
@@ -2129,9 +2154,9 @@ bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) {
   return true;
 }
 
-
 // Network IO work loop. Execute until marked done.
-int NetworkThread::Work() {
+// Return true if the thread ran as expected.
+bool NetworkThread::Work() {
   logprintf(9, "Log: Starting network thread %d, ip %s\n",
             thread_num_,
             ipaddr_);
@@ -2139,7 +2164,7 @@ int NetworkThread::Work() {
   // Make a socket.
   int sock = 0;
   if (!CreateSocket(&sock))
-    return 0;
+    return false;
 
   // Network IO loop requires network slave thread to have already initialized.
   // We will sleep here for awhile to ensure that the slave thread will be
@@ -2153,17 +2178,17 @@ int NetworkThread::Work() {
 
   // Connect to a slave thread.
   if (!Connect(sock))
-    return 0;
+    return false;
 
   // Loop until done.
-  int result = 1;
+  bool result = true;
   int strict = sat_->strict();
   int64 loops = 0;
   while (IsReadyToRun()) {
     struct page_entry src;
     struct page_entry dst;
-    result &= sat_->GetValid(&src);
-    result &= sat_->GetEmpty(&dst);
+    result = result && sat_->GetValid(&src);
+    result = result && sat_->GetEmpty(&dst);
     if (!result) {
       logprintf(0, "Process Error: net_thread failed to pop pages, "
                 "bailing\n");
@@ -2175,14 +2200,14 @@ int NetworkThread::Work() {
       CrcCheckPage(&src);
 
     // Do the network write.
-    if (!(result &= SendPage(sock, &src)))
+    if (!(result = result && SendPage(sock, &src)))
       break;
 
     // Update pattern reference to reflect new contents.
     dst.pattern = src.pattern;
 
     // Do the network read.
-    if (!(result &= ReceivePage(sock, &dst)))
+    if (!(result = result && ReceivePage(sock, &dst)))
       break;
 
     // Ensure that the transfer ended up with correct data.
@@ -2190,8 +2215,8 @@ int NetworkThread::Work() {
       CrcCheckPage(&dst);
 
     // Return all of our pages to the queue.
-    result &= sat_->PutValid(&dst);
-    result &= sat_->PutEmpty(&src);
+    result = result && sat_->PutValid(&dst);
+    result = result && sat_->PutEmpty(&src);
     if (!result) {
       logprintf(0, "Process Error: net_thread failed to push pages, "
                 "bailing\n");
@@ -2209,7 +2234,7 @@ int NetworkThread::Work() {
   logprintf(9, "Log: Completed %d: network thread status %d, "
                "%d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 // Spawn slave threads for incoming connections.
@@ -2253,15 +2278,17 @@ bool NetworkListenThread::ReapSlaves() {
 }
 
 // Network listener IO work loop. Execute until marked done.
-int NetworkListenThread::Work() {
-  int result = 1;
+// Return false on fatal software error.
+bool NetworkListenThread::Work() {
   logprintf(9, "Log: Starting network listen thread %d\n",
             thread_num_);
 
   // Make a socket.
   sock_ = 0;
-  if (!CreateSocket(&sock_))
-    return 0;
+  if (!CreateSocket(&sock_)) {
+    status_ = false;
+    return false;
+  }
   logprintf(9, "Log: Listen thread created sock\n");
 
   // Allows incoming connections to be queued up by socket library.
@@ -2296,12 +2323,12 @@ int NetworkListenThread::Work() {
 
   CloseSocket(sock_);
 
-  status_ = result;
+  status_ = true;
   logprintf(9,
             "Log: Completed %d: network listen thread status %d, "
             "%d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return 1;
+  return true;
 }
 
 // Set network reflector socket struct.
@@ -2310,14 +2337,17 @@ void NetworkSlaveThread::SetSock(int sock) {
 }
 
 // Network reflector IO work loop. Execute until marked done.
-int NetworkSlaveThread::Work() {
+// Return false on fatal software error.
+bool NetworkSlaveThread::Work() {
   logprintf(9, "Log: Starting network slave thread %d\n",
             thread_num_);
 
   // Verify that we have a socket.
   int sock = sock_;
-  if (!sock)
-    return 0;
+  if (!sock) {
+    status_ = false;
+    return false;
+  }
 
   // Loop until done.
   int64 loops = 0;
@@ -2328,7 +2358,7 @@ int NetworkSlaveThread::Work() {
     logprintf(0, "Process Error: net slave posix_memalign "
                  "returned %d (fail)\n",
               result);
-    status_ += 1;
+    status_ = false;
     return false;
   }
 
@@ -2351,7 +2381,7 @@ int NetworkSlaveThread::Work() {
 
   pages_copied_ = loops;
   // No results provided from this type of thread.
-  status_ = 1;
+  status_ = true;
 
   // Clean up.
   CloseSocket(sock);
@@ -2360,11 +2390,11 @@ int NetworkSlaveThread::Work() {
             "Log: Completed %d: network slave thread status %d, "
             "%d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return status_;
+  return true;
 }
 
 // Thread work loop. Execute until marked finished.
-int ErrorPollThread::Work() {
+bool ErrorPollThread::Work() {
   logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_);
 
   // This calls a generic error polling function in the Os abstraction layer.
@@ -2375,12 +2405,13 @@ int ErrorPollThread::Work() {
 
   logprintf(9, "Log: Finished system error poll thread %d: %d errors\n",
             thread_num_, errorcount_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
 }
 
 // Worker thread to heat up CPU.
-int CpuStressThread::Work() {
+// This thread does not evaluate pass/fail or software error.
+bool CpuStressThread::Work() {
   logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_);
 
   do {
@@ -2391,8 +2422,8 @@ int CpuStressThread::Work() {
 
   logprintf(9, "Log: Finished CPU stress thread %d:\n",
             thread_num_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
 }
 
 CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
@@ -2406,7 +2437,8 @@ CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
 }
 
 // Worked thread to test the cache coherency of the CPUs
-int CpuCacheCoherencyThread::Work() {
+// Return false on fatal sw error.
+bool CpuCacheCoherencyThread::Work() {
   logprintf(9, "Log: Starting the Cache Coherency thread %d\n",
             cc_thread_num_);
   uint64 time_start, time_end;
@@ -2459,8 +2491,8 @@ int CpuCacheCoherencyThread::Work() {
             cc_thread_num_, us_elapsed, total_inc, inc_rate);
   logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n",
             cc_thread_num_);
-  status_ = 1;
-  return 1;
+  status_ = true;
+  return true;
 }
 
 DiskThread::DiskThread(DiskBlockTable *block_table) {
@@ -2489,9 +2521,14 @@ DiskThread::DiskThread(DiskBlockTable *block_table) {
   update_block_table_ = 1;
 
   block_buffer_ = NULL;
+
+  blocks_written_ = 0;
+  blocks_read_ = 0;
 }
 
 DiskThread::~DiskThread() {
+  if (block_buffer_)
+    free(block_buffer_);
 }
 
 // Set filename for device file (in /dev).
@@ -2616,6 +2653,7 @@ bool DiskThread::SetParameters(int read_block_size,
   return true;
 }
 
+// Open a device, return false on failure.
 bool DiskThread::OpenDevice(int *pfile) {
   int fd = open(device_name_.c_str(),
                 O_RDWR | O_SYNC | O_DIRECT | O_LARGEFILE,
@@ -2631,6 +2669,7 @@ bool DiskThread::OpenDevice(int *pfile) {
 }
 
 // Retrieves the size (in bytes) of the disk/file.
+// Return false on failure.
 bool DiskThread::GetDiskSize(int fd) {
   struct stat device_stat;
   if (fstat(fd, &device_stat) == -1) {
@@ -2654,7 +2693,7 @@ bool DiskThread::GetDiskSize(int fd) {
     if (block_size == 0) {
       os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1);
       ++errorcount_;
-      status_ = 1;  // Avoid a procedural error.
+      status_ = true;  // Avoid a procedural error.
       return false;
     }
 
@@ -2694,11 +2733,12 @@ int64 DiskThread::GetTime() {
   return tv.tv_sec * 1000000 + tv.tv_usec;
 }
 
+// Do randomized reads and (possibly) writes on a device.
+// Return false on fatal error, either SW or HW.
 bool DiskThread::DoWork(int fd) {
   int64 block_num = 0;
-  blocks_written_ = 0;
-  blocks_read_ = 0;
   int64 num_segments;
+  bool result = true;
 
   if (segment_size_ == -1) {
     num_segments = 1;
@@ -2731,13 +2771,15 @@ bool DiskThread::DoWork(int fd) {
 
   while (IsReadyToRun()) {
     // Write blocks to disk.
-    logprintf(16, "Write phase for disk %s (thread %d).\n",
+    logprintf(16, "Log: Write phase %sfor disk %s (thread %d).\n",
+              non_destructive_ ? "(disabled) " : "",
               device_name_.c_str(), thread_num_);
     while (IsReadyToRunNoPause() &&
            in_flight_sectors_.size() < queue_size_ + 1) {
       // Confine testing to a particular segment of the disk.
       int64 segment = (block_num / blocks_per_segment_) % num_segments;
-      if (block_num % blocks_per_segment_ == 0) {
+      if (!non_destructive_ &&
+          (block_num % blocks_per_segment_ == 0)) {
         logprintf(20, "Log: Starting to write segment %lld out of "
                   "%lld on disk %s (thread %d).\n",
                   segment, num_segments, device_name_.c_str(),
@@ -2768,33 +2810,37 @@ bool DiskThread::DoWork(int fd) {
       if (!non_destructive_) {
         if (!WriteBlockToDisk(fd, block)) {
           block_table_->RemoveBlock(block);
-          continue;
+          return false;
         }
+        blocks_written_++;
       }
 
+      // Block is either initialized by writing, or in nondestructive case,
+      // initialized by being added into the datastructure for later reading.
       block->SetBlockAsInitialized();
 
-      blocks_written_++;
       in_flight_sectors_.push(block);
     }
 
     // Verify blocks on disk.
-    logprintf(20, "Read phase for disk %s (thread %d).\n",
+    logprintf(20, "Log: Read phase for disk %s (thread %d).\n",
               device_name_.c_str(), thread_num_);
     while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) {
       BlockData *block = in_flight_sectors_.front();
       in_flight_sectors_.pop();
-      ValidateBlockOnDisk(fd, block);
+      if (!ValidateBlockOnDisk(fd, block))
+        return false;
       block_table_->RemoveBlock(block);
       blocks_read_++;
     }
   }
 
   pages_copied_ = blocks_written_ + blocks_read_;
-  return true;
+  return result;
 }
 
 // Do an asynchronous disk I/O operation.
+// Return false if the IO is not set up.
 bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
                             int64 offset, int64 timeout) {
   // Use the Linux native asynchronous I/O interface for reading/writing.
@@ -2808,8 +2854,8 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
     const char *op_str;
     const char *error_str;
   } operations[2] = {
-    { IOCB_CMD_PREAD, "read", "disk-read-error" },
-    { IOCB_CMD_PWRITE, "write", "disk-write-error" }
+    { IO_CMD_PREAD, "read", "disk-read-error" },
+    { IO_CMD_PWRITE, "write", "disk-write-error" }
   };
 
   struct iocb cb;
@@ -2817,16 +2863,19 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
 
   cb.aio_fildes = fd;
   cb.aio_lio_opcode = operations[op].opcode;
-  cb.aio_buf = (__u64)buf;
-  cb.aio_nbytes = size;
-  cb.aio_offset = offset;
+  cb.u.c.buf = buf;
+  cb.u.c.nbytes = size;
+  cb.u.c.offset = offset;
 
   struct iocb *cbs[] = { &cb };
   if (io_submit(aio_ctx_, 1, cbs) != 1) {
+    int error = errno;
+    char buf[256];
+    sat_strerror(error, buf, sizeof(buf));
     logprintf(0, "Process Error: Unable to submit async %s "
-                 "on disk %s (thread %d).\n",
+                 "on disk %s (thread %d). Error %d, %s\n",
               operations[op].op_str, device_name_.c_str(),
-              thread_num_);
+              thread_num_, error, buf);
     return false;
   }
 
@@ -2839,7 +2888,8 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
     // A ctrl-c from the keyboard will cause io_getevents to fail with an
     // EINTR error code.  This is not an error and so don't treat it as such,
     // but still log it.
-    if (errno == EINTR) {
+    int error = errno;
+    if (error == EINTR) {
       logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n",
                 operations[op].op_str, device_name_.c_str(),
                 thread_num_);
@@ -2860,9 +2910,12 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
     io_destroy(aio_ctx_);
     aio_ctx_ = 0;
     if (io_setup(5, &aio_ctx_)) {
+      int error = errno;
+      char buf[256];
+      sat_strerror(error, buf, sizeof(buf));
       logprintf(0, "Process Error: Unable to create aio context on disk %s"
-                " (thread %d).\n",
-                device_name_.c_str(), thread_num_);
+                " (thread %d) Error %d, %s\n",
+                device_name_.c_str(), thread_num_, error, buf);
     }
 
     return false;
@@ -2901,6 +2954,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
 }
 
 // Write a block to disk.
+// Return false if the block is not written.
 bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
   memset(block_buffer_, 0, block->GetSize());
 
@@ -2951,6 +3005,8 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
 }
 
 // Verify a block on disk.
+// Return true if the block was read, also increment errorcount
+// if the block had data errors or performance problems.
 bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
   int64 blocks = block->GetSize() / read_block_size_;
   int64 bytes_read = 0;
@@ -2964,7 +3020,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
 
   // Read block from disk and time the read.  If it takes longer than the
   // threshold, complain.
-  if (lseek(fd, address * kSectorSize, SEEK_SET) == -1) {
+  if (lseek64(fd, address * kSectorSize, SEEK_SET) == -1) {
     logprintf(0, "Process Error: Unable to seek to sector %lld in "
               "DiskThread::ValidateSectorsOnDisk on disk %s "
               "(thread %d).\n", address, device_name_.c_str(), thread_num_);
@@ -2976,7 +3032,6 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
   // read them in groups of randomly-sized multiples of read block size.
   // This assures all data written on disk by this particular block
   // will be tested using a random reading pattern.
-
   while (blocks != 0) {
     // Test all read blocks in a written block.
     current_blocks = (random() % blocks) + 1;
@@ -3027,7 +3082,9 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
   return true;
 }
 
-int DiskThread::Work() {
+// Direct device access thread.
+// Return false on software error.
+bool DiskThread::Work() {
   int fd;
 
   logprintf(9, "Log: Starting disk thread %d, disk %s\n",
@@ -3036,42 +3093,43 @@ int DiskThread::Work() {
   srandom(time(NULL));
 
   if (!OpenDevice(&fd)) {
-    return 0;
+    status_ = false;
+    return false;
   }
 
   // Allocate a block buffer aligned to 512 bytes since the kernel requires it
   // when using direst IO.
-
-  int result = posix_memalign(&block_buffer_, kBufferAlignment,
+  int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
                               sat_->page_length());
-  if (result) {
+  if (memalign_result) {
     CloseDevice(fd);
     logprintf(0, "Process Error: Unable to allocate memory for buffers "
                  "for disk %s (thread %d) posix memalign returned %d.\n",
-              device_name_.c_str(), thread_num_, result);
-    status_ += 1;
+              device_name_.c_str(), thread_num_, memalign_result);
+    status_ = false;
     return false;
   }
 
   if (io_setup(5, &aio_ctx_)) {
+    CloseDevice(fd);
     logprintf(0, "Process Error: Unable to create aio context for disk %s"
               " (thread %d).\n",
               device_name_.c_str(), thread_num_);
-    return 0;
+    status_ = false;
+    return false;
   }
 
-  DoWork(fd);
+  bool result = DoWork(fd);
 
-  status_ = 1;
+  status_ = result;
 
   io_destroy(aio_ctx_);
   CloseDevice(fd);
-  free(block_buffer_);
 
   logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, "
                "%d pages copied\n",
             thread_num_, device_name_.c_str(), status_, pages_copied_);
-  return 1;
+  return result;
 }
 
 RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
@@ -3082,15 +3140,14 @@ RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table)
 RandomDiskThread::~RandomDiskThread() {
 }
 
+// Workload for random disk thread.
 bool RandomDiskThread::DoWork(int fd) {
-  blocks_read_ = 0;
-  blocks_written_ = 0;
-  logprintf(11, "Random phase for disk %s (thread %d).\n",
+  logprintf(11, "Log: Random phase for disk %s (thread %d).\n",
             device_name_.c_str(), thread_num_);
   while (IsReadyToRun()) {
     BlockData *block = block_table_->GetRandomBlock();
     if (block == NULL) {
-      logprintf(12, "No block available for device %s (thread %d).\n",
+      logprintf(12, "Log: No block available for device %s (thread %d).\n",
                 device_name_.c_str(), thread_num_);
     } else {
       ValidateBlockOnDisk(fd, block);
@@ -3112,6 +3169,8 @@ MemoryRegionThread::~MemoryRegionThread() {
     delete pages_;
 }
 
+// Set a region of memory or MMIO to be tested.
+// Return false if region could not be mapped.
 bool MemoryRegionThread::SetRegion(void *region, int64 size) {
   int plength = sat_->page_length();
   int npages = size / plength;
@@ -3137,6 +3196,8 @@ bool MemoryRegionThread::SetRegion(void *region, int64 size) {
   }
 }
 
+// More detailed error printout for hardware errors in memory or MMIO
+// regions.
 void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
                                       int priority,
                                       const char *message) {
@@ -3187,10 +3248,12 @@ void MemoryRegionThread::ProcessError(struct ErrorRecord *error,
   }
 }
 
-int MemoryRegionThread::Work() {
+// Workload for testion memory or MMIO regions.
+// Return false on software error.
+bool MemoryRegionThread::Work() {
   struct page_entry source_pe;
   struct page_entry memregion_pe;
-  int result = 1;
+  bool result = true;
   int64 loops = 0;
   const uint64 error_constant = 0x00ba00000000ba00LL;
 
@@ -3204,14 +3267,14 @@ int MemoryRegionThread::Work() {
   while (IsReadyToRun()) {
     // Getting pages from SAT and queue.
     phase_ = kPhaseNoPhase;
-    result &= sat_->GetValid(&source_pe);
+    result = result && sat_->GetValid(&source_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to pop "
                 "pages from SAT, bailing\n");
       break;
     }
 
-    result &= pages_->PopRandom(&memregion_pe);
+    result = result && pages_->PopRandom(&memregion_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to pop "
                 "pages from queue, bailing\n");
@@ -3245,13 +3308,13 @@ int MemoryRegionThread::Work() {
 
     phase_ = kPhaseNoPhase;
     // Storing pages on their proper queues.
-    result &= sat_->PutValid(&source_pe);
+    result = result && sat_->PutValid(&source_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to push "
                 "pages into SAT, bailing\n");
       break;
     }
-    result &= pages_->Push(&memregion_pe);
+    result = result && pages_->Push(&memregion_pe);
     if (!result) {
       logprintf(0, "Process Error: memory region thread failed to push "
                 "pages into queue, bailing\n");
@@ -3271,5 +3334,5 @@ int MemoryRegionThread::Work() {
   status_ = result;
   logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d "
             "pages checked\n", thread_num_, status_, pages_copied_);
-  return 1;
+  return result;
 }
diff --git a/src/worker.h b/src/worker.h
index b85f926..7aae5f2 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -26,7 +26,7 @@
 #include <sys/time.h>
 #include <sys/types.h>
 
-#include <linux/aio_abi.h>
+#include <libaio.h>
 
 #include <queue>
 #include <set>
@@ -207,11 +207,11 @@ class WorkerThread {
   virtual ~WorkerThread();
 
   // Initialize values and thread ID number.
-  void InitThread(int thread_num_init,
-                  class Sat *sat_init,
-                  class OsLayer *os_init,
-                  class PatternList *patternlist_init,
-                  WorkerStatus *worker_status);
+  virtual void InitThread(int thread_num_init,
+                          class Sat *sat_init,
+                          class OsLayer *os_init,
+                          class PatternList *patternlist_init,
+                          WorkerStatus *worker_status);
 
   // This function is DEPRECATED, it does nothing.
   void SetPriority(Priority priority) { priority_ = priority; }
@@ -222,13 +222,13 @@ class WorkerThread {
   bool InitPriority();
 
   // Wait for the thread to complete its cleanup.
-  virtual int JoinThread();
+  virtual bool JoinThread();
   // Kill worker thread with SIGINT.
-  virtual int KillThread();
+  virtual bool KillThread();
 
   // This is the task function that the thread executes.
   // This is implemented per subclass.
-  virtual int Work();
+  virtual bool Work();
 
   // Starts per-WorkerThread timer.
   void StartThreadTimer() {gettimeofday(&start_time_, NULL);}
@@ -247,13 +247,13 @@ class WorkerThread {
   }
 
   // Acccess member variables.
-  int GetStatus() {return status_;}
+  bool GetStatus() {return status_;}
   int64 GetErrorCount() {return errorcount_;}
   int64 GetPageCount() {return pages_copied_;}
   int64 GetRunDurationUSec() {return runduration_usec_;}
 
   // Returns bandwidth defined as pages_copied / thread_run_durations.
-  float GetCopiedData();
+  virtual float GetCopiedData();
   // Calculate worker thread specific copied data.
   virtual float GetMemoryCopiedData() {return 0;}
   virtual float GetDeviceCopiedData() {return 0;}
@@ -265,18 +265,31 @@ class WorkerThread {
     {return GetDeviceCopiedData() / (
         runduration_usec_ * 1.0 / 1000000);}
 
-  void set_cpu_mask(int32 mask) {cpu_mask_ = mask;}
+  void set_cpu_mask(cpu_set_t *mask) {
+    memcpy(&cpu_mask_, mask, sizeof(*mask));
+  }
+
+  void set_cpu_mask_to_cpu(int cpu_num) {
+    cpuset_set_ab(&cpu_mask_, cpu_num, cpu_num + 1);
+  }
+
   void set_tag(int32 tag) {tag_ = tag;}
 
   // Returns CPU mask, where each bit represents a logical cpu.
-  uint32 AvailableCpus();
+  bool AvailableCpus(cpu_set_t *cpuset);
   // Returns CPU mask of CPUs this thread is bound to,
-  uint32 CurrentCpus();
+  bool CurrentCpus(cpu_set_t *cpuset);
+  // Returns Current Cpus mask as string.
+  string CurrentCpusFormat() {
+    cpu_set_t current_cpus;
+    CurrentCpus(&current_cpus);
+    return cpuset_format(&current_cpus);
+  }
 
   int ThreadID() {return thread_num_;}
 
   // Bind worker thread to specified CPU(s)
-  bool BindToCpus(uint32 thread_mask);
+  bool BindToCpus(const cpu_set_t *cpuset);
 
  protected:
   // This function dictates whether the main work loop
@@ -326,17 +339,26 @@ class WorkerThread {
                                 unsigned int size_in_bytes,
                                 AdlerChecksum *checksum,
                                 struct page_entry *pe);
+  // SSE copy with address tagging.
+  virtual bool AdlerAddrMemcpyWarm(uint64 *dstmem64,
+                                   uint64 *srcmem64,
+                                   unsigned int size_in_bytes,
+                                   AdlerChecksum *checksum,
+                                   struct page_entry *pe);
   // Crc data with address tagging.
   virtual bool AdlerAddrCrcC(uint64 *srcmem64,
                              unsigned int size_in_bytes,
                              AdlerChecksum *checksum,
                              struct page_entry *pe);
+  // Setup tagging on an existing page.
+  virtual bool TagAddrC(uint64 *memwords,
+                        unsigned int size_in_bytes);
   // Report a mistagged cacheline.
-  bool ReportTagError(uint64 *mem64,
+  virtual bool ReportTagError(uint64 *mem64,
                       uint64 actual,
                       uint64 tag);
   // Print out the error record of the tag mismatch.
-  void ProcessTagError(struct ErrorRecord *error,
+  virtual void ProcessTagError(struct ErrorRecord *error,
                        int priority,
                        const char *message);
 
@@ -346,11 +368,11 @@ class WorkerThread {
  protected:
   // General state variables that all subclasses need.
   int thread_num_;                  // Thread ID.
-  volatile int status_;             // Error status.
+  volatile bool status_;            // Error status.
   volatile int64 pages_copied_;     // Recorded for memory bandwidth calc.
   volatile int64 errorcount_;       // Miscompares seen by this thread.
 
-  volatile uint32 cpu_mask_;        // Cores this thread is allowed to run on.
+  cpu_set_t cpu_mask_;              // Cores this thread is allowed to run on.
   volatile uint32 tag_;             // Tag hint for memory this thread can use.
 
   bool tag_mode_;                   // Tag cachelines with vaddr.
@@ -383,7 +405,7 @@ class FileThread : public WorkerThread {
   FileThread();
   // Set filename to use for file IO.
   virtual void SetFile(const char *filename_init);
-  virtual int Work();
+  virtual bool Work();
 
   // Calculate worker thread specific bandwidth.
   virtual float GetDeviceCopiedData()
@@ -466,7 +488,7 @@ class NetworkThread : public WorkerThread {
   NetworkThread();
   // Set hostname to use for net IO.
   virtual void SetIP(const char *ipaddr_init);
-  virtual int Work();
+  virtual bool Work();
 
   // Calculate worker thread specific bandwidth.
   virtual float GetDeviceCopiedData()
@@ -493,7 +515,7 @@ class NetworkSlaveThread : public NetworkThread {
   NetworkSlaveThread();
   // Set socket for IO.
   virtual void SetSock(int sock);
-  virtual int Work();
+  virtual bool Work();
 
  protected:
   virtual bool IsNetworkStopSet();
@@ -506,7 +528,7 @@ class NetworkSlaveThread : public NetworkThread {
 class NetworkListenThread : public NetworkThread {
  public:
   NetworkListenThread();
-  virtual int Work();
+  virtual bool Work();
 
  private:
   virtual bool Listen();
@@ -530,7 +552,7 @@ class NetworkListenThread : public NetworkThread {
 class CopyThread : public WorkerThread {
  public:
   CopyThread() {}
-  virtual int Work();
+  virtual bool Work();
   // Calculate worker thread specific bandwidth.
   virtual float GetMemoryCopiedData()
     {return GetCopiedData()*2;}
@@ -543,7 +565,7 @@ class CopyThread : public WorkerThread {
 class InvertThread : public WorkerThread {
  public:
   InvertThread() {}
-  virtual int Work();
+  virtual bool Work();
   // Calculate worker thread specific bandwidth.
   virtual float GetMemoryCopiedData()
     {return GetCopiedData()*4;}
@@ -560,7 +582,7 @@ class FillThread : public WorkerThread {
   FillThread();
   // Set how many pages this thread should fill before exiting.
   virtual void SetFillPages(int64 num_pages_to_fill_init);
-  virtual int Work();
+  virtual bool Work();
 
  private:
   // Fill a page with the data pattern in pe->pattern.
@@ -575,7 +597,7 @@ class FillThread : public WorkerThread {
 class CheckThread : public WorkerThread {
  public:
   CheckThread() {}
-  virtual int Work();
+  virtual bool Work();
   // Calculate worker thread specific bandwidth.
   virtual float GetMemoryCopiedData()
     {return GetCopiedData();}
@@ -590,7 +612,7 @@ class CheckThread : public WorkerThread {
 class ErrorPollThread : public WorkerThread {
  public:
   ErrorPollThread() {}
-  virtual int Work();
+  virtual bool Work();
 
  private:
   DISALLOW_COPY_AND_ASSIGN(ErrorPollThread);
@@ -600,7 +622,7 @@ class ErrorPollThread : public WorkerThread {
 class CpuStressThread : public WorkerThread {
  public:
   CpuStressThread() {}
-  virtual int Work();
+  virtual bool Work();
 
  private:
   DISALLOW_COPY_AND_ASSIGN(CpuStressThread);
@@ -614,7 +636,7 @@ class CpuCacheCoherencyThread : public WorkerThread {
                           int cc_cacheline_count_,
                           int cc_thread_num_,
                           int cc_inc_count_);
-  virtual int Work();
+  virtual bool Work();
 
  protected:
   cc_cacheline_data *cc_cacheline_data_;  // Datstructure for each cacheline.
@@ -651,7 +673,7 @@ class DiskThread : public WorkerThread {
                              int64 write_threshold,
                              int non_destructive);
 
-  virtual int Work();
+  virtual bool Work();
 
   virtual float GetMemoryCopiedData() {return 0;}
 
@@ -727,7 +749,7 @@ class DiskThread : public WorkerThread {
                                                 // not verified.
   void *block_buffer_;        // Pointer to aligned block buffer.
 
-  aio_context_t aio_ctx_;     // Asynchronous I/O context for Linux native AIO.
+  io_context_t aio_ctx_;     // Asynchronous I/O context for Linux native AIO.
 
   DiskBlockTable *block_table_;  // Disk Block Table, shared by all disk
                                  // threads that read / write at the same
@@ -751,7 +773,7 @@ class MemoryRegionThread : public WorkerThread {
  public:
   MemoryRegionThread();
   ~MemoryRegionThread();
-  virtual int Work();
+  virtual bool Work();
   void ProcessError(struct ErrorRecord *error, int priority,
                     const char *message);
   bool SetRegion(void *region, int64 size);
-- 
cgit v1.2.3


From 6d1e64db329883e43dbca06471c093fc23dc9a2e Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Fri, 14 May 2010 03:47:11 +0000
Subject: Update stressapptest to 1.0.3.

* /dev/shm support to allow >1.4G memory usage for 32 bit app.
* Some arm support.
* x86 SSE support.
---
 src/adler32memcpy.cc          | 124 ++++++++++++--------
 src/disk_blocks.h             |   2 +-
 src/finelock_queue.cc         |  12 +-
 src/finelock_queue.h          |   6 +-
 src/logger.cc                 |   5 +-
 src/logger.h                  |   2 +-
 src/os.cc                     | 259 +++++++++++++++++++++++++++++++++++-------
 src/os.h                      |  21 +++-
 src/pattern.cc                |   2 +-
 src/pattern.h                 |   2 +-
 src/sat.cc                    |  31 +----
 src/sat.h                     |   2 +-
 src/stressapptest_config.h.in |   3 +
 src/worker.cc                 |  28 +++--
 14 files changed, 351 insertions(+), 148 deletions(-)

(limited to 'src')

diff --git a/src/adler32memcpy.cc b/src/adler32memcpy.cc
index 529dcc4..69324f7 100644
--- a/src/adler32memcpy.cc
+++ b/src/adler32memcpy.cc
@@ -225,19 +225,41 @@ bool AdlerMemcpyWarmC(uint64 *dstmem64, uint64 *srcmem64,
 // x86_64 SSE2 assembly implementation of fast and stressful Adler memory copy.
 bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
                     unsigned int size_in_bytes, AdlerChecksum *checksum) {
-// Use assembly implementation only with 64bit compilation.
-#ifndef STRESSAPPTEST_CPU_X86_64
-  // Fall back to C implementation for 32bit compilation.
-  return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum);
-#else
+// Use assembly implementation where supported.
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+
+// Pull a bit of tricky preprocessing to make the inline asm both
+// 32 bit and 64 bit.
+#ifdef STRESSAPPTEST_CPU_I686  // Instead of coding both, x86...
+#define rAX "%%eax"
+#define rCX "%%ecx"
+#define rDX "%%edx"
+#define rBX "%%ebx"
+#define rSP "%%esp"
+#define rBP "%%ebp"
+#define rSI "%%esi"
+#define rDI "%%edi"
+#endif
+
+#ifdef STRESSAPPTEST_CPU_X86_64  // ...and x64, we use rXX macros.
+#define rAX "%%rax"
+#define rCX "%%rcx"
+#define rDX "%%rdx"
+#define rBX "%%rbx"
+#define rSP "%%rsp"
+#define rBP "%%rbp"
+#define rSI "%%rsi"
+#define rDI "%%rdi"
+#endif
+
   // Elements 0 to 3 are used for holding checksum terms a1, a2,
   // b1, b2 respectively. These elements are filled by asm code.
   // Elements 4 and 5 are used by asm code to for ANDing MMX data and removing
   // 2 words from each MMX register (A MMX reg has 4 words, by ANDing we are
   // setting word index 0 and word index 2 to zero).
   // Element 6 and 7 are used for setting a1 and a2 to 1.
-  volatile uint64 checksum_arr[] = {0, 0, 0, 0,
-    0x00000000ffffffffUL, 0x00000000ffffffffUL, 1, 1};
+  volatile uint64 checksum_arr[] __attribute__ ((aligned(16))) =
+      {0, 0, 0, 0, 0x00000000ffffffffUL, 0x00000000ffffffffUL, 1, 1};
 
   if ((size_in_bytes >> 19) > 0) {
     // Size is too large. Must be less than 2^19 bytes = 512 KB.
@@ -245,23 +267,24 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
   }
 
   // Number of 32-bit words which are not added to a1/a2 in the main loop.
-  uint64 remaining_words = (size_in_bytes % 48) / 4;
+  uint32 remaining_words = (size_in_bytes % 48) / 4;
 
   // Since we are moving 48 bytes at a time number of iterations = total size/48
   // is value of counter.
-  uint64 num_of_48_byte_units = size_in_bytes / 48;
+  uint32 num_of_48_byte_units = size_in_bytes / 48;
 
-  asm volatile(
+  asm volatile (
       // Source address is in ESI (extended source index)
       // destination is in EDI (extended destination index)
-      // and counter is already in ECX (extended counter index).
-      "cmp  $0, %%ecx;"   // Compare counter to zero.
+      // and counter is already in ECX (extended counter
+      // index).
+      "cmp  $0, " rCX ";"   // Compare counter to zero.
       "jz END;"
 
       // XMM6 is initialized with 1 and XMM7 with 0.
-      "prefetchnta  0(%%rsi);"
-      "prefetchnta 64(%%rsi);"
-      "movdqu   48(%%rax), %%xmm6;"
+      "prefetchnta  0(" rSI ");"
+      "prefetchnta 64(" rSI ");"
+      "movdqu   48(" rAX "), %%xmm6;"
       "xorps      %%xmm7, %%xmm7;"
 
       // Start of the loop which copies 48 bytes from source to dst each time.
@@ -269,28 +292,28 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
 
       // Make 6 moves each of 16 bytes from srcmem to XMM registers.
       // We are using 2 words out of 4 words in each XMM register,
-      // word index 0 and word index 2)
-      "movdqa   0(%%rsi), %%xmm0;"
-      "movdqu   4(%%rsi), %%xmm1;"  // Be careful to use unaligned move here.
-      "movdqa  16(%%rsi), %%xmm2;"
-      "movdqu  20(%%rsi), %%xmm3;"
-      "movdqa  32(%%rsi), %%xmm4;"
-      "movdqu  36(%%rsi), %%xmm5;"
+      // word index 0 and word index 2
+      "movdqa   0(" rSI "), %%xmm0;"
+      "movdqu   4(" rSI "), %%xmm1;"  // Be careful to use unaligned move here.
+      "movdqa  16(" rSI "), %%xmm2;"
+      "movdqu  20(" rSI "), %%xmm3;"
+      "movdqa  32(" rSI "), %%xmm4;"
+      "movdqu  36(" rSI "), %%xmm5;"
 
       // Move 3 * 16 bytes from XMM registers to dstmem.
       // Note: this copy must be performed before pinsrw instructions since
       // they will modify the XMM registers.
-      "movntdq %%xmm0,  0(%%rdi);"
-      "movntdq %%xmm2, 16(%%rdi);"
-      "movntdq %%xmm4, 32(%%rdi);"
+      "movntdq %%xmm0,  0(" rDI ");"
+      "movntdq %%xmm2, 16(" rDI ");"
+      "movntdq %%xmm4, 32(" rDI ");"
 
       // Sets the word[1] and word[3] of XMM0 to XMM5 to zero.
-      "andps 32(%%rax), %%xmm0;"
-      "andps 32(%%rax), %%xmm1;"
-      "andps 32(%%rax), %%xmm2;"
-      "andps 32(%%rax), %%xmm3;"
-      "andps 32(%%rax), %%xmm4;"
-      "andps 32(%%rax), %%xmm5;"
+      "andps 32(" rAX "), %%xmm0;"
+      "andps 32(" rAX "), %%xmm1;"
+      "andps 32(" rAX "), %%xmm2;"
+      "andps 32(" rAX "), %%xmm3;"
+      "andps 32(" rAX "), %%xmm4;"
+      "andps 32(" rAX "), %%xmm5;"
 
       // Add XMM0 to XMM6 and then add XMM6 to XMM7.
       // Repeat this for XMM1, ..., XMM5.
@@ -311,43 +334,43 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
       "paddq %%xmm6, %%xmm7;"
 
       // Increment ESI and EDI by 48 bytes and decrement counter by 1.
-      "add $48, %%rsi;"
-      "add $48, %%rdi;"
-      "prefetchnta  0(%%rsi);"
-      "prefetchnta 64(%%rsi);"
-      "dec  %%rcx;"
+      "add $48, " rSI ";"
+      "add $48, " rDI ";"
+      "prefetchnta  0(" rSI ");"
+      "prefetchnta 64(" rSI ");"
+      "dec " rCX ";"
       "jnz TOP;"
 
       // Now only remaining_words 32-bit words are left.
       // make a loop, add first two words to a1 and next two to a2 (just like
       // above loop, the only extra thing we are doing is rechecking
-      // %rdx (=remaining_words) everytime we add a number to a1/a2.
+      // rDX (=remaining_words) everytime we add a number to a1/a2.
       "REM_IS_STILL_NOT_ZERO:\n"
       // Unless remaining_words becomes less than 4 words(16 bytes)
       // there is not much issue and remaining_words will always
       // be a multiple of four by assumption.
-      "cmp $4, %%rdx;"
+      "cmp $4, " rDX ";"
       // In case for some weird reasons if remaining_words becomes
       // less than 4 but not zero then also break the code and go off to END.
       "jl END;"
       // Otherwise just go on and copy data in chunks of 4-words at a time till
       // whole data (<48 bytes) is copied.
-      "movdqa  0(%%rsi), %%xmm0;"      // Copy next 4-words to XMM0 and to XMM1.
+      "movdqa  0(" rSI "), %%xmm0;"    // Copy next 4-words to XMM0 and to XMM1.
 
-      "movdqa  0(%%rsi), %%xmm5;"      // Accomplish movdqu 4(%%rsi) without
+      "movdqa  0(" rSI "), %%xmm5;"    // Accomplish movdqu 4(%rSI) without
       "pshufd $0x39, %%xmm5, %%xmm1;"  // indexing off memory boundary.
 
-      "movntdq %%xmm0,  0(%%rdi);"     // Copy 4-words to destination.
-      "andps  32(%%rax), %%xmm0;"
-      "andps  32(%%rax), %%xmm1;"
+      "movntdq %%xmm0,  0(" rDI ");"   // Copy 4-words to destination.
+      "andps  32(" rAX "), %%xmm0;"
+      "andps  32(" rAX "), %%xmm1;"
       "paddq     %%xmm0, %%xmm6;"
       "paddq     %%xmm6, %%xmm7;"
       "paddq     %%xmm1, %%xmm6;"
       "paddq     %%xmm6, %%xmm7;"
-      "add $16, %%rsi;"
-      "add $16, %%rdi;"
-      "sub $4, %%rdx;"
-      // Decrement %%rdx by 4 since %%rdx is number of 32-bit
+      "add $16, " rSI ";"
+      "add $16, " rDI ";"
+      "sub $4, " rDX ";"
+      // Decrement %rDX by 4 since %rDX is number of 32-bit
       // words left after considering all 48-byte units.
       "jmp REM_IS_STILL_NOT_ZERO;"
 
@@ -356,8 +379,8 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
       // 64 bit numbers and have to be converted to 64 bit numbers)
       // seems like Adler128 (since size of each part is 4 byte rather than
       // 1 byte).
-      "movdqa %%xmm6,   0(%%rax);"
-      "movdqa %%xmm7,  16(%%rax);"
+      "movdqa %%xmm6,   0(" rAX ");"
+      "movdqa %%xmm7,  16(" rAX ");"
       "sfence;"
 
       // No output registers.
@@ -376,5 +399,8 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
   // that there is no problem with memory this just mean that data was copied
   // from src to dst and checksum was calculated successfully).
   return true;
+#else
+  // Fall back to C implementation for anything else.
+  return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum);
 #endif
 }
diff --git a/src/disk_blocks.h b/src/disk_blocks.h
index f4ca93f..cb634c9 100644
--- a/src/disk_blocks.h
+++ b/src/disk_blocks.h
@@ -100,7 +100,7 @@ class DiskBlockTable {
   typedef vector<int64> PosToAddrVector;
   PosToAddrVector pos_to_addr_;
   AddrToBlockMap addr_to_block_;
-  int64 nelems_;
+  uint64 nelems_;
   int sector_size_;          // Sector size, in bytes
   int write_block_size_;     // Block size, in bytes
   string device_name_;       // Device name
diff --git a/src/finelock_queue.cc b/src/finelock_queue.cc
index 569903a..8d914b8 100644
--- a/src/finelock_queue.cc
+++ b/src/finelock_queue.cc
@@ -45,7 +45,7 @@ FineLockPEQueue::FineLockPEQueue(
   queue_metric_ = kTouch;
 
   {  // Init all the page locks.
-    for (int64 i = 0; i < q_size_; i++) {
+    for (uint64 i = 0; i < q_size_; i++) {
         pthread_mutex_init(&(pagelocks_[i]), NULL);
         // Pages start out owned (locked) by Sat::InitializePages.
         // A locked state indicates that the page state is unknown,
@@ -147,7 +147,7 @@ int64 FineLockPEQueue::getC(int64 m) {
 
 // Destructor: Clean-up allocated memory and destroy pthread locks.
 FineLockPEQueue::~FineLockPEQueue() {
-  int64 i;
+  uint64 i;
   for (i = 0; i < q_size_; i++)
     pthread_mutex_destroy(&(pagelocks_[i]));
   delete[] pagelocks_;
@@ -173,11 +173,11 @@ bool FineLockPEQueue::QueueAnalysis() {
   }
 
   // Bucketize the page counts by highest bit set.
-  for (int64 i = 0; i < q_size_; i++) {
+  for (uint64 i = 0; i < q_size_; i++) {
     uint32 readcount = pages_[i].touch;
     int b = 0;
     for (b = 0; b < 31; b++) {
-      if (readcount < (1 << b))
+      if (readcount < (1u << b))
         break;
     }
 
@@ -271,7 +271,7 @@ bool FineLockPEQueue::GetPageFromPhysical(uint64 paddr,
                                           struct page_entry *pe) {
   // Traverse through array until finding a page
   // that contains the address we want..
-  for (int64 i = 0; i < q_size_; i++) {
+  for (uint64 i = 0; i < q_size_; i++) {
     uint64 page_addr = pages_[i].paddr;
     // This assumes linear vaddr.
     if ((page_addr <= paddr) && (page_addr + page_size_ > paddr)) {
@@ -335,7 +335,7 @@ bool FineLockPEQueue::GetRandomWithPredicateTag(struct page_entry *pe,
   uint64 next_try = 1;
 
   // Traverse through array until finding a page meeting given predicate.
-  for (int64 i = 0; i < q_size_; i++) {
+  for (uint64 i = 0; i < q_size_; i++) {
     uint64 index = (next_try + first_try) % q_size_;
     // Go through the loop linear conguentially. We are offsetting by
     // 'first_try' so this path will be a different sequence for every
diff --git a/src/finelock_queue.h b/src/finelock_queue.h
index 54b154e..2de5a46 100644
--- a/src/finelock_queue.h
+++ b/src/finelock_queue.h
@@ -57,7 +57,9 @@ class FineLockPEQueue {
   uint64 GetRandom64FromSlot(int slot);
 
   // Helper function to check index range, returns true if index is valid.
-  bool valid_index(int64 index) { return index >= 0 && index < q_size_; }
+  bool valid_index(int64 index) {
+    return index >= 0 && static_cast<uint64>(index) < q_size_;
+  }
 
   // Returns true if page entry is valid, false otherwise.
   static bool page_is_valid(struct page_entry *pe) {
@@ -85,7 +87,7 @@ class FineLockPEQueue {
 
   pthread_mutex_t *pagelocks_;  // Per-page-entry locks.
   struct page_entry *pages_;     // Where page entries are held.
-  int64 q_size_;                 // Size of the queue.
+  uint64 q_size_;                // Size of the queue.
   int64 page_size_;              // For calculating array index from offset.
 
   enum {
diff --git a/src/logger.cc b/src/logger.cc
index 81f1e3e..e4ecb03 100644
--- a/src/logger.cc
+++ b/src/logger.cc
@@ -38,7 +38,7 @@ void Logger::VLogF(int priority, const char *format, va_list args) {
   }
   char buffer[4096];
   int length = vsnprintf(buffer, sizeof buffer, format, args);
-  if (length >= sizeof buffer) {
+  if (static_cast<size_t>(length) >= sizeof buffer) {
     length = sizeof buffer;
     buffer[sizeof buffer - 1] = '\n';
   }
@@ -96,7 +96,8 @@ void Logger::QueueLogLine(string *line) {
 
 namespace {
 void WriteToFile(const string& line, int fd) {
-  LOGGER_ASSERT(write(fd, line.data(), line.size()) == line.size());
+  LOGGER_ASSERT(write(fd, line.data(), line.size()) ==
+                static_cast<ssize_t>(line.size()));
 }
 }
 
diff --git a/src/logger.h b/src/logger.h
index 3eaea57..1d70107 100644
--- a/src/logger.h
+++ b/src/logger.h
@@ -28,7 +28,7 @@
 // Attempts to log additional lines will block when the queue reaches this size.
 // Due to how the logging thread works, up to twice this many log lines may be
 // outstanding at any point.
-static const int kMaxQueueSize = 250;
+static const size_t kMaxQueueSize = 250;
 
 
 // This is only for use by the Logger class, do not use it elsewhere!
diff --git a/src/os.cc b/src/os.cc
index 4784028..1340d6b 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -53,8 +53,12 @@ OsLayer::OsLayer() {
   testmemsize_ = 0;
   totalmemsize_ = 0;
   min_hugepages_bytes_ = 0;
-  error_injection_ = false;
   normal_mem_ = true;
+  use_hugepages_ = false;
+  use_posix_shm_ = false;
+  dynamic_mapped_shmem_ = false;
+  shmid_ = 0;
+
   time_initialized_ = 0;
 
   regionsize_ = 0;
@@ -64,6 +68,13 @@ OsLayer::OsLayer() {
   num_cpus_per_node_ = 0;
   error_diagnoser_ = 0;
   err_log_callback_ = 0;
+  error_injection_ = false;
+
+  void *pvoid = 0;
+  address_mode_ = sizeof(pvoid) * 8;
+
+  has_clflush_ = false;
+  has_sse2_ = false;
 }
 
 // OsLayer cleanup.
@@ -75,8 +86,9 @@ OsLayer::~OsLayer() {
 // OsLayer initialization.
 bool OsLayer::Initialize() {
   time_initialized_ = time(NULL);
-  use_hugepages_ = false;
-  shmid_ = 0;
+  // Detect asm support.
+  GetFeatures();
+
   if (num_cpus_ == 0) {
     num_nodes_ = 1;
     num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
@@ -129,13 +141,53 @@ list<string> OsLayer::FindFileDevices() {
   return locations;
 }
 
+
+// Get HW core features from cpuid instruction.
+void OsLayer::GetFeatures() {
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+  // CPUID features documented at:
+  // http://www.sandpile.org/ia32/cpuid.htm
+  int ax, bx, cx, dx;
+  __asm__ __volatile__ (
+      "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1));
+  has_clflush_ = (dx >> 19) & 1;
+  has_sse2_ = (dx >> 26) & 1;
+
+  logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
+            has_clflush_ ? "true" : "false",
+            has_sse2_ ? "true" : "false");
+#elif defined(STRESSAPPTEST_CPU_PPC)
+  // All PPC implementations have cache flush instructions.
+  has_clflush_ = true;
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+#warning "Unsupported CPU type ARMV7A: unable to determine feature set."
+#else
+#warning "Unsupported CPU type: unable to determine feature set."
+#endif
+}
+
+
 // We need to flush the cacheline here.
 void OsLayer::Flush(void *vaddr) {
   // Use the generic flush. This function is just so we can override
   // this if we are so inclined.
-  FastFlush(vaddr);
+  if (has_clflush_)
+    FastFlush(vaddr);
+}
+
+
+// Run C or ASM copy as appropriate..
+bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
+                              unsigned int size_in_bytes,
+                              AdlerChecksum *checksum) {
+  if (has_sse2_) {
+    return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
+  } else {
+    return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
+  }
 }
 
+
 // Translate user virtual to physical address.
 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
   char tmpbuf[256];
@@ -317,65 +369,155 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
   // Try hugepages first.
   void *buf = 0;
 
+  sat_assert(length >= 0);
+
   if (paddr_base)
     logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
               " ignore.\n", paddr_base);
 
-  {  // Allocate hugepage mapped memory.
-    int shmid;
-    void *shmaddr;
-
-    if ((shmid = shmget(2, length,
-            SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
-      int err = errno;
-      char errtxt[256] = "";
-      strerror_r(err, errtxt, sizeof(errtxt));
-      logprintf(12, "Log: failed to allocate shared mem object - err %d (%s)\n",
-                err, errtxt);
-      goto hugepage_failover;
-    }
+  // Determine optimal memory allocation path.
+  bool prefer_hugepages = false;
+  bool prefer_posix_shm = false;
+  bool prefer_dynamic_mapping = false;
 
-    shmaddr = shmat(shmid, NULL, NULL);
-    if (shmaddr == reinterpret_cast<void*>(-1)) {
-      int err = errno;
-      char errtxt[256] = "";
-      strerror_r(err, errtxt, sizeof(errtxt));
-      logprintf(0, "Log: failed to attach shared mem object - err %d (%s).\n",
-                err, errtxt);
-      if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+  // Are there enough hugepages?
+  int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
+  // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
+  if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
+    prefer_dynamic_mapping = true;
+    prefer_posix_shm = true;
+    logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
+    logprintf(3, "Log: You may need to run "
+                 "'sudo mount -o remount,size=100\% /dev/shm.'\n");
+  } else if (hugepagesize >= length) {
+    prefer_hugepages = true;
+    logprintf(3, "Log: Prefer using hugepace allocation.\n");
+  } else {
+    logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
+  }
+
+  // Allocate hugepage mapped memory.
+  if (prefer_hugepages) {
+    do { // Allow break statement.
+      int shmid;
+      void *shmaddr;
+
+      if ((shmid = shmget(2, length,
+              SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
         int err = errno;
-        char errtxt[256] = "";
-        strerror_r(err, errtxt, sizeof(errtxt));
-        logprintf(0, "Log: failed to remove shared mem object - err %d (%s).\n",
-                  err, errtxt);
+        string errtxt = ErrorString(err);
+        logprintf(3, "Log: failed to allocate shared hugepage "
+                      "object - err %d (%s)\n",
+                  err, errtxt.c_str());
+        logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
+        break;
       }
-      goto hugepage_failover;
-    }
-    use_hugepages_ = true;
-    shmid_ = shmid;
-    buf = shmaddr;
-    logprintf(0, "Log: Using hugepages 0x%x at %p.\n", shmid, shmaddr);
+
+      shmaddr = shmat(shmid, NULL, NULL);
+      if (shmaddr == reinterpret_cast<void*>(-1)) {
+        int err = errno;
+        string errtxt = ErrorString(err);
+        logprintf(0, "Log: failed to attach shared "
+                     "hugepage object - err %d (%s).\n",
+                  err, errtxt.c_str());
+        if (shmctl(shmid, IPC_RMID, NULL) < 0) {
+          int err = errno;
+          string errtxt = ErrorString(err);
+          logprintf(0, "Log: failed to remove shared "
+                       "hugepage object - err %d (%s).\n",
+                    err, errtxt.c_str());
+        }
+        break;
+      }
+      use_hugepages_ = true;
+      shmid_ = shmid;
+      buf = shmaddr;
+      logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
+                shmid, shmaddr);
+    } while (0);
   }
-  hugepage_failover:
 
+  if ((!use_hugepages_) && prefer_posix_shm) {
+    do {
+      int shm_object;
+      void *shmaddr = NULL;
+
+      shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
+      if (shm_object < 0) {
+        int err = errno;
+        string errtxt = ErrorString(err);
+        logprintf(3, "Log: failed to allocate shared "
+                      "smallpage object - err %d (%s)\n",
+                  err, errtxt.c_str());
+        break;
+      }
+
+      if (0 > ftruncate(shm_object, length)) {
+        int err = errno;
+        string errtxt = ErrorString(err);
+        logprintf(3, "Log: failed to ftruncate shared "
+                      "smallpage object - err %d (%s)\n",
+                  err, errtxt.c_str());
+        break;
+      }
+
+      // 32 bit linux apps can only use ~1.4G of address space.
+      // Use dynamic mapping for allocations larger than that.
+      // Currently perf hit is ~10% for this.
+      if (prefer_dynamic_mapping) {
+        dynamic_mapped_shmem_ = true;
+      } else {
+        // Do a full mapping here otherwise.
+        shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
+                         MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
+                         shm_object, NULL);
+        if (shmaddr == reinterpret_cast<void*>(-1)) {
+          int err = errno;
+          string errtxt = ErrorString(err);
+          logprintf(0, "Log: failed to map shared "
+                       "smallpage object - err %d (%s).\n",
+                    err, errtxt.c_str());
+          break;
+        }
+      }
+
+      use_posix_shm_ = true;
+      shmid_ = shm_object;
+      buf = shmaddr;
+      char location_message[256] = "";
+      if (dynamic_mapped_shmem_) {
+        sprintf(location_message, "mapped as needed");
+      } else {
+        sprintf(location_message, "at %p", shmaddr);
+      }
+      logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
+                shm_object, location_message);
+    } while (0);
+    shm_unlink("/stressapptest");
+  }
 
-  if (!use_hugepages_) {
+  if (!use_hugepages_ && !use_posix_shm_) {
     // Use memalign to ensure that blocks are aligned enough for disk direct IO.
     buf = static_cast<char*>(memalign(4096, length));
-    if (buf)
+    if (buf) {
       logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
-    else
+    } else {
       logprintf(0, "Process Error: memalign returned 0\n");
+      if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
+        logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
+                     "bit process. Please setup shared memory.\n");
+      }
+    }
   }
 
   testmem_ = buf;
-  if (buf) {
+  if (buf || dynamic_mapped_shmem_) {
     testmemsize_ = length;
   } else {
     testmemsize_ = 0;
   }
 
-  return (buf != 0);
+  return (buf != 0) || dynamic_mapped_shmem_;
 }
 
 // Free the test memory.
@@ -384,6 +526,11 @@ void OsLayer::FreeTestMem() {
     if (use_hugepages_) {
       shmdt(testmem_);
       shmctl(shmid_, IPC_RMID, NULL);
+    } else if (use_posix_shm_) {
+      if (!dynamic_mapped_shmem_) {
+        munmap(testmem_, testmemsize_);
+      }
+      close(shmid_);
     } else {
       free(testmem_);
     }
@@ -396,11 +543,37 @@ void OsLayer::FreeTestMem() {
 // Prepare the target memory. It may requre mapping in, or this may be a noop.
 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
   sat_assert((offset + length) <= testmemsize_);
+  if (dynamic_mapped_shmem_) {
+    // TODO(nsanders): Check if we can support MAP_NONBLOCK,
+    // and evaluate performance hit from not using it.
+    void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
+                     MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
+                     shmid_, offset);
+    if (mapping == MAP_FAILED) {
+      string errtxt = ErrorString(errno);
+      logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
+                   "error: %s.\n",
+                offset, length, errtxt.c_str());
+      sat_assert(0);
+    }
+    return mapping;
+  }
+
   return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
 }
 
 // Release the test memory resources, if any.
 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
+  if (dynamic_mapped_shmem_) {
+    int retval = munmap(addr, length);
+    if (retval == -1) {
+      string errtxt = ErrorString(errno);
+      logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
+                   "error: %s.\n",
+                addr, length, errtxt.c_str());
+      sat_assert(0);
+    }
+  }
 }
 
 // No error polling on unknown systems.
@@ -453,7 +626,7 @@ uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
     logprintf(0, "Process Error: Can't seek %x\n", offset);
     return 0;
   }
-  if (read(fd, &datacast, size) != size) {
+  if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
     logprintf(0, "Process Error: Can't read %x\n", offset);
     return 0;
   }
@@ -502,7 +675,7 @@ void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
     logprintf(0, "Process Error: Can't seek %x\n", offset);
     return;
   }
-  if (write(fd, &datacast, size) != size) {
+  if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
     logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
     return;
   }
diff --git a/src/os.h b/src/os.h
index 9ed04d5..28c8a2a 100644
--- a/src/os.h
+++ b/src/os.h
@@ -125,6 +125,8 @@ class OsLayer {
     asm volatile("mfence");
     asm volatile("clflush (%0)" :: "r" (vaddr));
     asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+  #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes."
 #else
   #warning "Unsupported CPU type: Unable to force cache flushes."
 #endif
@@ -152,6 +154,9 @@ class OsLayer {
     datacast_t data;
     __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
     tsc = data.l64;
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+  #warning "Unsupported CPU type ARMV7A: your build may not function correctly"
+    tsc = 0;
 #else
   #warning "Unsupported CPU type: your build may not function correctly"
     tsc = 0;
@@ -181,6 +186,8 @@ class OsLayer {
 
   // Returns 32 for 32-bit, 64 for 64-bit.
   virtual int AddressMode();
+  // Update OsLayer state regarding cpu support for various features.
+  virtual void GetFeatures();
 
   // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file.
   virtual int PciOpen(int bus, int device, int function);
@@ -217,12 +224,10 @@ class OsLayer {
   // Detect all PCI Devices.
   virtual PCIDevices GetPCIDevices();
 
-  // Default platform dependent warm Adler memcpy to C implementation
-  // for compatibility.
+  // Disambiguate between different "warm" memcopies.
   virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
                                unsigned int size_in_bytes,
-                               AdlerChecksum *checksum)
-    {return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);}
+                               AdlerChecksum *checksum);
 
   // Store a callback to use to print
   // app-specific info about the last error location.
@@ -237,12 +242,14 @@ class OsLayer {
 
  protected:
   void *testmem_;                // Location of test memory.
-  int64 testmemsize_;            // Size of test memory.
+  uint64 testmemsize_;           // Size of test memory.
   int64 totalmemsize_;           // Size of available memory.
   int64 min_hugepages_bytes_;    // Minimum hugepages size.
   bool  error_injection_;        // Do error injection?
   bool  normal_mem_;             // Memory DMA capable?
   bool  use_hugepages_;          // Use hugepage shmem?
+  bool  use_posix_shm_;          // Use 4k page shmem?
+  bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
   int   shmid_;                  // Handle to shmem
 
   int64 regionsize_;             // Size of memory "regions"
@@ -250,6 +257,10 @@ class OsLayer {
   int   num_cpus_;               // Number of cpus in the system.
   int   num_nodes_;              // Number of nodes in the system.
   int   num_cpus_per_node_;      // Number of cpus per node in the system.
+  int   address_mode_;           // Are we running 32 or 64 bit?
+  bool  has_sse2_;               // Do we have sse2 instructions?
+  bool  has_clflush_;            // Do we have clflush instructions?
+
 
   time_t time_initialized_;      // Start time of test.
 
diff --git a/src/pattern.cc b/src/pattern.cc
index 2fb552a..9f22674 100644
--- a/src/pattern.cc
+++ b/src/pattern.cc
@@ -393,7 +393,7 @@ int PatternList::Destroy() {
 
 // Return pattern numbered "i"
 Pattern *PatternList::GetPattern(int i) {
-  if (i < size_) {
+  if (static_cast<unsigned int>(i) < size_) {
     return &patterns_[i];
   }
 
diff --git a/src/pattern.h b/src/pattern.h
index b1168aa..181f839 100644
--- a/src/pattern.h
+++ b/src/pattern.h
@@ -102,7 +102,7 @@ class PatternList {
  private:
   vector<class Pattern> patterns_;
   int weightcount_;  // Total count of pattern weights.
-  int size_;
+  unsigned int size_;
   int initialized_;
   DISALLOW_COPY_AND_ASSIGN(PatternList);
 };
diff --git a/src/sat.cc b/src/sat.cc
index 06b4c65..bed62b7 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -164,26 +164,6 @@ bool Sat::CheckEnvironment() {
     return false;
   }
 
-  if ((address_mode_ == 32) &&
-      (os_->normal_mem()) &&
-      (size_ >= 1499 * kMegabyte)) {
-    if (run_on_anything_) {
-      int64 new_size_mb = 1499;
-      logprintf(1, "Log: 32 bit binary: reducing from %lldMB to %lldMB\n",
-                size_mb_,
-                new_size_mb);
-      size_mb_ = new_size_mb;
-      size_ = size_mb_ * kMegabyte;
-    } else {
-      logprintf(0, "Process Error: %dMB test memory too large "
-                   "for 32 bit binary.\n",
-                static_cast<int>(size_ / kMegabyte));
-      logprintf(0, "Log: Command line option '-A' bypasses this error.\n");
-      bad_status();
-      return false;
-    }
-  }
-
   // If platform is 32 bit Xeon, floor memory size to multiple of 4.
   if (address_mode_ == 32) {
     size_mb_ = (size_mb_ / 4) * 4;
@@ -350,7 +330,7 @@ void Sat::AddrMapUpdate(struct page_entry *pe) {
   for (int i = 0; i < page_length_; i += 4096) {
     uint64 paddr = os_->VirtualToPhysical(base + i);
 
-    int offset = paddr / 4096 / 8;
+    uint32 offset = paddr / 4096 / 8;
     unsigned char mask = 1 << ((paddr / 4096) % 8);
 
     if (offset >= arraysize) {
@@ -969,7 +949,8 @@ bool Sat::ParseArgs(int argc, char **argv) {
   }
 
   // Set disk_pages_ if filesize or page size changed.
-  if (filesize != page_length_ * disk_pages_) {
+  if (filesize != static_cast<uint64>(page_length_) *
+                  static_cast<uint64>(disk_pages_)) {
     disk_pages_ = filesize / page_length_;
     if (disk_pages_ == 0)
       disk_pages_ = 1;
@@ -1014,7 +995,7 @@ void Sat::PrintHelp() {
          " --force_errors_like_crazy   inject a lot of false errors "
          "to test error handling\n"
          " -F               don't result check each transaction\n"
-         "--stop_on_errors  Stop after finding the first error.\n"
+         " --stop_on_errors  Stop after finding the first error.\n"
          " --read-block-size     size of block for reading (-d)\n"
          " --write-block-size    size of block for writing (-d). If not "
          "defined, the size of block for writing will be defined as the "
@@ -1041,7 +1022,7 @@ void Sat::PrintHelp() {
          " --pause_duration duration (in seconds) of each pause\n"
          " --local_numa : choose memory regions associated with "
          "each CPU to be tested by that CPU\n"
-         "--remote_numa : choose memory regions not associated with "
+         " --remote_numa : choose memory regions not associated with "
          "each CPU to be tested by that CPU\n");
 }
 
@@ -1850,7 +1831,7 @@ bool Sat::Cleanup() {
     delete[] page_bitmap_;
   }
 
-  for (int i = 0; i < blocktables_.size(); i++) {
+  for (size_t i = 0; i < blocktables_.size(); i++) {
     delete blocktables_[i];
   }
 
diff --git a/src/sat.h b/src/sat.h
index 950270f..b48f519 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -164,7 +164,7 @@ class Sat {
 
   bool error_injection_;              // Simulate errors, for unittests.
   bool crazy_error_injection_;        // Simulate lots of errors.
-  int64 max_errorcount_;              // Number of errors before forced exit.
+  uint64 max_errorcount_;             // Number of errors before forced exit.
   int run_on_anything_;               // Ignore unknown machine ereor.
   int use_logfile_;                   // Log to a file.
   char logfilename_[255];             // Name of file to log to.
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index 535bb34..b78857c 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -148,6 +148,9 @@
 /* Define to 1 if strerror_r returns char *. */
 #undef STRERROR_R_CHAR_P
 
+/* Defined if the target CPU is armv7a */
+#undef STRESSAPPTEST_CPU_ARMV7A
+
 /* Defined if the target CPU is i686 */
 #undef STRESSAPPTEST_CPU_I686
 
diff --git a/src/worker.cc b/src/worker.cc
index c568064..2fab28e 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -86,6 +86,9 @@ namespace {
     int cpu;
 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
     __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+  #warning "Unsupported CPU type ARMV7A: unable to determine core ID."
+    cpu = 0;
 #else
   #warning "Unsupported CPU type: unable to determine core ID."
     cpu = 0;
@@ -1953,7 +1956,6 @@ bool FileThread::Work() {
   }
 
   pages_copied_ = loops * sat_->disk_pages();
-  status_ = result;
 
   // Clean up.
   CloseFile(fd);
@@ -1961,7 +1963,10 @@ bool FileThread::Work() {
 
   logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n",
             thread_num_, status_, pages_copied_);
-  return result;
+  // Failure to read from device indicates hardware,
+  // rather than procedural SW error.
+  status_ = true;
+  return true;
 }
 
 bool NetworkThread::IsNetworkStopSet() {
@@ -2259,7 +2264,7 @@ bool NetworkListenThread::ReapSlaves() {
   // Gather status and reap threads.
   logprintf(12, "Log: Joining all outstanding threads\n");
 
-  for (int i = 0; i < child_workers_.size(); i++) {
+  for (size_t i = 0; i < child_workers_.size(); i++) {
     NetworkSlaveThread& child_thread = child_workers_[i]->thread;
     logprintf(12, "Log: Joining slave thread %d\n", i);
     child_thread.JoinThread();
@@ -2689,7 +2694,7 @@ bool DiskThread::GetDiskSize(int fd) {
       return false;
     }
 
-    // If an Elephant is initialized with status DEAD its size will be zero.
+    // Zero size indicates nonworking device..
     if (block_size == 0) {
       os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1);
       ++errorcount_;
@@ -2734,11 +2739,11 @@ int64 DiskThread::GetTime() {
 }
 
 // Do randomized reads and (possibly) writes on a device.
-// Return false on fatal error, either SW or HW.
+// Return false on fatal SW error, true on SW success,
+// regardless of whether HW failed.
 bool DiskThread::DoWork(int fd) {
   int64 block_num = 0;
   int64 num_segments;
-  bool result = true;
 
   if (segment_size_ == -1) {
     num_segments = 1;
@@ -2775,7 +2780,8 @@ bool DiskThread::DoWork(int fd) {
               non_destructive_ ? "(disabled) " : "",
               device_name_.c_str(), thread_num_);
     while (IsReadyToRunNoPause() &&
-           in_flight_sectors_.size() < queue_size_ + 1) {
+           in_flight_sectors_.size() <
+               static_cast<size_t>(queue_size_ + 1)) {
       // Confine testing to a particular segment of the disk.
       int64 segment = (block_num / blocks_per_segment_) % num_segments;
       if (!non_destructive_ &&
@@ -2810,7 +2816,7 @@ bool DiskThread::DoWork(int fd) {
       if (!non_destructive_) {
         if (!WriteBlockToDisk(fd, block)) {
           block_table_->RemoveBlock(block);
-          return false;
+          return true;
         }
         blocks_written_++;
       }
@@ -2829,14 +2835,14 @@ bool DiskThread::DoWork(int fd) {
       BlockData *block = in_flight_sectors_.front();
       in_flight_sectors_.pop();
       if (!ValidateBlockOnDisk(fd, block))
-        return false;
+        return true;
       block_table_->RemoveBlock(block);
       blocks_read_++;
     }
   }
 
   pages_copied_ = blocks_written_ + blocks_read_;
-  return result;
+  return true;
 }
 
 // Do an asynchronous disk I/O operation.
@@ -2923,7 +2929,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
 
   // event.res contains the number of bytes written/read or
   // error if < 0, I think.
-  if (event.res != size) {
+  if (event.res != static_cast<uint64>(size)) {
     errorcount_++;
     os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
 
-- 
cgit v1.2.3


From 3f2289d841acdcf4af731f538fc8ed85bab83434 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Fri, 11 Nov 2011 00:34:37 +0000
Subject: Update to stressapptest 1.0.4 * Build changes only. * fix configure
 issue which wrongly reported pthreads as missing if libaio was nto installed
 * add --with-static to specify static linking or not.

---
 src/Makefile.in               | 125 ++++++++++++++++++++++++++++--------------
 src/stressapptest_config.h.in |  20 ++++++-
 2 files changed, 102 insertions(+), 43 deletions(-)

(limited to 'src')

diff --git a/src/Makefile.in b/src/Makefile.in
index 7cd6f2a..f62d1ac 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1,8 +1,9 @@
-# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -16,8 +17,9 @@
 
 VPATH = @srcdir@
 pkgdatadir = $(datadir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
 pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
 am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
 install_sh_DATA = $(install_sh) -c -m 644
 install_sh_PROGRAM = $(install_sh) -c
@@ -44,8 +46,8 @@ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 mkinstalldirs = $(install_sh) -d
 CONFIG_HEADER = stressapptest_config.h
 CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
 am__installdirs = "$(DESTDIR)$(bindir)"
-binPROGRAMS_INSTALL = $(INSTALL_PROGRAM)
 PROGRAMS = $(bin_PROGRAMS)
 am__objects_1 = main.$(OBJEXT)
 am__objects_2 = os.$(OBJEXT) os_factory.$(OBJEXT) pattern.$(OBJEXT) \
@@ -60,6 +62,7 @@ stressapptest_LDADD = $(LDADD)
 DEFAULT_INCLUDES = -I.@am__isrc@
 depcomp = $(SHELL) $(top_srcdir)/depcomp
 am__depfiles_maybe = depfiles
+am__mv = mv -f
 CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
 	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
 CXXLD = $(CXX)
@@ -114,6 +117,7 @@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
 PACKAGE_NAME = @PACKAGE_NAME@
 PACKAGE_STRING = @PACKAGE_STRING@
 PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
 PACKAGE_VERSION = @PACKAGE_VERSION@
 PATH_SEPARATOR = @PATH_SEPARATOR@
 SET_MAKE = @SET_MAKE@
@@ -172,6 +176,7 @@ target_alias = @target_alias@
 target_cpu = @target_cpu@
 target_os = @target_os@
 target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 AM_DEFAULT_SOURCE_EXT = .cc
@@ -192,14 +197,14 @@ $(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
 	@for dep in $?; do \
 	  case '$(am__configure_deps)' in \
 	    *$$dep*) \
-	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
-		&& exit 0; \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
 	      exit 1;; \
 	  esac; \
 	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign  src/Makefile'; \
-	cd $(top_srcdir) && \
-	  $(AUTOMAKE) --foreign  src/Makefile
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign src/Makefile
 .PRECIOUS: Makefile
 Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	@case '$?' in \
@@ -217,6 +222,7 @@ $(top_srcdir)/configure:  $(am__configure_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 $(ACLOCAL_M4):  $(am__aclocal_m4_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
 
 stressapptest_config.h: stamp-h1
 	@if test ! -f $@; then \
@@ -228,7 +234,7 @@ stamp-h1: $(srcdir)/stressapptest_config.h.in $(top_builddir)/config.status
 	@rm -f stamp-h1
 	cd $(top_builddir) && $(SHELL) ./config.status src/stressapptest_config.h
 $(srcdir)/stressapptest_config.h.in:  $(am__configure_deps) 
-	cd $(top_srcdir) && $(AUTOHEADER)
+	($(am__cd) $(top_srcdir) && $(AUTOHEADER))
 	rm -f stamp-h1
 	touch $@
 
@@ -237,23 +243,37 @@ distclean-hdr:
 install-binPROGRAMS: $(bin_PROGRAMS)
 	@$(NORMAL_INSTALL)
 	test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
-	@list='$(bin_PROGRAMS)'; for p in $$list; do \
-	  p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \
-	  if test -f $$p \
-	  ; then \
-	    f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \
-	   echo " $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \
-	   $(INSTALL_PROGRAM_ENV) $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \
-	  else :; fi; \
-	done
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	for p in $$list; do echo "$$p $$p"; done | \
+	sed 's/$(EXEEXT)$$//' | \
+	while read p p1; do if test -f $$p; \
+	  then echo "$$p"; echo "$$p"; else :; fi; \
+	done | \
+	sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+	    -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+	sed 'N;N;N;s,\n, ,g' | \
+	$(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+	  { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+	    if ($$2 == $$4) files[d] = files[d] " " $$1; \
+	    else { print "f", $$3 "/" $$4, $$1; } } \
+	  END { for (d in files) print "f", d, files[d] }' | \
+	while read type dir files; do \
+	    if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+	    test -z "$$files" || { \
+	      echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+	      $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+	    } \
+	; done
 
 uninstall-binPROGRAMS:
 	@$(NORMAL_UNINSTALL)
-	@list='$(bin_PROGRAMS)'; for p in $$list; do \
-	  f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \
-	  echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \
-	  rm -f "$(DESTDIR)$(bindir)/$$f"; \
-	done
+	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+	files=`for p in $$list; do echo "$$p"; done | \
+	  sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+	      -e 's/$$/$(EXEEXT)/' `; \
+	test -n "$$list" || exit 0; \
+	echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+	cd "$(DESTDIR)$(bindir)" && rm -f $$files
 
 clean-binPROGRAMS:
 	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
@@ -283,14 +303,14 @@ distclean-compile:
 
 .cc.o:
 @am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
-@am__fastdepCXX_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ $<
 
 .cc.obj:
 @am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
-@am__fastdepCXX_TRUE@	mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCXX_FALSE@	$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
@@ -300,14 +320,14 @@ ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
 	      END { if (nonempty) { for (i in files) print i; }; }'`; \
 	mkid -fID $$unique
 tags: TAGS
 
 TAGS:  $(HEADERS) $(SOURCES) stressapptest_config.h.in $(TAGS_DEPENDENCIES) \
 		$(TAGS_FILES) $(LISP)
-	tags=; \
+	set x; \
 	here=`pwd`; \
 	list='$(SOURCES) $(HEADERS) stressapptest_config.h.in $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
@@ -315,29 +335,34 @@ TAGS:  $(HEADERS) $(SOURCES) stressapptest_config.h.in $(TAGS_DEPENDENCIES) \
 	  done | \
 	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
 	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
 	  test -n "$$unique" || unique=$$empty_fix; \
-	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	    $$tags $$unique; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
 	fi
 ctags: CTAGS
 CTAGS:  $(HEADERS) $(SOURCES) stressapptest_config.h.in $(TAGS_DEPENDENCIES) \
 		$(TAGS_FILES) $(LISP)
-	tags=; \
 	list='$(SOURCES) $(HEADERS) stressapptest_config.h.in $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
 	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
 	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	test -z "$(CTAGS_ARGS)$$tags$$unique" \
+	test -z "$(CTAGS_ARGS)$$unique" \
 	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-	     $$tags $$unique
+	     $$unique
 
 GTAGS:
 	here=`$(am__cd) $(top_builddir) && pwd` \
-	  && cd $(top_srcdir) \
-	  && gtags -i $(GTAGS_ARGS) $$here
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
 
 distclean-tags:
 	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
@@ -358,13 +383,17 @@ distdir: $(DISTFILES)
 	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
 	  if test -d $$d/$$file; then \
 	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
 	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
 	    fi; \
-	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
 	  else \
-	    test -f $(distdir)/$$file \
-	    || cp -p $$d/$$file $(distdir)/$$file \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
 	    || exit 1; \
 	  fi; \
 	done
@@ -395,6 +424,7 @@ clean-generic:
 
 distclean-generic:
 	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
 
 maintainer-clean-generic:
 	@echo "This command is intended for maintainers to use"
@@ -415,6 +445,8 @@ dvi-am:
 
 html: html-am
 
+html-am:
+
 info: info-am
 
 info-am:
@@ -423,18 +455,28 @@ install-data-am:
 
 install-dvi: install-dvi-am
 
+install-dvi-am:
+
 install-exec-am: install-binPROGRAMS
 
 install-html: install-html-am
 
+install-html-am:
+
 install-info: install-info-am
 
+install-info-am:
+
 install-man:
 
 install-pdf: install-pdf-am
 
+install-pdf-am:
+
 install-ps: install-ps-am
 
+install-ps-am:
+
 installcheck-am:
 
 maintainer-clean: maintainer-clean-am
@@ -456,7 +498,7 @@ ps-am:
 
 uninstall-am: uninstall-binPROGRAMS
 
-.MAKE: install-am install-strip
+.MAKE: all install-am install-strip
 
 .PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
 	clean-generic ctags distclean distclean-compile \
@@ -471,6 +513,7 @@ uninstall-am: uninstall-binPROGRAMS
 	mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
 	tags uninstall uninstall-am uninstall-binPROGRAMS
 
+
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index b78857c..6ae6e5a 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -91,6 +91,9 @@
 /* Define to 1 if you have the <sys/select.h> header file. */
 #undef HAVE_SYS_SELECT_H
 
+/* Define to 1 if you have the <sys/shm.h> header file. */
+#undef HAVE_SYS_SHM_H
+
 /* Define to 1 if you have the <sys/socket.h> header file. */
 #undef HAVE_SYS_SOCKET_H
 
@@ -127,6 +130,9 @@
 /* Define to the one symbol short name of this package. */
 #undef PACKAGE_TARNAME
 
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
 /* Define to the version of this package. */
 #undef PACKAGE_VERSION
 
@@ -190,9 +196,19 @@
 /* Define to `int' if <sys/types.h> does not define. */
 #undef pid_t
 
-/* Define to equivalent of C99 restrict keyword, or to nothing if this is not
-   supported. Do not define if restrict is supported directly. */
+/* Define to the equivalent of the C99 'restrict' keyword, or to
+   nothing if this is not supported.  Do not define if restrict is
+   supported directly.  */
 #undef restrict
+/* Work around a bug in Sun C++: it does not support _Restrict or
+   __restrict__, even though the corresponding Sun C compiler ends up with
+   "#define restrict _Restrict" or "#define restrict __restrict__" in the
+   previous line.  Perhaps some future version of Sun C++ will work with
+   restrict; if so, hopefully it defines __RESTRICT like Sun C does.  */
+#if defined __SUNPRO_CC && !defined __RESTRICT
+# define _Restrict
+# define __restrict__
+#endif
 
 /* Define to `int' if <sys/types.h> does not define. */
 #undef ssize_t
-- 
cgit v1.2.3


From aeef09178c954ee910caa6df29199eb0accb7ede Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Mon, 7 Jan 2013 20:07:28 +0000
Subject: Add better ARM/Android support, support a wider variety of configure
 options.

---
 src/os.cc                          |  60 +++++++++
 src/os.h                           |  10 ++
 src/sat.cc                         |  15 ++-
 src/sattypes.h                     |  13 +-
 src/stressapptest_config.h.in      |  24 +++-
 src/stressapptest_config_android.h | 243 +++++++++++++++++++++++++++++++++++++
 src/worker.cc                      |  84 +++++++++++--
 src/worker.h                       |   8 ++
 8 files changed, 439 insertions(+), 18 deletions(-)
 create mode 100644 src/stressapptest_config_android.h

(limited to 'src')

diff --git a/src/os.cc b/src/os.cc
index 1340d6b..8032cfc 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -32,7 +32,9 @@
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/ipc.h>
+#ifdef HAVE_SYS_SHM_H
 #include <sys/shm.h>
+#endif
 #include <unistd.h>
 
 #ifndef SHM_HUGETLB
@@ -75,6 +77,8 @@ OsLayer::OsLayer() {
 
   has_clflush_ = false;
   has_sse2_ = false;
+
+  use_flush_page_cache_ = false;
 }
 
 // OsLayer cleanup.
@@ -167,6 +171,46 @@ void OsLayer::GetFeatures() {
 }
 
 
+// Enable FlushPageCache to be functional instead of a NOP.
+void OsLayer::ActivateFlushPageCache(void) {
+  logprintf(9, "Log: page cache will be flushed as needed\n");
+  use_flush_page_cache_ = true;
+}
+
+// Flush the page cache to ensure reads come from the disk.
+bool OsLayer::FlushPageCache(void) {
+  if (!use_flush_page_cache_)
+    return true;
+
+  // First, ask the kernel to write the cache to the disk.
+  sync();
+
+  // Second, ask the kernel to empty the cache by writing "1" to
+  // "/proc/sys/vm/drop_caches".
+  static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
+  int dcfile = open(drop_caches_file, O_WRONLY);
+  if (dcfile < 0) {
+    int err = errno;
+    string errtxt = ErrorString(err);
+    logprintf(3, "Log: failed to open %s - err %d (%s)\n",
+              drop_caches_file, err, errtxt.c_str());
+    return false;
+  }
+
+  ssize_t bytes_written = write(dcfile, "1", 1);
+  close(dcfile);
+
+  if (bytes_written != 1) {
+    int err = errno;
+    string errtxt = ErrorString(err);
+    logprintf(3, "Log: failed to write %s - err %d (%s)\n",
+              drop_caches_file, err, errtxt.c_str());
+    return false;
+  }
+  return true;
+}
+
+
 // We need to flush the cacheline here.
 void OsLayer::Flush(void *vaddr) {
   // Use the generic flush. This function is just so we can override
@@ -396,6 +440,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
     logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
   }
 
+#ifdef HAVE_SYS_SHM_H
   // Allocate hugepage mapped memory.
   if (prefer_hugepages) {
     do { // Allow break statement.
@@ -495,6 +540,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
     } while (0);
     shm_unlink("/stressapptest");
   }
+#endif // HAVE_SYS_SHM_H
 
   if (!use_hugepages_ && !use_posix_shm_) {
     // Use memalign to ensure that blocks are aligned enough for disk direct IO.
@@ -524,8 +570,10 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
 void OsLayer::FreeTestMem() {
   if (testmem_) {
     if (use_hugepages_) {
+#ifdef HAVE_SYS_SHM_H
       shmdt(testmem_);
       shmctl(shmid_, IPC_RMID, NULL);
+#endif
     } else if (use_posix_shm_) {
       if (!dynamic_mapped_shmem_) {
         munmap(testmem_, testmemsize_);
@@ -546,9 +594,15 @@ void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
   if (dynamic_mapped_shmem_) {
     // TODO(nsanders): Check if we can support MAP_NONBLOCK,
     // and evaluate performance hit from not using it.
+#ifdef HAVE_MMAP64
     void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
                      shmid_, offset);
+#else
+    void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
+                     MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
+                     shmid_, offset);
+#endif
     if (mapping == MAP_FAILED) {
       string errtxt = ErrorString(errno);
       logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
@@ -750,9 +804,15 @@ bool OsLayer::CpuStressWorkload() {
 
   // Initialize array with random numbers.
   for (int i = 0; i < 100; i++) {
+#ifdef HAVE_RAND_R
     float_arr[i] = rand_r(&seed);
     if (rand_r(&seed) % 2)
       float_arr[i] *= -1.0;
+#else
+    float_arr[i] = rand();
+    if (rand() % 2)
+      float_arr[i] *= -1.0;
+#endif
   }
 
   // Calculate moving average.
diff --git a/src/os.h b/src/os.h
index 28c8a2a..b043b61 100644
--- a/src/os.h
+++ b/src/os.h
@@ -101,6 +101,15 @@ class OsLayer {
   // This will output a machine readable line regarding the error.
   virtual bool ErrorReport(const char *part, const char *symptom, int count);
 
+  // Flushes page cache. Used to circumvent the page cache when doing disk
+  // I/O.  This will be a NOP until ActivateFlushPageCache() is called, which
+  // is typically done when opening a file with O_DIRECT fails.
+  // Returns false on error, true on success or NOP.
+  // Subclasses may implement this in machine specific ways..
+  virtual bool FlushPageCache(void);
+  // Enable FlushPageCache() to actually do the flush instead of being a NOP.
+  virtual void ActivateFlushPageCache(void);
+
   // Flushes cacheline. Used to distinguish read or write errors.
   // Subclasses may implement this in machine specific ways..
   // Takes a pointer, and flushed the cacheline containing that pointer.
@@ -260,6 +269,7 @@ class OsLayer {
   int   address_mode_;           // Are we running 32 or 64 bit?
   bool  has_sse2_;               // Do we have sse2 instructions?
   bool  has_clflush_;            // Do we have clflush instructions?
+  bool  use_flush_page_cache_;   // Do we need to flush the page cache?
 
 
   time_t time_initialized_;      // Start time of test.
diff --git a/src/sat.cc b/src/sat.cc
index bed62b7..ede951d 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -77,7 +77,14 @@ bool Sat::InitializeLogfile() {
   // Open logfile.
   if (use_logfile_) {
     logfile_ = open(logfilename_,
-                    O_WRONLY | O_CREAT | O_DSYNC,
+#if defined(O_DSYNC)
+                    O_DSYNC |
+#elif defined(O_SYNC)
+                    O_SYNC |
+#elif defined(O_FSYNC)
+                    O_FSYNC |
+#endif
+                    O_WRONLY | O_CREAT,
                     S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
     if (logfile_ < 0) {
       printf("Fatal Error: cannot open file %s for logging\n",
@@ -1271,9 +1278,15 @@ void Sat::InitializeThreads() {
     // Allocate all the nums once so that we get a single chunk
     // of contiguous memory.
     int *num;
+#ifdef HAVE_POSIX_MEMALIGN
     int err_result = posix_memalign(
         reinterpret_cast<void**>(&num),
         kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_);
+#else
+    num = reinterpret_cast<int*>(memalign(kCacheLineSize,
+			sizeof(*num) * num_cpus * cc_cacheline_count_));
+    int err_result = (num == 0);
+#endif
     sat_assert(err_result == 0);
 
     int cline;
diff --git a/src/sattypes.h b/src/sattypes.h
index 96bf13b..c9341d0 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -26,9 +26,13 @@
 #include <string>
 
 #ifdef HAVE_CONFIG_H  // Built using autoconf
+#ifdef __ANDROID__
+#include "stressapptest_config_android.h"
+#else
 #include "stressapptest_config.h"
-using namespace std;
 using namespace __gnu_cxx;
+#endif
+using namespace std;
 
 typedef signed long long   int64;
 typedef signed int         int32;
@@ -172,7 +176,14 @@ inline bool sat_sleep(time_t seconds) {
 //   error_num: an errno error code
 inline string ErrorString(int error_num) {
   char buf[256];
+#ifdef STRERROR_R_CHAR_P
   return string(strerror_r(error_num, buf, sizeof buf));
+#else
+  if (strerror_r(error_num, buf, sizeof buf))
+    return "unknown failure";
+  else
+    return string(buf);
+#endif
 }
 
 // Define handy constants here
diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index 6ae6e5a..97f306e 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -20,6 +20,9 @@
 /* Define to 1 if you have the <fcntl.h> header file. */
 #undef HAVE_FCNTL_H
 
+/* Define to 1 if you have the `ftruncate' function. */
+#undef HAVE_FTRUNCATE
+
 /* Define to 1 if you have the `gettimeofday' function. */
 #undef HAVE_GETTIMEOFDAY
 
@@ -29,24 +32,36 @@
 /* Define to 1 if you have the <libaio.h> header file. */
 #undef HAVE_LIBAIO_H
 
-/* Define to 1 if you have the <malloc.h> header file. */
-#undef HAVE_MALLOC_H
-
 /* Define to 1 if you have the <memory.h> header file. */
 #undef HAVE_MEMORY_H
 
 /* Define to 1 if you have the `memset' function. */
 #undef HAVE_MEMSET
 
+/* Define to 1 if you have the `mmap64' function. */
+#undef HAVE_MMAP64
+
+/* Define to 1 if you have the `munmap' function. */
+#undef HAVE_MUNMAP
+
 /* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
 #undef HAVE_NDIR_H
 
 /* Define to 1 if you have the <netdb.h> header file. */
 #undef HAVE_NETDB_H
 
+/* Define to 1 if you have the `posix_memalign' function. */
+#undef HAVE_POSIX_MEMALIGN
+
 /* Define to 1 if you have the <pthread.h> header file. */
 #undef HAVE_PTHREAD_H
 
+/* Define to 1 if you have the `rand_r' function. */
+#undef HAVE_RAND_R
+
+/* Define to 1 if you have the `sched_getaffinity' function. */
+#undef HAVE_SCHED_GETAFFINITY
+
 /* Define to 1 if you have the `select' function. */
 #undef HAVE_SELECT
 
@@ -210,6 +225,9 @@
 # define __restrict__
 #endif
 
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+#undef size_t
+
 /* Define to `int' if <sys/types.h> does not define. */
 #undef ssize_t
 
diff --git a/src/stressapptest_config_android.h b/src/stressapptest_config_android.h
new file mode 100644
index 0000000..83a4866
--- /dev/null
+++ b/src/stressapptest_config_android.h
@@ -0,0 +1,243 @@
+/* src/stressapptest_config.h.  Generated from stressapptest_config.h.in by configure.  */
+/* src/stressapptest_config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define to 1 if the `closedir' function returns void instead of `int'. */
+#define CLOSEDIR_VOID 1
+
+/* Define to 1 if you have the <arpa/inet.h> header file. */
+#define HAVE_ARPA_INET_H 1
+
+/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you
+   don't. */
+#define HAVE_DECL_STRERROR_R 1
+
+/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
+   */
+#define HAVE_DIRENT_H 1
+
+/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
+/* #undef HAVE_DOPRNT */
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `ftruncate' function. */
+#define HAVE_FTRUNCATE 1
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#define HAVE_GETTIMEOFDAY 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <libaio.h> header file. */
+/* #undef HAVE_LIBAIO_H */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `memset' function. */
+#define HAVE_MEMSET 1
+
+/* Define to 1 if you have the `mmap64' function. */
+/* #undef HAVE_MMAP64 */
+
+/* Define to 1 if you have the `munmap' function. */
+#define HAVE_MUNMAP 1
+
+/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
+/* #undef HAVE_NDIR_H */
+
+/* Define to 1 if you have the <netdb.h> header file. */
+#define HAVE_NETDB_H 1
+
+/* Define to 1 if you have the `posix_memalign' function. */
+/* #undef HAVE_POSIX_MEMALIGN */
+
+/* Define to 1 if you have the <pthread.h> header file. */
+#define HAVE_PTHREAD_H 1
+
+/* Define to 1 if you have the `rand_r' function. */
+/* #undef HAVE_RAND_R */
+
+/* Define to 1 if you have the `sched_getaffinity' function. */
+#define HAVE_SCHED_GETAFFINITY 1
+
+/* Define to 1 if you have the `select' function. */
+#define HAVE_SELECT 1
+
+/* Define to 1 if you have the `socket' function. */
+#define HAVE_SOCKET 1
+
+/* Define to 1 if stdbool.h conforms to C99. */
+/* #undef HAVE_STDBOOL_H */
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strerror_r' function. */
+#define HAVE_STRERROR_R 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strtol' function. */
+#define HAVE_STRTOL 1
+
+/* Define to 1 if you have the `strtoull' function. */
+#define HAVE_STRTOULL 1
+
+/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
+   */
+/* #undef HAVE_SYS_DIR_H */
+
+/* Define to 1 if you have the <sys/ioctl.h> header file. */
+#define HAVE_SYS_IOCTL_H 1
+
+/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
+   */
+/* #undef HAVE_SYS_NDIR_H */
+
+/* Define to 1 if you have the <sys/select.h> header file. */
+#define HAVE_SYS_SELECT_H 1
+
+/* Define to 1 if you have the <sys/shm.h> header file. */
+/* #undef HAVE_SYS_SHM_H */
+
+/* Define to 1 if you have the <sys/socket.h> header file. */
+#define HAVE_SYS_SOCKET_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `vprintf' function. */
+#define HAVE_VPRINTF 1
+
+/* Define to 1 if the system has the type `_Bool'. */
+#define HAVE__BOOL 1
+
+/* Name of package */
+#define PACKAGE "stressapptest"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "opensource@google.com"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "stressapptest"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "stressapptest 1.0.4_autoconf"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "stressapptest"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.0.4_autoconf"
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#define RETSIGTYPE void
+
+/* Define to the type of arg 1 for `select'. */
+#define SELECT_TYPE_ARG1 int
+
+/* Define to the type of args 2, 3 and 4 for `select'. */
+#define SELECT_TYPE_ARG234 (fd_set *)
+
+/* Define to the type of arg 5 for `select'. */
+#define SELECT_TYPE_ARG5 (struct timeval *)
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to 1 if strerror_r returns char *. */
+/* #undef STRERROR_R_CHAR_P */
+
+/* Defined if the target CPU is armv7a */
+#define STRESSAPPTEST_CPU_ARMV7A /**/
+
+/* Defined if the target CPU is i686 */
+/* #undef STRESSAPPTEST_CPU_I686 */
+
+/* Defined if the target CPU is PowerPC */
+/* #undef STRESSAPPTEST_CPU_PPC */
+
+/* Defined if the target CPU is x86_64 */
+/* #undef STRESSAPPTEST_CPU_X86_64 */
+
+/* Defined if the target OS is BSD based */
+/* #undef STRESSAPPTEST_OS_BSD */
+
+/* Defined if the target OS is OSX */
+/* #undef STRESSAPPTEST_OS_DARWIN */
+
+/* Defined if the target OS is Linux */
+#define STRESSAPPTEST_OS_LINUX /**/
+
+/* Timestamp when ./configure was executed */
+#ifndef STRESSAPPTEST_TIMESTAMP
+#define STRESSAPPTEST_TIMESTAMP "Android version"
+#endif
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#define TIME_WITH_SYS_TIME 1
+
+/* Version number of package */
+#define VERSION "1.0.4_autoconf"
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef const */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+
+/* Define to `int' if <sys/types.h> does not define. */
+/* #undef pid_t */
+
+/* Define to the equivalent of the C99 'restrict' keyword, or to
+   nothing if this is not supported.  Do not define if restrict is
+   supported directly.  */
+#define restrict __restrict
+/* Work around a bug in Sun C++: it does not support _Restrict or
+   __restrict__, even though the corresponding Sun C compiler ends up with
+   "#define restrict _Restrict" or "#define restrict __restrict__" in the
+   previous line.  Perhaps some future version of Sun C++ will work with
+   restrict; if so, hopefully it defines __RESTRICT like Sun C does.  */
+#if defined __SUNPRO_CC && !defined __RESTRICT
+# define _Restrict
+# define __restrict__
+#endif
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef size_t */
+
+/* Define to `int' if <sys/types.h> does not define. */
+/* #undef ssize_t */
+
+/* Define to the type of an unsigned integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint16_t */
+
+/* Define to empty if the keyword `volatile' does not work. Warning: valid
+   code using `volatile' can become incorrect without. Disable with care. */
+/* #undef volatile */
diff --git a/src/worker.cc b/src/worker.cc
index 2fab28e..dcf4dcb 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -44,7 +44,9 @@
 #include <sys/ioctl.h>
 #include <linux/fs.h>
 // For asynchronous I/O
+#ifdef HAVE_LIBAIO_H
 #include <libaio.h>
+#endif
 
 #include <sys/syscall.h>
 
@@ -75,11 +77,6 @@ _syscall3(int, sched_setaffinity, pid_t, pid,
           unsigned int, len, cpu_set_t*, mask)
 #endif
 
-// Linux aio syscalls.
-#if !defined(__NR_io_setup)
-#error "No aio headers inculded, please install libaio."
-#endif
-
 namespace {
   // Get HW core ID from cpuid instruction.
   inline int apicid(void) {
@@ -147,14 +144,18 @@ static void *ThreadSpawnerGeneric(void *ptr) {
 void WorkerStatus::Initialize() {
   sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
   sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
+#ifdef _POSIX_BARRIERS
   sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL,
                                        num_workers_ + 1));
+#endif
 }
 
 void WorkerStatus::Destroy() {
   sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_));
   sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_));
+#ifdef _POSIX_BARRIERS
   sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
+#endif
 }
 
 void WorkerStatus::PauseWorkers() {
@@ -219,8 +220,10 @@ void WorkerStatus::RemoveSelf() {
   AcquireNumWorkersLock();
   // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't
   // in use because (status != PAUSE).
+#ifdef _POSIX_BARRIERS
   sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
   sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_));
+#endif
   --num_workers_;
   ReleaseNumWorkersLock();
 
@@ -399,7 +402,11 @@ bool WorkerThread::Work() {
 //   mask = 13 (1101b): cpu0, 2, 3
 bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) {
   CPU_ZERO(cpuset);
+#ifdef HAVE_SCHED_GETAFFINITY
   return sched_getaffinity(getppid(), sizeof(*cpuset), cpuset) == 0;
+#else
+  return 0;
+#endif
 }
 
 
@@ -409,7 +416,11 @@ bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) {
 //   mask = 13 (1101b): cpu0, 2, 3
 bool WorkerThread::CurrentCpus(cpu_set_t *cpuset) {
   CPU_ZERO(cpuset);
+#ifdef HAVE_SCHED_GETAFFINITY
   return sched_getaffinity(0, sizeof(*cpuset), cpuset) == 0;
+#else
+  return 0;
+#endif
 }
 
 
@@ -436,7 +447,11 @@ bool WorkerThread::BindToCpus(const cpu_set_t *thread_mask) {
               cpuset_format(&process_mask).c_str());
     return false;
   }
+#ifdef HAVE_SCHED_GETAFFINITY
   return (sched_setaffinity(gettid(), sizeof(*thread_mask), thread_mask) == 0);
+#else
+  return 0;
+#endif
 }
 
 
@@ -1585,15 +1600,21 @@ void FileThread::SetFile(const char *filename_init) {
 
 // Open the file for access.
 bool FileThread::OpenFile(int *pfile) {
-  int fd = open(filename_.c_str(),
-                O_RDWR | O_CREAT | O_SYNC | O_DIRECT,
-                0644);
+  bool no_O_DIRECT = false;
+  int flags = O_RDWR | O_CREAT | O_SYNC;
+  int fd = open(filename_.c_str(), flags | O_DIRECT, 0644);
+  if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
+    no_O_DIRECT = true;
+    fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
+  }
   if (fd < 0) {
     logprintf(0, "Process Error: Failed to create file %s!!\n",
               filename_.c_str());
     pages_copied_ = 0;
     return false;
   }
+  if (no_O_DIRECT)
+    os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
   *pfile = fd;
   return true;
 }
@@ -1664,7 +1685,7 @@ bool FileThread::WritePages(int fd) {
     if (!result)
       return false;
   }
-  return true;
+  return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
 }
 
 // Copy data from file into memory block.
@@ -1791,7 +1812,12 @@ bool FileThread::PagePrepare() {
 
   // Init a local buffer if we need it.
   if (!page_io_) {
+#ifdef HAVE_POSIX_MEMALIGN
     int result = posix_memalign(&local_page_, 512, sat_->page_length());
+#else
+    local_page_ = memalign(512, sat_->page_length());
+    int result = (local_page_ == 0);
+#endif
     if (result) {
       logprintf(0, "Process Error: disk thread posix_memalign "
                    "returned %d (fail)\n",
@@ -2358,7 +2384,12 @@ bool NetworkSlaveThread::Work() {
   int64 loops = 0;
   // Init a local buffer for storing data.
   void *local_page = NULL;
+#ifdef HAVE_POSIX_MEMALIGN
   int result = posix_memalign(&local_page, 512, sat_->page_length());
+#else
+  local_page = memalign(512, sat_->page_length());
+  int result = (local_page == 0);
+#endif
   if (result) {
     logprintf(0, "Process Error: net slave posix_memalign "
                  "returned %d (fail)\n",
@@ -2459,7 +2490,11 @@ bool CpuCacheCoherencyThread::Work() {
       // Choose a datastructure in random and increment the appropriate
       // member in that according to the offset (which is the same as the
       // thread number.
+#ifdef HAVE_RAND_R
       int r = rand_r(&seed);
+#else
+      int r = rand();
+#endif
       r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
       // Increment the member of the randomely selected structure.
       (cc_cacheline_data_[r].num[cc_thread_num_])++;
@@ -2521,7 +2556,9 @@ DiskThread::DiskThread(DiskBlockTable *block_table) {
   device_sectors_ = 0;
   non_destructive_ = 0;
 
+#ifdef HAVE_LIBAIO_H
   aio_ctx_ = 0;
+#endif
   block_table_ = block_table;
   update_block_table_ = 1;
 
@@ -2660,14 +2697,20 @@ bool DiskThread::SetParameters(int read_block_size,
 
 // Open a device, return false on failure.
 bool DiskThread::OpenDevice(int *pfile) {
-  int fd = open(device_name_.c_str(),
-                O_RDWR | O_SYNC | O_DIRECT | O_LARGEFILE,
-                0);
+  bool no_O_DIRECT = false;
+  int flags = O_RDWR | O_SYNC | O_LARGEFILE;
+  int fd = open(device_name_.c_str(), flags | O_DIRECT, 0);
+  if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
+    no_O_DIRECT = true;
+    fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
+  }
   if (fd < 0) {
     logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
               device_name_.c_str(), thread_num_);
     return false;
   }
+  if (no_O_DIRECT)
+    os_->ActivateFlushPageCache();
   *pfile = fd;
 
   return GetDiskSize(fd);
@@ -2827,6 +2870,8 @@ bool DiskThread::DoWork(int fd) {
 
       in_flight_sectors_.push(block);
     }
+    if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
+      return false;
 
     // Verify blocks on disk.
     logprintf(20, "Log: Read phase for disk %s (thread %d).\n",
@@ -2849,6 +2894,7 @@ bool DiskThread::DoWork(int fd) {
 // Return false if the IO is not set up.
 bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
                             int64 offset, int64 timeout) {
+#ifdef HAVE_LIBAIO_H
   // Use the Linux native asynchronous I/O interface for reading/writing.
   // A read/write consists of three basic steps:
   //    1. create an io context.
@@ -2957,6 +3003,9 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
   }
 
   return true;
+#else // !HAVE_LIBAIO_H
+  return false;
+#endif
 }
 
 // Write a block to disk.
@@ -3104,9 +3153,14 @@ bool DiskThread::Work() {
   }
 
   // Allocate a block buffer aligned to 512 bytes since the kernel requires it
-  // when using direst IO.
+  // when using direct IO.
+#ifdef HAVE_POSIX_MEMALIGN
   int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
                               sat_->page_length());
+#else
+  block_buffer_ = memalign(kBufferAlignment, sat_->page_length());
+  int memalign_result = (block_buffer_ == 0);
+#endif
   if (memalign_result) {
     CloseDevice(fd);
     logprintf(0, "Process Error: Unable to allocate memory for buffers "
@@ -3116,6 +3170,7 @@ bool DiskThread::Work() {
     return false;
   }
 
+#ifdef HAVE_LIBAIO_H
   if (io_setup(5, &aio_ctx_)) {
     CloseDevice(fd);
     logprintf(0, "Process Error: Unable to create aio context for disk %s"
@@ -3124,12 +3179,15 @@ bool DiskThread::Work() {
     status_ = false;
     return false;
   }
+#endif
 
   bool result = DoWork(fd);
 
   status_ = result;
 
+#ifdef HAVE_LIBAIO_H
   io_destroy(aio_ctx_);
+#endif
   CloseDevice(fd);
 
   logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, "
diff --git a/src/worker.h b/src/worker.h
index 7aae5f2..0ec4c1d 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -26,7 +26,9 @@
 #include <sys/time.h>
 #include <sys/types.h>
 
+#ifdef HAVE_LIBAIO_H
 #include <libaio.h>
+#endif
 
 #include <queue>
 #include <set>
@@ -138,9 +140,11 @@ class WorkerStatus {
   enum Status { RUN, PAUSE, STOP };
 
   void WaitOnPauseBarrier() {
+#ifdef _POSIX_BARRIERS
     int error = pthread_barrier_wait(&pause_barrier_);
     if (error != PTHREAD_BARRIER_SERIAL_THREAD)
       sat_assert(error == 0);
+#endif
   }
 
   void AcquireNumWorkersLock() {
@@ -185,8 +189,10 @@ class WorkerStatus {
   pthread_rwlock_t status_rwlock_;
   Status status_;
 
+#ifdef _POSIX_BARRIERS
   // Guaranteed to not be in use when (status_ != PAUSE).
   pthread_barrier_t pause_barrier_;
+#endif
 
   DISALLOW_COPY_AND_ASSIGN(WorkerStatus);
 };
@@ -749,7 +755,9 @@ class DiskThread : public WorkerThread {
                                                 // not verified.
   void *block_buffer_;        // Pointer to aligned block buffer.
 
+#ifdef HAVE_LIBAIO_H
   io_context_t aio_ctx_;     // Asynchronous I/O context for Linux native AIO.
+#endif
 
   DiskBlockTable *block_table_;  // Disk Block Table, shared by all disk
                                  // threads that read / write at the same
-- 
cgit v1.2.3


From 1dbb40a37bee31db4125b2f6ae0a5be1a87f063d Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Mon, 7 Jan 2013 22:02:45 +0000
Subject: This patch replaces the existing OsLayer::VirtualToPhysical stub with
 an actual implementation that translates addresses via the Linux
 /proc/self/pagemap interface. This causes failing memory locations to be
 reported with both virtual and physical address, and facilitates isolating a
 defective memory module.

Like many other functions currently implemented in the generic OsLayer
class, this functionality is specific to Linux and would need to be
moved to a respective subclass if proper multi-OS support ever gets
implemented.

Signed-off-by: Julius Werner <jwerner@chromium.org>
---
 src/os.cc | 20 ++++++++++++++++++--
 src/os.h  |  1 +
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/os.cc b/src/os.cc
index 8032cfc..944ff88 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -129,8 +129,24 @@ int OsLayer::AddressMode() {
 
 // Translates user virtual to physical address.
 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
-  // Needs platform specific implementation.
-  return 0;
+  uint64 frame, shift;
+  off64_t off = ((uintptr_t)vaddr) / getpagesize() * 8;
+  int fd = open(kPagemapPath, O_RDONLY);
+  if (fd < 0 || lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
+    int err = errno;
+    string errtxt = ErrorString(err);
+    logprintf(0, "Error: failed to access %s with errno %d (%s)\n",
+              kPagemapPath, err, errtxt.c_str());
+    if (fd >= 0)
+      close(fd);
+    return 0;
+  }
+  close(fd);
+  if (!(frame & (1LL << 63)) || (frame & (1LL << 62)))
+    return 0;
+  shift = (frame >> 55) & 0x3f;
+  frame = (frame & 0x007fffffffffffffLL) << shift;
+  return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1));
 }
 
 // Returns the HD device that contains this file.
diff --git a/src/os.h b/src/os.h
index b043b61..3f3d3e9 100644
--- a/src/os.h
+++ b/src/os.h
@@ -27,6 +27,7 @@
 #include "adler32memcpy.h"  // NOLINT
 #include "sattypes.h"       // NOLINT
 
+const char kPagemapPath[] = "/proc/self/pagemap";
 const char kSysfsPath[] = "/sys/bus/pci/devices";
 
 struct PCIDevice {
-- 
cgit v1.2.3


From 75884d4d5d55df09c56232f460011d5a5d42d547 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Mon, 7 Jan 2013 22:13:27 +0000
Subject: Fix handling of cpuid and PIC on i386 systems

The current cpuid logic clobbers %ebx.  this is OK if the code is not PIC, but if you're building PIEs, it'll fail because %ebx is the PIC register and gcc doesn't let you clobber it.

Thanks to vapier@chromium.org
---
 src/os.cc     | 11 ++++++++++-
 src/worker.cc | 12 +++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/os.cc b/src/os.cc
index 944ff88..225e86c 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -169,7 +169,16 @@ void OsLayer::GetFeatures() {
   // http://www.sandpile.org/ia32/cpuid.htm
   int ax, bx, cx, dx;
   __asm__ __volatile__ (
-      "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1));
+# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
+      "xchg %%ebx, %%esi;"
+      "cpuid;"
+      "xchg %%esi, %%ebx;"
+      : "=S" (bx),
+# else
+      "cpuid;"
+      : "=b" (bx),
+# endif
+        "=a" (ax), "=c" (cx), "=d" (dx) : "a" (1));
   has_clflush_ = (dx >> 19) & 1;
   has_sse2_ = (dx >> 26) & 1;
 
diff --git a/src/worker.cc b/src/worker.cc
index dcf4dcb..eddea6c 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -82,7 +82,17 @@ namespace {
   inline int apicid(void) {
     int cpu;
 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
-    __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx");
+    __asm__ __volatile__ (
+# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
+        "xchg %%ebx, %%esi;"
+        "cpuid;"
+        "xchg %%esi, %%ebx;"
+        : "=S" (cpu)
+# else
+        "cpuid;"
+        : "=b" (cpu)
+# endif
+        : "a" (1) : "cx", "dx");
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
   #warning "Unsupported CPU type ARMV7A: unable to determine core ID."
     cpu = 0;
-- 
cgit v1.2.3


From ef1c9359bc14c3e90ed207c44d9a609d4b8654da Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Wed, 9 Jan 2013 09:10:08 +0000
Subject: 1.0.6 update: Memory mapping features and stability fixes

Allow mapping of dram chips based on simple memory controllers
Fix memory corruption caused by define include bug on some toolchains


This patch adds a primitive mechanism to map physical addresses to memory
chips/modules in stressapptest. It assumes that the memory controller
uses channel interleaving of equal-sized chunks in a simple round-robin
fashion (like recent Intel chipsets in dual-channel mode seem to do).

The chunk size and the amount of memory channels must be configured on the
command line. In addition, you can supply a comma-separated list of
chip/module names per memory channel, between which the bytes of an
aligned machine word are evenly distributed. If you want to disable
this sub-feature and only detect channel interleaving, you can just
provide only one name per channel.

Signed-off-by: Julius Werner <jwerner@chromium.org>

Add autoconf support for pthread barriers, substituting for POSIX defines.

Signed-off-by: Nick Sanders <nsanders@chromium.org>
---
 src/os.cc     | 37 +++++++++++++++++++++------
 src/os.h      | 11 ++++++++
 src/sat.cc    | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 src/sat.h     |  4 +++
 src/worker.cc |  6 ++---
 src/worker.h  |  4 +--
 6 files changed, 125 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/os.cc b/src/os.cc
index 225e86c..b73b18c 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -132,10 +132,14 @@ uint64 OsLayer::VirtualToPhysical(void *vaddr) {
   uint64 frame, shift;
   off64_t off = ((uintptr_t)vaddr) / getpagesize() * 8;
   int fd = open(kPagemapPath, O_RDONLY);
-  if (fd < 0 || lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
+  // /proc/self/pagemap is available in kernel >= 2.6.25
+  if (fd < 0)
+    return 0;
+
+  if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
     int err = errno;
     string errtxt = ErrorString(err);
-    logprintf(0, "Error: failed to access %s with errno %d (%s)\n",
+    logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
               kPagemapPath, err, errtxt.c_str());
     if (fd >= 0)
       close(fd);
@@ -257,12 +261,31 @@ bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
 }
 
 
-// Translate user virtual to physical address.
+// Translate physical address to memory module name.
+// Assumes simple round-robin interleaving between memory channels of
+// 'interleave_size_' sized chunks, with repeated 'channel_width_'
+// blocks with bits distributed from each chip in that channel.
 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
-  char tmpbuf[256];
-  snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
-  snprintf(buf, len, "%s", tmpbuf);
-  return 0;
+  static const string unknown = "DIMM Unknown";
+  if (!modules_) {
+    snprintf(buf, len, "%s", unknown.c_str());
+    return 0;
+  }
+
+  // Find channel by counting interleave units (typically cachelines),
+  // and mod by number of channels.
+  vector<string>& channel = (*modules_)[
+      (addr / interleave_size_) % modules_->size()];
+
+  // Find dram chip by finding which byte within the channel
+  // by address mod channel width, then divide the channel
+  // evenly among the listed dram chips. Note, this will not work
+  // with x4 dram.
+  int chip = (addr % (channel_width_ / 8)) /
+             ((channel_width_ / 8) / channel.size());
+  string name = channel[chip];
+  snprintf(buf, len, "%s", name.c_str());
+  return 1;
 }
 
 
diff --git a/src/os.h b/src/os.h
index 3f3d3e9..86e0c4e 100644
--- a/src/os.h
+++ b/src/os.h
@@ -57,6 +57,14 @@ class OsLayer {
     min_hugepages_bytes_ = min_bytes;
   }
 
+  // Set parameters needed to translate physical address to memory module.
+  void SetDramMappingParams(int interleave_size, int channel_width,
+                            vector< vector<string> > *modules) {
+    interleave_size_ = interleave_size;
+    channel_width_ = channel_width;
+    modules_ = modules;
+  }
+
   // Initializes data strctures and open files.
   // Returns false on error.
   virtual bool Initialize();
@@ -261,6 +269,9 @@ class OsLayer {
   bool  use_posix_shm_;          // Use 4k page shmem?
   bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
   int   shmid_;                  // Handle to shmem
+  vector< vector<string> > *modules_;  // Memory module names per channel.
+  int interleave_size_;          // Channel interleaving chunk size.
+  int channel_width_;            // Channel width in bits.
 
   int64 regionsize_;             // Size of memory "regions"
   int   regioncount_;            // Number of memory "regions"
diff --git a/src/sat.cc b/src/sat.cc
index ede951d..0679ea1 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -488,13 +488,13 @@ bool Sat::InitializePages() {
   for (int64 i = 0; i < pages_; i++) {
     struct page_entry pe;
     // Only get valid pages with uninitialized tags here.
-    char buf[256];
     if (GetValid(&pe, kInvalidTag)) {
       int64 paddr = os_->VirtualToPhysical(pe.addr);
       int32 region = os_->FindRegion(paddr);
 
-      os_->FindDimm(paddr, buf, sizeof(buf));
       if (i < 256) {
+        char buf[256];
+        os_->FindDimm(paddr, buf, sizeof(buf));
         logprintf(12, "Log: address: %#llx, %s\n", paddr, buf);
       }
       region_[region]++;
@@ -572,6 +572,13 @@ bool Sat::Initialize() {
 
   if (min_hugepages_mbytes_ > 0)
     os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
+  if (modules_.size() > 0) {
+    logprintf(6, "Log: Decoding memory: %dx%d bit channels,"
+        " %d byte burst size, %d modules per channel (x%d)\n",
+        modules_.size(), channel_width_, interleave_size_, modules_[0].size(),
+        channel_width_/modules_[0].size());
+    os_->SetDramMappingParams(interleave_size_, channel_width_, &modules_);
+  }
 
   if (!os_->Initialize()) {
     logprintf(0, "Process Error: Failed to initialize OS layer\n");
@@ -643,6 +650,8 @@ Sat::Sat() {
   min_hugepages_mbytes_ = 0;
   freepages_ = 0;
   paddr_base_ = 0;
+  interleave_size_ = kCacheLineSize;
+  channel_width_ = 64;
 
   user_break_ = false;
   verbosity_ = 8;
@@ -918,6 +927,23 @@ bool Sat::ParseArgs(int argc, char **argv) {
       continue;
     }
 
+    ARG_IVALUE("--interleave_size", interleave_size_);
+    ARG_IVALUE("--channel_width", channel_width_);
+
+    if (!strcmp(argv[i], "--memory_channel")) {
+      i++;
+      if (i < argc) {
+        char *module = argv[i];
+        modules_.push_back(vector<string>());
+        while (char* next = strchr(module, ',')) {
+          modules_.back().push_back(string(module, next - module));
+          module = next + 1;
+        }
+        modules_.back().push_back(string(module));
+      }
+      continue;
+    }
+
     // Default:
     PrintVersion();
     PrintHelp();
@@ -963,6 +989,44 @@ bool Sat::ParseArgs(int argc, char **argv) {
       disk_pages_ = 1;
   }
 
+  // Validate memory channel parameters if supplied
+  if (modules_.size()) {
+    if (interleave_size_ <= 0 ||
+        interleave_size_ & (interleave_size_ - 1)) {
+      logprintf(6, "Process Error: "
+          "Interleave size %d is not a power of 2.\n", interleave_size_);
+      bad_status();
+      return false;
+    }
+    for (uint i = 0; i < modules_.size(); i++)
+      if (modules_[i].size() != modules_[0].size()) {
+        logprintf(6, "Process Error: "
+            "Channels 0 and %d have a different amount of modules.\n",i);
+        bad_status();
+        return false;
+      }
+    if (modules_[0].size() & (modules_[0].size() - 1)) {
+      logprintf(6, "Process Error: "
+          "Amount of modules per memory channel is not a power of 2.\n");
+      bad_status();
+      return false;
+    }
+    if (channel_width_ < 16
+        || channel_width_ & (channel_width_ - 1)) {
+      logprintf(6, "Process Error: "
+          "Channel width %d is invalid.\n", channel_width_);
+      bad_status();
+      return false;
+    }
+    if (channel_width_ / modules_[0].size() < 8) {
+      logprintf(6, "Process Error: "
+          "Chip width x%d must be x8 or greater.\n", channel_width_ / modules_[0].size());
+      bad_status();
+      return false;
+    }
+  }
+
+
   // Print each argument.
   for (int i = 0; i < argc; i++) {
     if (i)
@@ -1027,10 +1091,16 @@ void Sat::PrintHelp() {
          " --paddr_base     allocate memory starting from this address\n"
          " --pause_delay    delay (in seconds) between power spikes\n"
          " --pause_duration duration (in seconds) of each pause\n"
-         " --local_numa : choose memory regions associated with "
+         " --local_numa     choose memory regions associated with "
+         "each CPU to be tested by that CPU\n"
+         " --remote_numa    choose memory regions not associated with "
          "each CPU to be tested by that CPU\n"
-         " --remote_numa : choose memory regions not associated with "
-         "each CPU to be tested by that CPU\n");
+         " --interleave_size bytes  size in bytes of each channel's data as interleaved "
+         "between memory channels\n"
+         " --channel_width bits     width in bits of each memory channel\n"
+         " --memory_channel u1,u2   defines a comma-separated list of names\n"
+         "                          for dram packages in a memory channel.\n"
+         "                          Use multiple times to define multiple channels.\n");
 }
 
 bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) {
diff --git a/src/sat.h b/src/sat.h
index b48f519..e867151 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -151,6 +151,10 @@ class Sat {
   int64 freepages_;                   // How many invalid pages we need.
   int disk_pages_;                    // Number of pages per temp file.
   uint64 paddr_base_;                 // Physical address base.
+  vector< vector<string> > modules_;  // Memory module names per channel.
+  int interleave_size_;               // Channel interleaving   chunk size in bytes.
+                                      // Usually cacheline sized.
+  int channel_width_;                 // Channel width in bits.
 
   // Control flags.
   volatile sig_atomic_t user_break_;  // User has signalled early exit.  Used as
diff --git a/src/worker.cc b/src/worker.cc
index eddea6c..d24b5cd 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -154,7 +154,7 @@ static void *ThreadSpawnerGeneric(void *ptr) {
 void WorkerStatus::Initialize() {
   sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL));
   sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL));
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
   sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL,
                                        num_workers_ + 1));
 #endif
@@ -163,7 +163,7 @@ void WorkerStatus::Initialize() {
 void WorkerStatus::Destroy() {
   sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_));
   sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_));
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
   sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
 #endif
 }
@@ -230,7 +230,7 @@ void WorkerStatus::RemoveSelf() {
   AcquireNumWorkersLock();
   // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't
   // in use because (status != PAUSE).
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
   sat_assert(0 == pthread_barrier_destroy(&pause_barrier_));
   sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_));
 #endif
diff --git a/src/worker.h b/src/worker.h
index 0ec4c1d..31e0225 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -140,7 +140,7 @@ class WorkerStatus {
   enum Status { RUN, PAUSE, STOP };
 
   void WaitOnPauseBarrier() {
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
     int error = pthread_barrier_wait(&pause_barrier_);
     if (error != PTHREAD_BARRIER_SERIAL_THREAD)
       sat_assert(error == 0);
@@ -189,7 +189,7 @@ class WorkerStatus {
   pthread_rwlock_t status_rwlock_;
   Status status_;
 
-#ifdef _POSIX_BARRIERS
+#ifdef HAVE_PTHREAD_BARRIERS
   // Guaranteed to not be in use when (status_ != PAUSE).
   pthread_barrier_t pause_barrier_;
 #endif
-- 
cgit v1.2.3


From 34f025ddd73b6004d53208b9b76750e7e1eeb128 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Wed, 9 Jan 2013 21:13:13 +0000
Subject: Allow ./configure for cross compile

Fix regression from BARRIER detect change.
---
 src/stressapptest_config.h.in | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index 97f306e..5412df4 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -53,6 +53,9 @@
 /* Define to 1 if you have the `posix_memalign' function. */
 #undef HAVE_POSIX_MEMALIGN
 
+/* Define to 1 if the system has `pthread_barrier'. */
+#undef HAVE_PTHREAD_BARRIERS
+
 /* Define to 1 if you have the <pthread.h> header file. */
 #undef HAVE_PTHREAD_H
 
-- 
cgit v1.2.3


From 1f94f5fb389066947b68fc0eb775e375588b25eb Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@93e54ea4-8218-11de-8aaf-8d8425684b44>
Date: Thu, 10 Jan 2013 23:42:36 +0000
Subject: Replace interleave_size with channel_hash

This patch replaces the previously introduced interleave_size memory
channel decoding mechanism with a more powerful channel_hash. Decoding
can now be based upon an arbitrary mask of address bits, which will be
XORed together to determine the target channel. Note that this drops
support for more than two channels, but TripleChannel controllers will
probably use much more complicated decoding mechanisms anyway.

It also includes the findmask program, which offers a crude method to
guess the decoding mask from an unknown memory controller for enterprising
users. Use at your own risk.

Signed-off-by: Julius Werner <jwerner@chromium.org>
---
 src/Makefile.am  |   2 +
 src/Makefile.in  |  68 +++++++++++++++++++--------
 src/findmask.c   | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/findmask.inc |   4 ++
 src/os.cc        |  17 +++----
 src/os.h         |  12 ++---
 src/sat.cc       |  55 +++++++++++-----------
 src/sat.h        |   5 +-
 8 files changed, 238 insertions(+), 63 deletions(-)
 create mode 100644 src/findmask.c
 create mode 100644 src/findmask.inc

(limited to 'src')

diff --git a/src/Makefile.am b/src/Makefile.am
index e044974..2179b42 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,4 +1,5 @@
 bin_PROGRAMS = stressapptest
+noinst_PROGRAMS = findmask
 
 AM_DEFAULT_SOURCE_EXT=.cc
 
@@ -29,3 +30,4 @@ HFILES += adler32memcpy.h
 HFILES += logger.h
 
 stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
+findmask_SOURCES = findmask.c findmask.inc
diff --git a/src/Makefile.in b/src/Makefile.in
index f62d1ac..65470cb 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -36,6 +36,7 @@ build_triplet = @build@
 host_triplet = @host@
 target_triplet = @target@
 bin_PROGRAMS = stressapptest$(EXEEXT)
+noinst_PROGRAMS = findmask$(EXEEXT)
 subdir = src
 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
 	$(srcdir)/stressapptest_config.h.in
@@ -48,7 +49,10 @@ CONFIG_HEADER = stressapptest_config.h
 CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
 am__installdirs = "$(DESTDIR)$(bindir)"
-PROGRAMS = $(bin_PROGRAMS)
+PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
+am_findmask_OBJECTS = findmask.$(OBJEXT)
+findmask_OBJECTS = $(am_findmask_OBJECTS)
+findmask_LDADD = $(LDADD)
 am__objects_1 = main.$(OBJEXT)
 am__objects_2 = os.$(OBJEXT) os_factory.$(OBJEXT) pattern.$(OBJEXT) \
 	queue.$(OBJEXT) sat.$(OBJEXT) sat_factory.$(OBJEXT) \
@@ -63,17 +67,17 @@ DEFAULT_INCLUDES = -I.@am__isrc@
 depcomp = $(SHELL) $(top_srcdir)/depcomp
 am__depfiles_maybe = depfiles
 am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
 CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
 	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
 CXXLD = $(CXX)
 CXXLINK = $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
 	-o $@
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
-	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = $(stressapptest_SOURCES)
-DIST_SOURCES = $(stressapptest_SOURCES)
+SOURCES = $(findmask_SOURCES) $(stressapptest_SOURCES)
+DIST_SOURCES = $(findmask_SOURCES) $(stressapptest_SOURCES)
 ETAGS = etags
 CTAGS = ctags
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -188,11 +192,12 @@ HFILES = os.h pattern.h queue.h sat.h worker.h sattypes.h \
 	finelock_queue.h error_diag.h disk_blocks.h adler32memcpy.h \
 	logger.h
 stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
+findmask_SOURCES = findmask.c findmask.inc
 all: stressapptest_config.h
 	$(MAKE) $(AM_MAKEFLAGS) all-am
 
 .SUFFIXES:
-.SUFFIXES: .cc .o .obj
+.SUFFIXES: .c .cc .o .obj
 $(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
 	@for dep in $?; do \
 	  case '$(am__configure_deps)' in \
@@ -277,6 +282,12 @@ uninstall-binPROGRAMS:
 
 clean-binPROGRAMS:
 	-test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+
+clean-noinstPROGRAMS:
+	-test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS)
+findmask$(EXEEXT): $(findmask_OBJECTS) $(findmask_DEPENDENCIES) 
+	@rm -f findmask$(EXEEXT)
+	$(LINK) $(findmask_OBJECTS) $(findmask_LDADD) $(LIBS)
 stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES) 
 	@rm -f stressapptest$(EXEEXT)
 	$(CXXLINK) $(stressapptest_OBJECTS) $(stressapptest_LDADD) $(LIBS)
@@ -290,6 +301,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adler32memcpy.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/disk_blocks.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error_diag.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findmask.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/finelock_queue.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/logger.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Po@am__quote@
@@ -301,6 +313,20 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sat_factory.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/worker.Po@am__quote@
 
+.c.o:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`
+
 .cc.o:
 @am__fastdepCXX_TRUE@	$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
 @am__fastdepCXX_TRUE@	$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@@ -431,7 +457,8 @@ maintainer-clean-generic:
 	@echo "it deletes files that may require special tools to rebuild."
 clean: clean-am
 
-clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \
+	mostlyclean-am
 
 distclean: distclean-am
 	-rm -rf ./$(DEPDIR)
@@ -501,17 +528,18 @@ uninstall-am: uninstall-binPROGRAMS
 .MAKE: all install-am install-strip
 
 .PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
-	clean-generic ctags distclean distclean-compile \
-	distclean-generic distclean-hdr distclean-tags distdir dvi \
-	dvi-am html html-am info info-am install install-am \
-	install-binPROGRAMS install-data install-data-am install-dvi \
-	install-dvi-am install-exec install-exec-am install-html \
-	install-html-am install-info install-info-am install-man \
-	install-pdf install-pdf-am install-ps install-ps-am \
-	install-strip installcheck installcheck-am installdirs \
-	maintainer-clean maintainer-clean-generic mostlyclean \
-	mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
-	tags uninstall uninstall-am uninstall-binPROGRAMS
+	clean-generic clean-noinstPROGRAMS ctags distclean \
+	distclean-compile distclean-generic distclean-hdr \
+	distclean-tags distdir dvi dvi-am html html-am info info-am \
+	install install-am install-binPROGRAMS install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \
+	uninstall-am uninstall-binPROGRAMS
 
 
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/src/findmask.c b/src/findmask.c
new file mode 100644
index 0000000..d8ec300
--- /dev/null
+++ b/src/findmask.c
@@ -0,0 +1,138 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ * This "tool" can be used to brute force the XOR bitmask that a memory
+ * controller uses to interleave addresses onto its two channels. To use it,
+ * you need to have a bunch of addresses that are known to go to only one
+ * of the memory channels... easiest way to get these is to run stressapptest on
+ * a machine while holding a soldering iron close to the chips of one channel.
+ * Generate about a thousand failures and extract their physical addresses
+ * from the output. Write them to findmask.inc in a way that forms a valid
+ * definition for the addrs array. Make and run on a big machine.
+ *
+ * The program iterates over all possible bitmasks within the first NUM_BITS,
+ * parallelizing execution over NUM_THREADS. Every integer is masked
+ * onto all supplied addresses, counting the amount of times this results in
+ * an odd or even amount of bits. If all but NOISE addresses fall on one side,
+ * it will print that mask to stdout. Note that the script will always "find"
+ * the mask 0x0, and may also report masks such as 0x100000000 depending on
+ * your test machines memory size... you will need to use your own judgement to
+ * interpret the results.
+ *
+ * As the program might run for a long time, you can send SIGUSR1 to it to
+ * output the last mask that was processed and get a rough idea of the
+ * current progress.
+ */
+
+#include <pthread.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NOISE 20
+#define NUM_BITS 32
+#define NUM_THREADS 128  // keep this a power of two
+
+static uint64_t addrs[] = {
+#include "findmask.inc"
+};
+static uint64_t lastmask;
+
+__attribute__((optimize(3, "unroll-loops")))
+void* thread_func(void* arg) {
+  register uint64_t mask;
+  register uintptr_t num = (uintptr_t)arg;
+
+  for (mask = num; mask < (1ULL << (NUM_BITS + 1)); mask += NUM_THREADS) {
+    register const uint64_t* cur;
+    register int a = 0;
+    register int b = 0;
+
+    for (cur = addrs; (char*)cur < (char*)addrs + sizeof(addrs); cur++) {
+#ifdef __x86_64__
+      register uint64_t addr asm("rdx") = *cur & mask;
+      register uint32_t tmp asm("ebx");
+
+      // Behold: the dark bit counting magic!
+      asm (
+        // Fold high and low 32 bits onto each other
+        "MOVl %%edx, %%ebx\n\t"
+        "SHRq $32, %%rdx\n\t"
+        "XORl %%ebx, %%edx\n\t"
+        // Fold high and low 16 bits onto each other
+        "MOVl %%edx, %%ebx\n\t"
+        "SHRl $16, %%edx\n\t"
+        "XORw %%bx, %%dx\n\t"
+        // Fold high and low 8 bits onto each other
+        "XORb %%dh, %%dl\n\t"
+        // Invoke ancient 8086 parity flag (only counts lowest byte)
+        "SETnp %%bl\n\t"
+        "SETp %%dl\n\t"
+        // Stupid SET instruction can only affect the lowest byte...
+        "ANDl $1, %%ebx\n\t"
+        "ANDl $1, %%edx\n\t"
+        // Increment either 'a' or 'b' without needing another branch
+        "ADDl %%ebx, %2\n\t"
+        "ADDl %%edx, %1\n\t"
+        : "=b" (tmp), "+r"(a), "+r"(b) : "d"(addr) : "cc");
+
+#else  // generic processor
+      register uint64_t addr = *cur & mask;
+      register uint32_t low = (uint32_t)addr;
+      register uint32_t high = (uint32_t)(addr >> 32);
+
+      // Takes about twice as long as the version above... take that GCC!
+      __builtin_parity(low) ^ __builtin_parity(high) ? a++ : b++;
+#endif
+
+      // Early abort: probably still the most valuable optimization in here
+      if (a >= NOISE && b >= NOISE) break;
+    }
+
+    if (a < NOISE) b = a;
+    if (b < NOISE) {
+      printf("Found mask with just %d deviations: 0x%llx\n", b, mask);
+      fflush(stdout);
+    }
+
+    // I'm a little paranoid about performance: don't write to memory too often
+    if (!(mask & 0x7ff)) lastmask = mask;
+  }
+
+  return 0;
+}
+
+void signal_handler(int signum) {
+  printf("Received signal... currently evaluating mask 0x%llx!\n", lastmask);
+  fflush(stdout);
+}
+
+int main(int argc, char** argv) {
+  uintptr_t i;
+  pthread_t threads[NUM_THREADS];
+
+  signal(SIGUSR1, signal_handler);
+
+  for (i = 0; i < NUM_THREADS; i++)
+    pthread_create(&threads[i], 0, thread_func, (void*)i);
+
+  for (i = 0; i < NUM_THREADS; i++)
+    pthread_join(threads[i], 0);
+
+  return 0;
+}
diff --git a/src/findmask.inc b/src/findmask.inc
new file mode 100644
index 0000000..e76f72f
--- /dev/null
+++ b/src/findmask.inc
@@ -0,0 +1,4 @@
+// This is the body of a uintptr_t array definition. Fill in your own addresses.
+0x116bb312c, // example values (can be >32 bit)
+0x38d3c5ad,  // replace with your own
+0x77c1e96d   // don't forget: no comma after the last one
diff --git a/src/os.cc b/src/os.cc
index b73b18c..7cae23b 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -261,21 +261,22 @@ bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
 }
 
 
-// Translate physical address to memory module name.
-// Assumes simple round-robin interleaving between memory channels of
-// 'interleave_size_' sized chunks, with repeated 'channel_width_'
+// Translate physical address to memory module/chip name.
+// Assumes interleaving between two memory channels based on the XOR of
+// all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
 // blocks with bits distributed from each chip in that channel.
 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
   static const string unknown = "DIMM Unknown";
-  if (!modules_) {
+  if (!channels_) {
     snprintf(buf, len, "%s", unknown.c_str());
     return 0;
   }
 
-  // Find channel by counting interleave units (typically cachelines),
-  // and mod by number of channels.
-  vector<string>& channel = (*modules_)[
-      (addr / interleave_size_) % modules_->size()];
+  // Find channel by XORing address bits in channel_hash mask.
+  uint32 low = (uint32)(addr & channel_hash_);
+  uint32 high = (uint32)((addr & channel_hash_) >> 32);
+  vector<string>& channel = (*channels_)[
+      __builtin_parity(high) ^ __builtin_parity(low)];
 
   // Find dram chip by finding which byte within the channel
   // by address mod channel width, then divide the channel
diff --git a/src/os.h b/src/os.h
index 86e0c4e..a928577 100644
--- a/src/os.h
+++ b/src/os.h
@@ -58,11 +58,11 @@ class OsLayer {
   }
 
   // Set parameters needed to translate physical address to memory module.
-  void SetDramMappingParams(int interleave_size, int channel_width,
-                            vector< vector<string> > *modules) {
-    interleave_size_ = interleave_size;
+  void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
+                            vector< vector<string> > *channels) {
+    channel_hash_ = channel_hash;
     channel_width_ = channel_width;
-    modules_ = modules;
+    channels_ = channels;
   }
 
   // Initializes data strctures and open files.
@@ -269,8 +269,8 @@ class OsLayer {
   bool  use_posix_shm_;          // Use 4k page shmem?
   bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
   int   shmid_;                  // Handle to shmem
-  vector< vector<string> > *modules_;  // Memory module names per channel.
-  int interleave_size_;          // Channel interleaving chunk size.
+  vector< vector<string> > *channels_;  // Memory module names per channel.
+  uint64 channel_hash_;          // Mask of address bits XORed for channel.
   int channel_width_;            // Channel width in bits.
 
   int64 regionsize_;             // Size of memory "regions"
diff --git a/src/sat.cc b/src/sat.cc
index 0679ea1..4f4e684 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -572,12 +572,12 @@ bool Sat::Initialize() {
 
   if (min_hugepages_mbytes_ > 0)
     os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
-  if (modules_.size() > 0) {
+  if (channels_.size() > 0) {
     logprintf(6, "Log: Decoding memory: %dx%d bit channels,"
-        " %d byte burst size, %d modules per channel (x%d)\n",
-        modules_.size(), channel_width_, interleave_size_, modules_[0].size(),
-        channel_width_/modules_[0].size());
-    os_->SetDramMappingParams(interleave_size_, channel_width_, &modules_);
+        "%d modules per channel (x%d), decoding hash 0x%x\n",
+        channels_.size(), channel_width_, channels_[0].size(),
+        channel_width_/channels_[0].size(), channel_hash_);
+    os_->SetDramMappingParams(channel_hash_, channel_width_, &channels_);
   }
 
   if (!os_->Initialize()) {
@@ -650,7 +650,7 @@ Sat::Sat() {
   min_hugepages_mbytes_ = 0;
   freepages_ = 0;
   paddr_base_ = 0;
-  interleave_size_ = kCacheLineSize;
+  channel_hash_ = kCacheLineSize;
   channel_width_ = 64;
 
   user_break_ = false;
@@ -927,19 +927,19 @@ bool Sat::ParseArgs(int argc, char **argv) {
       continue;
     }
 
-    ARG_IVALUE("--interleave_size", interleave_size_);
+    ARG_IVALUE("--channel_hash", channel_hash_);
     ARG_IVALUE("--channel_width", channel_width_);
 
     if (!strcmp(argv[i], "--memory_channel")) {
       i++;
       if (i < argc) {
-        char *module = argv[i];
-        modules_.push_back(vector<string>());
-        while (char* next = strchr(module, ',')) {
-          modules_.back().push_back(string(module, next - module));
-          module = next + 1;
+        char *channel = argv[i];
+        channels_.push_back(vector<string>());
+        while (char* next = strchr(channel, ',')) {
+          channels_.back().push_back(string(channel, next - channel));
+          channel = next + 1;
         }
-        modules_.back().push_back(string(module));
+        channels_.back().push_back(string(channel));
       }
       continue;
     }
@@ -990,22 +990,25 @@ bool Sat::ParseArgs(int argc, char **argv) {
   }
 
   // Validate memory channel parameters if supplied
-  if (modules_.size()) {
-    if (interleave_size_ <= 0 ||
-        interleave_size_ & (interleave_size_ - 1)) {
+  if (channels_.size()) {
+    if (channels_.size() == 1) {
+      channel_hash_ = 0;
+      logprintf(7, "Log: "
+          "Only one memory channel...deactivating interleave decoding.\n");
+    } else if (channels_.size() > 2) {
       logprintf(6, "Process Error: "
-          "Interleave size %d is not a power of 2.\n", interleave_size_);
+          "Triple-channel mode not yet supported... sorry.\n");
       bad_status();
       return false;
     }
-    for (uint i = 0; i < modules_.size(); i++)
-      if (modules_[i].size() != modules_[0].size()) {
+    for (uint i = 0; i < channels_.size(); i++)
+      if (channels_[i].size() != channels_[0].size()) {
         logprintf(6, "Process Error: "
-            "Channels 0 and %d have a different amount of modules.\n",i);
+            "Channels 0 and %d have a different count of dram modules.\n",i);
         bad_status();
         return false;
       }
-    if (modules_[0].size() & (modules_[0].size() - 1)) {
+    if (channels_[0].size() & (channels_[0].size() - 1)) {
       logprintf(6, "Process Error: "
           "Amount of modules per memory channel is not a power of 2.\n");
       bad_status();
@@ -1018,9 +1021,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
       bad_status();
       return false;
     }
-    if (channel_width_ / modules_[0].size() < 8) {
-      logprintf(6, "Process Error: "
-          "Chip width x%d must be x8 or greater.\n", channel_width_ / modules_[0].size());
+    if (channel_width_ / channels_[0].size() < 8) {
+      logprintf(6, "Process Error: Chip width x%d must be x8 or greater.\n",
+          channel_width_ / channels_[0].size());
       bad_status();
       return false;
     }
@@ -1095,8 +1098,8 @@ void Sat::PrintHelp() {
          "each CPU to be tested by that CPU\n"
          " --remote_numa    choose memory regions not associated with "
          "each CPU to be tested by that CPU\n"
-         " --interleave_size bytes  size in bytes of each channel's data as interleaved "
-         "between memory channels\n"
+         " --channel_hash   mask of address bits XORed to determine channel.\n"
+         "                  Mask 0x40 interleaves cachelines between channels\n"
          " --channel_width bits     width in bits of each memory channel\n"
          " --memory_channel u1,u2   defines a comma-separated list of names\n"
          "                          for dram packages in a memory channel.\n"
diff --git a/src/sat.h b/src/sat.h
index e867151..93d6b34 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -151,9 +151,8 @@ class Sat {
   int64 freepages_;                   // How many invalid pages we need.
   int disk_pages_;                    // Number of pages per temp file.
   uint64 paddr_base_;                 // Physical address base.
-  vector< vector<string> > modules_;  // Memory module names per channel.
-  int interleave_size_;               // Channel interleaving   chunk size in bytes.
-                                      // Usually cacheline sized.
+  vector< vector<string> > channels_; // Memory module names per channel.
+  uint64 channel_hash_;               // Mask of address bits XORed for channel.
   int channel_width_;                 // Channel width in bits.
 
   // Control flags.
-- 
cgit v1.2.3


From 2ea87b7996f4f433d5d946eaf8f0d2f6fd18c144 Mon Sep 17 00:00:00 2001
From: ewout <ewout@google.com>
Date: Tue, 10 Sep 2013 21:27:49 +0000
Subject: New frequency test, fixed error accounting, added logging timestamps,
 and miscellaneous smaller changes.

* Added a CPU Frequency test for select X86 processors to verify a minimum frequency is maintained during non-pause periods.
* Fixed the error accounting in WorkerThread::CheckRegion if more than 128 miscompares are found and when block errors are detected.
* Updated the logger to include timestamps and the associated timezone.
* Moved from apicid() to sched_getcpu() for determining the core ID.
* Added the ability to reserve a specified amount of memory. This can override the requested memory allocation.
* If not using POSIX shared memory or hugepages, explicitly mmap memory if the pagesize is 4kB otherwise use memalign.
* Removed the OSLayer's unused PCI device handling.
* Numerous refactoring changes.
---
 src/Makefile.am    |   1 +
 src/clock.h        |  29 ++++
 src/disk_blocks.cc | 187 ++++++++---------------
 src/disk_blocks.h  | 157 +++++++++++++------
 src/findmask.c     |   6 +-
 src/logger.cc      |  56 ++++---
 src/logger.h       |  17 ++-
 src/os.cc          | 218 +++++++++++----------------
 src/os.h           | 132 ++++++++++++++--
 src/sat.cc         | 144 +++++++++++++++---
 src/sat.h          |  19 ++-
 src/sattypes.h     |  58 ++++++-
 src/worker.cc      | 435 +++++++++++++++++++++++++++++++++++++++++------------
 src/worker.h       | 100 +++++++++++-
 14 files changed, 1086 insertions(+), 473 deletions(-)
 create mode 100644 src/clock.h

(limited to 'src')

diff --git a/src/Makefile.am b/src/Makefile.am
index 2179b42..16f539d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -28,6 +28,7 @@ HFILES += error_diag.h
 HFILES += disk_blocks.h
 HFILES += adler32memcpy.h
 HFILES += logger.h
+HFILES += clock.h
 
 stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
 findmask_SOURCES = findmask.c findmask.inc
diff --git a/src/clock.h b/src/clock.h
new file mode 100644
index 0000000..4204188
--- /dev/null
+++ b/src/clock.h
@@ -0,0 +1,29 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+// Author: cferris
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//      http://www.apache.org/licenses/LICENSE-2.0
+
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef STRESSAPPTEST_CLOCK_H_  // NOLINT
+#define STRESSAPPTEST_CLOCK_H_
+
+#include <time.h>
+
+// This class implements a clock that can be overriden for unit tests.
+class Clock {
+ public:
+  virtual ~Clock() {}
+
+  virtual time_t Now() { return time(NULL); }
+};
+
+#endif  // STRESSAPPTEST_CLOCK_H_ NOLINT
diff --git a/src/disk_blocks.cc b/src/disk_blocks.cc
index c7860b0..60018f9 100644
--- a/src/disk_blocks.cc
+++ b/src/disk_blocks.cc
@@ -14,38 +14,51 @@
 
 // Thread-safe container of disk blocks
 
-#include <utility>
-
 // This file must work with autoconf on its public version,
 // so these includes are correct.
 #include "disk_blocks.h"
 
-DiskBlockTable::DiskBlockTable() {
-  nelems_ = 0;
+#include <utility>
+
+// BlockData
+BlockData::BlockData() : address_(0), size_(0),
+                         references_(0), initialized_(false),
+                         pattern_(NULL) {
+  pthread_mutex_init(&data_mutex_, NULL);
+}
+
+BlockData::~BlockData() {
+  pthread_mutex_destroy(&data_mutex_);
+}
+
+void BlockData::set_initialized() {
+  pthread_mutex_lock(&data_mutex_);
+  initialized_ = true;
+  pthread_mutex_unlock(&data_mutex_);
+}
+
+bool BlockData::initialized() const {
+  pthread_mutex_lock(&data_mutex_);
+  bool initialized = initialized_;
+  pthread_mutex_unlock(&data_mutex_);
+  return initialized;
+}
+
+// DiskBlockTable
+DiskBlockTable::DiskBlockTable() : sector_size_(0), write_block_size_(0),
+                                   device_name_(""), device_sectors_(0),
+                                   segment_size_(0), size_(0) {
   pthread_mutex_init(&data_mutex_, NULL);
   pthread_mutex_init(&parameter_mutex_, NULL);
   pthread_cond_init(&data_condition_, NULL);
 }
 
 DiskBlockTable::~DiskBlockTable() {
-  CleanTable();
   pthread_mutex_destroy(&data_mutex_);
   pthread_mutex_destroy(&parameter_mutex_);
   pthread_cond_destroy(&data_condition_);
 }
 
-void DiskBlockTable::CleanTable() {
-  pthread_mutex_lock(&data_mutex_);
-  for (map<int64, StorageData*>::iterator it =
-           addr_to_block_.begin(); it != addr_to_block_.end(); ++it) {
-    delete it->second;
-  }
-  addr_to_block_.erase(addr_to_block_.begin(), addr_to_block_.end());
-  nelems_ = 0;
-  pthread_cond_broadcast(&data_condition_);
-  pthread_mutex_unlock(&data_mutex_);
-}
-
 // 64-bit non-negative random number generator.  Stolen from
 // depot/google3/base/tracecontext_unittest.cc.
 int64 DiskBlockTable::Random64() {
@@ -58,28 +71,27 @@ int64 DiskBlockTable::Random64() {
     return -x;
 }
 
-int64 DiskBlockTable::NumElems() {
-  unsigned int nelems;
+uint64 DiskBlockTable::Size() {
   pthread_mutex_lock(&data_mutex_);
-  nelems = nelems_;
+  uint64 size = size_;
   pthread_mutex_unlock(&data_mutex_);
-  return nelems;
+  return size;
 }
 
 void DiskBlockTable::InsertOnStructure(BlockData *block) {
-  int64 address = block->GetAddress();
+  int64 address = block->address();
   StorageData *sd = new StorageData();
   sd->block = block;
-  sd->pos = nelems_;
+  sd->pos = size_;
   // Creating new block ...
   pthread_mutex_lock(&data_mutex_);
-  if (pos_to_addr_.size() <= nelems_) {
+  if (pos_to_addr_.size() <= size_) {
     pos_to_addr_.insert(pos_to_addr_.end(), address);
   } else {
-    pos_to_addr_[nelems_] = address;
+    pos_to_addr_[size_] = address;
   }
-  addr_to_block_.insert(std::make_pair(address, sd));
-  nelems_++;
+  addr_to_block_[address] = sd;
+  size_++;
   pthread_cond_broadcast(&data_condition_);
   pthread_mutex_unlock(&data_mutex_);
 }
@@ -87,26 +99,28 @@ void DiskBlockTable::InsertOnStructure(BlockData *block) {
 int DiskBlockTable::RemoveBlock(BlockData *block) {
   // For write threads, check the reference counter and remove
   // it from the structure.
-  int64 address = block->GetAddress();
+  int64 address = block->address();
   AddrToBlockMap::iterator it = addr_to_block_.find(address);
   int ret = 1;
   if (it != addr_to_block_.end()) {
     int curr_pos = it->second->pos;
-    int last_pos = nelems_ - 1;
+    int last_pos = size_ - 1;
     AddrToBlockMap::iterator last_it = addr_to_block_.find(
         pos_to_addr_[last_pos]);
-    sat_assert(nelems_ > 0);
+    sat_assert(size_ > 0);
     sat_assert(last_it != addr_to_block_.end());
-    // Everything is fine, updating ...
+    // Everything is fine, removing block from table.
     pthread_mutex_lock(&data_mutex_);
     pos_to_addr_[curr_pos] = pos_to_addr_[last_pos];
     last_it->second->pos = curr_pos;
     delete it->second;
     addr_to_block_.erase(it);
-    nelems_--;
+    size_--;
     block->DecreaseReferenceCounter();
     if (block->GetReferenceCounter() == 0)
       delete block;
+    else if (block->GetReferenceCounter() < 0)
+      ret = 0;
     pthread_cond_broadcast(&data_condition_);
     pthread_mutex_unlock(&data_mutex_);
   } else {
@@ -116,18 +130,16 @@ int DiskBlockTable::RemoveBlock(BlockData *block) {
 }
 
 int DiskBlockTable::ReleaseBlock(BlockData *block) {
-  // If is a random thread, just check the reference counter.
+  // If caller is a random thread, just check the reference counter.
   int ret = 1;
   pthread_mutex_lock(&data_mutex_);
   int references = block->GetReferenceCounter();
-  if (references > 0) {
-    if (references == 1)
-      delete block;
-    else
-      block->DecreaseReferenceCounter();
-  } else {
+  if (references == 1)
+    delete block;
+  else if (references > 0)
+    block->DecreaseReferenceCounter();
+  else
     ret = 0;
-  }
   pthread_mutex_unlock(&data_mutex_);
   return ret;
 }
@@ -135,13 +147,13 @@ int DiskBlockTable::ReleaseBlock(BlockData *block) {
 BlockData *DiskBlockTable::GetRandomBlock() {
   struct timespec ts;
   struct timeval tp;
-  int result = 0;
   gettimeofday(&tp, NULL);
   ts.tv_sec  = tp.tv_sec;
   ts.tv_nsec = tp.tv_usec * 1000;
   ts.tv_sec += 2;  // Wait for 2 seconds.
+  int result = 0;
   pthread_mutex_lock(&data_mutex_);
-  while (!nelems_ && result != ETIMEDOUT) {
+  while (!size_ && result != ETIMEDOUT) {
     result = pthread_cond_timedwait(&data_condition_, &data_mutex_, &ts);
   }
   if (result == ETIMEDOUT) {
@@ -149,13 +161,13 @@ BlockData *DiskBlockTable::GetRandomBlock() {
     return NULL;
   } else {
     int64 random_number = Random64();
-    int64 random_pos = random_number % nelems_;
+    int64 random_pos = random_number % size_;
     int64 address = pos_to_addr_[random_pos];
     AddrToBlockMap::const_iterator it = addr_to_block_.find(address);
     sat_assert(it != addr_to_block_.end());
     BlockData *b = it->second->block;
     // A block is returned only if its content is written on disk.
-    if (b->BlockIsInitialized()) {
+    if (b->initialized()) {
       b->IncreaseReferenceCounter();
     } else {
       b = NULL;
@@ -165,45 +177,38 @@ BlockData *DiskBlockTable::GetRandomBlock() {
   }
 }
 
-void DiskBlockTable::SetParameters(
-    int sector_size, int write_block_size, int64 device_sectors,
-    int64 segment_size, string device_name) {
+void DiskBlockTable::SetParameters(int sector_size,
+                                   int write_block_size,
+                                   int64 device_sectors,
+                                   int64 segment_size,
+                                   const string& device_name) {
+  sat_assert(size_ == 0);
   pthread_mutex_lock(&parameter_mutex_);
   sector_size_ = sector_size;
   write_block_size_ = write_block_size;
   device_sectors_ = device_sectors;
   segment_size_ = segment_size;
   device_name_ = device_name;
-  CleanTable();
   pthread_mutex_unlock(&parameter_mutex_);
 }
 
 BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
   int64 sector = 0;
   BlockData *block = new BlockData();
-
   bool good_sequence = false;
-  int num_sectors;
-
   if (block == NULL) {
     logprintf(0, "Process Error: Unable to allocate memory "
               "for sector data for disk %s.\n", device_name_.c_str());
     return NULL;
   }
-
   pthread_mutex_lock(&parameter_mutex_);
-
   sat_assert(device_sectors_ != 0);
-
   // Align the first sector with the beginning of a write block
-  num_sectors = write_block_size_ / sector_size_;
-
+  int num_sectors = write_block_size_ / sector_size_;
   for (int i = 0; i < kBlockRetry && !good_sequence; i++) {
     good_sequence = true;
-
     // Use the entire disk or a small segment of the disk to allocate the first
     // sector in the block from.
-
     if (segment_size_ == -1) {
       sector = (Random64() & 0x7FFFFFFFFFFFFFFFLL) % (
           device_sectors_ / num_sectors);
@@ -213,7 +218,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
           segment_size_ / num_sectors);
       sector *= num_sectors;
       sector += segment * segment_size_;
-
       // Make sure the block is within the segment.
       if (sector + num_sectors > (segment + 1) * segment_size_) {
         good_sequence = false;
@@ -229,7 +233,6 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
     // now aligned to the write_block_size, it is not necessary
     // to check each sector, just the first block (a sector
     // overlap will never occur).
-
     pthread_mutex_lock(&data_mutex_);
     if (addr_to_block_.find(sector) != addr_to_block_.end()) {
       good_sequence = false;
@@ -238,7 +241,8 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
   }
 
   if (good_sequence) {
-    block->SetParameters(sector, write_block_size_);
+    block->set_address(sector);
+    block->set_size(write_block_size_);
     block->IncreaseReferenceCounter();
     InsertOnStructure(block);
   } else {
@@ -248,66 +252,5 @@ BlockData *DiskBlockTable::GetUnusedBlock(int64 segment) {
     block = NULL;
   }
   pthread_mutex_unlock(&parameter_mutex_);
-
   return block;
 }
-
-// BlockData
-
-BlockData::BlockData() {
-  addr_ = 0;
-  size_ = 0;
-  references_ = 0;
-  initialized_ = false;
-  pthread_mutex_init(&data_mutex_, NULL);
-}
-
-BlockData::~BlockData() {
-  pthread_mutex_destroy(&data_mutex_);
-}
-
-void BlockData::SetParameters(int64 address, int64 size) {
-  addr_ = address;
-  size_ = size;
-}
-
-void BlockData::IncreaseReferenceCounter() {
-  references_++;
-}
-
-void BlockData::DecreaseReferenceCounter() {
-  references_--;
-}
-
-int BlockData::GetReferenceCounter() {
-  return references_;
-}
-
-void BlockData::SetBlockAsInitialized() {
-  pthread_mutex_lock(&data_mutex_);
-  initialized_ = true;
-  pthread_mutex_unlock(&data_mutex_);
-}
-
-bool BlockData::BlockIsInitialized() {
-  pthread_mutex_lock(&data_mutex_);
-  bool initialized = initialized_;
-  pthread_mutex_unlock(&data_mutex_);
-  return initialized;
-}
-
-int64 BlockData::GetAddress() {
-  return addr_;
-}
-
-int64 BlockData::GetSize() {
-  return size_;
-}
-
-Pattern *BlockData::GetPattern() {
-  return pattern_;
-}
-
-void BlockData::SetPattern(Pattern *p) {
-  pattern_ = p;
-}
diff --git a/src/disk_blocks.h b/src/disk_blocks.h
index cb634c9..638ee9f 100644
--- a/src/disk_blocks.h
+++ b/src/disk_blocks.h
@@ -25,87 +25,146 @@
 #include <map>
 #include <vector>
 #include <string>
-// This file must work with autoconf on its public version,
-// so these includes are correct.
-#include "pattern.h"
+
+#include "sattypes.h"
+
+class Pattern;
 
 // Data about a block written to disk so that it can be verified later.
+// Thread-unsafe, must be used with locks on non-const methods,
+// except for initialized accessor/mutator, which are thread-safe
+// (and in fact, is the only method supposed to be accessed from
+// someone which is not the thread-safe DiskBlockTable).
 class BlockData {
  public:
   BlockData();
   ~BlockData();
-  void SetParameters(int64 address, int64 size);
-  void IncreaseReferenceCounter();
-  void DecreaseReferenceCounter();
-  int GetReferenceCounter();
-  void SetBlockAsInitialized();
-  bool BlockIsInitialized();
-  int64 GetAddress();
-  int64 GetSize();
-  void SetPattern(Pattern *p);
-  Pattern *GetPattern();
- protected:
-  int64 addr_;         // address of first sector in block
-  int64 size_;         // size of block
-  int references_;      // reference counter
-  bool initialized_;     // flag indicating the block was written on disk
+
+  // These are reference counters used to control how many
+  // threads currently have a copy of this particular block.
+  void IncreaseReferenceCounter() { references_++; }
+  void DecreaseReferenceCounter() { references_--; }
+  int GetReferenceCounter() const { return references_; }
+
+  // Controls whether the block was written on disk or not.
+  // Once written, you cannot "un-written" then without destroying
+  // this object.
+  void set_initialized();
+  bool initialized() const;
+
+  // Accessor methods for some data related to blocks.
+  void set_address(uint64 address) { address_ = address; }
+  uint64 address() const { return address_; }
+  void set_size(uint64 size) { size_ = size; }
+  uint64 size() const { return size_; }
+  void set_pattern(Pattern *p) { pattern_ = p; }
+  Pattern *pattern() { return pattern_; }
+ private:
+  uint64 address_;  // Address of first sector in block
+  uint64 size_;  // Size of block
+  int references_;  // Reference counter
+  bool initialized_;  // Flag indicating the block was written on disk
   Pattern *pattern_;
-  pthread_mutex_t data_mutex_;
+  mutable pthread_mutex_t data_mutex_;
   DISALLOW_COPY_AND_ASSIGN(BlockData);
 };
 
-// Disk Block table - store data from blocks to be write / read by
-// a DiskThread
+// A thread-safe table used to store block data and control access
+// to these blocks, letting several threads read and write blocks on
+// disk.
 class DiskBlockTable {
  public:
   DiskBlockTable();
   virtual ~DiskBlockTable();
 
-  // Get Number of elements stored on table
-  int64 NumElems();
-  // Clean all table data
-  void CleanTable();
-  // Get a random block from the list. Only returns if a element
-  // is available (consider that other thread must have added them.
-  BlockData *GetRandomBlock();
-  // Set all initial parameters. Assumes all existent data is
+  // Returns number of elements stored on table.
+  uint64 Size();
+
+  // Sets all initial parameters. Assumes all existent data is
   // invalid and, therefore, must be removed.
   void SetParameters(int sector_size, int write_block_size,
                      int64 device_sectors,
                      int64 segment_size,
-                     string device_name);
-  // Return a new block in a unused address.
+                     const string& device_name);
+
+  // During the regular execution, there will be 2 types of threads:
+  // - Write thread:  gets a large number of blocks using GetUnusedBlock,
+  //                  writes them on disk (if on destructive mode),
+  //                  reads block content ONCE from disk and them removes
+  //                  the block from queue with RemoveBlock. After a removal a
+  //                  block is not available for read threads, but it is
+  //                  only removed from memory if there is no reference for
+  //                  this block. Note that a write thread also counts as
+  //                  a reference.
+  // - Read threads:  get one block at a time (if available) with
+  //                  GetRandomBlock, reads its content from disk,
+  //                  checking whether it is correct or not, and releases
+  //                  (Using ReleaseBlock) the block to be erased by the
+  //                  write threads. Since several read threads are allowed
+  //                  to read the same block, a reference counter is used to
+  //                  control when the block can be REALLY erased from
+  //                  memory, and all memory management is made by a
+  //                  DiskBlockTable instance.
+
+  // Returns a new block in a unused address. Does not
+  // grant ownership of the pointer to the caller
+  // (use RemoveBlock to delete the block from memory instead).
   BlockData *GetUnusedBlock(int64 segment);
-  // Remove block from structure (called by write threads)
+
+  // Removes block from structure (called by write threads). Returns
+  // 1 if successful, 0 otherwise.
   int RemoveBlock(BlockData *block);
-  // Release block to be erased (called by random threads)
-  int ReleaseBlock(BlockData *block);
 
- protected:
+  // Gets a random block from the list. Only returns if an element
+  // is available (a write thread has got this block, written it on disk,
+  // and set this block as initialized). Does not grant ownership of the
+  // pointer to the caller (use RemoveBlock to delete the block from
+  // memory instead).
+  BlockData *GetRandomBlock();
 
-  void InsertOnStructure(BlockData *block);
-  //  Generate a random 64-bit integer (virtual so it could be
-  //  override by the tests)
-  virtual int64 Random64();
+  // Releases block to be erased (called by random threads). Returns
+  // 1 if successful, 0 otherwise.
+  int ReleaseBlock(BlockData *block);
 
+ protected:
   struct StorageData {
     BlockData *block;
     int pos;
   };
-
-  static const int kBlockRetry = 100;       // Number of retries to allocate
-                                            // sectors.
-
   typedef map<int64, StorageData*> AddrToBlockMap;
   typedef vector<int64> PosToAddrVector;
+
+  // Inserts block in structure, used in tests and by other methods.
+  void InsertOnStructure(BlockData *block);
+
+  // Generates a random 64-bit integer.
+  // Virtual method so it can be overridden by the tests.
+  virtual int64 Random64();
+
+  // Accessor methods for testing.
+  const PosToAddrVector& pos_to_addr() const { return pos_to_addr_; }
+  const AddrToBlockMap& addr_to_block() const { return addr_to_block_; }
+
+  int sector_size() const { return sector_size_; }
+  int write_block_size() const { return write_block_size_; }
+  const string& device_name() const { return device_name_; }
+  int64 device_sectors() const { return device_sectors_; }
+  int64 segment_size() const { return segment_size_; }
+
+ private:
+  // Number of retries to allocate sectors.
+  static const int kBlockRetry = 100;
+  // Actual tables.
   PosToAddrVector pos_to_addr_;
   AddrToBlockMap addr_to_block_;
-  uint64 nelems_;
-  int sector_size_;          // Sector size, in bytes
-  int write_block_size_;     // Block size, in bytes
-  string device_name_;       // Device name
-  int64 device_sectors_;     // Number of sectors in device
-  int64 segment_size_;       // Segment size, in bytes
+
+  // Configuration parameters for block selection
+  int sector_size_;  // Sector size, in bytes
+  int write_block_size_;  // Block size, in bytes
+  string device_name_;  // Device name
+  int64 device_sectors_;  // Number of sectors in device
+  int64 segment_size_;  // Segment size in bytes
+  uint64 size_;  // Number of elements on table
   pthread_mutex_t data_mutex_;
   pthread_cond_t data_condition_;
   pthread_mutex_t parameter_mutex_;
diff --git a/src/findmask.c b/src/findmask.c
index d8ec300..1b10988 100644
--- a/src/findmask.c
+++ b/src/findmask.c
@@ -38,6 +38,7 @@
  * current progress.
  */
 
+#include <inttypes.h>
 #include <pthread.h>
 #include <signal.h>
 #include <stdint.h>
@@ -106,7 +107,7 @@ void* thread_func(void* arg) {
 
     if (a < NOISE) b = a;
     if (b < NOISE) {
-      printf("Found mask with just %d deviations: 0x%llx\n", b, mask);
+      printf("Found mask with just %d deviations: 0x%" PRIx64 "\n", b, mask);
       fflush(stdout);
     }
 
@@ -118,7 +119,8 @@ void* thread_func(void* arg) {
 }
 
 void signal_handler(int signum) {
-  printf("Received signal... currently evaluating mask 0x%llx!\n", lastmask);
+  printf("Received signal... currently evaluating mask 0x%" PRIx64 "!\n",
+         lastmask);
   fflush(stdout);
 }
 
diff --git a/src/logger.cc b/src/logger.cc
index e4ecb03..f13e003 100644
--- a/src/logger.cc
+++ b/src/logger.cc
@@ -17,6 +17,7 @@
 #include <pthread.h>
 #include <stdarg.h>
 #include <stdio.h>
+#include <time.h>
 #include <unistd.h>
 
 #include <string>
@@ -37,10 +38,20 @@ void Logger::VLogF(int priority, const char *format, va_list args) {
     return;
   }
   char buffer[4096];
-  int length = vsnprintf(buffer, sizeof buffer, format, args);
-  if (static_cast<size_t>(length) >= sizeof buffer) {
-    length = sizeof buffer;
-    buffer[sizeof buffer - 1] = '\n';
+  size_t length = 0;
+  if (log_timestamps_) {
+    time_t raw_time;
+    time(&raw_time);
+    struct tm time_struct;
+    localtime_r(&raw_time, &time_struct);
+    length = strftime(buffer, sizeof(buffer), "%Y/%m/%d-%H:%M:%S(%Z) ",
+                      &time_struct);
+    LOGGER_ASSERT(length);  // Catch if the buffer is set too small.
+  }
+  length += vsnprintf(buffer + length, sizeof(buffer) - length, format, args);
+  if (length >= sizeof(buffer)) {
+    length = sizeof(buffer);
+    buffer[sizeof(buffer) - 1] = '\n';
   }
   QueueLogLine(new string(buffer, length));
 }
@@ -52,19 +63,30 @@ void Logger::StartThread() {
 }
 
 void Logger::StopThread() {
-  LOGGER_ASSERT(thread_running_);
+  // Allow this to be called before the thread has started.
+  if (!thread_running_) {
+    return;
+  }
   thread_running_ = false;
-  LOGGER_ASSERT(0 == pthread_mutex_lock(&queued_lines_mutex_));
+  int retval = pthread_mutex_lock(&queued_lines_mutex_);
+  LOGGER_ASSERT(0 == retval);
   bool need_cond_signal = queued_lines_.empty();
   queued_lines_.push_back(NULL);
-  LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
+  retval = pthread_mutex_unlock(&queued_lines_mutex_);
+  LOGGER_ASSERT(0 == retval);
   if (need_cond_signal) {
-    LOGGER_ASSERT(0 == pthread_cond_signal(&queued_lines_cond_));
+    retval = pthread_cond_signal(&queued_lines_cond_);
+    LOGGER_ASSERT(0 == retval);
   }
-  LOGGER_ASSERT(0 == pthread_join(thread_, NULL));
+  retval = pthread_join(thread_, NULL);
+  LOGGER_ASSERT(0 == retval);
 }
 
-Logger::Logger() : verbosity_(20), log_fd_(-1), thread_running_(false) {
+Logger::Logger()
+    : verbosity_(20),
+      log_fd_(-1),
+      thread_running_(false),
+      log_timestamps_(true) {
   LOGGER_ASSERT(0 == pthread_mutex_init(&queued_lines_mutex_, NULL));
   LOGGER_ASSERT(0 == pthread_cond_init(&queued_lines_cond_, NULL));
   LOGGER_ASSERT(0 == pthread_cond_init(&full_queue_cond_, NULL));
@@ -94,19 +116,15 @@ void Logger::QueueLogLine(string *line) {
   LOGGER_ASSERT(0 == pthread_mutex_unlock(&queued_lines_mutex_));
 }
 
-namespace {
-void WriteToFile(const string& line, int fd) {
-  LOGGER_ASSERT(write(fd, line.data(), line.size()) ==
-                static_cast<ssize_t>(line.size()));
-}
-}
-
 void Logger::WriteAndDeleteLogLine(string *line) {
   LOGGER_ASSERT(line != NULL);
+  ssize_t bytes_written;
   if (log_fd_ >= 0) {
-    WriteToFile(*line, log_fd_);
+    bytes_written = write(log_fd_, line->data(), line->size());
+    LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
   }
-  WriteToFile(*line, 1);
+  bytes_written = write(STDOUT_FILENO, line->data(), line->size());
+  LOGGER_ASSERT(bytes_written == static_cast<ssize_t>(line->size()));
   delete line;
 }
 
diff --git a/src/logger.h b/src/logger.h
index 1d70107..21b3c6b 100644
--- a/src/logger.h
+++ b/src/logger.h
@@ -62,7 +62,7 @@ class Logger {
 
   // Lines with a priority numerically greater than this will not be logged.
   // May not be called while multiple threads are running.
-  void SetVerbosity(int verbosity) {
+  virtual void SetVerbosity(int verbosity) {
     verbosity_ = verbosity;
   }
 
@@ -72,17 +72,22 @@ class Logger {
   // Args:
   //   log_fd: The file descriptor to write to.  Will not be closed by this
   //           object.
-  void SetLogFd(int log_fd) {
+  virtual void SetLogFd(int log_fd) {
     LOGGER_ASSERT(log_fd >= 0);
     log_fd_ = log_fd;
   }
 
   // Set output to be written to stdout only.  This is the default mode.  May
   // not be called while multiple threads are running.
-  void SetStdoutOnly() {
+  virtual void SetStdoutOnly() {
     log_fd_ = -1;
   }
 
+  // Enable or disable logging of timestamps.
+  void SetTimestampLogging(bool log_ts_enabled) {
+    log_timestamps_ = log_ts_enabled;
+  }
+
   // Logs a line, with a vprintf(3)-like interface.  This will block on writing
   // the line to stdout/disk iff the dedicated logging thread is not running.
   // This will block on adding the line to the queue if doing so would exceed
@@ -104,11 +109,12 @@ class Logger {
   // before this returns.  Waits for the thread to finish before returning.
   void StopThread();
 
- private:
+ protected:
   Logger();
 
-  ~Logger();
+  virtual ~Logger();
 
+ private:
   // Args:
   //   line: Must be non-NULL.  This function takes ownership of it.
   void QueueLogLine(string *line);
@@ -127,6 +133,7 @@ class Logger {
   int verbosity_;
   int log_fd_;
   bool thread_running_;
+  bool log_timestamps_;
   vector<string*> queued_lines_;
   // This doubles as a mutex for log_fd_ when the logging thread is not running.
   pthread_mutex_t queued_lines_mutex_;
diff --git a/src/os.cc b/src/os.cc
index 7cae23b..6358398 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -48,6 +48,7 @@
 // so these includes are correct.
 #include "sattypes.h"
 #include "error_diag.h"
+#include "clock.h"
 
 // OsLayer initialization.
 OsLayer::OsLayer() {
@@ -55,10 +56,12 @@ OsLayer::OsLayer() {
   testmemsize_ = 0;
   totalmemsize_ = 0;
   min_hugepages_bytes_ = 0;
+  reserve_mb_ = 0;
   normal_mem_ = true;
   use_hugepages_ = false;
   use_posix_shm_ = false;
   dynamic_mapped_shmem_ = false;
+  mmapped_allocation_ = false;
   shmid_ = 0;
 
   time_initialized_ = 0;
@@ -79,17 +82,25 @@ OsLayer::OsLayer() {
   has_sse2_ = false;
 
   use_flush_page_cache_ = false;
+
+  clock_ = NULL;
 }
 
 // OsLayer cleanup.
 OsLayer::~OsLayer() {
   if (error_diagnoser_)
     delete error_diagnoser_;
+  if (clock_)
+    delete clock_;
 }
 
 // OsLayer initialization.
 bool OsLayer::Initialize() {
-  time_initialized_ = time(NULL);
+  if (!clock_) {
+    clock_ = new Clock();
+  }
+
+  time_initialized_ = clock_->Now();
   // Detect asm support.
   GetFeatures();
 
@@ -130,7 +141,7 @@ int OsLayer::AddressMode() {
 // Translates user virtual to physical address.
 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
   uint64 frame, shift;
-  off64_t off = ((uintptr_t)vaddr) / getpagesize() * 8;
+  off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
   int fd = open(kPagemapPath, O_RDONLY);
   // /proc/self/pagemap is available in kernel >= 2.6.25
   if (fd < 0)
@@ -169,22 +180,10 @@ list<string> OsLayer::FindFileDevices() {
 // Get HW core features from cpuid instruction.
 void OsLayer::GetFeatures() {
 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
-  // CPUID features documented at:
-  // http://www.sandpile.org/ia32/cpuid.htm
-  int ax, bx, cx, dx;
-  __asm__ __volatile__ (
-# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
-      "xchg %%ebx, %%esi;"
-      "cpuid;"
-      "xchg %%esi, %%ebx;"
-      : "=S" (bx),
-# else
-      "cpuid;"
-      : "=b" (bx),
-# endif
-        "=a" (ax), "=c" (cx), "=d" (dx) : "a" (1));
-  has_clflush_ = (dx >> 19) & 1;
-  has_sse2_ = (dx >> 26) & 1;
+  unsigned int eax = 1, ebx, ecx, edx;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  has_clflush_ = (edx >> 19) & 1;
+  has_sse2_ = (edx >> 26) & 1;
 
   logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
             has_clflush_ ? "true" : "false",
@@ -244,8 +243,9 @@ bool OsLayer::FlushPageCache(void) {
 void OsLayer::Flush(void *vaddr) {
   // Use the generic flush. This function is just so we can override
   // this if we are so inclined.
-  if (has_clflush_)
-    FastFlush(vaddr);
+  if (has_clflush_) {
+    OsLayer::FastFlush(vaddr);
+  }
 }
 
 
@@ -266,15 +266,14 @@ bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
 // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
 // blocks with bits distributed from each chip in that channel.
 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
-  static const string unknown = "DIMM Unknown";
   if (!channels_) {
-    snprintf(buf, len, "%s", unknown.c_str());
-    return 0;
+    snprintf(buf, len, "DIMM Unknown");
+    return -1;
   }
 
   // Find channel by XORing address bits in channel_hash mask.
-  uint32 low = (uint32)(addr & channel_hash_);
-  uint32 high = (uint32)((addr & channel_hash_) >> 32);
+  uint32 low = static_cast<uint32>(addr & channel_hash_);
+  uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
   vector<string>& channel = (*channels_)[
       __builtin_parity(high) ^ __builtin_parity(low)];
 
@@ -342,9 +341,17 @@ string OsLayer::FindCoreMaskFormat(int32 region) {
 
 // Report an error in an easily parseable way.
 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
-  time_t now = time(NULL);
+  time_t now = clock_->Now();
   int ttf = now - time_initialized_;
-  logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
+  if (strlen(symptom) && strlen(part)) {
+    logprintf(0, "Report Error: %s : %s : %d : %ds\n",
+              symptom, part, count, ttf);
+  } else {
+    // Log something so the error still shows up, but this won't break the
+    // parser.
+    logprintf(0, "Warning: Invalid Report Error: "
+              "%s : %s : %d : %ds\n", symptom, part, count, ttf);
+  }
   return true;
 }
 
@@ -408,12 +415,31 @@ int64 OsLayer::FindFreeMemSize() {
   //
   // TODO(nsanders): is there a more correct way to determine target
   // memory size?
-  if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
-    minsize = min_hugepages_bytes_;
-  } else if (physsize < 2048LL * kMegabyte) {
-    minsize = ((pages * 85) / 100) * pagesize;
+  if (hugepagesize > 0) {
+    if (min_hugepages_bytes_ > 0) {
+      minsize = min_hugepages_bytes_;
+    } else {
+      minsize = hugepagesize;
+    }
   } else {
-    minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+    if (physsize < 2048LL * kMegabyte) {
+      minsize = ((pages * 85) / 100) * pagesize;
+    } else {
+      minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
+    }
+    // Make sure that at least reserve_mb_ is left for the system.
+    if (reserve_mb_ > 0) {
+      int64 totalsize = pages * pagesize;
+      int64 reserve_kb = reserve_mb_ * kMegabyte;
+      if (reserve_kb > totalsize) {
+        logprintf(0, "Procedural Error: %lld is bigger than the total memory "
+                  "available %lld\n", reserve_kb, totalsize);
+      } else if (reserve_kb > totalsize - minsize) {
+        logprintf(5, "Warning: Overriding memory to use: original %lld, "
+                  "current %lld\n", minsize, totalsize - reserve_kb);
+        minsize = totalsize - reserve_kb;
+      }
+    }
   }
 
   // Use hugepage sizing if available.
@@ -484,7 +510,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
                  "'sudo mount -o remount,size=100\% /dev/shm.'\n");
   } else if (hugepagesize >= length) {
     prefer_hugepages = true;
-    logprintf(3, "Log: Prefer using hugepace allocation.\n");
+    logprintf(3, "Log: Prefer using hugepage allocation.\n");
   } else {
     logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
   }
@@ -507,7 +533,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
         break;
       }
 
-      shmaddr = shmat(shmid, NULL, NULL);
+      shmaddr = shmat(shmid, NULL, 0);
       if (shmaddr == reinterpret_cast<void*>(-1)) {
         int err = errno;
         string errtxt = ErrorString(err);
@@ -564,7 +590,7 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
         // Do a full mapping here otherwise.
         shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
                          MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
-                         shm_object, NULL);
+                         shm_object, 0);
         if (shmaddr == reinterpret_cast<void*>(-1)) {
           int err = errno;
           string errtxt = ErrorString(err);
@@ -589,18 +615,32 @@ bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
     } while (0);
     shm_unlink("/stressapptest");
   }
-#endif // HAVE_SYS_SHM_H
+#endif  // HAVE_SYS_SHM_H
 
   if (!use_hugepages_ && !use_posix_shm_) {
-    // Use memalign to ensure that blocks are aligned enough for disk direct IO.
-    buf = static_cast<char*>(memalign(4096, length));
-    if (buf) {
-      logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
-    } else {
-      logprintf(0, "Process Error: memalign returned 0\n");
-      if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
-        logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
-                     "bit process. Please setup shared memory.\n");
+    // If the page size is what SAT is expecting explicitly perform mmap()
+    // allocation.
+    if (sysconf(_SC_PAGESIZE) >= 4096) {
+      void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
+                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+      if (map_buf != MAP_FAILED) {
+        buf = map_buf;
+        mmapped_allocation_ = true;
+        logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
+      }
+    }
+    if (!mmapped_allocation_) {
+      // Use memalign to ensure that blocks are aligned enough for disk direct
+      // IO.
+      buf = static_cast<char*>(memalign(4096, length));
+      if (buf) {
+        logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
+      } else {
+        logprintf(0, "Process Error: memalign returned 0\n");
+        if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
+          logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
+                       "bit process. Please setup shared memory.\n");
+        }
       }
     }
   }
@@ -628,6 +668,8 @@ void OsLayer::FreeTestMem() {
         munmap(testmem_, testmemsize_);
       }
       close(shmid_);
+    } else if (mmapped_allocation_) {
+      munmap(testmem_, testmemsize_);
     } else {
       free(testmem_);
     }
@@ -849,7 +891,9 @@ uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
 bool OsLayer::CpuStressWorkload() {
   double float_arr[100];
   double sum = 0;
+#ifdef HAVE_RAND_R
   unsigned int seed = 12345;
+#endif
 
   // Initialize array with random numbers.
   for (int i = 0; i < 100; i++) {
@@ -858,8 +902,9 @@ bool OsLayer::CpuStressWorkload() {
     if (rand_r(&seed) % 2)
       float_arr[i] *= -1.0;
 #else
-    float_arr[i] = rand();
-    if (rand() % 2)
+    srand(time(NULL));
+    float_arr[i] = rand();  // NOLINT
+    if (rand() % 2)         // NOLINT
       float_arr[i] *= -1.0;
 #endif
   }
@@ -877,82 +922,3 @@ bool OsLayer::CpuStressWorkload() {
     logprintf(12, "Log: I'm Feeling Lucky!\n");
   return true;
 }
-
-PCIDevices OsLayer::GetPCIDevices() {
-  PCIDevices device_list;
-  DIR *dir;
-  struct dirent *buf = new struct dirent();
-  struct dirent *entry;
-  dir = opendir(kSysfsPath);
-  if (!dir)
-    logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
-  while (readdir_r(dir, buf, &entry) == 0 && entry) {
-    PCIDevice *device;
-    unsigned int dev, func;
-    // ".", ".." or a special non-device perhaps.
-    if (entry->d_name[0] == '.')
-      continue;
-
-    device = new PCIDevice();
-    if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
-               &device->domain, &device->bus, &dev, &func) < 4) {
-      logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
-      free(device);
-      continue;
-    }
-    device->dev = dev;
-    device->func = func;
-    device->vendor_id = PCIGetValue(entry->d_name, "vendor");
-    device->device_id = PCIGetValue(entry->d_name, "device");
-    PCIGetResources(entry->d_name, device);
-    device_list.insert(device_list.end(), device);
-  }
-  closedir(dir);
-  delete buf;
-  return device_list;
-}
-
-int OsLayer::PCIGetValue(string name, string object) {
-  int fd, len;
-  char filename[256];
-  char buf[256];
-  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
-           name.c_str(), object.c_str());
-  fd = open(filename, O_RDONLY);
-  if (fd < 0)
-    return 0;
-  len = read(fd, buf, 256);
-  close(fd);
-  buf[len] = '\0';
-  return strtol(buf, NULL, 0);  // NOLINT
-}
-
-int OsLayer::PCIGetResources(string name, PCIDevice *device) {
-  char filename[256];
-  char buf[256];
-  FILE *file;
-  int64 start;
-  int64 end;
-  int64 size;
-  int i;
-  snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
-           name.c_str(), "resource");
-  file = fopen(filename, "r");
-  if (!file) {
-    logprintf(0, "Process Error: impossible to find resource file for %s",
-              filename);
-    return errno;
-  }
-  for (i = 0; i < 6; i++) {
-    if (!fgets(buf, 256, file))
-      break;
-    sscanf(buf, "%llx %llx", &start, &end);  // NOLINT
-    size = 0;
-    if (start)
-      size = end - start + 1;
-    device->base_addr[i] = start;
-    device->size[i] = size;
-  }
-  fclose(file);
-  return 0;
-}
diff --git a/src/os.h b/src/os.h
index a928577..13660d8 100644
--- a/src/os.h
+++ b/src/os.h
@@ -17,6 +17,8 @@
 #define STRESSAPPTEST_OS_H_
 
 #include <dirent.h>
+#include <sys/syscall.h>
+
 #include <string>
 #include <list>
 #include <map>
@@ -26,9 +28,9 @@
 // so these includes are correct.
 #include "adler32memcpy.h"  // NOLINT
 #include "sattypes.h"       // NOLINT
+#include "clock.h"          // NOLINT
 
 const char kPagemapPath[] = "/proc/self/pagemap";
-const char kSysfsPath[] = "/sys/bus/pci/devices";
 
 struct PCIDevice {
   int32 domain;
@@ -45,6 +47,8 @@ typedef vector<PCIDevice*> PCIDevices;
 
 class ErrorDiag;
 
+class Clock;
+
 // This class implements OS/Platform specific funtions.
 class OsLayer {
  public:
@@ -57,6 +61,13 @@ class OsLayer {
     min_hugepages_bytes_ = min_bytes;
   }
 
+  // Set the minium amount of memory that should not be allocated. This only
+  // has any affect if hugepages are not used.
+  // Must be set before Initialize().
+  void SetReserveSize(int64 reserve_mb) {
+    reserve_mb_ = reserve_mb;
+  }
+
   // Set parameters needed to translate physical address to memory module.
   void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
                             vector< vector<string> > *channels) {
@@ -77,13 +88,11 @@ class OsLayer {
   // Prints failed dimm. This implementation is optional for
   // subclasses to implement.
   // Takes a bus address and string, and prints the DIMM name
-  // into the string. Returns error status.
+  // into the string. Returns the DIMM number that corresponds to the
+  // address given, or -1 if unable to identify the DIMM number.
+  // Note that subclass implementations of FindDimm() MUST fill
+  // buf with at LEAST one non-whitespace character (provided len > 0).
   virtual int FindDimm(uint64 addr, char *buf, int len);
-  // Print dimm info, plus more available info.
-  virtual int FindDimmExtended(uint64 addr, char *buf, int len) {
-    return FindDimm(addr, buf, len);
-  }
-
 
   // Classifies addresses according to "regions"
   // This may mean different things on different platforms.
@@ -141,10 +150,95 @@ class OsLayer {
     // instruction. For example, software can use an MFENCE instruction to
     // insure that previous stores are included in the write-back.
     asm volatile("mfence");
-    asm volatile("clflush (%0)" :: "r" (vaddr));
+    asm volatile("clflush (%0)" : : "r" (vaddr));
+    asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+    #warning "Unsupported CPU type ARMV7A: Using syscall to cache flush."
+    // ARMv7a cachelines are 8 words (32 bytes).
+    syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
+#else
+  #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+  }
+
+  // Fast flush, for use in performance critical code.
+  // This is bound at compile time, and will not pick up
+  // any runtime machine configuration info.  Takes a NULL-terminated
+  // array of addresses to flush.
+  inline static void FastFlushList(void **vaddrs) {
+#ifdef STRESSAPPTEST_CPU_PPC
+    while (*vaddrs) {
+      asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
+    }
+    asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+    // Put mfence before and after clflush to make sure:
+    // 1. The write before the clflush is committed to memory bus;
+    // 2. The read after the clflush is hitting the memory bus.
+    //
+    // From Intel manual:
+    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+    // to be ordered by any other fencing, serializing or other CLFLUSH
+    // instruction. For example, software can use an MFENCE instruction to
+    // insure that previous stores are included in the write-back.
+    asm volatile("mfence");
+    while (*vaddrs) {
+      asm volatile("clflush (%0)" : : "r" (*vaddrs++));
+    }
+    asm volatile("mfence");
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+    while (*vaddrs) {
+      FastFlush(*vaddrs++);
+    }
+#else
+    #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+  }
+
+  // Fast flush hint, for use in performance critical code.
+  // This is bound at compile time, and will not pick up
+  // any runtime machine configuration info.  Note that this
+  // will not guarantee that a flush happens, but will at least
+  // hint that it should.  This is useful for speeding up
+  // parallel march algorithms.
+  inline static void FastFlushHint(void *vaddr) {
+#ifdef STRESSAPPTEST_CPU_PPC
+    asm volatile("dcbf 0,%0" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+    // From Intel manual:
+    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+    // to be ordered by any other fencing, serializing or other CLFLUSH
+    // instruction. For example, software can use an MFENCE instruction to
+    // insure that previous stores are included in the write-back.
+    asm volatile("clflush (%0)" : : "r" (vaddr));
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+    FastFlush(vaddr);
+#else
+    #warning "Unsupported CPU type: Unable to force cache flushes."
+#endif
+  }
+
+  // Fast flush, for use in performance critical code.
+  // This is bound at compile time, and will not pick up
+  // any runtime machine configuration info.  Sync's any
+  // transactions for ordering FastFlushHints.
+  inline static void FastFlushSync() {
+#ifdef STRESSAPPTEST_CPU_PPC
+    asm volatile("sync");
+#elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+    // Put mfence before and after clflush to make sure:
+    // 1. The write before the clflush is committed to memory bus;
+    // 2. The read after the clflush is hitting the memory bus.
+    //
+    // From Intel manual:
+    // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
+    // to be ordered by any other fencing, serializing or other CLFLUSH
+    // instruction. For example, software can use an MFENCE instruction to
+    // insure that previous stores are included in the write-back.
     asm volatile("mfence");
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
-  #warning "Unsupported CPU type ARMV7A: Unable to force cache flushes."
+    // This is a NOP, FastFlushHint() always does a full flush, so there's
+    // nothing to do for FastFlushSync().
 #else
   #warning "Unsupported CPU type: Unable to force cache flushes."
 #endif
@@ -239,9 +333,6 @@ class OsLayer {
   // Handle to platform-specific error diagnoser.
   ErrorDiag *error_diagnoser_;
 
-  // Detect all PCI Devices.
-  virtual PCIDevices GetPCIDevices();
-
   // Disambiguate between different "warm" memcopies.
   virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
                                unsigned int size_in_bytes,
@@ -258,16 +349,27 @@ class OsLayer {
   }
   ErrCallback get_err_log_callback() { return err_log_callback_; }
 
+  // Set a clock object that can be overridden for use with unit tests.
+  void SetClock(Clock *clock) {
+    if (clock_) {
+      delete clock_;
+    }
+    clock_ = clock;
+    time_initialized_ = clock_->Now();
+  }
+
  protected:
   void *testmem_;                // Location of test memory.
   uint64 testmemsize_;           // Size of test memory.
   int64 totalmemsize_;           // Size of available memory.
   int64 min_hugepages_bytes_;    // Minimum hugepages size.
+  int64 reserve_mb_;             // Minimum amount of memory to reserve in MB.
   bool  error_injection_;        // Do error injection?
   bool  normal_mem_;             // Memory DMA capable?
   bool  use_hugepages_;          // Use hugepage shmem?
   bool  use_posix_shm_;          // Use 4k page shmem?
   bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
+  bool  mmapped_allocation_;     // Was memory allocated using mmap()?
   int   shmid_;                  // Handle to shmem
   vector< vector<string> > *channels_;  // Memory module names per channel.
   uint64 channel_hash_;          // Mask of address bits XORed for channel.
@@ -291,9 +393,6 @@ class OsLayer {
 
   // Get file descriptor for dev msr.
   virtual int OpenMSR(uint32 core, uint32 address);
-  // Auxiliary methods for PCI device configuration
-  int PCIGetValue(string name, string object);
-  int PCIGetResources(string name, PCIDevice *device);
 
   // Look up how many hugepages there are.
   virtual int64 FindHugePages();
@@ -301,6 +400,9 @@ class OsLayer {
   // Link to find last transaction at an error location.
   ErrCallback err_log_callback_;
 
+  // Object to wrap the time function.
+  Clock *clock_;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(OsLayer);
 };
diff --git a/src/sat.cc b/src/sat.cc
index 4f4e684..57fd4fe 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -125,6 +125,26 @@ bool Sat::CheckEnvironment() {
   #error Build system regression - COPTS disregarded.
 #endif
 
+  // Check if the cpu frequency test is enabled and able to run.
+  if (cpu_freq_test_) {
+    if (!CpuFreqThread::CanRun()) {
+      logprintf(0, "Process Error: This platform does not support this "
+                "test.\n");
+      bad_status();
+      return false;
+    } else if (cpu_freq_threshold_ <= 0) {
+      logprintf(0, "Process Error: The cpu frequency test requires "
+                "--cpu_freq_threshold set to a value > 0\n");
+      bad_status();
+      return false;
+    } else if (cpu_freq_round_ < 0) {
+      logprintf(0, "Process Error: The --cpu_freq_round option must be greater"
+                " than or equal to zero. A value of zero means no rounding.\n");
+      bad_status();
+      return false;
+    }
+  }
+
   // Use all CPUs if nothing is specified.
   if (memory_threads_ == -1) {
     memory_threads_ = os_->num_cpus();
@@ -491,12 +511,6 @@ bool Sat::InitializePages() {
     if (GetValid(&pe, kInvalidTag)) {
       int64 paddr = os_->VirtualToPhysical(pe.addr);
       int32 region = os_->FindRegion(paddr);
-
-      if (i < 256) {
-        char buf[256];
-        os_->FindDimm(paddr, buf, sizeof(buf));
-        logprintf(12, "Log: address: %#llx, %s\n", paddr, buf);
-      }
       region_[region]++;
       pe.paddr = paddr;
       pe.tag = 1 << region;
@@ -554,6 +568,7 @@ bool Sat::Initialize() {
   // Initializes sync'd log file to ensure output is saved.
   if (!InitializeLogfile())
     return false;
+  Logger::GlobalLogger()->SetTimestampLogging(log_timestamps_);
   Logger::GlobalLogger()->StartThread();
 
   logprintf(5, "Log: Commandline - %s\n", cmdline_.c_str());
@@ -572,6 +587,10 @@ bool Sat::Initialize() {
 
   if (min_hugepages_mbytes_ > 0)
     os_->SetMinimumHugepagesSize(min_hugepages_mbytes_ * kMegabyte);
+
+  if (reserve_mb_ > 0)
+    os_->SetReserveSize(reserve_mb_);
+
   if (channels_.size() > 0) {
     logprintf(6, "Log: Decoding memory: %dx%d bit channels,"
         "%d modules per channel (x%d), decoding hash 0x%x\n",
@@ -647,6 +666,7 @@ Sat::Sat() {
   pages_ = 0;
   size_mb_ = 0;
   size_ = size_mb_ * kMegabyte;
+  reserve_mb_ = 0;
   min_hugepages_mbytes_ = 0;
   freepages_ = 0;
   paddr_base_ = 0;
@@ -661,6 +681,7 @@ Sat::Sat() {
   run_on_anything_ = 0;
   use_logfile_ = 0;
   logfile_ = 0;
+  log_timestamps_ = true;
   // Detect 32/64 bit binary.
   void *pvoid = 0;
   address_mode_ = sizeof(pvoid) * 8;
@@ -678,9 +699,15 @@ Sat::Sat() {
   // Cache coherency data initialization.
   cc_test_ = false;         // Flag to trigger cc threads.
   cc_cacheline_count_ = 2;  // Two datastructures of cache line size.
+  cc_cacheline_size_ = 0;   // Size of a cacheline (0 for auto-detect).
   cc_inc_count_ = 1000;     // Number of times to increment the shared variable.
   cc_cacheline_data_ = 0;   // Cache Line size datastructure.
 
+  // Cpu frequency data initialization.
+  cpu_freq_test_ = false;   // Flag to trigger cpu frequency thread.
+  cpu_freq_threshold_ = 0;  // Threshold, in MHz, at which a cpu fails.
+  cpu_freq_round_ = 10;     // Round the computed frequency to this value.
+
   sat_assert(0 == pthread_mutex_init(&worker_lock_, NULL));
   file_threads_ = 0;
   net_threads_ = 0;
@@ -774,6 +801,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
     // Set number of megabyte to use.
     ARG_IVALUE("-M", size_mb_);
 
+    // Specify the amount of megabytes to be reserved for system.
+    ARG_IVALUE("--reserve_memory", reserve_mb_);
+
     // Set minimum megabytes of hugepages to require.
     ARG_IVALUE("-H", min_hugepages_mbytes_);
 
@@ -795,8 +825,21 @@ bool Sat::ParseArgs(int argc, char **argv) {
     // Set number of cache line size datastructures
     ARG_IVALUE("--cc_line_count", cc_cacheline_count_);
 
+    // Override the detected or assumed cache line size.
+    ARG_IVALUE("--cc_line_size", cc_cacheline_size_);
+
     // Flag set when cache coherency tests need to be run
-    ARG_KVALUE("--cc_test", cc_test_, 1);
+    ARG_KVALUE("--cc_test", cc_test_, true);
+
+    // Set when the cpu_frequency test needs to be run
+    ARG_KVALUE("--cpu_freq_test", cpu_freq_test_, true);
+
+    // Set the threshold in MHz at which the cpu frequency test will fail.
+    ARG_IVALUE("--cpu_freq_threshold", cpu_freq_threshold_);
+
+    // Set the rounding value for the cpu frequency test. The default is to
+    // round to the nearest 10s value.
+    ARG_IVALUE("--cpu_freq_round", cpu_freq_round_);
 
     // Set number of CPU stress threads.
     ARG_IVALUE("-C", cpu_stress_threads_);
@@ -807,6 +850,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
     // Verbosity level.
     ARG_IVALUE("-v", verbosity_);
 
+    // Turn off timestamps logging.
+    ARG_KVALUE("--no_timestamps", log_timestamps_, false);
+
     // Set maximum number of errors to collect. Stop running after this many.
     ARG_IVALUE("--max_errors", max_errorcount_);
 
@@ -1004,7 +1050,7 @@ bool Sat::ParseArgs(int argc, char **argv) {
     for (uint i = 0; i < channels_.size(); i++)
       if (channels_[i].size() != channels_[0].size()) {
         logprintf(6, "Process Error: "
-            "Channels 0 and %d have a different count of dram modules.\n",i);
+            "Channels 0 and %d have a different count of dram modules.\n", i);
         bad_status();
         return false;
       }
@@ -1043,6 +1089,8 @@ bool Sat::ParseArgs(int argc, char **argv) {
 void Sat::PrintHelp() {
   printf("Usage: ./sat(32|64) [options]\n"
          " -M mbytes        megabytes of ram to test\n"
+         " --reserve-memory If not using hugepages, the amount of memory to "
+         " reserve for the system\n"
          " -H mbytes        minimum megabytes of hugepages to require\n"
          " -s seconds       number of seconds to run\n"
          " -m threads       number of memory copy threads to run\n"
@@ -1054,6 +1102,7 @@ void Sat::PrintHelp() {
          " -f filename      add a disk thread with "
          "tempfile 'filename'\n"
          " -l logfile       log output to file 'logfile'\n"
+         " --no_timestamps  do not prefix timestamps to log messages\n"
          " --max_errors n   exit early after finding 'n' errors\n"
          " -v level         verbosity (0-20), default is 8\n"
          " -W               Use more CPU-stressful memory copy\n"
@@ -1091,6 +1140,13 @@ void Sat::PrintHelp() {
          "cacheline's member\n"
          " --cc_line_count  number of cache line sized datastructures "
          "to allocate for the cache coherency threads to operate\n"
+         " --cc_line_size   override the auto-detected cache line size\n"
+         " --cpu_freq_test  enable the cpu frequency test (requires the "
+         "--cpu_freq_threshold argument to be set)\n"
+         " --cpu_freq_threshold  fail the cpu frequency test if the frequency "
+         "goes below this value (specified in MHz)\n"
+         " --cpu_freq_round round the computed frequency to this value, if set"
+         " to zero, only round to the nearest MHz\n"
          " --paddr_base     allocate memory starting from this address\n"
          " --pause_delay    delay (in seconds) between power spikes\n"
          " --pause_duration duration (in seconds) of each pause\n"
@@ -1098,12 +1154,12 @@ void Sat::PrintHelp() {
          "each CPU to be tested by that CPU\n"
          " --remote_numa    choose memory regions not associated with "
          "each CPU to be tested by that CPU\n"
-         " --channel_hash   mask of address bits XORed to determine channel.\n"
-         "                  Mask 0x40 interleaves cachelines between channels\n"
+         " --channel_hash   mask of address bits XORed to determine channel. "
+         "Mask 0x40 interleaves cachelines between channels\n"
          " --channel_width bits     width in bits of each memory channel\n"
-         " --memory_channel u1,u2   defines a comma-separated list of names\n"
-         "                          for dram packages in a memory channel.\n"
-         "                          Use multiple times to define multiple channels.\n");
+         " --memory_channel u1,u2   defines a comma-separated list of names "
+         "for dram packages in a memory channel. Use multiple times to "
+         "define multiple channels.\n");
 }
 
 bool Sat::CheckGoogleSpecificArgs(int argc, char **argv, int *i) {
@@ -1348,32 +1404,45 @@ void Sat::InitializeThreads() {
            sizeof(cc_cacheline_data) * cc_cacheline_count_);
 
     int num_cpus = CpuCount();
+    char *num;
+    // Calculate the number of cache lines needed just to give each core
+    // its own counter.
+    int line_size = cc_cacheline_size_;
+    if (line_size <= 0) {
+      line_size = CacheLineSize();
+      if (line_size < kCacheLineSize)
+        line_size = kCacheLineSize;
+      logprintf(12, "Log: Using %d as cache line size\n", line_size);
+    }
+    // The number of cache lines needed to hold an array of num_cpus.
+    // "num" must be the same type as cc_cacheline_data[X].num or the memory
+    // size calculations will fail.
+    int needed_lines = (sizeof(*num) * num_cpus + line_size - 1) / line_size;
     // Allocate all the nums once so that we get a single chunk
     // of contiguous memory.
-    int *num;
 #ifdef HAVE_POSIX_MEMALIGN
     int err_result = posix_memalign(
         reinterpret_cast<void**>(&num),
-        kCacheLineSize, sizeof(*num) * num_cpus * cc_cacheline_count_);
+        line_size, line_size * needed_lines * cc_cacheline_count_);
 #else
-    num = reinterpret_cast<int*>(memalign(kCacheLineSize,
-			sizeof(*num) * num_cpus * cc_cacheline_count_));
+    num = reinterpret_cast<int*>(memalign(
+        line_size, line_size * needed_lines * cc_cacheline_count_));
     int err_result = (num == 0);
 #endif
     sat_assert(err_result == 0);
 
     int cline;
     for (cline = 0; cline < cc_cacheline_count_; cline++) {
-      memset(num, 0, sizeof(num_cpus) * num_cpus);
+      memset(num, 0, sizeof(*num) * num_cpus);
       cc_cacheline_data_[cline].num = num;
-      num += num_cpus;
+      num += (line_size * needed_lines) / sizeof(*num);
     }
 
     int tnum;
     for (tnum = 0; tnum < num_cpus; tnum++) {
       CpuCacheCoherencyThread *thread =
           new CpuCacheCoherencyThread(cc_cacheline_data_, cc_cacheline_count_,
-                                      tnum, cc_inc_count_);
+                                      tnum, num_cpus, cc_inc_count_);
       thread->InitThread(total_threads_++, this, os_, patternlist_,
                          &continuous_status_);
       // Pin the thread to a particular core.
@@ -1384,6 +1453,22 @@ void Sat::InitializeThreads() {
     }
     workers_map_.insert(make_pair(kCCType, cc_vector));
   }
+
+  if (cpu_freq_test_) {
+    // Create the frequency test thread.
+    logprintf(5, "Log: Running cpu frequency test: threshold set to %dMHz.\n",
+              cpu_freq_threshold_);
+    CpuFreqThread *thread = new CpuFreqThread(CpuCount(), cpu_freq_threshold_,
+                                              cpu_freq_round_);
+    // This thread should be paused when other threads are paused.
+    thread->InitThread(total_threads_++, this, os_, NULL,
+                       &power_spike_status_);
+
+    WorkerVector *cpu_freq_vector = new WorkerVector();
+    cpu_freq_vector->insert(cpu_freq_vector->end(), thread);
+    workers_map_.insert(make_pair(kCPUFreqType, cpu_freq_vector));
+  }
+
   ReleaseWorkerLock();
 }
 
@@ -1392,6 +1477,19 @@ int Sat::CpuCount() {
   return sysconf(_SC_NPROCESSORS_CONF);
 }
 
+// Return the worst case (largest) cache line size of the various levels of
+// cache actually prsent in the machine.
+int Sat::CacheLineSize() {
+  int max_linesize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+  int linesize = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
+  if (linesize > max_linesize) max_linesize = linesize;
+  linesize = sysconf(_SC_LEVEL3_CACHE_LINESIZE);
+  if (linesize > max_linesize) max_linesize = linesize;
+  linesize = sysconf(_SC_LEVEL4_CACHE_LINESIZE);
+  if (linesize > max_linesize) max_linesize = linesize;
+  return max_linesize;
+}
+
 // Notify and reap worker threads.
 void Sat::JoinThreads() {
   logprintf(12, "Log: Joining worker threads\n");
@@ -1974,3 +2072,9 @@ void logprintf(int priority, const char *format, ...) {
   Logger::GlobalLogger()->VLogF(priority, format, args);
   va_end(args);
 }
+
+// Stop the logging thread and verify any pending data is written to the log.
+void logstop() {
+  Logger::GlobalLogger()->StopThread();
+}
+
diff --git a/src/sat.h b/src/sat.h
index 93d6b34..92396d8 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -134,6 +134,8 @@ class Sat {
 
   // Return the number of cpus in the system.
   int CpuCount();
+  // Return the worst-case (largest) cache line size of the system.
+  int CacheLineSize();
 
   // Collect error counts from threads.
   int64 GetTotalErrorCount();
@@ -147,13 +149,15 @@ class Sat {
   int64 pages_;                       // Number of memory blocks.
   int64 size_;                        // Size of memory tested, in bytes.
   int64 size_mb_;                     // Size of memory tested, in MB.
+  int64 reserve_mb_;                  // Reserve at least this amount of memory
+                                      // for the system, in MB.
   int64 min_hugepages_mbytes_;        // Minimum hugepages size.
   int64 freepages_;                   // How many invalid pages we need.
   int disk_pages_;                    // Number of pages per temp file.
   uint64 paddr_base_;                 // Physical address base.
-  vector< vector<string> > channels_; // Memory module names per channel.
   uint64 channel_hash_;               // Mask of address bits XORed for channel.
   int channel_width_;                 // Channel width in bits.
+  vector< vector<string> > channels_;  // Memory module names per channel.
 
   // Control flags.
   volatile sig_atomic_t user_break_;  // User has signalled early exit.  Used as
@@ -172,6 +176,7 @@ class Sat {
   int use_logfile_;                   // Log to a file.
   char logfilename_[255];             // Name of file to log to.
   int logfile_;                       // File handle to log to.
+  bool log_timestamps_;               // Whether to add timestamps to log lines.
 
   // Disk thread options.
   int read_block_size_;               // Size of block to read from disk.
@@ -202,9 +207,18 @@ class Sat {
   bool cc_test_;                      // Flag to decide whether to start the
                                       // cache coherency threads.
   int cc_cacheline_count_;            // Number of cache line size structures.
+  int cc_cacheline_size_;             // Size of a cache line.
   int cc_inc_count_;                  // Number of times to increment the shared
                                       // cache lines structure members.
 
+  // Cpu Frequency Options.
+  bool cpu_freq_test_;                // Flag to decide whether to start the
+                                      // cpu frequency thread.
+  int cpu_freq_threshold_;            // The MHz threshold which will cause
+                                      // the test to fail.
+  int cpu_freq_round_;                // Round the computed frequency to this
+                                      // value.
+
   // Thread control.
   int file_threads_;                  // Threads of file IO.
   int net_threads_;                   // Threads of network IO.
@@ -252,7 +266,8 @@ class Sat {
     kRandomDiskType = 7,
     kCPUType = 8,
     kErrorType = 9,
-    kCCType = 10
+    kCCType = 10,
+    kCPUFreqType = 11,
   };
 
   // Helper functions.
diff --git a/src/sattypes.h b/src/sattypes.h
index c9341d0..e51db31 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -27,11 +27,11 @@
 
 #ifdef HAVE_CONFIG_H  // Built using autoconf
 #ifdef __ANDROID__
-#include "stressapptest_config_android.h"
+#include "stressapptest_config_android.h"  // NOLINT
 #else
-#include "stressapptest_config.h"
-using namespace __gnu_cxx;
-#endif
+#include "stressapptest_config.h"  // NOLINT
+using namespace __gnu_cxx;  //NOLINT
+#endif  // __ANDROID__
 using namespace std;
 
 typedef signed long long   int64;
@@ -57,10 +57,10 @@ inline const char* BuildChangelist() {
 }
 
 static const bool kOpenSource = true;
-#else
+#else  // !HAVE_CONFIG_H
 static const bool kOpenSource = false;
-  #include "googlesattypes.h"
-#endif
+  #include "googlesattypes.h"  // NOLINT
+#endif  // HAVE_CONFIG_H
 // Workaround to allow 32/64 bit conversion
 // without running into strict aliasing problems.
 union datacast_t {
@@ -75,11 +75,15 @@ union datacast_t {
 // File sync'd print to console and log
 void logprintf(int priority, const char *format, ...);
 
+// Stop the log and dump any queued lines.
+void logstop();
+
 // We print to stderr ourselves first in case we're in such a bad state that the
 // logger can't work.
 #define sat_assert(x) \
 {\
   if (!(x)) {\
+    logstop();\
     fprintf(stderr, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
     logprintf(0, "Assertion failed at %s:%d\n", __FILE__, __LINE__);\
     exit(1);\
@@ -186,6 +190,46 @@ inline string ErrorString(int error_num) {
 #endif
 }
 
+// Execute the cpuid instruction and pass back the contents of the registers.
+// This only works on x86 based platforms.
+inline void cpuid(
+  unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+  *ebx = 0;
+  *ecx = 0;
+  *edx = 0;
+  // CPUID features documented at:
+  // http://www.sandpile.org/ia32/cpuid.htm
+#if defined(STRESSAPPTEST_CPU_I686) || defined(STRESSAPPTEST_CPU_X86_64)
+#if defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+  // In PIC compilations using the i686 cpu type, ebx contains the address
+  // of the global offset table. The compiler can't properly handle constraints
+  // using the ebx register for this compile, so preserve the register
+  // ourselves.
+  asm(
+    "mov %%ebx, %%edi;"
+    "cpuid;"
+    "xchg %%edi, %%ebx;"
+    // Output registers.
+    : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx)
+    // Input registers.
+    : "a" (*eax)
+  );  // Asm
+#else
+  asm(
+    "cpuid;"
+    // Output registers.
+    : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+    // Input registers.
+    : "a" (*eax)
+  );  // Asm
+#endif  // defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
+#elif defined(STRESSAPPTEST_CPU_PPC)
+  return;
+#else
+#warning "Unsupported CPU type."
+#endif
+}
+
 // Define handy constants here
 static const int kTicksPerSec = 100;
 static const int kMegabyte = (1024LL*1024LL);
diff --git a/src/worker.cc b/src/worker.cc
index d24b5cd..dcffd4e 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -78,31 +78,6 @@ _syscall3(int, sched_setaffinity, pid_t, pid,
 #endif
 
 namespace {
-  // Get HW core ID from cpuid instruction.
-  inline int apicid(void) {
-    int cpu;
-#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
-    __asm__ __volatile__ (
-# if defined(STRESSAPPTEST_CPU_I686) && defined(__PIC__)
-        "xchg %%ebx, %%esi;"
-        "cpuid;"
-        "xchg %%esi, %%ebx;"
-        : "=S" (cpu)
-# else
-        "cpuid;"
-        : "=b" (cpu)
-# endif
-        : "a" (1) : "cx", "dx");
-#elif defined(STRESSAPPTEST_CPU_ARMV7A)
-  #warning "Unsupported CPU type ARMV7A: unable to determine core ID."
-    cpu = 0;
-#else
-  #warning "Unsupported CPU type: unable to determine core ID."
-    cpu = 0;
-#endif
-    return (cpu >> 24);
-  }
-
   // Work around the sad fact that there are two (gnu, xsi) incompatible
   // versions of strerror_r floating around google. Awesome.
   bool sat_strerror(int err, char *buf, int len) {
@@ -124,7 +99,7 @@ namespace {
   inline uint64 addr_to_tag(void *address) {
     return reinterpret_cast<uint64>(address);
   }
-}
+}  // namespace
 
 #if !defined(O_DIRECT)
 // Sometimes this isn't available.
@@ -183,10 +158,13 @@ void WorkerStatus::StopWorkers() {
     WaitOnPauseBarrier();
 }
 
-bool WorkerStatus::ContinueRunning() {
+bool WorkerStatus::ContinueRunning(bool *paused) {
   // This loop is an optimization.  We use it to immediately re-check the status
   // after resuming from a pause, instead of returning and waiting for the next
   // call to this function.
+  if (paused) {
+    *paused = false;
+  }
   for (;;) {
     switch (GetStatus()) {
       case RUN:
@@ -197,6 +175,10 @@ bool WorkerStatus::ContinueRunning() {
         WaitOnPauseBarrier();
         // Wait for ResumeWorkers() to be called.
         WaitOnPauseBarrier();
+        // Indicate that a pause occurred.
+        if (paused) {
+          *paused = true;
+        }
         break;
       case STOP:
         return false;
@@ -325,8 +307,8 @@ bool WorkerThread::InitPriority() {
     logprintf(11, "Log: Bind to %s failed.\n",
               cpuset_format(&cpu_mask_).c_str());
 
-  logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n",
-            thread_num_, apicid(),
+  logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n",
+            thread_num_, sched_getcpu(),
             CurrentCpusFormat().c_str(),
             cpuset_format(&cpu_mask_).c_str());
 #if 0
@@ -590,7 +572,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                 const char *message) {
   char dimm_string[256] = "";
 
-  int apic_id = apicid();
+  int core_id = sched_getcpu();
 
   // Determine if this is a write or read error.
   os_->Flush(error->vaddr);
@@ -625,7 +607,7 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
               "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
               "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
               message,
-              apic_id,
+              core_id,
               CurrentCpusFormat().c_str(),
               error->vaddr,
               error->paddr,
@@ -825,6 +807,9 @@ int WorkerThread::CheckRegion(void *addr,
       if ((state == kGoodAgain) || (state == kBad)) {
         unsigned int blockerrors = badend - badstart + 1;
         errormessage = "Block Error";
+        // It's okay for the 1st entry to be corrected multiple times,
+        // it will simply be reported twice. Once here and once below
+        // when processing the error queue.
         ProcessError(&recorded[0], 0, errormessage.c_str());
         logprintf(0, "Block Error: (%p) pattern %s instead of %s, "
                   "%d bytes from offset 0x%x to 0x%x\n",
@@ -833,8 +818,6 @@ int WorkerThread::CheckRegion(void *addr,
                   blockerrors * wordsize_,
                   offset + badstart * wordsize_,
                   offset + badend * wordsize_);
-        errorcount_ += blockerrors;
-        return blockerrors;
       }
     }
   }
@@ -850,7 +833,6 @@ int WorkerThread::CheckRegion(void *addr,
 
   if (page_error) {
     // For each word in the data region.
-    int error_recount = 0;
     for (int i = 0; i < length / wordsize_; i++) {
       uint64 actual = memblock[i];
       uint64 expected;
@@ -869,21 +851,16 @@ int WorkerThread::CheckRegion(void *addr,
 
       // If the value is incorrect, save an error record for later printing.
       if (actual != expected) {
-        if (error_recount < kErrorLimit) {
-          // We already reported these.
-          error_recount++;
-        } else {
-          // If we have overflowed the error queue, print the errors now.
-          struct ErrorRecord er;
-          er.actual = actual;
-          er.expected = expected;
-          er.vaddr = &memblock[i];
-
-          // Do the error printout. This will take a long time and
-          // likely change the machine state.
-          ProcessError(&er, 12, errormessage.c_str());
-          overflowerrors++;
-        }
+        // If we have overflowed the error queue, print the errors now.
+        struct ErrorRecord er;
+        er.actual = actual;
+        er.expected = expected;
+        er.vaddr = &memblock[i];
+
+        // Do the error printout. This will take a long time and
+        // likely change the machine state.
+        ProcessError(&er, 12, errormessage.c_str());
+        overflowerrors++;
       }
     }
   }
@@ -958,7 +935,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
   char tag_dimm_string[256] = "";
   bool read_error = false;
 
-  int apic_id = apicid();
+  int core_id = sched_getcpu();
 
   // Determine if this is a write or read error.
   os_->Flush(error->vaddr);
@@ -992,7 +969,7 @@ void WorkerThread::ProcessTagError(struct ErrorRecord *error,
               error->tagvaddr, error->tagpaddr,
               tag_dimm_string,
               read_error ? "read error" : "write error",
-              apic_id,
+              core_id,
               CurrentCpusFormat().c_str(),
               error->vaddr,
               error->paddr,
@@ -1110,12 +1087,18 @@ bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64,
   AdlerChecksum ignored_checksum;
   os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum);
 
-  // Force cache flush.
-  int length = size_in_bytes / sizeof(*dstmem64);
-  for (int i = 0; i < length; i += sizeof(*dstmem64)) {
-    os_->FastFlush(dstmem64 + i);
-    os_->FastFlush(srcmem64 + i);
+  // Force cache flush of both the source and destination addresses.
+  //  length - length of block to flush in cachelines.
+  //  mem_increment - number of dstmem/srcmem values per cacheline.
+  int length = size_in_bytes / kCacheLineSize;
+  int mem_increment = kCacheLineSize / sizeof(*dstmem64);
+  OsLayer::FastFlushSync();
+  for (int i = 0; i < length; ++i) {
+    OsLayer::FastFlushHint(dstmem64 + (i * mem_increment));
+    OsLayer::FastFlushHint(srcmem64 + (i * mem_increment));
   }
+  OsLayer::FastFlushSync();
+
   // Check results.
   AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe);
   // Patch up address tags.
@@ -1246,11 +1229,11 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                                    blocksize,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
-            int apic_id = apicid();
+            int core_id = sched_getcpu();
             logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage "
                          "CRC mismatch %s != %s, "
                          "but no miscompares found on second pass.\n",
-                      apic_id, CurrentCpusFormat().c_str(),
+                      core_id, CurrentCpusFormat().c_str(),
                       crc.ToHexString().c_str(),
                       expectedcrc->ToHexString().c_str());
             struct ErrorRecord er;
@@ -1390,11 +1373,11 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                                    blocksize,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
-            int apic_id = apicid();
+            int core_id = sched_getcpu();
             logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage "
                          "CRC mismatch %s != %s, "
                          "but no miscompares found on second pass.\n",
-                      apic_id, CurrentCpusFormat().c_str(),
+                      core_id, CurrentCpusFormat().c_str(),
                       crc.ToHexString().c_str(),
                       expectedcrc->ToHexString().c_str());
             struct ErrorRecord er;
@@ -1610,12 +1593,11 @@ void FileThread::SetFile(const char *filename_init) {
 
 // Open the file for access.
 bool FileThread::OpenFile(int *pfile) {
-  bool no_O_DIRECT = false;
   int flags = O_RDWR | O_CREAT | O_SYNC;
   int fd = open(filename_.c_str(), flags | O_DIRECT, 0644);
   if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
-    no_O_DIRECT = true;
-    fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT
+    fd = open(filename_.c_str(), flags, 0644);  // Try without O_DIRECT
+    os_->ActivateFlushPageCache();  // Not using O_DIRECT fixed EINVAL
   }
   if (fd < 0) {
     logprintf(0, "Process Error: Failed to create file %s!!\n",
@@ -1623,8 +1605,6 @@ bool FileThread::OpenFile(int *pfile) {
     pages_copied_ = 0;
     return false;
   }
-  if (no_O_DIRECT)
-    os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL
   *pfile = fd;
   return true;
 }
@@ -1695,7 +1675,7 @@ bool FileThread::WritePages(int fd) {
     if (!result)
       return false;
   }
-  return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP.
+  return os_->FlushPageCache();  // If O_DIRECT worked, this will be a NOP.
 }
 
 // Copy data from file into memory block.
@@ -2475,13 +2455,22 @@ bool CpuStressThread::Work() {
 CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data,
                                                  int cacheline_count,
                                                  int thread_num,
+                                                 int thread_count,
                                                  int inc_count) {
   cc_cacheline_data_ = data;
   cc_cacheline_count_ = cacheline_count;
   cc_thread_num_ = thread_num;
+  cc_thread_count_ = thread_count;
   cc_inc_count_ = inc_count;
 }
 
+// A very simple psuedorandom generator.  Since the random number is based
+// on only a few simple logic operations, it can be done quickly in registers
+// and the compiler can inline it.
+uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) {
+  return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial);
+}
+
 // Worked thread to test the cache coherency of the CPUs
 // Return false on fatal sw error.
 bool CpuCacheCoherencyThread::Work() {
@@ -2490,7 +2479,19 @@ bool CpuCacheCoherencyThread::Work() {
   uint64 time_start, time_end;
   struct timeval tv;
 
+  // Use a slightly more robust random number for the initial
+  // value, so the random sequences from the simple generator will
+  // be more divergent.
+#ifdef HAVE_RAND_R
   unsigned int seed = static_cast<unsigned int>(gettid());
+  uint64 r = static_cast<uint64>(rand_r(&seed));
+  r |= static_cast<uint64>(rand_r(&seed)) << 32;
+#else
+  srand(time(NULL));
+  uint64 r = static_cast<uint64>(rand());  // NOLINT
+  r |= static_cast<uint64>(rand()) << 32;  // NOLINT
+#endif
+
   gettimeofday(&tv, NULL);  // Get the timestamp before increments.
   time_start = tv.tv_sec * 1000000ULL + tv.tv_usec;
 
@@ -2500,14 +2501,19 @@ bool CpuCacheCoherencyThread::Work() {
       // Choose a datastructure in random and increment the appropriate
       // member in that according to the offset (which is the same as the
       // thread number.
-#ifdef HAVE_RAND_R
-      int r = rand_r(&seed);
-#else
-      int r = rand();
-#endif
-      r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0));
+      r = SimpleRandom(r);
+      int cline_num = r % cc_cacheline_count_;
+      int offset;
+      // Reverse the order for odd numbered threads in odd numbered cache
+      // lines.  This is designed for massively multi-core systems where the
+      // number of cores exceeds the bytes in a cache line, so "distant" cores
+      // get a chance to exercize cache coherency between them.
+      if (cline_num & cc_thread_num_ & 1)
+        offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+      else
+        offset = cc_thread_num_;
       // Increment the member of the randomely selected structure.
-      (cc_cacheline_data_[r].num[cc_thread_num_])++;
+      (cc_cacheline_data_[cline_num].num[offset])++;
     }
 
     total_inc += cc_inc_count_;
@@ -2516,14 +2522,26 @@ bool CpuCacheCoherencyThread::Work() {
     // in all the cache line structures for this particular thread.
     int cc_global_num = 0;
     for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) {
-      cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_];
+      int offset;
+      // Perform the same offset calculation from above.
+      if (cline_num & cc_thread_num_ & 1)
+        offset = (cc_thread_count_ & ~1) - cc_thread_num_;
+      else
+        offset = cc_thread_num_;
+      cc_global_num += cc_cacheline_data_[cline_num].num[offset];
       // Reset the cachline member's value for the next run.
-      cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0;
+      cc_cacheline_data_[cline_num].num[offset] = 0;
     }
     if (sat_->error_injection())
       cc_global_num = -1;
 
-    if (cc_global_num != cc_inc_count_) {
+    // Since the count is only stored in a byte, to squeeze more into a
+    // single cache line, only compare it as a byte.  In the event that there
+    // is something detected, the chance that it would be missed by a single
+    // thread is 1 in 256.  If it affects all cores, that makes the chance
+    // of it being missed terribly minute.  It seems unlikely any failure
+    // case would be off by more than a small number.
+    if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) {
       errorcount_++;
       logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n",
                 cc_global_num, cc_inc_count_);
@@ -2707,20 +2725,17 @@ bool DiskThread::SetParameters(int read_block_size,
 
 // Open a device, return false on failure.
 bool DiskThread::OpenDevice(int *pfile) {
-  bool no_O_DIRECT = false;
   int flags = O_RDWR | O_SYNC | O_LARGEFILE;
   int fd = open(device_name_.c_str(), flags | O_DIRECT, 0);
   if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) {
-    no_O_DIRECT = true;
-    fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT
+    fd = open(device_name_.c_str(), flags, 0);  // Try without O_DIRECT
+    os_->ActivateFlushPageCache();
   }
   if (fd < 0) {
     logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n",
               device_name_.c_str(), thread_num_);
     return false;
   }
-  if (no_O_DIRECT)
-    os_->ActivateFlushPageCache();
   *pfile = fd;
 
   return GetDiskSize(fd);
@@ -2876,11 +2891,11 @@ bool DiskThread::DoWork(int fd) {
 
       // Block is either initialized by writing, or in nondestructive case,
       // initialized by being added into the datastructure for later reading.
-      block->SetBlockAsInitialized();
+      block->initialized();
 
       in_flight_sectors_.push(block);
     }
-    if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP.
+    if (!os_->FlushPageCache())  // If O_DIRECT worked, this will be a NOP.
       return false;
 
     // Verify blocks on disk.
@@ -2989,8 +3004,9 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
     errorcount_++;
     os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1);
 
-    if (event.res < 0) {
-      switch (event.res) {
+    int64 result = static_cast<int64>(event.res);
+    if (result < 0) {
+      switch (result) {
         case -EIO:
           logprintf(0, "Hardware Error: Low-level I/O error while doing %s to "
                        "sectors starting at %lld on disk %s (thread %d).\n",
@@ -3013,7 +3029,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
   }
 
   return true;
-#else // !HAVE_LIBAIO_H
+#else  // !HAVE_LIBAIO_H
   return false;
 #endif
 }
@@ -3021,7 +3037,7 @@ bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size,
 // Write a block to disk.
 // Return false if the block is not written.
 bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
-  memset(block_buffer_, 0, block->GetSize());
+  memset(block_buffer_, 0, block->size());
 
   // Fill block buffer with a pattern
   struct page_entry pe;
@@ -3029,30 +3045,30 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
     // Even though a valid page could not be obatined, it is not an error
     // since we can always fill in a pattern directly, albeit slower.
     unsigned int *memblock = static_cast<unsigned int *>(block_buffer_);
-    block->SetPattern(patternlist_->GetRandomPattern());
+    block->set_pattern(patternlist_->GetRandomPattern());
 
     logprintf(11, "Log: Warning, using pattern fill fallback in "
                   "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n",
               device_name_.c_str(), thread_num_);
 
-    for (int i = 0; i < block->GetSize()/wordsize_; i++) {
-      memblock[i] = block->GetPattern()->pattern(i);
+    for (unsigned int i = 0; i < block->size()/wordsize_; i++) {
+      memblock[i] = block->pattern()->pattern(i);
     }
   } else {
-    memcpy(block_buffer_, pe.addr, block->GetSize());
-    block->SetPattern(pe.pattern);
+    memcpy(block_buffer_, pe.addr, block->size());
+    block->set_pattern(pe.pattern);
     sat_->PutValid(&pe);
   }
 
   logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s"
             " (thread %d).\n",
-            block->GetSize()/kSectorSize, block->GetAddress(),
+            block->size()/kSectorSize, block->address(),
             device_name_.c_str(), thread_num_);
 
   int64 start_time = GetTime();
 
-  if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(),
-                   block->GetAddress() * kSectorSize, write_timeout_)) {
+  if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(),
+                   block->address() * kSectorSize, write_timeout_)) {
     return false;
   }
 
@@ -3073,11 +3089,11 @@ bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) {
 // Return true if the block was read, also increment errorcount
 // if the block had data errors or performance problems.
 bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
-  int64 blocks = block->GetSize() / read_block_size_;
+  int64 blocks = block->size() / read_block_size_;
   int64 bytes_read = 0;
   int64 current_blocks;
   int64 current_bytes;
-  uint64 address = block->GetAddress();
+  uint64 address = block->address();
 
   logprintf(20, "Log: Reading sectors starting at %lld on disk %s "
             "(thread %d).\n",
@@ -3129,7 +3145,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
     // In non-destructive mode, don't compare the block to the pattern since
     // the block was never written to disk in the first place.
     if (!non_destructive_) {
-      if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes,
+      if (CheckRegion(block_buffer_, block->pattern(), current_bytes,
                       0, bytes_read)) {
         os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
         errorcount_ += 1;
@@ -3166,7 +3182,7 @@ bool DiskThread::Work() {
   // when using direct IO.
 #ifdef HAVE_POSIX_MEMALIGN
   int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment,
-                              sat_->page_length());
+                                       sat_->page_length());
 #else
   block_buffer_ = memalign(kBufferAlignment, sat_->page_length());
   int memalign_result = (block_buffer_ == 0);
@@ -3410,3 +3426,224 @@ bool MemoryRegionThread::Work() {
             "pages checked\n", thread_num_, status_, pages_copied_);
   return result;
 }
+
+// The list of MSRs to read from each cpu.
+const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = {
+  { kMsrTscAddr, "TSC" },
+  { kMsrAperfAddr, "APERF" },
+  { kMsrMperfAddr, "MPERF" },
+};
+
+CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round)
+  : num_cpus_(num_cpus),
+    freq_threshold_(freq_threshold),
+    round_(round) {
+  sat_assert(round >= 0);
+  if (round == 0) {
+    // If rounding is off, force rounding to the nearest MHz.
+    round_ = 1;
+    round_value_ = 0.5;
+  } else {
+    round_value_ = round/2.0;
+  }
+}
+
+CpuFreqThread::~CpuFreqThread() {
+}
+
+// Compute the difference between the currently read MSR values and the
+// previously read values and store the results in delta. If any of the
+// values did not increase, or the TSC value is too small, returns false.
+// Otherwise, returns true.
+bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous,
+                                 CpuDataType *delta) {
+  // Loop through the msrs.
+  for (int msr = 0; msr < kMsrLast; msr++) {
+    if (previous->msrs[msr] > current->msrs[msr]) {
+      logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx "
+                "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr],
+                current->msrs[msr]);
+      return false;
+    } else {
+      delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr];
+    }
+  }
+
+  // Check for TSC < 1 Mcycles over interval.
+  if (delta->msrs[kMsrTsc] < (1000 * 1000)) {
+    logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n");
+    return false;
+  }
+  timersub(&current->tv, &previous->tv, &delta->tv);
+
+  return true;
+}
+
+// Compute the change in values of the MSRs between current and previous,
+// set the frequency in MHz of the cpu. If there is an error computing
+// the delta, return false. Othewise, return true.
+bool CpuFreqThread::ComputeFrequency(CpuDataType *current,
+                                     CpuDataType *previous, int *freq) {
+  CpuDataType delta;
+  if (!ComputeDelta(current, previous, &delta)) {
+    return false;
+  }
+
+  double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0;
+  double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000
+                     * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval;
+
+  // Use the rounding value to round up properly.
+  int computed = static_cast<int>(frequency + round_value_);
+  *freq = computed - (computed % round_);
+  return true;
+}
+
+// This is the task function that the thread executes.
+bool CpuFreqThread::Work() {
+  cpu_set_t cpuset;
+  if (!AvailableCpus(&cpuset)) {
+    logprintf(0, "Process Error: Cannot get information about the cpus.\n");
+    return false;
+  }
+
+  // Start off indicating the test is passing.
+  status_ = true;
+
+  int curr = 0;
+  int prev = 1;
+  uint32 num_intervals = 0;
+  bool paused = false;
+  bool valid;
+  bool pass = true;
+
+  vector<CpuDataType> data[2];
+  data[0].resize(num_cpus_);
+  data[1].resize(num_cpus_);
+  while (IsReadyToRun(&paused)) {
+    if (paused) {
+      // Reset the intervals and restart logic after the pause.
+      num_intervals = 0;
+    }
+    if (num_intervals == 0) {
+      // If this is the first interval, then always wait a bit before
+      // starting to collect data.
+      sat_sleep(kStartupDelay);
+    }
+
+    // Get the per cpu counters.
+    valid = true;
+    for (int cpu = 0; cpu < num_cpus_; cpu++) {
+      if (CPU_ISSET(cpu, &cpuset)) {
+        if (!GetMsrs(cpu, &data[curr][cpu])) {
+          logprintf(0, "Failed to get msrs on cpu %d.\n", cpu);
+          valid = false;
+          break;
+        }
+      }
+    }
+    if (!valid) {
+      // Reset the number of collected intervals since something bad happened.
+      num_intervals = 0;
+      continue;
+    }
+
+    num_intervals++;
+
+    // Only compute a delta when we have at least two intervals worth of data.
+    if (num_intervals > 2) {
+      for (int cpu = 0; cpu < num_cpus_; cpu++) {
+        if (CPU_ISSET(cpu, &cpuset)) {
+          int freq;
+          if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu],
+                                &freq)) {
+            // Reset the number of collected intervals since an unknown
+            // error occurred.
+            logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu);
+            num_intervals = 0;
+            break;
+          }
+          logprintf(15, "Cpu %d Freq %d\n", cpu, freq);
+          if (freq < freq_threshold_) {
+            errorcount_++;
+            pass = false;
+            logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz "
+                      "threshold %d MHz.\n", cpu, freq, freq_threshold_);
+          }
+        }
+      }
+    }
+
+    sat_sleep(kIntervalPause);
+
+    // Swap the values in curr and prev (these values flip between 0 and 1).
+    curr ^= 1;
+    prev ^= 1;
+  }
+
+  return pass;
+}
+
+
+// Get the MSR values for this particular cpu and save them in data. If
+// any error is encountered, returns false. Otherwise, returns true.
+bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) {
+  for (int msr = 0; msr < kMsrLast; msr++) {
+    if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) {
+      return false;
+    }
+  }
+  // Save the time at which we acquired these values.
+  gettimeofday(&data->tv, NULL);
+
+  return true;
+}
+
+// Returns true if this test can run on the current machine. Otherwise,
+// returns false.
+bool CpuFreqThread::CanRun() {
+#if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
+  unsigned int eax, ebx, ecx, edx;
+
+  // Check that the TSC feature is supported.
+  // This check is valid for both Intel and AMD.
+  eax = 1;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (!(edx & (1 << 5))) {
+    logprintf(0, "Process Error: No TSC support.\n");
+    return false;
+  }
+
+  // Check the highest extended function level supported.
+  // This check is valid for both Intel and AMD.
+  eax = 0x80000000;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (eax < 0x80000007) {
+    logprintf(0, "Process Error: No invariant TSC support.\n");
+    return false;
+  }
+
+  // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+  // This check is valid for both Intel and AMD.
+  eax = 0x80000007;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if ((edx & (1 << 8)) == 0) {
+    logprintf(0, "Process Error: No non-stop TSC support.\n");
+    return false;
+  }
+
+  // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+  // This check is valid for both Intel and AMD.
+  eax = 0x6;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if ((ecx & 1) == 0) {
+    logprintf(0, "Process Error: No APERF MSR support.\n");
+    return false;
+  }
+  return true;
+#else
+  logprintf(0, "Process Error: "
+               "cpu_freq_test is only supported on X86 processors.\n");
+  return false;
+#endif
+}
diff --git a/src/worker.h b/src/worker.h
index 31e0225..6f9fde7 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -44,7 +44,7 @@
 
 // Global Datastruture shared by the Cache Coherency Worker Threads.
 struct cc_cacheline_data {
-  int *num;
+  char *num;
 };
 
 // Typical usage:
@@ -127,10 +127,8 @@ class WorkerStatus {
   // ResumeWorkers() or StopWorkers() has been called.  Number of distinct
   // calling threads must match the worker count (see AddWorkers() and
   // RemoveSelf()).
-  bool ContinueRunning();
+  bool ContinueRunning(bool *paused);
 
-  // TODO(matthewb): Is this functionality really necessary?  Remove it if not.
-  //
   // This is a hack!  It's like ContinueRunning(), except it won't pause.  If
   // any worker threads use this exclusively in place of ContinueRunning() then
   // PauseWorkers() should never be used!
@@ -304,9 +302,10 @@ class WorkerThread {
   //   do {
   //     // work.
   //   } while (IsReadyToRun());
-  virtual bool IsReadyToRun() { return worker_status_->ContinueRunning(); }
-  // TODO(matthewb): Is this function really necessary? Remove it if not.
-  //
+  virtual bool IsReadyToRun(bool *paused = NULL) {
+    return worker_status_->ContinueRunning(paused);
+  }
+
   // Like IsReadyToRun(), except it won't pause.
   virtual bool IsReadyToRunNoPause() {
     return worker_status_->ContinueRunningNoPause();
@@ -641,16 +640,27 @@ class CpuCacheCoherencyThread : public WorkerThread {
   CpuCacheCoherencyThread(cc_cacheline_data *cc_data,
                           int cc_cacheline_count_,
                           int cc_thread_num_,
+                          int cc_thread_count_,
                           int cc_inc_count_);
   virtual bool Work();
 
  protected:
+  // Used by the simple random number generator as a shift feedback;
+  // this polynomial (x^64 + x^63 + x^61 + x^60 + 1) will produce a
+  // psuedorandom cycle of period 2^64-1.
+  static const uint64 kRandomPolynomial = 0xD800000000000000ULL;
+  // A very simple psuedorandom generator that can be inlined and use
+  // registers, to keep the CC test loop tight and focused.
+  static uint64 SimpleRandom(uint64 seed);
+
   cc_cacheline_data *cc_cacheline_data_;  // Datstructure for each cacheline.
   int cc_local_num_;        // Local counter for each thread.
   int cc_cacheline_count_;  // Number of cache lines to operate on.
   int cc_thread_num_;       // The integer id of the thread which is
                             // used as an index into the integer array
                             // of the cacheline datastructure.
+  int cc_thread_count_;     // Total number of threads being run, for
+                            // calculations mixing up cache line access.
   int cc_inc_count_;        // Number of times to increment the counter.
 
  private:
@@ -809,4 +819,80 @@ class MemoryRegionThread : public WorkerThread {
   DISALLOW_COPY_AND_ASSIGN(MemoryRegionThread);
 };
 
+// Worker thread to check that the frequency of every cpu does not go below a
+// certain threshold.
+class CpuFreqThread : public WorkerThread {
+ public:
+  CpuFreqThread(int num_cpus, int freq_threshold, int round);
+  ~CpuFreqThread();
+
+  // This is the task function that the thread executes.
+  virtual bool Work();
+
+  // Returns true if this test can run on the current machine. Otherwise,
+  // returns false.
+  static bool CanRun();
+
+ private:
+  static const int kIntervalPause = 10;   // The number of seconds to pause
+                                          // between acquiring the MSR data.
+  static const int kStartupDelay = 5;     // The number of seconds to wait
+                                          // before acquiring MSR data.
+  static const int kMsrTscAddr = 0x10;    // The address of the TSC MSR.
+  static const int kMsrAperfAddr = 0xE8;  // The address of the APERF MSR.
+  static const int kMsrMperfAddr = 0xE7;  // The address of the MPERF MSR.
+
+  // The index values into the CpuDataType.msr[] array.
+  enum MsrValues {
+    kMsrTsc = 0,           // MSR index 0 = TSC.
+    kMsrAperf = 1,         // MSR index 1 = APERF.
+    kMsrMperf = 2,         // MSR index 2 = MPERF.
+    kMsrLast,              // Last MSR index.
+  };
+
+  typedef struct {
+    uint32 msr;         // The address of the MSR.
+    const char *name;   // A human readable string for the MSR.
+  } CpuRegisterType;
+
+  typedef struct {
+    uint64 msrs[kMsrLast];  // The values of the MSRs.
+    struct timeval tv;      // The time at which the MSRs were read.
+  } CpuDataType;
+
+  // The set of MSR addresses and register names.
+  static const CpuRegisterType kCpuRegisters[kMsrLast];
+
+  // Compute the change in values of the MSRs between current and previous,
+  // set the frequency in MHz of the cpu. If there is an error computing
+  // the delta, return false. Othewise, return true.
+  bool ComputeFrequency(CpuDataType *current, CpuDataType *previous,
+                        int *frequency);
+
+  // Get the MSR values for this particular cpu and save them in data. If
+  // any error is encountered, returns false. Otherwise, returns true.
+  bool GetMsrs(int cpu, CpuDataType *data);
+
+  // Compute the difference between the currently read MSR values and the
+  // previously read values and store the results in delta. If any of the
+  // values did not increase, or the TSC value is too small, returns false.
+  // Otherwise, returns true.
+  bool ComputeDelta(CpuDataType *current, CpuDataType *previous,
+                    CpuDataType *delta);
+
+  // The total number of cpus on the system.
+  int num_cpus_;
+
+  // The minimum frequency that each cpu must operate at (in MHz).
+  int freq_threshold_;
+
+  // The value to round the computed frequency to.
+  int round_;
+
+  // Precomputed value to add to the frequency to do the rounding.
+  double round_value_;
+
+  DISALLOW_COPY_AND_ASSIGN(CpuFreqThread);
+};
+
 #endif  // STRESSAPPTEST_WORKER_H_
-- 
cgit v1.2.3


From 3c1c63e2c8620aeb552aba19374c7af134bb63fd Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@gmail.com>
Date: Tue, 11 Feb 2014 05:57:33 +0000
Subject: Add NEON checksum and some bugfixes

* Add NEON copy and checksum for "-W" on ARM
* Fix timer overflow for log runs under 32 bit.
* Fix assert on checksum failure without miscompare.
* Improve checksum error printout.
---
 src/adler32memcpy.cc | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/os.cc            |  11 +++--
 src/os.h             |   8 ++--
 src/sat.cc           |   2 +-
 src/sattypes.h       |   2 +
 src/worker.cc        |  10 ++---
 src/worker.h         |   8 ++--
 7 files changed, 141 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/adler32memcpy.cc b/src/adler32memcpy.cc
index 69324f7..47c6262 100644
--- a/src/adler32memcpy.cc
+++ b/src/adler32memcpy.cc
@@ -70,7 +70,7 @@ bool AdlerChecksum::Equals(const AdlerChecksum &other) const {
 // Returns string representation of the Adler checksum.
 string AdlerChecksum::ToHexString() const {
   char buffer[128];
-  snprintf(buffer, sizeof(buffer), "%llx%llx%llx%llx", a1_, a2_, b1_, b2_);
+  snprintf(buffer, sizeof(buffer), "%016llx %016llx %016llx %016llx", a1_, a2_, b1_, b2_);
   return string(buffer);
 }
 
@@ -395,11 +395,128 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
                   checksum_arr[2], checksum_arr[3]);
   }
 
+  // Everything went fine, so return true (this does not mean
+  // that there is no problem with memory this just mean that data was copied
+  // from src to dst and checksum was calculated successfully).
+  return true;
+#elif defined(STRESSAPPTEST_CPU_ARMV7A) && defined(__ARM_NEON__)
+  // Elements 0 to 3 are used for holding checksum terms a1, a2,
+  // b1, b2 respectively. These elements are filled by asm code.
+  // Checksum is seeded with the null checksum.
+  volatile uint64 checksum_arr[] __attribute__ ((aligned(16))) =
+      {1, 1, 0, 0};
+
+  if ((size_in_bytes >> 19) > 0) {
+    // Size is too large. Must be less than 2^19 bytes = 512 KB.
+    return false;
+  }
+
+  // Since we are moving 64 bytes at a time number of iterations = total size/64
+  uint32 blocks = size_in_bytes / 64;
+
+  uint64 *dst = dstmem64;
+  uint64 *src = srcmem64;
+
+  #define src_r "r3"
+  #define dst_r "r4"
+  #define blocks_r "r5"
+  #define crc_r "r6"
+
+  asm volatile (
+      "mov "src_r", %[src];	 	\n"
+      "mov "dst_r", %[dst]; 		\n"
+      "mov "crc_r", %[crc]; 		\n"
+      "mov "blocks_r", %[blocks]; 	\n"
+
+      // Loop over block count.
+      "cmp "blocks_r", #0; 	\n"   // Compare counter to zero.
+      "ble END;			\n"
+
+
+      // Preload upcoming cacheline.
+      "pld ["src_r", #0x0];	\n"
+      "pld ["src_r", #0x20];	\n"
+
+      // Init checksum
+      "vldm "crc_r", {q0};		\n"
+      "vmov.i32 q1, #0;			\n"
+
+      // Start of the loop which copies 48 bytes from source to dst each time.
+      "TOP:			\n"
+
+      // Make 3 moves each of 16 bytes from srcmem to qX registers.
+      // We are using 2 words out of 4 words in each qX register,
+      // word index 0 and word index 2. We'll swizzle them in a bit.
+      // Copy it.
+      "vldm "src_r"!, {q8, q9, q10, q11};	\n"
+      "vstm "dst_r"!, {q8, q9, q10, q11};	\n"
+
+      // Arrange it.
+      "vmov.i64 q12, #0;	\n"
+      "vmov.i64 q13, #0;	\n"
+      "vmov.i64 q14, #0;	\n"
+      "vmov.i64 q15, #0;	\n"
+      // This exchenges words 1,3 in the filled registers with 
+      // words 0,2 in the empty registers.
+      "vtrn.32 q8, q12;		\n"
+      "vtrn.32 q9, q13;		\n"
+      "vtrn.32 q10, q14;	\n"
+      "vtrn.32 q11, q15;	\n"
+
+      // Sum into q0, then into q1.
+      // Repeat this for q8 - q13.
+      // Overflow can occur only if there are more
+      // than 2^16 additions => more than 2^17 words => more than 2^19 bytes so
+      // if size_in_bytes > 2^19 than overflow occurs.
+      "vadd.i64 q0, q0, q8;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+      "vadd.i64 q0, q0, q12;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+      "vadd.i64 q0, q0, q9;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+      "vadd.i64 q0, q0, q13;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+
+      "vadd.i64 q0, q0, q10;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+      "vadd.i64 q0, q0, q14;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+      "vadd.i64 q0, q0, q11;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+      "vadd.i64 q0, q0, q15;	\n"
+      "vadd.i64 q1, q1, q0;	\n"
+
+      // Increment counter and loop.
+      "sub "blocks_r", "blocks_r", #1;	\n"
+      "cmp "blocks_r", #0;	\n"   // Compare counter to zero.
+      "bgt TOP;	\n"
+
+
+      "END:\n"
+      // Report checksum values A and B (both right now are two concatenated
+      // 64 bit numbers and have to be converted to 64 bit numbers)
+      // seems like Adler128 (since size of each part is 4 byte rather than
+      // 1 byte).
+      "vstm "crc_r", {q0, q1};	\n"
+
+      // Output registers.
+      :
+      // Input registers.
+      : [src] "r"(src), [dst] "r"(dst), [blocks] "r"(blocks) , [crc] "r"(checksum_arr)
+      : "memory", "cc", "r3", "r4", "r5", "r6", "q0", "q1", "q8","q9","q10", "q11", "q12","q13","q14","q15"
+  );  // asm.
+
+  if (checksum != NULL) {
+    checksum->Set(checksum_arr[0], checksum_arr[1],
+                  checksum_arr[2], checksum_arr[3]);
+  }
+
   // Everything went fine, so return true (this does not mean
   // that there is no problem with memory this just mean that data was copied
   // from src to dst and checksum was calculated successfully).
   return true;
 #else
+  #warning "No vector copy defined for this architecture."
   // Fall back to C implementation for anything else.
   return AdlerMemcpyWarmC(dstmem64, srcmem64, size_in_bytes, checksum);
 #endif
diff --git a/src/os.cc b/src/os.cc
index 6358398..7c4e3d1 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -79,7 +79,7 @@ OsLayer::OsLayer() {
   address_mode_ = sizeof(pvoid) * 8;
 
   has_clflush_ = false;
-  has_sse2_ = false;
+  has_vector_ = false;
 
   use_flush_page_cache_ = false;
 
@@ -183,15 +183,18 @@ void OsLayer::GetFeatures() {
   unsigned int eax = 1, ebx, ecx, edx;
   cpuid(&eax, &ebx, &ecx, &edx);
   has_clflush_ = (edx >> 19) & 1;
-  has_sse2_ = (edx >> 26) & 1;
+  has_vector_ = (edx >> 26) & 1;  // SSE2 caps bit.
 
   logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
             has_clflush_ ? "true" : "false",
-            has_sse2_ ? "true" : "false");
+            has_vector_ ? "true" : "false");
 #elif defined(STRESSAPPTEST_CPU_PPC)
   // All PPC implementations have cache flush instructions.
   has_clflush_ = true;
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
+  // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
+  // For now assume neon and don't run -W if you don't have it.
+  has_vector_ = true; // NEON.
 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
 #else
 #warning "Unsupported CPU type: unable to determine feature set."
@@ -253,7 +256,7 @@ void OsLayer::Flush(void *vaddr) {
 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
                               unsigned int size_in_bytes,
                               AdlerChecksum *checksum) {
-  if (has_sse2_) {
+  if (has_vector_) {
     return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
   } else {
     return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
diff --git a/src/os.h b/src/os.h
index 13660d8..2272e4d 100644
--- a/src/os.h
+++ b/src/os.h
@@ -17,6 +17,7 @@
 #define STRESSAPPTEST_OS_H_
 
 #include <dirent.h>
+#include <unistd.h>
 #include <sys/syscall.h>
 
 #include <string>
@@ -153,7 +154,6 @@ class OsLayer {
     asm volatile("clflush (%0)" : : "r" (vaddr));
     asm volatile("mfence");
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
-    #warning "Unsupported CPU type ARMV7A: Using syscall to cache flush."
     // ARMv7a cachelines are 8 words (32 bytes).
     syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
 #else
@@ -267,10 +267,10 @@ class OsLayer {
     __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
     tsc = data.l64;
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
-  #warning "Unsupported CPU type ARMV7A: your build may not function correctly"
+    #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
     tsc = 0;
 #else
-  #warning "Unsupported CPU type: your build may not function correctly"
+    #warning "Unsupported CPU type: your timer may not function correctly"
     tsc = 0;
 #endif
     return (tsc);
@@ -381,7 +381,7 @@ class OsLayer {
   int   num_nodes_;              // Number of nodes in the system.
   int   num_cpus_per_node_;      // Number of cpus per node in the system.
   int   address_mode_;           // Are we running 32 or 64 bit?
-  bool  has_sse2_;               // Do we have sse2 instructions?
+  bool  has_vector_;             // Do we have sse2/neon instructions?
   bool  has_clflush_;            // Do we have clflush instructions?
   bool  use_flush_page_cache_;   // Do we need to flush the page cache?
 
diff --git a/src/sat.cc b/src/sat.cc
index 57fd4fe..56c6b66 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -1614,7 +1614,7 @@ void Sat::AnalysisAllStats() {
        map_it != workers_map_.end(); ++map_it) {
     for (WorkerVector::const_iterator it = map_it->second->begin();
          it != map_it->second->end(); ++it) {
-      thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000;
+      thread_runtime_sec = (*it)->GetRunDurationUSec()*1.0/1000000.;
       total_data += (*it)->GetMemoryCopiedData();
       total_data += (*it)->GetDeviceCopiedData();
       if (thread_runtime_sec > max_runtime_sec) {
diff --git a/src/sattypes.h b/src/sattypes.h
index e51db31..79bb47d 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -225,6 +225,8 @@ inline void cpuid(
 #endif  // defined(__PIC__) && defined(STRESSAPPTEST_CPU_I686)
 #elif defined(STRESSAPPTEST_CPU_PPC)
   return;
+#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+  return;
 #else
 #warning "Unsupported CPU type."
 #endif
diff --git a/src/worker.cc b/src/worker.cc
index dcffd4e..0864661 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -1359,10 +1359,10 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                                    blocksize,
                                    currentblock * blocksize, 0);
       if (errorcount == 0) {
-        logprintf(0, "Log: CrcWarmCopyPage CRC mismatch %s != %s, "
+        logprintf(0, "Log: CrcWarmCopyPage CRC mismatch expected: %s != actual: %s, "
                      "but no miscompares found. Retrying with fresh data.\n",
-                  crc.ToHexString().c_str(),
-                  expectedcrc->ToHexString().c_str());
+                  expectedcrc->ToHexString().c_str(),
+                  crc.ToHexString().c_str() );
         if (!tag_mode_) {
           // Copy the data originally read from this region back again.
           // This data should have any corruption read originally while
@@ -1382,7 +1382,7 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                       expectedcrc->ToHexString().c_str());
             struct ErrorRecord er;
             er.actual = sourcemem[0];
-            er.expected = 0x0;
+            er.expected = 0xbad;
             er.vaddr = sourcemem;
             ProcessError(&er, 0, "Hardware Error");
           }
@@ -1954,7 +1954,7 @@ bool FileThread::Work() {
   // Load patterns into page records.
   page_recs_ = new struct PageRec[sat_->disk_pages()];
   for (int i = 0; i < sat_->disk_pages(); i++) {
-    page_recs_[i].pattern = new struct Pattern();
+    page_recs_[i].pattern = new class Pattern();
   }
 
   // Loop until done.
diff --git a/src/worker.h b/src/worker.h
index 6f9fde7..091d96b 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -240,7 +240,7 @@ class WorkerThread {
   int64 ReadThreadTimer() {
     struct timeval end_time_;
     gettimeofday(&end_time_, NULL);
-    return (end_time_.tv_sec - start_time_.tv_sec)*1000000 +
+    return (end_time_.tv_sec - start_time_.tv_sec)*1000000ULL +
       (end_time_.tv_usec - start_time_.tv_usec);
   }
   // Stops per-WorkerThread timer and records thread run duration.
@@ -264,10 +264,10 @@ class WorkerThread {
   // Calculate worker thread specific bandwidth.
   virtual float GetMemoryBandwidth()
     {return GetMemoryCopiedData() / (
-        runduration_usec_ * 1.0 / 1000000);}
+        runduration_usec_ * 1.0 / 1000000.);}
   virtual float GetDeviceBandwidth()
     {return GetDeviceCopiedData() / (
-        runduration_usec_ * 1.0 / 1000000);}
+        runduration_usec_ * 1.0 / 1000000.);}
 
   void set_cpu_mask(cpu_set_t *mask) {
     memcpy(&cpu_mask_, mask, sizeof(*mask));
@@ -421,7 +421,7 @@ class FileThread : public WorkerThread {
   // Record of where these pages were sourced from, and what
   // potentially broken components they passed through.
   struct PageRec {
-     struct Pattern *pattern;  // This is the data it should contain.
+     class Pattern *pattern;  // This is the data it should contain.
      void *src;  // This is the memory location the data was sourced from.
      void *dst;  // This is where it ended up.
   };
-- 
cgit v1.2.3


From f99ecfc322bb6ccd63c4050a9a46cc7fb08200d2 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@gmail.com>
Date: Tue, 11 Feb 2014 06:43:37 +0000
Subject: Add --printsec argument

* Allows printing 'Seconds remaining' less frequently.
---
 src/sat.cc | 11 ++++++++---
 src/sat.h  |  3 ++-
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/sat.cc b/src/sat.cc
index 56c6b66..7b72ec2 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -676,6 +676,7 @@ Sat::Sat() {
   user_break_ = false;
   verbosity_ = 8;
   Logger::GlobalLogger()->SetVerbosity(verbosity_);
+  print_delay_ = 10;
   strict_ = 1;
   warm_ = 0;
   run_on_anything_ = 0;
@@ -850,6 +851,9 @@ bool Sat::ParseArgs(int argc, char **argv) {
     // Verbosity level.
     ARG_IVALUE("-v", verbosity_);
 
+    // Chatty printout level.
+    ARG_IVALUE("--printsec", print_delay_);
+
     // Turn off timestamps logging.
     ARG_KVALUE("--no_timestamps", log_timestamps_, false);
 
@@ -1105,6 +1109,7 @@ void Sat::PrintHelp() {
          " --no_timestamps  do not prefix timestamps to log messages\n"
          " --max_errors n   exit early after finding 'n' errors\n"
          " -v level         verbosity (0-20), default is 8\n"
+         " --printsec secs  How often to print 'seconds remaining'\n"
          " -W               Use more CPU-stressful memory copy\n"
          " -A               run in degraded mode on incompatible systems\n"
          " -p pagesize      size in bytes of memory chunks\n"
@@ -1885,12 +1890,12 @@ bool Sat::Run() {
   // All of these are in seconds.  You probably want them to be >=
   // kSleepFrequency and multiples of kSleepFrequency, but neither is necessary.
   static const time_t kInjectionFrequency = 10;
-  static const time_t kPrintFrequency = 10;
+  // print_delay_ determines "seconds remaining" chatty update.
 
   const time_t start = time(NULL);
   const time_t end = start + runtime_seconds_;
   time_t now = start;
-  time_t next_print = start + kPrintFrequency;
+  time_t next_print = start + print_delay_;
   time_t next_pause = start + pause_delay_;
   time_t next_resume = 0;
   time_t next_injection;
@@ -1926,7 +1931,7 @@ bool Sat::Run() {
     if (now >= next_print) {
       // Print a count down message.
       logprintf(5, "Log: Seconds remaining: %d\n", seconds_remaining);
-      next_print = NextOccurance(kPrintFrequency, start, now);
+      next_print = NextOccurance(print_delay_, start, now);
     }
 
     if (next_injection && now >= next_injection) {
diff --git a/src/sat.h b/src/sat.h
index 92396d8..5cc3bec 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -163,8 +163,9 @@ class Sat {
   volatile sig_atomic_t user_break_;  // User has signalled early exit.  Used as
                                       // a boolean.
   int verbosity_;                     // How much to print.
+  int print_delay_;                   // Chatty update frequency.
   int strict_;                        // Check results per transaction.
-  int warm_;                          // FPU warms CPU while coying.
+  int warm_;                          // FPU warms CPU while copying.
   int address_mode_;                  // 32 or 64 bit binary.
   bool stop_on_error_;                // Exit immendiately on any error.
   bool findfiles_;                    // Autodetect tempfile locations.
-- 
cgit v1.2.3


From 7a6b252d2d22c45c2fb546029e95f616497f23ff Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@gmail.com>
Date: Tue, 11 Feb 2014 08:53:31 +0000
Subject: Fix autoconf bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Correctly use autoconf host rather than target.
* Use autoconf host_os.
* Warn rather than error on unknown configs.
* Add --disable-default-optimizations to avoid forced flags.
* Fix manfile install
---
 src/Makefile.in | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/Makefile.in b/src/Makefile.in
index 65470cb..d4ae27f 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.3 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -34,7 +34,6 @@ PRE_UNINSTALL = :
 POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
-target_triplet = @target@
 bin_PROGRAMS = stressapptest$(EXEEXT)
 noinst_PROGRAMS = findmask$(EXEEXT)
 subdir = src
@@ -175,11 +174,7 @@ sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
 sysconfdir = @sysconfdir@
-target = @target@
 target_alias = @target_alias@
-target_cpu = @target_cpu@
-target_os = @target_os@
-target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
@@ -190,7 +185,7 @@ CFILES = os.cc os_factory.cc pattern.cc queue.cc sat.cc sat_factory.cc \
 	adler32memcpy.cc logger.cc
 HFILES = os.h pattern.h queue.h sat.h worker.h sattypes.h \
 	finelock_queue.h error_diag.h disk_blocks.h adler32memcpy.h \
-	logger.h
+	logger.h clock.h
 stressapptest_SOURCES = $(MAINFILES) $(CFILES) $(HFILES)
 findmask_SOURCES = findmask.c findmask.inc
 all: stressapptest_config.h
@@ -230,10 +225,8 @@ $(ACLOCAL_M4):  $(am__aclocal_m4_deps)
 $(am__aclocal_m4_deps):
 
 stressapptest_config.h: stamp-h1
-	@if test ! -f $@; then \
-	  rm -f stamp-h1; \
-	  $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
-	else :; fi
+	@if test ! -f $@; then rm -f stamp-h1; else :; fi
+	@if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
 
 stamp-h1: $(srcdir)/stressapptest_config.h.in $(top_builddir)/config.status
 	@rm -f stamp-h1
@@ -285,10 +278,10 @@ clean-binPROGRAMS:
 
 clean-noinstPROGRAMS:
 	-test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS)
-findmask$(EXEEXT): $(findmask_OBJECTS) $(findmask_DEPENDENCIES) 
+findmask$(EXEEXT): $(findmask_OBJECTS) $(findmask_DEPENDENCIES) $(EXTRA_findmask_DEPENDENCIES) 
 	@rm -f findmask$(EXEEXT)
 	$(LINK) $(findmask_OBJECTS) $(findmask_LDADD) $(LIBS)
-stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES) 
+stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES) $(EXTRA_stressapptest_DEPENDENCIES) 
 	@rm -f stressapptest$(EXEEXT)
 	$(CXXLINK) $(stressapptest_OBJECTS) $(stressapptest_LDADD) $(LIBS)
 
@@ -440,10 +433,15 @@ install-am: all-am
 
 installcheck: installcheck-am
 install-strip:
-	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	  `test -z '$(STRIP)' || \
-	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
 mostlyclean-generic:
 
 clean-generic:
-- 
cgit v1.2.3


From 00fa12ee766a153432b98b7d57a9fd3edb0dda04 Mon Sep 17 00:00:00 2001
From: "nick.j.sanders" <nick.j.sanders@gmail.com>
Date: Mon, 4 Aug 2014 21:04:06 +0000
Subject: Fix non-posix configure.ac
 http://code.google.com/p/stressapptest/issues/detail?id=35

---
 src/Makefile.in | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'src')

diff --git a/src/Makefile.in b/src/Makefile.in
index d4ae27f..ff320f3 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.3 from Makefile.am.
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
+# Inc.
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -225,8 +225,10 @@ $(ACLOCAL_M4):  $(am__aclocal_m4_deps)
 $(am__aclocal_m4_deps):
 
 stressapptest_config.h: stamp-h1
-	@if test ! -f $@; then rm -f stamp-h1; else :; fi
-	@if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
+	@if test ! -f $@; then \
+	  rm -f stamp-h1; \
+	  $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
+	else :; fi
 
 stamp-h1: $(srcdir)/stressapptest_config.h.in $(top_builddir)/config.status
 	@rm -f stamp-h1
@@ -278,10 +280,10 @@ clean-binPROGRAMS:
 
 clean-noinstPROGRAMS:
 	-test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS)
-findmask$(EXEEXT): $(findmask_OBJECTS) $(findmask_DEPENDENCIES) $(EXTRA_findmask_DEPENDENCIES) 
+findmask$(EXEEXT): $(findmask_OBJECTS) $(findmask_DEPENDENCIES) 
 	@rm -f findmask$(EXEEXT)
 	$(LINK) $(findmask_OBJECTS) $(findmask_LDADD) $(LIBS)
-stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES) $(EXTRA_stressapptest_DEPENDENCIES) 
+stressapptest$(EXEEXT): $(stressapptest_OBJECTS) $(stressapptest_DEPENDENCIES) 
 	@rm -f stressapptest$(EXEEXT)
 	$(CXXLINK) $(stressapptest_OBJECTS) $(stressapptest_LDADD) $(LIBS)
 
@@ -433,15 +435,10 @@ install-am: all-am
 
 installcheck: installcheck-am
 install-strip:
-	if test -z '$(STRIP)'; then \
-	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	      install; \
-	else \
-	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
-	fi
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
 mostlyclean-generic:
 
 clean-generic:
-- 
cgit v1.2.3


From 1c66ad6b0199d56c689b0e79ce90d419e3203e2c Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 23 Sep 2016 11:28:56 -0700
Subject: update Android build

---
 src/adler32memcpy.cc               | 26 +++++++++++++-------------
 src/stressapptest_config_android.h |  2 +-
 src/worker.cc                      |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'src')

diff --git a/src/adler32memcpy.cc b/src/adler32memcpy.cc
index 47c6262..c0c6a39 100644
--- a/src/adler32memcpy.cc
+++ b/src/adler32memcpy.cc
@@ -423,22 +423,22 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
   #define crc_r "r6"
 
   asm volatile (
-      "mov "src_r", %[src];	 	\n"
-      "mov "dst_r", %[dst]; 		\n"
-      "mov "crc_r", %[crc]; 		\n"
-      "mov "blocks_r", %[blocks]; 	\n"
+      "mov " src_r ", %[src];	 	\n"
+      "mov " dst_r ", %[dst]; 		\n"
+      "mov " crc_r ", %[crc]; 		\n"
+      "mov " blocks_r ", %[blocks]; 	\n"
 
       // Loop over block count.
-      "cmp "blocks_r", #0; 	\n"   // Compare counter to zero.
+      "cmp " blocks_r ", #0; 	\n"   // Compare counter to zero.
       "ble END;			\n"
 
 
       // Preload upcoming cacheline.
-      "pld ["src_r", #0x0];	\n"
-      "pld ["src_r", #0x20];	\n"
+      "pld [" src_r ", #0x0];	\n"
+      "pld [" src_r ", #0x20];	\n"
 
       // Init checksum
-      "vldm "crc_r", {q0};		\n"
+      "vldm " crc_r ", {q0};		\n"
       "vmov.i32 q1, #0;			\n"
 
       // Start of the loop which copies 48 bytes from source to dst each time.
@@ -448,8 +448,8 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
       // We are using 2 words out of 4 words in each qX register,
       // word index 0 and word index 2. We'll swizzle them in a bit.
       // Copy it.
-      "vldm "src_r"!, {q8, q9, q10, q11};	\n"
-      "vstm "dst_r"!, {q8, q9, q10, q11};	\n"
+      "vldm " src_r "!, {q8, q9, q10, q11};	\n"
+      "vstm " dst_r "!, {q8, q9, q10, q11};	\n"
 
       // Arrange it.
       "vmov.i64 q12, #0;	\n"
@@ -487,8 +487,8 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
       "vadd.i64 q1, q1, q0;	\n"
 
       // Increment counter and loop.
-      "sub "blocks_r", "blocks_r", #1;	\n"
-      "cmp "blocks_r", #0;	\n"   // Compare counter to zero.
+      "sub " blocks_r ", " blocks_r ", #1;	\n"
+      "cmp " blocks_r ", #0;	\n"   // Compare counter to zero.
       "bgt TOP;	\n"
 
 
@@ -497,7 +497,7 @@ bool AdlerMemcpyAsm(uint64 *dstmem64, uint64 *srcmem64,
       // 64 bit numbers and have to be converted to 64 bit numbers)
       // seems like Adler128 (since size of each part is 4 byte rather than
       // 1 byte).
-      "vstm "crc_r", {q0, q1};	\n"
+      "vstm " crc_r ", {q0, q1};	\n"
 
       // Output registers.
       :
diff --git a/src/stressapptest_config_android.h b/src/stressapptest_config_android.h
index 83a4866..0f6718e 100644
--- a/src/stressapptest_config_android.h
+++ b/src/stressapptest_config_android.h
@@ -52,7 +52,7 @@
 #define HAVE_NETDB_H 1
 
 /* Define to 1 if you have the `posix_memalign' function. */
-/* #undef HAVE_POSIX_MEMALIGN */
+#define HAVE_POSIX_MEMALIGN 1
 
 /* Define to 1 if you have the <pthread.h> header file. */
 #define HAVE_PTHREAD_H 1
diff --git a/src/worker.cc b/src/worker.cc
index 0864661..5b0fe59 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -2056,7 +2056,7 @@ bool NetworkListenThread::Listen() {
   sa.sin_addr.s_addr = INADDR_ANY;
   sa.sin_port = htons(kNetworkPort);
 
-  if (-1 == bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) {
+  if (-1 == ::bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) {
     char buf[256];
     sat_strerror(errno, buf, sizeof(buf));
     logprintf(0, "Process Error: Cannot bind socket: %s\n", buf);
-- 
cgit v1.2.3


From 1d848e96ae0894bec29e6c5a503006c42c5719a0 Mon Sep 17 00:00:00 2001
From: Nick Sanders <nsanders@chromium.org>
Date: Tue, 31 Jan 2017 14:11:21 -0800
Subject: Fix uninitialized variable

channels_ could be used uninitialized, and is not guaranteed to default
to zero.

crbug.com/686830
---
 src/os.cc | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src')

diff --git a/src/os.cc b/src/os.cc
index 7c4e3d1..a792c14 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -63,6 +63,7 @@ OsLayer::OsLayer() {
   dynamic_mapped_shmem_ = false;
   mmapped_allocation_ = false;
   shmid_ = 0;
+  channels_ = NULL;
 
   time_initialized_ = 0;
 
-- 
cgit v1.2.3


From 8345145a97f226a2f1e403d6638527d69e446c08 Mon Sep 17 00:00:00 2001
From: Wei Huang <wei@redhat.com>
Date: Mon, 20 Nov 2017 23:40:54 -0500
Subject: Add STRESSAPPTEST_CPU_AARCH64 defintion for aarch64 CPU

This patch adds a new macro, STRESSAPPTEST_CPU_AARCH64, for aarch64 CPUs.
The file stressapptest_config.h.in is also refreshed using autoheader.

Signed-off-by: Wei Huang <wei@redhat.com>
---
 src/stressapptest_config.h.in | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src')

diff --git a/src/stressapptest_config.h.in b/src/stressapptest_config.h.in
index 5412df4..4ab0ec8 100644
--- a/src/stressapptest_config.h.in
+++ b/src/stressapptest_config.h.in
@@ -172,6 +172,9 @@
 /* Define to 1 if strerror_r returns char *. */
 #undef STRERROR_R_CHAR_P
 
+/* Defined if the target CPU is aarch64 */
+#undef STRESSAPPTEST_CPU_AARCH64
+
 /* Defined if the target CPU is armv7a */
 #undef STRESSAPPTEST_CPU_ARMV7A
 
-- 
cgit v1.2.3


From 53aadb92f3822085d5c0c2ddf292bb05aabee932 Mon Sep 17 00:00:00 2001
From: Wei Huang <wei@redhat.com>
Date: Mon, 20 Nov 2017 23:45:00 -0500
Subject: Add cpu type checking for aarch64 in sattypes.h file

To avoid the warning message while compiling on aarch64 machines, this
patch adds STRESSAPPTEST_CPU_AARCH64 support in CPU type checking to
sattypes.h file

Signed-off-by: Wei Huang <wei@redhat.com>
---
 src/sattypes.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src')

diff --git a/src/sattypes.h b/src/sattypes.h
index 79bb47d..251e8bc 100644
--- a/src/sattypes.h
+++ b/src/sattypes.h
@@ -227,6 +227,8 @@ inline void cpuid(
   return;
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
   return;
+#elif defined(STRESSAPPTEST_CPU_AARCH64)
+  return;
 #else
 #warning "Unsupported CPU type."
 #endif
-- 
cgit v1.2.3


From 82af7e15583882f6748abe76ba5d8585fe4ae52b Mon Sep 17 00:00:00 2001
From: Wei Huang <wei@redhat.com>
Date: Mon, 20 Nov 2017 23:49:10 -0500
Subject: Add aarch64 support in OS related code

This patch adds OS related support for aarch64. Other than removing
unncessary warnings, it adds two aarch64 specific functions: cache
line flush and gettsc.

Signed-off-by: Wei Huang <wei@redhat.com>
---
 src/os.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/os.h b/src/os.h
index 2272e4d..7dd69b8 100644
--- a/src/os.h
+++ b/src/os.h
@@ -156,6 +156,12 @@ class OsLayer {
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
     // ARMv7a cachelines are 8 words (32 bytes).
     syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
+#elif defined(STRESSAPPTEST_CPU_AARCH64)
+    asm volatile("dc cvau, %0" : : "r" (vaddr));
+    asm volatile("dsb ish");
+    asm volatile("ic ivau, %0" : : "r" (vaddr));
+    asm volatile("dsb ish");
+    asm volatile("isb");
 #else
   #warning "Unsupported CPU type: Unable to force cache flushes."
 #endif
@@ -186,7 +192,7 @@ class OsLayer {
       asm volatile("clflush (%0)" : : "r" (*vaddrs++));
     }
     asm volatile("mfence");
-#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+#elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
     while (*vaddrs) {
       FastFlush(*vaddrs++);
     }
@@ -211,7 +217,7 @@ class OsLayer {
     // instruction. For example, software can use an MFENCE instruction to
     // insure that previous stores are included in the write-back.
     asm volatile("clflush (%0)" : : "r" (vaddr));
-#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+#elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
     FastFlush(vaddr);
 #else
     #warning "Unsupported CPU type: Unable to force cache flushes."
@@ -236,7 +242,7 @@ class OsLayer {
     // instruction. For example, software can use an MFENCE instruction to
     // insure that previous stores are included in the write-back.
     asm volatile("mfence");
-#elif defined(STRESSAPPTEST_CPU_ARMV7A)
+#elif defined(STRESSAPPTEST_CPU_ARMV7A) || defined(STRESSAPPTEST_CPU_AARCH64)
     // This is a NOP, FastFlushHint() always does a full flush, so there's
     // nothing to do for FastFlushSync().
 #else
@@ -269,6 +275,8 @@ class OsLayer {
 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
     #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
     tsc = 0;
+#elif defined(STRESSAPPTEST_CPU_AARCH64)
+    __asm __volatile("mrs %0, CNTVCT_EL0" : "=r" (tsc) : : );
 #else
     #warning "Unsupported CPU type: your timer may not function correctly"
     tsc = 0;
-- 
cgit v1.2.3


From 0f05a3e13a382eda453166789f49b144a55f4b10 Mon Sep 17 00:00:00 2001
From: Nick Sanders <nsanders@chromium.org>
Date: Mon, 11 Dec 2017 11:59:16 -0800
Subject: Fix CRC error reporting

In the situation where CRC calculation fails,
but no data miscompares were found, we print out
an error, but don't increment the error count.
This leads stressapptest to report "PASS" incorrectly.

We'll add an error count increment to fix the issue.

BUG=https://github.com/stressapptest/stressapptest/issues/56
TEST=add code to corrupt CRC, see "FAIL" as expected.

Signed-off-by: Nick Sanders <nsanders@chromium.org>
---
 src/worker.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/worker.cc b/src/worker.cc
index 5b0fe59..922d2c1 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -1238,9 +1238,11 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                       expectedcrc->ToHexString().c_str());
             struct ErrorRecord er;
             er.actual = sourcemem[0];
-            er.expected = 0x0;
+            er.expected = 0xbad00000ull << 32;
             er.vaddr = sourcemem;
             ProcessError(&er, 0, "Hardware Error");
+            errors += 1;
+            errorcount_ ++;
           }
         }
       }
@@ -1385,6 +1387,8 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
             er.expected = 0xbad;
             er.vaddr = sourcemem;
             ProcessError(&er, 0, "Hardware Error");
+            errors ++;
+            errorcount_ ++;
           }
         }
       }
-- 
cgit v1.2.3


From 041ac51f5816dfdd813352db984591664afebb9a Mon Sep 17 00:00:00 2001
From: ymjmsghs1 <ymjmsghs1@naver.com>
Date: Fri, 15 Dec 2017 15:38:41 +0900
Subject: Change integer 'use_logfile_' to boolean

Signed-off-by: ymjmsghs1 <ymjmsghs1@naver.com>
---
 src/sat.cc | 4 ++--
 src/sat.h  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/sat.cc b/src/sat.cc
index 7b72ec2..0528bf2 100644
--- a/src/sat.cc
+++ b/src/sat.cc
@@ -680,7 +680,7 @@ Sat::Sat() {
   strict_ = 1;
   warm_ = 0;
   run_on_anything_ = 0;
-  use_logfile_ = 0;
+  use_logfile_ = false;
   logfile_ = 0;
   log_timestamps_ = true;
   // Detect 32/64 bit binary.
@@ -1015,7 +1015,7 @@ bool Sat::ParseArgs(int argc, char **argv) {
 
   // Set logfile flag.
   if (strcmp(logfilename_, ""))
-    use_logfile_ = 1;
+    use_logfile_ = true;
   // Checks valid page length.
   if (page_length_ &&
       !(page_length_ & (page_length_ - 1)) &&
diff --git a/src/sat.h b/src/sat.h
index 5cc3bec..33824b4 100644
--- a/src/sat.h
+++ b/src/sat.h
@@ -174,7 +174,7 @@ class Sat {
   bool crazy_error_injection_;        // Simulate lots of errors.
   uint64 max_errorcount_;             // Number of errors before forced exit.
   int run_on_anything_;               // Ignore unknown machine ereor.
-  int use_logfile_;                   // Log to a file.
+  bool use_logfile_;                  // Log to a file.
   char logfilename_[255];             // Name of file to log to.
   int logfile_;                       // File handle to log to.
   bool log_timestamps_;               // Whether to add timestamps to log lines.
-- 
cgit v1.2.3


From c8ce5af2c2ac12377d1c229f8be73410acdd00ff Mon Sep 17 00:00:00 2001
From: Nick Sanders <nsanders@chromium.org>
Date: Mon, 11 Jun 2018 18:43:27 -0700
Subject: Fix VirtualToPhysical reporting

The kernel interface for discovering the physical address
https://www.kernel.org/doc/Documentation/vm/pagemap.txt
has changed. We'll update the call to use the new API.

BUG=https://github.com/stressapptest/stressapptest/issues/64
TEST=stressapptest --force_errors looks legit

Signed-off-by: Nick Sanders <nsanders@chromium.org>
---
 src/os.cc | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/os.cc b/src/os.cc
index a792c14..089b92d 100644
--- a/src/os.cc
+++ b/src/os.cc
@@ -141,10 +141,17 @@ int OsLayer::AddressMode() {
 
 // Translates user virtual to physical address.
 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
-  uint64 frame, shift;
-  off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
+  uint64 frame, paddr, pfnmask, pagemask;
+  int pagesize = sysconf(_SC_PAGESIZE);
+  off64_t off = ((uintptr_t)vaddr) / pagesize * 8;
   int fd = open(kPagemapPath, O_RDONLY);
-  // /proc/self/pagemap is available in kernel >= 2.6.25
+
+  /*
+   * https://www.kernel.org/doc/Documentation/vm/pagemap.txt
+   * API change (July 2015)
+   * https://patchwork.kernel.org/patch/6787991/
+   */
+
   if (fd < 0)
     return 0;
 
@@ -158,11 +165,18 @@ uint64 OsLayer::VirtualToPhysical(void *vaddr) {
     return 0;
   }
   close(fd);
-  if (!(frame & (1LL << 63)) || (frame & (1LL << 62)))
+
+  /* Check if page is present and not swapped. */
+  if (!(frame & (1ULL << 63)) || (frame & (1ULL << 62)))
     return 0;
-  shift = (frame >> 55) & 0x3f;
-  frame = (frame & 0x007fffffffffffffLL) << shift;
-  return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1));
+
+  /* pfn is bits 0-54. */
+  pfnmask = ((1ULL << 55) - 1);
+  /* Pagesize had better be a power of 2. */
+  pagemask = pagesize - 1;
+
+  paddr = ((frame & pfnmask) * pagesize) | ((uintptr_t)vaddr & pagemask);
+  return paddr;
 }
 
 // Returns the HD device that contains this file.
-- 
cgit v1.2.3


From 68cf922fd9cc702ff70bc6f666f5c65e17b7f9aa Mon Sep 17 00:00:00 2001
From: Nick Sanders <nsanders@chromium.org>
Date: Thu, 9 Aug 2018 14:41:49 -0700
Subject: Fix GetRandomPattern

Random weighting was not being done correctly. Oops.

BUG=https://github.com/stressapptest/stressapptest/issues/62
TEST=run with instrumentation to check weights

Signed-off-by: Nick Sanders <nsanders@chromium.org>
---
 src/pattern.cc | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/pattern.cc b/src/pattern.cc
index 9f22674..ba8f4d4 100644
--- a/src/pattern.cc
+++ b/src/pattern.cc
@@ -403,15 +403,17 @@ Pattern *PatternList::GetPattern(int i) {
 
 // Return a randomly selected pattern.
 Pattern *PatternList::GetRandomPattern() {
-  unsigned int target = random();
-  target = target % weightcount_;
-
+  int target = random();
   unsigned int i = 0;
-  unsigned int sum = 0;
-  while (target > sum) {
-    sum += patterns_[i].weight();
+  target = (target % weightcount_) + 1;
+
+  do {
+    target -= patterns_[i].weight();
+    if (target <= 0)
+      break;
     i++;
-  }
+  } while (i < size_);
+
   if (i < size_) {
     return &patterns_[i];
   }
-- 
cgit v1.2.3


From 13bdf568b05f5a36e1374de45d37d80a2a0268c2 Mon Sep 17 00:00:00 2001
From: Nick Sanders <nsanders@chromium.org>
Date: Wed, 18 Mar 2020 14:27:07 -0700
Subject: Print pattern name, CPU on error

Print pattern name and source CPU on error, for better debuggability.

BUG=None
TEST=run with --force_errors

Signed-off-by: Nick Sanders <nsanders@chromium.org>
---
 src/queue.h   |  3 ++-
 src/worker.cc | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 src/worker.h  |  1 +
 3 files changed, 48 insertions(+), 8 deletions(-)

(limited to 'src')

diff --git a/src/queue.h b/src/queue.h
index a6296b1..d1920a5 100644
--- a/src/queue.h
+++ b/src/queue.h
@@ -44,6 +44,7 @@ struct page_entry {
   int32 tag;     // These are tags for use in NUMA affinity or other uses.
   uint32 touch;  // Counter of the number of reads from this page.
   uint64 ts;     // Timestamp of the last read from this page.
+  uint32 lastcpu; // Last CPU to write this page.
   class Pattern *lastpattern;  // Expected Pattern at last read.
 };
 
@@ -54,7 +55,7 @@ static inline void init_pe(struct page_entry *pe) {
   pe->tag = kInvalidTag;
   pe->touch = 0;
   pe->ts = 0;
-  pe->lastpattern = NULL;
+  pe->lastcpu = 0;
 }
 
 // This is a threadsafe randomized queue of pages for
diff --git a/src/worker.cc b/src/worker.cc
index 922d2c1..d8f37eb 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -117,6 +117,8 @@ struct ErrorRecord {
   uint64 paddr;  // This is the bus address, if available.
   uint64 *tagvaddr;  // This holds the tag value if this data was tagged.
   uint64 tagpaddr;  // This holds the physical address corresponding to the tag.
+  uint32 lastcpu;  // This holds the CPU recorded as probably writing this data.
+  const char *patternname;  // This holds the pattern name of the expected data.
 };
 
 // This is a helper function to create new threads with pthreads.
@@ -462,6 +464,9 @@ bool WorkerThread::FillPage(struct page_entry *pe) {
     return 0;
   }
 
+  // Tag this page as written from the current CPU.
+  pe->lastcpu = sched_getcpu();
+
   // Mask is the bitmask of indexes used by the pattern.
   // It is the pattern size -1. Size is always a power of 2.
   uint64 *memwords = static_cast<uint64*>(pe->addr);
@@ -514,6 +519,8 @@ bool FillThread::FillPageRandom(struct page_entry *pe) {
 
   // Choose a random pattern for this block.
   pe->pattern = patternlist_->GetRandomPattern();
+  pe->lastcpu = sched_getcpu();
+
   if (pe->pattern == 0) {
     logprintf(0, "Process Error: Null data pattern\n");
     return 0;
@@ -604,17 +611,19 @@ void WorkerThread::ProcessError(struct ErrorRecord *error,
                                               (error->vaddr), 1);
 
     logprintf(priority,
-              "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): "
-              "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n",
+              "%s: miscompare on CPU %d(<-%d) at %p(0x%llx:%s): "
+              "read:0x%016llx, reread:0x%016llx expected:0x%016llx. '%s'%s.\n",
               message,
               core_id,
-              CurrentCpusFormat().c_str(),
+              error->lastcpu,
               error->vaddr,
               error->paddr,
               dimm_string,
               error->actual,
               error->reread,
-              error->expected);
+              error->expected,
+              (error->patternname) ? error->patternname : "None",
+              (error->reread == error->expected) ? " read error" : "");
   }
 
 
@@ -681,7 +690,8 @@ void FileThread::ProcessError(struct ErrorRecord *error,
             dimm_string,
             error->actual,
             error->reread,
-            error->expected);
+            error->expected,
+            (error->patternname) ? error->patternname : "None");
 
   // Overwrite incorrect data with correct data to prevent
   // future miscompares when this data is reused.
@@ -694,6 +704,7 @@ void FileThread::ProcessError(struct ErrorRecord *error,
 // Print errors on mismatches.
 int WorkerThread::CheckRegion(void *addr,
                               class Pattern *pattern,
+                              uint32 lastcpu,
                               int64 length,
                               int offset,
                               int64 pattern_offset) {
@@ -729,6 +740,8 @@ int WorkerThread::CheckRegion(void *addr,
         recorded[errors].actual = actual;
         recorded[errors].expected = expected;
         recorded[errors].vaddr = &memblock[i];
+        recorded[errors].patternname = pattern->name();
+        recorded[errors].lastcpu = lastcpu;
         errors++;
       } else {
         page_error = true;
@@ -902,6 +915,7 @@ int WorkerThread::CrcCheckPage(struct page_entry *srcpe) {
                 expectedcrc->ToHexString().c_str());
       int errorcount = CheckRegion(memslice,
                                    srcpe->pattern,
+                                   srcpe->lastcpu,
                                    blocksize,
                                    currentblock * blocksize, 0);
       if (errorcount == 0) {
@@ -920,6 +934,7 @@ int WorkerThread::CrcCheckPage(struct page_entry *srcpe) {
     uint64 *memslice = memblock + blocks * blockwords;
     errors += CheckRegion(memslice,
                           srcpe->pattern,
+                          srcpe->lastcpu,
                           leftovers,
                           blocks * blocksize, 0);
   }
@@ -1212,6 +1227,7 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
                 expectedcrc->ToHexString().c_str());
       int errorcount = CheckRegion(sourcemem,
                                    srcpe->pattern,
+                                   srcpe->lastcpu,
                                    blocksize,
                                    currentblock * blocksize, 0);
       if (errorcount == 0) {
@@ -1226,6 +1242,7 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
           memcpy(sourcemem, targetmem, blocksize);
           errorcount = CheckRegion(sourcemem,
                                    srcpe->pattern,
+                                   srcpe->lastcpu,
                                    blocksize,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
@@ -1240,6 +1257,9 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
             er.actual = sourcemem[0];
             er.expected = 0xbad00000ull << 32;
             er.vaddr = sourcemem;
+            er.lastcpu = srcpe->lastcpu;
+            logprintf(0, "Process Error: lastCPU %d\n", srcpe->lastcpu);
+            er.patternname = srcpe->pattern->name();
             ProcessError(&er, 0, "Hardware Error");
             errors += 1;
             errorcount_ ++;
@@ -1258,6 +1278,7 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
 
     errors += CheckRegion(sourcemem,
                           srcpe->pattern,
+                          srcpe->lastcpu,
                           leftovers,
                           blocks * blocksize, 0);
     int leftoverwords = leftovers / wordsize_;
@@ -1268,6 +1289,7 @@ int WorkerThread::CrcCopyPage(struct page_entry *dstpe,
 
   // Update pattern reference to reflect new contents.
   dstpe->pattern = srcpe->pattern;
+  dstpe->lastcpu = sched_getcpu();
 
   // Clean clean clean the errors away.
   if (errors) {
@@ -1300,6 +1322,7 @@ int InvertThread::InvertPageDown(struct page_entry *srcpe) {
     }
   }
 
+  srcpe->lastcpu = sched_getcpu();
   return 0;
 }
 
@@ -1322,6 +1345,8 @@ int InvertThread::InvertPageUp(struct page_entry *srcpe) {
       OsLayer::FastFlush(&sourcemem[i]);
     }
   }
+
+  srcpe->lastcpu = sched_getcpu();
   return 0;
 }
 
@@ -1358,6 +1383,7 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
                 expectedcrc->ToHexString().c_str());
       int errorcount = CheckRegion(sourcemem,
                                    srcpe->pattern,
+                                   srcpe->lastcpu,
                                    blocksize,
                                    currentblock * blocksize, 0);
       if (errorcount == 0) {
@@ -1372,6 +1398,7 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
           memcpy(sourcemem, targetmem, blocksize);
           errorcount = CheckRegion(sourcemem,
                                    srcpe->pattern,
+                                   srcpe->lastcpu,
                                    blocksize,
                                    currentblock * blocksize, 0);
           if (errorcount == 0) {
@@ -1386,6 +1413,8 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
             er.actual = sourcemem[0];
             er.expected = 0xbad;
             er.vaddr = sourcemem;
+            er.lastcpu = srcpe->lastcpu;
+            er.patternname = srcpe->pattern->name();
             ProcessError(&er, 0, "Hardware Error");
             errors ++;
             errorcount_ ++;
@@ -1404,6 +1433,7 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
 
     errors += CheckRegion(sourcemem,
                           srcpe->pattern,
+                          srcpe->lastcpu,
                           leftovers,
                           blocks * blocksize, 0);
     int leftoverwords = leftovers / wordsize_;
@@ -1414,6 +1444,8 @@ int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe,
 
   // Update pattern reference to reflect new contents.
   dstpe->pattern = srcpe->pattern;
+  dstpe->lastcpu = sched_getcpu();
+
 
   // Clean clean clean the errors away.
   if (errors) {
@@ -1494,7 +1526,7 @@ bool CopyThread::Work() {
 
     // Force errors for unittests.
     if (sat_->error_injection()) {
-      if (loops == 8) {
+      if ((random() % 50000) == 8) {
         char *addr = reinterpret_cast<char*>(src.addr);
         int offset = random() % sat_->page_length();
         addr[offset] = 0xba;
@@ -1509,6 +1541,7 @@ bool CopyThread::Work() {
     } else {
       memcpy(dst.addr, src.addr, sat_->page_length());
       dst.pattern = src.pattern;
+      dst.lastcpu = sched_getcpu();
     }
 
     result = result && sat_->PutValid(&dst);
@@ -1753,6 +1786,7 @@ bool FileThread::SectorValidatePage(const struct PageRec &page,
                                                   tag[sec].sector,
                                                   page.src, page.dst);
 
+      errorcount_ += 1;
       logprintf(5, "Sector Error: Sector tag @ 0x%x, pass %d/%d. "
                 "sec %x/%x, block %d/%d, magic %x/%x, File: %s \n",
                 block * page_length + 512 * sec,
@@ -1844,6 +1878,7 @@ bool FileThread::GetEmptyPage(struct page_entry *dst) {
     dst->addr = local_page_;
     dst->offset = 0;
     dst->pattern = 0;
+    dst->lastcpu = 0;
   }
   return true;
 }
@@ -1899,6 +1934,7 @@ bool FileThread::ReadPages(int fd) {
       return false;
     // Retrieve expected pattern.
     dst.pattern = page_recs_[i].pattern;
+    dst.lastcpu = sched_getcpu();
     // Update page recordpage record.
     page_recs_[i].dst = dst.addr;
 
@@ -2230,6 +2266,7 @@ bool NetworkThread::Work() {
 
     // Update pattern reference to reflect new contents.
     dst.pattern = src.pattern;
+    dst.lastcpu = sched_getcpu();
 
     // Do the network read.
     if (!(result = result && ReceivePage(sock, &dst)))
@@ -3149,7 +3186,7 @@ bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) {
     // In non-destructive mode, don't compare the block to the pattern since
     // the block was never written to disk in the first place.
     if (!non_destructive_) {
-      if (CheckRegion(block_buffer_, block->pattern(), current_bytes,
+      if (CheckRegion(block_buffer_, block->pattern(), 0, current_bytes,
                       0, bytes_read)) {
         os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1);
         errorcount_ += 1;
@@ -3387,6 +3424,7 @@ bool MemoryRegionThread::Work() {
     phase_ = kPhaseCopy;
     CrcCopyPage(&memregion_pe, &source_pe);
     memregion_pe.pattern = source_pe.pattern;
+    memregion_pe.lastcpu = sched_getcpu();
 
     // Error injection for CRC Check.
     if ((sat_->error_injection() || error_injection_) && loops == 2) {
diff --git a/src/worker.h b/src/worker.h
index 091d96b..3398208 100644
--- a/src/worker.h
+++ b/src/worker.h
@@ -320,6 +320,7 @@ class WorkerThread {
   // Compare a region of memory with a known data patter, and report errors.
   virtual int CheckRegion(void *addr,
                           class Pattern *pat,
+                          uint32 lastcpu,
                           int64 length,
                           int offset,
                           int64 patternoffset);
-- 
cgit v1.2.3