94 files changed, 16962 insertions, 1273 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 2e4cfdb2e..b5fa1c845 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -1,10 +1,26 @@
+# generate split test header file only if it does not yet exist
+# in order to prevent a rebuild everytime cmake is configured
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
+  file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h "")
+  foreach(i RANGE 1 999)
+    file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h
+      "#ifdef EIGEN_TEST_PART_${i}\n"
+      "#define CALL_SUBTEST_${i}(FUNC) CALL_SUBTEST(FUNC)\n"
+      "#else\n"
+      "#define CALL_SUBTEST_${i}(FUNC)\n"
+      "#endif\n\n"
+    )
+  endforeach()
+endif()
 
 set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported")
 add_custom_target(BuildUnsupported)
 
-include_directories(../../test ../../unsupported ../../Eigen 
+include_directories(../../test ../../unsupported ../../Eigen
                     ${CMAKE_CURRENT_BINARY_DIR}/../../test)
 
+find_package (Threads)
+
 find_package(GoogleHash)
 if(GOOGLEHASH_FOUND)
   add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
@@ -28,22 +44,30 @@ endif(ADOLC_FOUND)
 ei_add_test(NonLinearOptimization)
 
 ei_add_test(NumericalDiff)
+ei_add_test(autodiff_scalar)
 ei_add_test(autodiff)
+
+if (NOT CMAKE_CXX_COMPILER MATCHES "clang\\+\\+$")
 ei_add_test(BVH)
+endif()
+
 ei_add_test(matrix_exponential)
 ei_add_test(matrix_function)
 ei_add_test(matrix_power)
 ei_add_test(matrix_square_root)
 ei_add_test(alignedvector3)
+
 ei_add_test(FFT)
 
+ei_add_test(EulerAngles)
+
 find_package(MPFR 2.3.0)
 find_package(GMP)
-if(MPFR_FOUND)
+if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11)
   include_directories(${MPFR_INCLUDES} ./mpreal)
   ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
   set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES})
-  ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" )
+ ei_add_test(mpreal_support "-std=c++11" "${EIGEN_MPFR_TEST_LIBRARIES}" )
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
 endif()
@@ -82,9 +106,152 @@ endif()
 
 ei_add_test(polynomialsolver)
 ei_add_test(polynomialutils)
-ei_add_test(kronecker_product)
 ei_add_test(splines)
 ei_add_test(gmres)
 ei_add_test(minres)
 ei_add_test(levenberg_marquardt)
-ei_add_test(bdcsvd)
+ei_add_test(kronecker_product)
+ei_add_test(special_functions)
+
+# TODO: The following test names are prefixed with the cxx11 string, since historically
+# the tests depended on c++11. This isn't the case anymore so we ought to rename them.
+# FIXME: Old versions of MSVC fail to compile this code, so we just disable these tests
+# when using visual studio. We should make the check more strict to enable the tests for
+# newer versions of MSVC.
+if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+ei_add_test(cxx11_tensor_dimension)
+ei_add_test(cxx11_tensor_map)
+ei_add_test(cxx11_tensor_assign)
+ei_add_test(cxx11_tensor_comparisons)
+ei_add_test(cxx11_tensor_forced_eval)
+ei_add_test(cxx11_tensor_math)
+ei_add_test(cxx11_tensor_const)
+ei_add_test(cxx11_tensor_intdiv)
+ei_add_test(cxx11_tensor_casts)
+ei_add_test(cxx11_tensor_empty)
+ei_add_test(cxx11_tensor_sugar)
+ei_add_test(cxx11_tensor_roundings)
+ei_add_test(cxx11_tensor_layout_swap)
+ei_add_test(cxx11_tensor_io)
+if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
+  # This test requires __uint128_t which is only available on 64bit systems
+  ei_add_test(cxx11_tensor_uint128)
+endif()
+endif()
+
+if(EIGEN_TEST_CXX11)
+  if(EIGEN_TEST_SYCL)
+    ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_forced_eval_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
+  endif(EIGEN_TEST_SYCL)
+  # It should be safe to always run these tests as there is some fallback code for
+  # older compiler that don't support cxx11.
+  set(CMAKE_CXX_STANDARD 11)
+
+  ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+  ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+  ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+
+  ei_add_test(cxx11_meta)
+  ei_add_test(cxx11_tensor_simple)
+#  ei_add_test(cxx11_tensor_symmetry)
+  ei_add_test(cxx11_tensor_index_list)
+  ei_add_test(cxx11_tensor_mixed_indices)
+  ei_add_test(cxx11_tensor_contraction)
+  ei_add_test(cxx11_tensor_convolution)
+  ei_add_test(cxx11_tensor_expr)
+  ei_add_test(cxx11_tensor_fixed_size)
+  ei_add_test(cxx11_tensor_of_const_values)
+  ei_add_test(cxx11_tensor_of_complex)
+  ei_add_test(cxx11_tensor_of_strings)
+  ei_add_test(cxx11_tensor_lvalue)
+  ei_add_test(cxx11_tensor_broadcasting)
+  ei_add_test(cxx11_tensor_chipping)
+  ei_add_test(cxx11_tensor_concatenation)
+  ei_add_test(cxx11_tensor_inflation)
+  ei_add_test(cxx11_tensor_morphing)
+  ei_add_test(cxx11_tensor_padding)
+  ei_add_test(cxx11_tensor_patch)
+  ei_add_test(cxx11_tensor_image_patch)
+  ei_add_test(cxx11_tensor_volume_patch)
+  ei_add_test(cxx11_tensor_reduction)
+  ei_add_test(cxx11_tensor_argmax)
+  ei_add_test(cxx11_tensor_shuffling)
+  ei_add_test(cxx11_tensor_striding)
+  ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+  ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+  ei_add_test(cxx11_tensor_ref)
+  ei_add_test(cxx11_tensor_random)
+  ei_add_test(cxx11_tensor_generator)
+  ei_add_test(cxx11_tensor_custom_op)
+  ei_add_test(cxx11_tensor_custom_index)
+  ei_add_test(cxx11_tensor_fft)
+  ei_add_test(cxx11_tensor_ifft)
+  ei_add_test(cxx11_tensor_scan)
+
+endif()
+
+# These tests needs nvcc
+find_package(CUDA 7.0)
+if(CUDA_FOUND AND EIGEN_TEST_CUDA)
+  # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
+  # and -fno-check-new flags since they trigger thousands of compilation warnings
+  # in the CUDA runtime
+  # Also remove -ansi that is incompatible with std=c++11.
+  string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+  message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
+
+  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
+  endif()
+  if(EIGEN_TEST_CUDA_CLANG)
+   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}")
+  endif()
+
+  set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
+  if (${CUDA_VERSION} STREQUAL "7.0")
+    set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
+  endif()
+
+  if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3))
+    set(EIGEN_CUDA_CXX11_FLAG "-std=c++11")
+  else()
+    # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11)
+    set(EIGEN_CUDA_CXX11_FLAG "")
+  endif()
+
+  set(CUDA_NVCC_FLAGS  "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\" ${CUDA_NVCC_FLAGS}")
+  cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
+  set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
+
+  ei_add_test(cxx11_tensor_complex_cuda)
+  ei_add_test(cxx11_tensor_complex_cwise_ops_cuda)
+  ei_add_test(cxx11_tensor_reduction_cuda)
+  ei_add_test(cxx11_tensor_argmax_cuda)
+  ei_add_test(cxx11_tensor_cast_float16_cuda)
+  ei_add_test(cxx11_tensor_scan_cuda)
+
+  # Contractions require arch 3.0 or higher
+  if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 29)
+    ei_add_test(cxx11_tensor_device)
+    ei_add_test(cxx11_tensor_cuda)
+    ei_add_test(cxx11_tensor_contract_cuda)
+    ei_add_test(cxx11_tensor_of_float16_cuda)
+  endif()
+
+  # The random number generation code requires arch 3.5 or greater.
+  if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34)
+    ei_add_test(cxx11_tensor_random_cuda)
+  endif()
+
+
+  unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
+endif()
diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp
new file mode 100644
index 000000000..a8cb52864
--- /dev/null
+++ b/unsupported/test/EulerAngles.cpp
@@ -0,0 +1,208 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <unsupported/Eigen/EulerAngles>
+
+using namespace Eigen;
+
+template<typename EulerSystem, typename Scalar>
+void verify_euler_ranged(const Matrix<Scalar,3,1>& ea,
+  bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma)
+{
+  typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType;
+  typedef Matrix<Scalar,3,3> Matrix3;
+  typedef Matrix<Scalar,3,1> Vector3;
+  typedef Quaternion<Scalar> QuaternionType;
+  typedef AngleAxis<Scalar> AngleAxisType;
+  using std::abs;
+  
+  Scalar alphaRangeStart, alphaRangeEnd;
+  Scalar betaRangeStart, betaRangeEnd;
+  Scalar gammaRangeStart, gammaRangeEnd;
+  
+  if (positiveRangeAlpha)
+  {
+    alphaRangeStart = Scalar(0);
+    alphaRangeEnd = Scalar(2 * EIGEN_PI);
+  }
+  else
+  {
+    alphaRangeStart = -Scalar(EIGEN_PI);
+    alphaRangeEnd = Scalar(EIGEN_PI);
+  }
+  
+  if (positiveRangeBeta)
+  {
+    betaRangeStart = Scalar(0);
+    betaRangeEnd = Scalar(2 * EIGEN_PI);
+  }
+  else
+  {
+    betaRangeStart = -Scalar(EIGEN_PI);
+    betaRangeEnd = Scalar(EIGEN_PI);
+  }
+  
+  if (positiveRangeGamma)
+  {
+    gammaRangeStart = Scalar(0);
+    gammaRangeEnd = Scalar(2 * EIGEN_PI);
+  }
+  else
+  {
+    gammaRangeStart = -Scalar(EIGEN_PI);
+    gammaRangeEnd = Scalar(EIGEN_PI);
+  }
+  
+  const int i = EulerSystem::AlphaAxisAbs - 1;
+  const int j = EulerSystem::BetaAxisAbs - 1;
+  const int k = EulerSystem::GammaAxisAbs - 1;
+  
+  const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1;
+  const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1;
+  const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1;
+  
+  const Vector3 I = EulerAnglesType::AlphaAxisVector();
+  const Vector3 J = EulerAnglesType::BetaAxisVector();
+  const Vector3 K = EulerAnglesType::GammaAxisVector();
+  
+  EulerAnglesType e(ea[0], ea[1], ea[2]);
+  
+  Matrix3 m(e);
+  Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+  
+  // Check that eabis in range
+  VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd);
+  VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd);
+  VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd);
+  
+  Vector3 eabis2 = m.eulerAngles(i, j, k);
+  
+  // Invert the relevant axes
+  eabis2[0] *= iFactor;
+  eabis2[1] *= jFactor;
+  eabis2[2] *= kFactor;
+  
+  // Saturate the angles to the correct range
+  if (positiveRangeAlpha && (eabis2[0] < 0))
+    eabis2[0] += Scalar(2 * EIGEN_PI);
+  if (positiveRangeBeta && (eabis2[1] < 0))
+    eabis2[1] += Scalar(2 * EIGEN_PI);
+  if (positiveRangeGamma && (eabis2[2] < 0))
+    eabis2[2] += Scalar(2 * EIGEN_PI);
+  
+  VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is
+  
+  Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K));
+  VERIFY_IS_APPROX(m,  mbis);
+  
+  // Tests that are only relevant for no possitive range
+  if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma))
+  {
+    /* If I==K, and ea[1]==0, then there no unique solution. */ 
+    /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ 
+    if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) 
+      VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
+    
+    // approx_or_less_than does not work for 0
+    VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
+  }
+  
+  // Quaternions
+  QuaternionType q(e);
+  eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+  VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
+}
+
+template<typename EulerSystem, typename Scalar>
+void verify_euler(const Matrix<Scalar,3,1>& ea)
+{
+  verify_euler_ranged<EulerSystem>(ea, false, false, false);
+  verify_euler_ranged<EulerSystem>(ea, false, false, true);
+  verify_euler_ranged<EulerSystem>(ea, false, true, false);
+  verify_euler_ranged<EulerSystem>(ea, false, true, true);
+  verify_euler_ranged<EulerSystem>(ea, true, false, false);
+  verify_euler_ranged<EulerSystem>(ea, true, false, true);
+  verify_euler_ranged<EulerSystem>(ea, true, true, false);
+  verify_euler_ranged<EulerSystem>(ea, true, true, true);
+}
+
+template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+{
+  verify_euler<EulerSystemXYZ>(ea);
+  verify_euler<EulerSystemXYX>(ea);
+  verify_euler<EulerSystemXZY>(ea);
+  verify_euler<EulerSystemXZX>(ea);
+  
+  verify_euler<EulerSystemYZX>(ea);
+  verify_euler<EulerSystemYZY>(ea);
+  verify_euler<EulerSystemYXZ>(ea);
+  verify_euler<EulerSystemYXY>(ea);
+  
+  verify_euler<EulerSystemZXY>(ea);
+  verify_euler<EulerSystemZXZ>(ea);
+  verify_euler<EulerSystemZYX>(ea);
+  verify_euler<EulerSystemZYZ>(ea);
+}
+
+template<typename Scalar> void eulerangles()
+{
+  typedef Matrix<Scalar,3,3> Matrix3;
+  typedef Matrix<Scalar,3,1> Vector3;
+  typedef Array<Scalar,3,1> Array3;
+  typedef Quaternion<Scalar> Quaternionx;
+  typedef AngleAxis<Scalar> AngleAxisType;
+
+  Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI));
+  Quaternionx q1;
+  q1 = AngleAxisType(a, Vector3::Random().normalized());
+  Matrix3 m;
+  m = q1;
+  
+  Vector3 ea = m.eulerAngles(0,1,2);
+  check_all_var(ea);
+  ea = m.eulerAngles(0,1,0);
+  check_all_var(ea);
+  
+  // Check with purely random Quaternion:
+  q1.coeffs() = Quaternionx::Coefficients::Random().normalized();
+  m = q1;
+  ea = m.eulerAngles(0,1,2);
+  check_all_var(ea);
+  ea = m.eulerAngles(0,1,0);
+  check_all_var(ea);
+  
+  // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi].
+  ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1);
+  check_all_var(ea);
+  
+  ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
+  check_all_var(ea);
+  
+  ea[0] = ea[1] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
+  check_all_var(ea);
+  
+  ea[1] = 0;
+  check_all_var(ea);
+  
+  ea.head(2).setZero();
+  check_all_var(ea);
+  
+  ea.setZero();
+  check_all_var(ea);
+}
+
+void test_EulerAngles()
+{
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1( eulerangles<float>() );
+    CALL_SUBTEST_2( eulerangles<double>() );
+  }
+}
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index d3718e2d2..8b7528fb7 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -18,11 +18,11 @@ using namespace Eigen;
 
 
 template < typename T>
-complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+complex<long double>  promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
 
-complex<long double>  promote(float x) { return complex<long double>( x); }
-complex<long double>  promote(double x) { return complex<long double>( x); }
-complex<long double>  promote(long double x) { return complex<long double>( x); }
+complex<long double>  promote(float x) { return complex<long double>((long double)x); }
+complex<long double>  promote(double x) { return complex<long double>((long double)x); }
+complex<long double>  promote(long double x) { return complex<long double>((long double)x); }
     
 
     template <typename VT1,typename VT2>
@@ -33,7 +33,7 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
         long double pi = acos((long double)-1 );
         for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
             complex<long double> acc = 0;
-            long double phinc = -2.*k0* pi / timebuf.size();
+            long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
             for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
                 acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
             }
@@ -54,8 +54,8 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
         long double difpower=0;
         size_t n = (min)( buf1.size(),buf2.size() );
         for (size_t k=0;k<n;++k) {
-            totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.;
-            difpower += numext::abs2(buf1[k] - buf2[k]);
+            totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
+            difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
         }
         return sqrt(difpower/totalpower);
     }
@@ -93,19 +93,19 @@ void test_scalar_generic(int nfft)
     fft.SetFlag(fft.HalfSpectrum );
     fft.fwd( freqBuf,tbuf);
     VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
-    VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>()  );// gross check
+    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
 
     fft.ClearFlag(fft.HalfSpectrum );
     fft.fwd( freqBuf,tbuf);
     VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
-    VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>()  );// gross check
+    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
 
     if (nfft&1)
         return; // odd FFTs get the wrong size inverse FFT
 
     ScalarVector tbuf2;
     fft.inv( tbuf2 , freqBuf);
-    VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
 
 
     // verify that the Unscaled flag takes effect
@@ -121,12 +121,12 @@ void test_scalar_generic(int nfft)
     //for (size_t i=0;i<(size_t) tbuf.size();++i)
     //    cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " -  in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) <<  endl;
 
-    VERIFY( dif_rmse(tbuf,tbuf3) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>()  );// gross check
 
     // verify that ClearFlag works
     fft.ClearFlag(fft.Unscaled);
     fft.inv( tbuf2 , freqBuf);
-    VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
 }
 
 template <typename T>
@@ -152,10 +152,10 @@ void test_complex_generic(int nfft)
         inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
     fft.fwd( outbuf , inbuf);
 
-    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+    VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>()  );// gross check
     fft.inv( buf3 , outbuf);
 
-    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
 
     // verify that the Unscaled flag takes effect
     ComplexVector buf4;
@@ -163,12 +163,12 @@ void test_complex_generic(int nfft)
     fft.inv( buf4 , outbuf);
     for (int k=0;k<nfft;++k)
         buf4[k] *= T(1./nfft);
-    VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>()  );// gross check
 
     // verify that ClearFlag works
     fft.ClearFlag(fft.Unscaled);
     fft.inv( buf3 , outbuf);
-    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
+    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
 }
 
 template <typename T>
diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp
index d7376b0f5..1d682dd83 100644
--- a/unsupported/test/NonLinearOptimization.cpp
+++ b/unsupported/test/NonLinearOptimization.cpp
@@ -12,7 +12,8 @@
 // It is intended to be done for this test only.
 #include <Eigen/src/Core/util/DisableStupidWarnings.h>
 
-using std::sqrt;
+// tolerance for chekcing number of iterations
+#define LM_EVAL_COUNT_TOL 4/3
 
 int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag)
 {
@@ -246,9 +247,9 @@ struct hybrj_functor : Functor<double>
     int operator()(const VectorXd &x, VectorXd &fvec)
     {
         double temp, temp1, temp2;
-        const int n = x.size();
+        const VectorXd::Index n = x.size();
         assert(fvec.size()==n);
-        for (int k = 0; k < n; k++)
+        for (VectorXd::Index k = 0; k < n; k++)
         {
             temp = (3. - 2.*x[k])*x[k];
             temp1 = 0.;
@@ -261,12 +262,12 @@ struct hybrj_functor : Functor<double>
     }
     int df(const VectorXd &x, MatrixXd &fjac)
     {
-        const int n = x.size();
+        const VectorXd::Index n = x.size();
         assert(fjac.rows()==n);
         assert(fjac.cols()==n);
-        for (int k = 0; k < n; k++)
+        for (VectorXd::Index k = 0; k < n; k++)
         {
-            for (int j = 0; j < n; j++)
+            for (VectorXd::Index j = 0; j < n; j++)
                 fjac(k,j) = 0.;
             fjac(k,k) = 3.- 4.*x[k];
             if (k) fjac(k,k-1) = -1.;
@@ -351,10 +352,10 @@ struct hybrd_functor : Functor<double>
     int operator()(const VectorXd &x, VectorXd &fvec) const
     {
         double temp, temp1, temp2;
-        const int n = x.size();
+        const VectorXd::Index n = x.size();
 
         assert(fvec.size()==n);
-        for (int k=0; k < n; k++)
+        for (VectorXd::Index k=0; k < n; k++)
         {
             temp = (3. - 2.*x[k])*x[k];
             temp1 = 0.;
@@ -455,7 +456,7 @@ struct lmstr_functor : Functor<double>
         assert(jac_row.size()==x.size());
         double tmp1, tmp2, tmp3, tmp4;
 
-        int i = rownb-2;
+        VectorXd::Index i = rownb-2;
         tmp1 = i+1;
         tmp2 = 16 - i - 1;
         tmp3 = (i>=8)? tmp2 : tmp1;
@@ -1022,7 +1023,9 @@ void testNistLanczos1(void)
   VERIFY_IS_EQUAL(lm.nfev, 79);
   VERIFY_IS_EQUAL(lm.njev, 72);
   // check norm^2
-  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430899764097e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
+  std::cout.precision(30);
+  std::cout << lm.fvec.squaredNorm() << "\n";
+  VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
   // check x
   VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
   VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1043,7 +1046,7 @@ void testNistLanczos1(void)
   VERIFY_IS_EQUAL(lm.nfev, 9);
   VERIFY_IS_EQUAL(lm.njev, 8);
   // check norm^2
-  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.428595533845e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
+  VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
   // check x
   VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
   VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -1262,8 +1265,8 @@ void testNistBoxBOD(void)
 
   // check return value
   VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev, 31);
-  VERIFY_IS_EQUAL(lm.njev, 25);
+  VERIFY(lm.nfev < 31); // 31
+  VERIFY(lm.njev < 25); // 25
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
   // check x
@@ -1342,10 +1345,6 @@ void testNistMGH17(void)
   lm.parameters.maxfev = 1000;
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 2); 
-  VERIFY_IS_EQUAL(lm.nfev, 602 ); 
-  VERIFY_IS_EQUAL(lm.njev, 545 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
   // check x
@@ -1354,6 +1353,15 @@ void testNistMGH17(void)
   VERIFY_IS_APPROX(x[2], -1.4646871366E+00);
   VERIFY_IS_APPROX(x[3], 1.2867534640E-02);
   VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
+  
+  // check return value
+  VERIFY_IS_EQUAL(info, 2); 
+  ++g_test_level;
+  VERIFY_IS_EQUAL(lm.nfev, 602);  // 602
+  VERIFY_IS_EQUAL(lm.njev, 545);  // 545
+  --g_test_level;
+  VERIFY(lm.nfev < 602 * LM_EVAL_COUNT_TOL);
+  VERIFY(lm.njev < 545 * LM_EVAL_COUNT_TOL);
 
   /*
    * Second try
@@ -1832,8 +1840,8 @@ void test_NonLinearOptimization()
     // NIST tests, level of difficulty = "Average"
     CALL_SUBTEST/*_5*/(testNistHahn1());
     CALL_SUBTEST/*_6*/(testNistMisra1d());
-//     CALL_SUBTEST/*_7*/(testNistMGH17());
-//     CALL_SUBTEST/*_8*/(testNistLanczos1());
+    CALL_SUBTEST/*_7*/(testNistMGH17());
+    CALL_SUBTEST/*_8*/(testNistLanczos1());
 
 //     // NIST tests, level of difficulty = "Higher"
     CALL_SUBTEST/*_9*/(testNistRat42());
diff --git a/unsupported/test/alignedvector3.cpp b/unsupported/test/alignedvector3.cpp
index fc2bc2135..252cb1d3f 100644
--- a/unsupported/test/alignedvector3.cpp
+++ b/unsupported/test/alignedvector3.cpp
@@ -10,6 +10,16 @@
 #include "main.h"
 #include <unsupported/Eigen/AlignedVector3>
 
+namespace Eigen {
+
+template<typename T,typename Derived>
+T test_relative_error(const AlignedVector3<T> &a, const MatrixBase<Derived> &b)
+{
+  return test_relative_error(a.coeffs().template head<3>(), b);
+}
+
+}
+
 template<typename Scalar>
 void alignedvector3()
 {
@@ -19,8 +29,8 @@ void alignedvector3()
   typedef Matrix<Scalar,3,3> Mat33;
   typedef AlignedVector3<Scalar> FastType;
   RefType  r1(RefType::Random()), r2(RefType::Random()), r3(RefType::Random()),
-           r4(RefType::Random()), r5(RefType::Random()), r6(RefType::Random());
-  FastType f1(r1), f2(r2), f3(r3), f4(r4), f5(r5), f6(r6);
+           r4(RefType::Random()), r5(RefType::Random());
+  FastType f1(r1), f2(r2), f3(r3), f4(r4), f5(r5);
   Mat33 m1(Mat33::Random());
   
   VERIFY_IS_APPROX(f1,r1);
@@ -49,6 +59,21 @@ void alignedvector3()
   f2.normalize();
   r2.normalize();
   VERIFY_IS_APPROX(f2,r2);
+  
+  {
+    FastType f6 = RefType::Zero();
+    FastType f7 = FastType::Zero();
+    VERIFY_IS_APPROX(f6,f7);
+    f6 = r4+r1;
+    VERIFY_IS_APPROX(f6,r4+r1);
+    f6 -= Scalar(2)*r4;
+    VERIFY_IS_APPROX(f6,r1-r4);
+  }
+  
+  std::stringstream ss1, ss2;
+  ss1 << f1;
+  ss2 << r1;
+  VERIFY(ss1.str()==ss2.str());
 }
 
 void test_alignedvector3()
diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index 087e7c542..85743137e 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp
@@ -16,7 +16,8 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y)
   using namespace std;
 //   return x+std::sin(y);
   EIGEN_ASM_COMMENT("mybegin");
-  return static_cast<Scalar>(x*2 - pow(x,2) + 2*sqrt(y*y) - 4 * sin(x) + 2 * cos(y) - exp(-0.5*x*x));
+  // pow(float, int) promotes to pow(double, double)
+  return x*2 - 1 + static_cast<Scalar>(pow(1+x,2)) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(Scalar(-0.5)*x*x+0);
   //return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2;
   EIGEN_ASM_COMMENT("myend");
 }
@@ -104,6 +105,89 @@ struct TestFunc1
   }
 };
 
+
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+/* Test functor for the C++11 features. */
+template <typename Scalar>
+struct integratorFunctor
+{
+    typedef Matrix<Scalar, 2, 1> InputType;
+    typedef Matrix<Scalar, 2, 1> ValueType;
+
+    /*
+     * Implementation starts here.
+     */
+    integratorFunctor(const Scalar gain) : _gain(gain) {}
+    integratorFunctor(const integratorFunctor& f) : _gain(f._gain) {}
+    const Scalar _gain;
+
+    template <typename T1, typename T2>
+    void operator() (const T1 &input, T2 *output, const Scalar dt) const
+    {
+        T2 &o = *output;
+
+        /* Integrator to test the AD. */
+        o[0] = input[0] + input[1] * dt * _gain;
+        o[1] = input[1] * _gain;
+    }
+
+    /* Only needed for the test */
+    template <typename T1, typename T2, typename T3>
+    void operator() (const T1 &input, T2 *output, T3 *jacobian, const Scalar dt) const
+    {
+        T2 &o = *output;
+
+        /* Integrator to test the AD. */
+        o[0] = input[0] + input[1] * dt * _gain;
+        o[1] = input[1] * _gain;
+
+        if (jacobian)
+        {
+            T3 &j = *jacobian;
+
+            j(0, 0) = 1;
+            j(0, 1) = dt * _gain;
+            j(1, 0) = 0;
+            j(1, 1) = _gain;
+        }
+    }
+
+};
+
+template<typename Func> void forward_jacobian_cpp11(const Func& f)
+{
+    typedef typename Func::ValueType::Scalar Scalar;
+    typedef typename Func::ValueType ValueType;
+    typedef typename Func::InputType InputType;
+    typedef typename AutoDiffJacobian<Func>::JacobianType JacobianType;
+
+    InputType x = InputType::Random(InputType::RowsAtCompileTime);
+    ValueType y, yref;
+    JacobianType j, jref;
+
+    const Scalar dt = internal::random<double>();
+
+    jref.setZero();
+    yref.setZero();
+    f(x, &yref, &jref, dt);
+
+    //std::cerr << "y, yref, jref: " << "\n";
+    //std::cerr << y.transpose() << "\n\n";
+    //std::cerr << yref << "\n\n";
+    //std::cerr << jref << "\n\n";
+
+    AutoDiffJacobian<Func> autoj(f);
+    autoj(x, &y, &j, dt);
+
+    //std::cerr << "y j (via autodiff): " << "\n";
+    //std::cerr << y.transpose() << "\n\n";
+    //std::cerr << j << "\n\n";
+
+    VERIFY_IS_APPROX(y, yref);
+    VERIFY_IS_APPROX(j, jref);
+}
+#endif
+
 template<typename Func> void forward_jacobian(const Func& f)
 {
     typename Func::InputType x = Func::InputType::Random(f.inputs());
@@ -127,8 +211,8 @@ template<typename Func> void forward_jacobian(const Func& f)
     VERIFY_IS_APPROX(j, jref);
 }
 
-
 // TODO also check actual derivatives!
+template <int>
 void test_autodiff_scalar()
 {
   Vector2f p = Vector2f::Random();
@@ -139,7 +223,9 @@ void test_autodiff_scalar()
   VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y()));
 }
 
+
 // TODO also check actual derivatives!
+template <int>
 void test_autodiff_vector()
 {
   Vector2f p = Vector2f::Random();
@@ -148,11 +234,12 @@ void test_autodiff_vector()
   VectorAD ap = p.cast<AD>();
   ap.x().derivatives() = Vector2f::UnitX();
   ap.y().derivatives() = Vector2f::UnitY();
-  
+
   AD res = foo<VectorAD>(ap);
   VERIFY_IS_APPROX(res.value(), foo(p));
 }
 
+template <int>
 void test_autodiff_jacobian()
 {
   CALL_SUBTEST(( forward_jacobian(TestFunc1<double,2,2>()) ));
@@ -160,14 +247,121 @@ void test_autodiff_jacobian()
   CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
   CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
   CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+  CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
+#endif
+}
+
+
+template <int>
+void test_autodiff_hessian()
+{
+  typedef AutoDiffScalar<VectorXd> AD;
+  typedef Matrix<AD,Eigen::Dynamic,1> VectorAD;
+  typedef AutoDiffScalar<VectorAD> ADD;
+  typedef Matrix<ADD,Eigen::Dynamic,1> VectorADD;
+  VectorADD x(2);
+  double s1 = internal::random<double>(), s2 = internal::random<double>(), s3 = internal::random<double>(), s4 = internal::random<double>();
+  x(0).value()=s1;
+  x(1).value()=s2;
+
+  //set unit vectors for the derivative directions (partial derivatives of the input vector)
+  x(0).derivatives().resize(2);
+  x(0).derivatives().setZero();
+  x(0).derivatives()(0)= 1;
+  x(1).derivatives().resize(2);
+  x(1).derivatives().setZero();
+  x(1).derivatives()(1)=1;
+
+  //repeat partial derivatives for the inner AutoDiffScalar
+  x(0).value().derivatives() = VectorXd::Unit(2,0);
+  x(1).value().derivatives() = VectorXd::Unit(2,1);
+
+  //set the hessian matrix to zero
+  for(int idx=0; idx<2; idx++) {
+      x(0).derivatives()(idx).derivatives()  = VectorXd::Zero(2);
+      x(1).derivatives()(idx).derivatives()  = VectorXd::Zero(2);
+  }
+
+  ADD y = sin(AD(s3)*x(0) + AD(s4)*x(1));
+
+  VERIFY_IS_APPROX(y.value().derivatives()(0), y.derivatives()(0).value());
+  VERIFY_IS_APPROX(y.value().derivatives()(1), y.derivatives()(1).value());
+  VERIFY_IS_APPROX(y.value().derivatives()(0), s3*std::cos(s1*s3+s2*s4));
+  VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4));
+  VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3));
+  VERIFY_IS_APPROX(y.derivatives()(1).derivatives(),  -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4));
+
+  ADD z = x(0)*x(1);
+  VERIFY_IS_APPROX(z.derivatives()(0).derivatives(), Vector2d(0,1));
+  VERIFY_IS_APPROX(z.derivatives()(1).derivatives(), Vector2d(1,0));
+}
+
+double bug_1222() {
+  typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD;
+  const double _cv1_3 = 1.0;
+  const AD chi_3 = 1.0;
+  // this line did not work, because operator+ returns ADS<DerType&>, which then cannot be converted to ADS<DerType>
+  const AD denom = chi_3 + _cv1_3;
+  return denom.value();
+}
+
+double bug_1223() {
+  using std::min;
+  typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD;
+
+  const double _cv1_3 = 1.0;
+  const AD chi_3 = 1.0;
+  const AD denom = 1.0;
+
+  // failed because implementation of min attempts to construct ADS<DerType&> via constructor AutoDiffScalar(const Real& value)
+  // without initializing m_derivatives (which is a reference in this case)
+  #define EIGEN_TEST_SPACE
+  const AD t = min EIGEN_TEST_SPACE (denom / chi_3, 1.0);
+
+  const AD t2 = min EIGEN_TEST_SPACE (denom / (chi_3 * _cv1_3), 1.0);
+
+  return t.value() + t2.value();
+}
+
+// regression test for some compilation issues with specializations of ScalarBinaryOpTraits
+void bug_1260() {
+  Matrix4d A;
+  Vector4d v;
+  A*v;
+}
+
+// check a compilation issue with numext::max
+double bug_1261() {
+  typedef AutoDiffScalar<Matrix2d> AD;
+  typedef Matrix<AD,2,1> VectorAD;
+
+  VectorAD v;
+  const AD maxVal = v.maxCoeff();
+  const AD minVal = v.minCoeff();
+  return maxVal.value() + minVal.value();
+}
+
+double bug_1264() {
+  typedef AutoDiffScalar<Vector2d> AD;
+  const AD s;
+  const Matrix<AD, 3, 1> v1;
+  const Matrix<AD, 3, 1> v2 = (s + 3.0) * v1;
+  return v2(0).value();
 }
 
 void test_autodiff()
 {
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1( test_autodiff_scalar() );
-    CALL_SUBTEST_2( test_autodiff_vector() );
-    CALL_SUBTEST_3( test_autodiff_jacobian() );
+    CALL_SUBTEST_1( test_autodiff_scalar<1>() );
+    CALL_SUBTEST_2( test_autodiff_vector<1>() );
+    CALL_SUBTEST_3( test_autodiff_jacobian<1>() );
+    CALL_SUBTEST_4( test_autodiff_hessian<1>() );
   }
+
+  bug_1222();
+  bug_1223();
+  bug_1260();
+  bug_1261();
 }
 
diff --git a/unsupported/test/autodiff_scalar.cpp b/unsupported/test/autodiff_scalar.cpp
new file mode 100644
index 000000000..4df2f5c57
--- /dev/null
+++ b/unsupported/test/autodiff_scalar.cpp
@@ -0,0 +1,83 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2013 Christoph Hertzberg <chtz@informatik.uni-bremen.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <unsupported/Eigen/AutoDiff>
+
+/*
+ * In this file scalar derivations are tested for correctness.
+ * TODO add more tests!
+ */
+
+template<typename Scalar> void check_atan2()
+{
+  typedef Matrix<Scalar, 1, 1> Deriv1;
+  typedef AutoDiffScalar<Deriv1> AD;
+  
+  AD x(internal::random<Scalar>(-3.0, 3.0), Deriv1::UnitX());
+  
+  using std::exp;
+  Scalar r = exp(internal::random<Scalar>(-10, 10));
+  
+  AD s = sin(x), c = cos(x);
+  AD res = atan2(r*s, r*c);
+  
+  VERIFY_IS_APPROX(res.value(), x.value());
+  VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
+
+  res = atan2(r*s+0, r*c+0);
+  VERIFY_IS_APPROX(res.value(), x.value());
+  VERIFY_IS_APPROX(res.derivatives(), x.derivatives());
+}
+
+template<typename Scalar> void check_hyperbolic_functions()
+{
+  using std::sinh;
+  using std::cosh;
+  using std::tanh;
+  typedef Matrix<Scalar, 1, 1> Deriv1;
+  typedef AutoDiffScalar<Deriv1> AD;
+  Deriv1 p = Deriv1::Random();
+  AD val(p.x(),Deriv1::UnitX());
+
+  Scalar cosh_px = std::cosh(p.x());
+  AD res1 = tanh(val);
+  VERIFY_IS_APPROX(res1.value(), std::tanh(p.x()));
+  VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(1.0) / (cosh_px * cosh_px));
+
+  AD res2 = sinh(val);
+  VERIFY_IS_APPROX(res2.value(), std::sinh(p.x()));
+  VERIFY_IS_APPROX(res2.derivatives().x(), cosh_px);
+
+  AD res3 = cosh(val);
+  VERIFY_IS_APPROX(res3.value(), cosh_px);
+  VERIFY_IS_APPROX(res3.derivatives().x(), std::sinh(p.x()));
+
+  // Check constant values.
+  const Scalar sample_point = Scalar(1) / Scalar(3); 
+  val = AD(sample_point,Deriv1::UnitX());
+  res1 = tanh(val);
+  VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(0.896629559604914));
+
+  res2 = sinh(val);
+  VERIFY_IS_APPROX(res2.derivatives().x(), Scalar(1.056071867829939));
+
+  res3 = cosh(val);
+  VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150));
+}
+
+void test_autodiff_scalar()
+{
+  for(int i = 0; i < g_repeat; i++) {
+    CALL_SUBTEST_1( check_atan2<float>() );
+    CALL_SUBTEST_2( check_atan2<double>() );
+    CALL_SUBTEST_3( check_hyperbolic_functions<float>() );
+    CALL_SUBTEST_4( check_hyperbolic_functions<double>() );
+  }
+}
diff --git a/unsupported/test/bdcsvd.cpp b/unsupported/test/bdcsvd.cpp
deleted file mode 100644
index 115a649b0..000000000
--- a/unsupported/test/bdcsvd.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
-// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
-// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
-// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/
-
-#include "svd_common.h"
-#include <iostream>
-#include <Eigen/LU>
-
-// check if "svd" is the good image of "m"  
-template<typename MatrixType>
-void bdcsvd_check_full(const MatrixType& m, const BDCSVD<MatrixType>& svd)
-{
-  svd_check_full< MatrixType, BDCSVD< MatrixType > >(m, svd);
-}
-
-// Compare to a reference value
-template<typename MatrixType>
-void bdcsvd_compare_to_full(const MatrixType& m,
-			    unsigned int computationOptions,
-			    const BDCSVD<MatrixType>& referenceSvd)
-{
-  svd_compare_to_full< MatrixType, BDCSVD< MatrixType > >(m, computationOptions, referenceSvd);
-} // end bdcsvd_compare_to_full
-
-
-template<typename MatrixType>
-void bdcsvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
-  svd_solve< MatrixType, BDCSVD< MatrixType > >(m, computationOptions);
-} //  end template bdcsvd_solve
-
-
-// test the computations options
-template<typename MatrixType>
-void bdcsvd_test_all_computation_options(const MatrixType& m)
-{
-  BDCSVD<MatrixType> fullSvd(m, ComputeFullU|ComputeFullV);
-  svd_test_computation_options_1< MatrixType, BDCSVD< MatrixType > >(m, fullSvd); 
-  svd_test_computation_options_2< MatrixType, BDCSVD< MatrixType > >(m, fullSvd); 
-} // end bdcsvd_test_all_computation_options
-
-
-// Call a test with all the computations options
-template<typename MatrixType>
-void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-{
-  MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a;
-  bdcsvd_test_all_computation_options<MatrixType>(m);
-} // end template bdcsvd
-
-
-// verify assert
-template<typename MatrixType> 
-void bdcsvd_verify_assert(const MatrixType& m)
-{
-  svd_verify_assert< MatrixType, BDCSVD< MatrixType > >(m);
-}// end template bdcsvd_verify_assert
-
-
-// test weird values
-template<typename MatrixType>
-void bdcsvd_inf_nan()
-{
-  svd_inf_nan< MatrixType, BDCSVD< MatrixType > >();
-}// end template bdcsvd_inf_nan
-
-
-
-void bdcsvd_preallocate()
-{
-  svd_preallocate< BDCSVD< MatrixXf > >();
-} // end bdcsvd_preallocate
-
-
-// compare the Singular values returned with Jacobi and Bdc
-template<typename MatrixType> 
-void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0)
-{
-  std::cout << "debut compare" << std::endl;
-  MatrixType m = MatrixType::Random(a.rows(), a.cols());
-  BDCSVD<MatrixType> bdc_svd(m);
-  JacobiSVD<MatrixType> jacobi_svd(m);
-  VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues());
-  if(computationOptions & ComputeFullU)
-    VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
-  if(computationOptions & ComputeThinU)
-    VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU());
-  if(computationOptions & ComputeFullV)
-    VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
-  if(computationOptions & ComputeThinV)
-    VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV());
-  std::cout << "fin compare" << std::endl;
-} // end template compare_bdc_jacobi
-
-
-// call the tests
-void test_bdcsvd()
-{
-  // test of Dynamic defined Matrix (42, 42) of float 
-  CALL_SUBTEST_11(( bdcsvd_verify_assert<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(42,42)) ));
-  CALL_SUBTEST_11(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(42,42), 0) ));
-  CALL_SUBTEST_11(( bdcsvd<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(42,42)) ));
-
-  // test of Dynamic defined Matrix (50, 50) of double 
-  CALL_SUBTEST_13(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(50,50)) ));
-  CALL_SUBTEST_13(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(50,50), 0) ));
-  CALL_SUBTEST_13(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(50, 50)) )); 
-
-  // test of Dynamic defined Matrix (22, 22) of complex double
-  CALL_SUBTEST_14(( bdcsvd_verify_assert<Matrix<std::complex<double>,Dynamic,Dynamic> >
-  		    (Matrix<std::complex<double>,Dynamic,Dynamic>(22,22)) ));
-  CALL_SUBTEST_14(( compare_bdc_jacobi<Matrix<std::complex<double>,Dynamic,Dynamic> >
-  		    (Matrix<std::complex<double>, Dynamic, Dynamic> (22,22), 0) ));
-  CALL_SUBTEST_14(( bdcsvd<Matrix<std::complex<double>,Dynamic,Dynamic> >
-  		    (Matrix<std::complex<double>,Dynamic,Dynamic>(22, 22)) )); 
-
-  // test of Dynamic defined Matrix (10, 10) of int
-  //CALL_SUBTEST_15(( bdcsvd_verify_assert<Matrix<int,Dynamic,Dynamic> >
-  //		    (Matrix<int,Dynamic,Dynamic>(10,10)) ));		    
-  //CALL_SUBTEST_15(( compare_bdc_jacobi<Matrix<int,Dynamic,Dynamic> >
-  //		    (Matrix<int,Dynamic,Dynamic>(10,10), 0) ));
-  //CALL_SUBTEST_15(( bdcsvd<Matrix<int,Dynamic,Dynamic> >
-  //		    (Matrix<int,Dynamic,Dynamic>(10, 10)) )); 
-  
-
-  // test of Dynamic defined Matrix (8, 6) of double 
- 
-  CALL_SUBTEST_16(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(8,6)) ));
-  CALL_SUBTEST_16(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(8, 6), 0) )); 
-  CALL_SUBTEST_16(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(8, 6)) ));
-
-
-  
-  // test of Dynamic defined Matrix (36, 12) of float
-  CALL_SUBTEST_17(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(36, 12), 0) )); 
-  CALL_SUBTEST_17(( bdcsvd<Matrix<float,Dynamic,Dynamic> >
-		    (Matrix<float,Dynamic,Dynamic>(36, 12)) )); 
-
-  // test of Dynamic defined Matrix (5, 8) of double 
-  CALL_SUBTEST_18(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(5, 8), 0) )); 
-  CALL_SUBTEST_18(( bdcsvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(5, 8)) )); 
-
-
-  // non regression tests
-  CALL_SUBTEST_3(( bdcsvd_verify_assert(Matrix3f()) ));
-  CALL_SUBTEST_4(( bdcsvd_verify_assert(Matrix4d()) ));
-  CALL_SUBTEST_7(( bdcsvd_verify_assert(MatrixXf(10,12)) ));
-  CALL_SUBTEST_8(( bdcsvd_verify_assert(MatrixXcd(7,5)) ));
-
-  // SUBTESTS 1 and 2 on specifics matrix
-  for(int i = 0; i < g_repeat; i++) {
-    Matrix2cd m;
-    m << 0, 1,
-      0, 1;
-    CALL_SUBTEST_1(( bdcsvd(m, false) ));
-    m << 1, 0,
-      1, 0;
-    CALL_SUBTEST_1(( bdcsvd(m, false) ));
-
-    Matrix2d n;
-    n << 0, 0,
-      0, 0;
-    CALL_SUBTEST_2(( bdcsvd(n, false) ));
-    n << 0, 0,
-      0, 1;
-    CALL_SUBTEST_2(( bdcsvd(n, false) ));
-    
-    // Statics matrix don't work with BDSVD yet
-    // bdc algo on a random 3x3 float matrix
-    // CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
-    // bdc algo on a random 4x4 double matrix
-    // CALL_SUBTEST_4(( bdcsvd<Matrix4d>() ));
-    // bdc algo on a random 3x5 float matrix
-    // CALL_SUBTEST_5(( bdcsvd<Matrix<float,3,5> >() ));
-
-    int r = internal::random<int>(1, 30),
-      c = internal::random<int>(1, 30);
-    CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(r,c)) ));
-    CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(r,c)) ));
-    (void) r;
-    (void) c;
-
-    // Test on inf/nan matrix
-    CALL_SUBTEST_7( bdcsvd_inf_nan<MatrixXf>() );
-  }
-
-  CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
-  CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) ));
-
-  // Test problem size constructors
-  CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
-
-} // end test_bdcsvd
diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp
new file mode 100644
index 000000000..3b598bf42
--- /dev/null
+++ b/unsupported/test/cxx11_eventcount.cpp
@@ -0,0 +1,142 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include "main.h"
+#include <Eigen/CXX11/ThreadPool>
+
+// Visual studio doesn't implement a rand_r() function since its
+// implementation of rand() is already thread safe
+int rand_reentrant(unsigned int* s) {
+#ifdef EIGEN_COMP_MSVC_STRICT
+  EIGEN_UNUSED_VARIABLE(s);
+  return rand();
+#else
+  return rand_r(s);
+#endif
+}
+
+static void test_basic_eventcount()
+{
+  MaxSizeVector<EventCount::Waiter> waiters(1);
+  waiters.resize(1);
+  EventCount ec(waiters);
+  EventCount::Waiter& w = waiters[0];
+  ec.Notify(false);
+  ec.Prewait(&w);
+  ec.Notify(true);
+  ec.CommitWait(&w);
+  ec.Prewait(&w);
+  ec.CancelWait(&w);
+}
+
+// Fake bounded counter-based queue.
+struct TestQueue {
+  std::atomic<int> val_;
+  static const int kQueueSize = 10;
+
+  TestQueue() : val_() {}
+
+  ~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); }
+
+  bool Push() {
+    int val = val_.load(std::memory_order_relaxed);
+    for (;;) {
+      VERIFY_GE(val, 0);
+      VERIFY_LE(val, kQueueSize);
+      if (val == kQueueSize) return false;
+      if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed))
+        return true;
+    }
+  }
+
+  bool Pop() {
+    int val = val_.load(std::memory_order_relaxed);
+    for (;;) {
+      VERIFY_GE(val, 0);
+      VERIFY_LE(val, kQueueSize);
+      if (val == 0) return false;
+      if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed))
+        return true;
+    }
+  }
+
+  bool Empty() { return val_.load(std::memory_order_relaxed) == 0; }
+};
+
+const int TestQueue::kQueueSize;
+
+// A number of producers send messages to a set of consumers using a set of
+// fake queues. Ensure that it does not crash, consumers don't deadlock and
+// number of blocked and unblocked threads match.
+static void test_stress_eventcount()
+{
+  const int kThreads = std::thread::hardware_concurrency();
+  static const int kEvents = 1 << 16;
+  static const int kQueues = 10;
+
+  MaxSizeVector<EventCount::Waiter> waiters(kThreads);
+  waiters.resize(kThreads);
+  EventCount ec(waiters);
+  TestQueue queues[kQueues];
+
+  std::vector<std::unique_ptr<std::thread>> producers;
+  for (int i = 0; i < kThreads; i++) {
+    producers.emplace_back(new std::thread([&ec, &queues]() {
+      unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
+      for (int j = 0; j < kEvents; j++) {
+        unsigned idx = rand_reentrant(&rnd) % kQueues;
+        if (queues[idx].Push()) {
+          ec.Notify(false);
+          continue;
+        }
+        EIGEN_THREAD_YIELD();
+        j--;
+      }
+    }));
+  }
+
+  std::vector<std::unique_ptr<std::thread>> consumers;
+  for (int i = 0; i < kThreads; i++) {
+    consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() {
+      EventCount::Waiter& w = waiters[i];
+      unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id()));
+      for (int j = 0; j < kEvents; j++) {
+        unsigned idx = rand_reentrant(&rnd) % kQueues;
+        if (queues[idx].Pop()) continue;
+        j--;
+        ec.Prewait(&w);
+        bool empty = true;
+        for (int q = 0; q < kQueues; q++) {
+          if (!queues[q].Empty()) {
+            empty = false;
+            break;
+          }
+        }
+        if (!empty) {
+          ec.CancelWait(&w);
+          continue;
+        }
+        ec.CommitWait(&w);
+      }
+    }));
+  }
+
+  for (int i = 0; i < kThreads; i++) {
+    producers[i]->join();
+    consumers[i]->join();
+  }
+}
+
+void test_cxx11_eventcount()
+{
+  CALL_SUBTEST(test_basic_eventcount());
+  CALL_SUBTEST(test_stress_eventcount());
+}
diff --git a/unsupported/test/cxx11_meta.cpp b/unsupported/test/cxx11_meta.cpp
new file mode 100644
index 000000000..8911c59d8
--- /dev/null
+++ b/unsupported/test/cxx11_meta.cpp
@@ -0,0 +1,357 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <array>
+#include <Eigen/CXX11/src/util/CXX11Meta.h>
+
+using Eigen::internal::is_same;
+using Eigen::internal::type_list;
+using Eigen::internal::numeric_list;
+using Eigen::internal::gen_numeric_list;
+using Eigen::internal::gen_numeric_list_reversed;
+using Eigen::internal::gen_numeric_list_swapped_pair;
+using Eigen::internal::gen_numeric_list_repeated;
+using Eigen::internal::concat;
+using Eigen::internal::mconcat;
+using Eigen::internal::take;
+using Eigen::internal::skip;
+using Eigen::internal::slice;
+using Eigen::internal::get;
+using Eigen::internal::id_numeric;
+using Eigen::internal::id_type;
+using Eigen::internal::is_same_gf;
+using Eigen::internal::apply_op_from_left;
+using Eigen::internal::apply_op_from_right;
+using Eigen::internal::contained_in_list;
+using Eigen::internal::contained_in_list_gf;
+using Eigen::internal::arg_prod;
+using Eigen::internal::arg_sum;
+using Eigen::internal::sum_op;
+using Eigen::internal::product_op;
+using Eigen::internal::array_reverse;
+using Eigen::internal::array_sum;
+using Eigen::internal::array_prod;
+using Eigen::internal::array_reduce;
+using Eigen::internal::array_zip;
+using Eigen::internal::array_zip_and_reduce;
+using Eigen::internal::array_apply;
+using Eigen::internal::array_apply_and_reduce;
+using Eigen::internal::repeat;
+using Eigen::internal::instantiate_by_c_array;
+
+struct dummy_a {};
+struct dummy_b {};
+struct dummy_c {};
+struct dummy_d {};
+struct dummy_e {};
+
+// dummy operation for testing apply
+template<typename A, typename B> struct dummy_op;
+template<> struct dummy_op<dummy_a, dummy_b> { typedef dummy_c type; };
+template<> struct dummy_op<dummy_b, dummy_a> { typedef dummy_d type; };
+template<> struct dummy_op<dummy_b, dummy_c> { typedef dummy_a type; };
+template<> struct dummy_op<dummy_c, dummy_b> { typedef dummy_d type; };
+template<> struct dummy_op<dummy_c, dummy_a> { typedef dummy_b type; };
+template<> struct dummy_op<dummy_a, dummy_c> { typedef dummy_d type; };
+template<> struct dummy_op<dummy_a, dummy_a> { typedef dummy_e type; };
+template<> struct dummy_op<dummy_b, dummy_b> { typedef dummy_e type; };
+template<> struct dummy_op<dummy_c, dummy_c> { typedef dummy_e type; };
+
+template<typename A, typename B> struct dummy_test { constexpr static bool value = false; constexpr static int global_flags = 0; };
+template<> struct dummy_test<dummy_a, dummy_a>     { constexpr static bool value = true;  constexpr static int global_flags = 1; };
+template<> struct dummy_test<dummy_b, dummy_b>     { constexpr static bool value = true;  constexpr static int global_flags = 2; };
+template<> struct dummy_test<dummy_c, dummy_c>     { constexpr static bool value = true;  constexpr static int global_flags = 4; };
+
+struct times2_op { template<typename A> static A run(A v) { return v * 2; } };
+
+struct dummy_inst
+{
+  int c;
+
+  dummy_inst() : c(0) {}
+  explicit dummy_inst(int) : c(1) {}
+  dummy_inst(int, int) : c(2) {}
+  dummy_inst(int, int, int) : c(3) {}
+  dummy_inst(int, int, int, int) : c(4) {}
+  dummy_inst(int, int, int, int, int) : c(5) {}
+};
+
+static void test_gen_numeric_list()
+{
+  VERIFY((is_same<typename gen_numeric_list<int, 0>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 1>::type, numeric_list<int, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 2>::type, numeric_list<int, 0, 1>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 5>::type, numeric_list<int, 0, 1, 2, 3, 4>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 10>::type, numeric_list<int, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9>>::value));
+
+  VERIFY((is_same<typename gen_numeric_list<int, 0, 42>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 1, 42>::type, numeric_list<int, 42>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 2, 42>::type, numeric_list<int, 42, 43>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 5, 42>::type, numeric_list<int, 42, 43, 44, 45, 46>>::value));
+  VERIFY((is_same<typename gen_numeric_list<int, 10, 42>::type, numeric_list<int, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51>>::value));
+
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 0>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 1>::type, numeric_list<int, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 2>::type, numeric_list<int, 1, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 5>::type, numeric_list<int, 4, 3, 2, 1, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 10>::type, numeric_list<int, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0>>::value));
+
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 0, 42>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 1, 42>::type, numeric_list<int, 42>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 2, 42>::type, numeric_list<int, 43, 42>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 5, 42>::type, numeric_list<int, 46, 45, 44, 43, 42>>::value));
+  VERIFY((is_same<typename gen_numeric_list_reversed<int, 10, 42>::type, numeric_list<int, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42>>::value));
+
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 0, 2, 3>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 1, 2, 3>::type, numeric_list<int, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 2, 2, 3>::type, numeric_list<int, 0, 1>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 5, 2, 3>::type, numeric_list<int, 0, 1, 3, 2, 4>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 10, 2, 3>::type, numeric_list<int, 0, 1, 3, 2, 4, 5, 6, 7, 8, 9>>::value));
+
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 0, 44, 45, 42>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 1, 44, 45, 42>::type, numeric_list<int, 42>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 2, 44, 45, 42>::type, numeric_list<int, 42, 43>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 5, 44, 45, 42>::type, numeric_list<int, 42, 43, 45, 44, 46>>::value));
+  VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 10, 44, 45, 42>::type, numeric_list<int, 42, 43, 45, 44, 46, 47, 48, 49, 50, 51>>::value));
+
+  VERIFY((is_same<typename gen_numeric_list_repeated<int, 0, 0>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename gen_numeric_list_repeated<int, 1, 0>::type, numeric_list<int, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list_repeated<int, 2, 0>::type, numeric_list<int, 0, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list_repeated<int, 5, 0>::type, numeric_list<int, 0, 0, 0, 0, 0>>::value));
+  VERIFY((is_same<typename gen_numeric_list_repeated<int, 10, 0>::type, numeric_list<int, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>>::value));
+}
+
+static void test_concat()
+{
+  VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<>>::type, type_list<dummy_a, dummy_a>>::value));
+  VERIFY((is_same<typename concat<type_list<>, type_list<dummy_a, dummy_a>>::type, type_list<dummy_a, dummy_a>>::value));
+  VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<dummy_a, dummy_a>>::type, type_list<dummy_a, dummy_a, dummy_a, dummy_a>>::value));
+  VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_c>>::value));
+  VERIFY((is_same<typename concat<type_list<dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
+
+  VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int>>::type, numeric_list<int, 0, 0>>::value));
+  VERIFY((is_same<typename concat<numeric_list<int>, numeric_list<int, 0, 0>>::type, numeric_list<int, 0, 0>>::value));
+  VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int, 0, 0>>::type, numeric_list<int, 0, 0, 0, 0>>::value));
+  VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 0, 1, 2>>::value));
+  VERIFY((is_same<typename concat<numeric_list<int, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
+
+  VERIFY((is_same<typename mconcat<type_list<dummy_a>>::type, type_list<dummy_a>>::value));
+  VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b>>::type, type_list<dummy_a, dummy_b>>::value));
+  VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b>, type_list<dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
+  VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
+  VERIFY((is_same<typename mconcat<type_list<dummy_a, dummy_b>, type_list<dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value));
+
+  VERIFY((is_same<typename mconcat<numeric_list<int, 0>>::type, numeric_list<int, 0>>::value));
+  VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1>>::type, numeric_list<int, 0, 1>>::value));
+  VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1>, numeric_list<int, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
+  VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
+  VERIFY((is_same<typename mconcat<numeric_list<int, 0, 1>, numeric_list<int, 2>>::type, numeric_list<int, 0, 1, 2>>::value));
+}
+
+static void test_slice()
+{
+  typedef type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c> tl;
+  typedef numeric_list<int, 0, 1, 2, 3, 4, 5> il;
+
+  VERIFY((is_same<typename take<0, tl>::type, type_list<>>::value));
+  VERIFY((is_same<typename take<1, tl>::type, type_list<dummy_a>>::value));
+  VERIFY((is_same<typename take<2, tl>::type, type_list<dummy_a, dummy_a>>::value));
+  VERIFY((is_same<typename take<3, tl>::type, type_list<dummy_a, dummy_a, dummy_b>>::value));
+  VERIFY((is_same<typename take<4, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b>>::value));
+  VERIFY((is_same<typename take<5, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c>>::value));
+  VERIFY((is_same<typename take<6, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value));
+
+  VERIFY((is_same<typename take<0, il>::type, numeric_list<int>>::value));
+  VERIFY((is_same<typename take<1, il>::type, numeric_list<int, 0>>::value));
+  VERIFY((is_same<typename take<2, il>::type, numeric_list<int, 0, 1>>::value));
+  VERIFY((is_same<typename take<3, il>::type, numeric_list<int, 0, 1, 2>>::value));
+  VERIFY((is_same<typename take<4, il>::type, numeric_list<int, 0, 1, 2, 3>>::value));
+  VERIFY((is_same<typename take<5, il>::type, numeric_list<int, 0, 1, 2, 3, 4>>::value));
+  VERIFY((is_same<typename take<6, il>::type, numeric_list<int, 0, 1, 2, 3, 4, 5>>::value));
+  
+  VERIFY((is_same<typename skip<0, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value));
+  VERIFY((is_same<typename skip<1, tl>::type, type_list<dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value));
+  VERIFY((is_same<typename skip<2, tl>::type, type_list<dummy_b, dummy_b, dummy_c, dummy_c>>::value));
+  VERIFY((is_same<typename skip<3, tl>::type, type_list<dummy_b, dummy_c, dummy_c>>::value));
+  VERIFY((is_same<typename skip<4, tl>::type, type_list<dummy_c, dummy_c>>::value));
+  VERIFY((is_same<typename skip<5, tl>::type, type_list<dummy_c>>::value));
+  VERIFY((is_same<typename skip<6, tl>::type, type_list<>>::value));
+
+  VERIFY((is_same<typename skip<0, il>::type, numeric_list<int, 0, 1, 2, 3, 4, 5>>::value));
+  VERIFY((is_same<typename skip<1, il>::type, numeric_list<int, 1, 2, 3, 4, 5>>::value));
+  VERIFY((is_same<typename skip<2, il>::type, numeric_list<int, 2, 3, 4, 5>>::value));
+  VERIFY((is_same<typename skip<3, il>::type, numeric_list<int, 3, 4, 5>>::value));
+  VERIFY((is_same<typename skip<4, il>::type, numeric_list<int, 4, 5>>::value));
+  VERIFY((is_same<typename skip<5, il>::type, numeric_list<int, 5>>::value));
+  VERIFY((is_same<typename skip<6, il>::type, numeric_list<int>>::value));
+
+  VERIFY((is_same<typename slice<0, 3, tl>::type, typename take<3, tl>::type>::value));
+  VERIFY((is_same<typename slice<0, 3, il>::type, typename take<3, il>::type>::value));
+  VERIFY((is_same<typename slice<1, 3, tl>::type, type_list<dummy_a, dummy_b, dummy_b>>::value));
+  VERIFY((is_same<typename slice<1, 3, il>::type, numeric_list<int, 1, 2, 3>>::value));
+}
+
+static void test_get()
+{
+  typedef type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c> tl;
+  typedef numeric_list<int, 4, 8, 15, 16, 23, 42> il;
+
+  VERIFY((is_same<typename get<0, tl>::type, dummy_a>::value));
+  VERIFY((is_same<typename get<1, tl>::type, dummy_a>::value));
+  VERIFY((is_same<typename get<2, tl>::type, dummy_b>::value));
+  VERIFY((is_same<typename get<3, tl>::type, dummy_b>::value));
+  VERIFY((is_same<typename get<4, tl>::type, dummy_c>::value));
+  VERIFY((is_same<typename get<5, tl>::type, dummy_c>::value));
+
+  VERIFY_IS_EQUAL(((int)get<0, il>::value), 4);
+  VERIFY_IS_EQUAL(((int)get<1, il>::value), 8);
+  VERIFY_IS_EQUAL(((int)get<2, il>::value), 15);
+  VERIFY_IS_EQUAL(((int)get<3, il>::value), 16);
+  VERIFY_IS_EQUAL(((int)get<4, il>::value), 23);
+  VERIFY_IS_EQUAL(((int)get<5, il>::value), 42);
+}
+
+static void test_id_helper(dummy_a a, dummy_a b, dummy_a c)
+{
+  (void)a;
+  (void)b;
+  (void)c;
+}
+
+template<int... ii>
+static void test_id_numeric()
+{
+  test_id_helper(typename id_numeric<int, ii, dummy_a>::type()...);
+}
+
+template<typename... tt>
+static void test_id_type()
+{
+  test_id_helper(typename id_type<tt, dummy_a>::type()...);
+}
+
+static void test_id()
+{
+  // don't call VERIFY here, just assume it works if it compiles
+  // (otherwise it will complain that it can't find the function)
+  test_id_numeric<1, 4, 6>();
+  test_id_type<dummy_a, dummy_b, dummy_c>();
+}
+
+static void test_is_same_gf()
+{
+  VERIFY((!is_same_gf<dummy_a, dummy_b>::value));
+  VERIFY((!!is_same_gf<dummy_a, dummy_a>::value));
+  VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_b>::global_flags), false);
+  VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_a>::global_flags), false);
+}
+
+static void test_apply_op()
+{
+  typedef type_list<dummy_a, dummy_b, dummy_c> tl;
+  VERIFY((!!is_same<typename apply_op_from_left<dummy_op, dummy_a, tl>::type, type_list<dummy_e, dummy_c, dummy_d>>::value));
+  VERIFY((!!is_same<typename apply_op_from_right<dummy_op, dummy_a, tl>::type, type_list<dummy_e, dummy_d, dummy_b>>::value));
+}
+
+static void test_contained_in_list()
+{
+  typedef type_list<dummy_a, dummy_b, dummy_c> tl;
+
+  VERIFY((!!contained_in_list<is_same, dummy_a, tl>::value));
+  VERIFY((!!contained_in_list<is_same, dummy_b, tl>::value));
+  VERIFY((!!contained_in_list<is_same, dummy_c, tl>::value));
+  VERIFY((!contained_in_list<is_same, dummy_d, tl>::value));
+  VERIFY((!contained_in_list<is_same, dummy_e, tl>::value));
+
+  VERIFY((!!contained_in_list_gf<dummy_test, dummy_a, tl>::value));
+  VERIFY((!!contained_in_list_gf<dummy_test, dummy_b, tl>::value));
+  VERIFY((!!contained_in_list_gf<dummy_test, dummy_c, tl>::value));
+  VERIFY((!contained_in_list_gf<dummy_test, dummy_d, tl>::value));
+  VERIFY((!contained_in_list_gf<dummy_test, dummy_e, tl>::value));
+
+  VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_a, tl>::global_flags), 1);
+  VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_b, tl>::global_flags), 2);
+  VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_c, tl>::global_flags), 4);
+  VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_d, tl>::global_flags), 0);
+  VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_e, tl>::global_flags), 0);
+}
+
+static void test_arg_reductions()
+{
+  VERIFY_IS_EQUAL(arg_sum(1,2,3,4), 10);
+  VERIFY_IS_EQUAL(arg_prod(1,2,3,4), 24);
+  VERIFY_IS_APPROX(arg_sum(0.5, 2, 5), 7.5);
+  VERIFY_IS_APPROX(arg_prod(0.5, 2, 5), 5.0);
+}
+
+static void test_array_reverse_and_reduce()
+{
+  array<int, 6> a{{4, 8, 15, 16, 23, 42}};
+  array<int, 6> b{{42, 23, 16, 15, 8, 4}};
+
+  // there is no operator<< for std::array, so VERIFY_IS_EQUAL will
+  // not compile
+  VERIFY((array_reverse(a) == b));
+  VERIFY((array_reverse(b) == a));
+  VERIFY_IS_EQUAL((array_sum(a)), 108);
+  VERIFY_IS_EQUAL((array_sum(b)), 108);
+  VERIFY_IS_EQUAL((array_prod(a)), 7418880);
+  VERIFY_IS_EQUAL((array_prod(b)), 7418880);
+}
+
+static void test_array_zip_and_apply()
+{
+  array<int, 6> a{{4, 8, 15, 16, 23, 42}};
+  array<int, 6> b{{0, 1, 2, 3, 4, 5}};
+  array<int, 6> c{{4, 9, 17, 19, 27, 47}};
+  array<int, 6> d{{0, 8, 30, 48, 92, 210}};
+  array<int, 6> e{{0, 2, 4, 6, 8, 10}};
+
+  VERIFY((array_zip<sum_op>(a, b) == c));
+  VERIFY((array_zip<product_op>(a, b) == d));
+  VERIFY((array_apply<times2_op>(b) == e));
+  VERIFY_IS_EQUAL((array_apply_and_reduce<sum_op, times2_op>(a)), 216);
+  VERIFY_IS_EQUAL((array_apply_and_reduce<sum_op, times2_op>(b)), 30);
+  VERIFY_IS_EQUAL((array_zip_and_reduce<product_op, sum_op>(a, b)), 14755932);
+  VERIFY_IS_EQUAL((array_zip_and_reduce<sum_op, product_op>(a, b)), 388);
+}
+
+static void test_array_misc()
+{
+  array<int, 3> a3{{1, 1, 1}};
+  array<int, 6> a6{{2, 2, 2, 2, 2, 2}};
+  VERIFY((repeat<3, int>(1) == a3));
+  VERIFY((repeat<6, int>(2) == a6));
+
+  int data[5] = { 0, 1, 2, 3, 4 };
+  VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 0>(data).c), 0);
+  VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 1>(data).c), 1);
+  VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 2>(data).c), 2);
+  VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 3>(data).c), 3);
+  VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 4>(data).c), 4);
+  VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 5>(data).c), 5);
+}
+
+void test_cxx11_meta()
+{
+  CALL_SUBTEST(test_gen_numeric_list());
+  CALL_SUBTEST(test_concat());
+  CALL_SUBTEST(test_slice());
+  CALL_SUBTEST(test_get());
+  CALL_SUBTEST(test_id());
+  CALL_SUBTEST(test_is_same_gf());
+  CALL_SUBTEST(test_apply_op());
+  CALL_SUBTEST(test_contained_in_list());
+  CALL_SUBTEST(test_arg_reductions());
+  CALL_SUBTEST(test_array_reverse_and_reduce());
+  CALL_SUBTEST(test_array_zip_and_apply());
+  CALL_SUBTEST(test_array_misc());
+}
diff --git a/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/unsupported/test/cxx11_non_blocking_thread_pool.cpp
new file mode 100644
index 000000000..5f9bb938b
--- /dev/null
+++ b/unsupported/test/cxx11_non_blocking_thread_pool.cpp
@@ -0,0 +1,107 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include "main.h"
+#include "Eigen/CXX11/ThreadPool"
+
+static void test_create_destroy_empty_pool()
+{
+  // Just create and destroy the pool. This will wind up and tear down worker
+  // threads. Ensure there are no issues in that logic.
+  for (int i = 0; i < 16; ++i) {
+    NonBlockingThreadPool tp(i);
+  }
+}
+
+
+static void test_parallelism()
+{
+  // Test we never-ever fail to match available tasks with idle threads.
+  const int kThreads = 16;  // code below expects that this is a multiple of 4
+  NonBlockingThreadPool tp(kThreads);
+  VERIFY_IS_EQUAL(tp.NumThreads(), kThreads);
+  VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1);
+  for (int iter = 0; iter < 100; ++iter) {
+    std::atomic<int> running(0);
+    std::atomic<int> done(0);
+    std::atomic<int> phase(0);
+    // Schedule kThreads tasks and ensure that they all are running.
+    for (int i = 0; i < kThreads; ++i) {
+      tp.Schedule([&]() {
+        const int thread_id = tp.CurrentThreadId();
+        VERIFY_GE(thread_id, 0);
+        VERIFY_LE(thread_id, kThreads - 1);
+        running++;
+        while (phase < 1) {
+        }
+        done++;
+      });
+    }
+    while (running != kThreads) {
+    }
+    running = 0;
+    phase = 1;
+    // Now, while the previous tasks exit, schedule another kThreads tasks and
+    // ensure that they are running.
+    for (int i = 0; i < kThreads; ++i) {
+      tp.Schedule([&, i]() {
+        running++;
+        while (phase < 2) {
+        }
+        // When all tasks are running, half of tasks exit, quarter of tasks
+        // continue running and quarter of tasks schedule another 2 tasks each.
+        // Concurrently main thread schedules another quarter of tasks.
+        // This gives us another kThreads tasks and we ensure that they all
+        // are running.
+        if (i < kThreads / 2) {
+        } else if (i < 3 * kThreads / 4) {
+          running++;
+          while (phase < 3) {
+          }
+          done++;
+        } else {
+          for (int j = 0; j < 2; ++j) {
+            tp.Schedule([&]() {
+              running++;
+              while (phase < 3) {
+              }
+              done++;
+            });
+          }
+        }
+        done++;
+      });
+    }
+    while (running != kThreads) {
+    }
+    running = 0;
+    phase = 2;
+    for (int i = 0; i < kThreads / 4; ++i) {
+      tp.Schedule([&]() {
+        running++;
+        while (phase < 3) {
+        }
+        done++;
+      });
+    }
+    while (running != kThreads) {
+    }
+    phase = 3;
+    while (done != 3 * kThreads) {
+    }
+  }
+}
+
+void test_cxx11_non_blocking_thread_pool()
+{
+  CALL_SUBTEST(test_create_destroy_empty_pool());
+  CALL_SUBTEST(test_parallelism());
+}
diff --git a/unsupported/test/cxx11_runqueue.cpp b/unsupported/test/cxx11_runqueue.cpp
new file mode 100644
index 000000000..91f690114
--- /dev/null
+++ b/unsupported/test/cxx11_runqueue.cpp
@@ -0,0 +1,235 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com>
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+#include <cstdlib>
+#include "main.h"
+#include <Eigen/CXX11/ThreadPool>
+
+
+// Visual studio doesn't implement a rand_r() function since its
+// implementation of rand() is already thread safe
+int rand_reentrant(unsigned int* s) {
+#ifdef EIGEN_COMP_MSVC_STRICT
+  EIGEN_UNUSED_VARIABLE(s);
+  return rand();
+#else
+  return rand_r(s);
+#endif
+}
+
+void test_basic_runqueue()
+{
+  RunQueue<int, 4> q;
+  // Check empty state.
+  VERIFY(q.Empty());
+  VERIFY_IS_EQUAL(0u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PopFront());
+  std::vector<int> stolen;
+  VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
+  VERIFY_IS_EQUAL(0u, stolen.size());
+  // Push one front, pop one front.
+  VERIFY_IS_EQUAL(0, q.PushFront(1));
+  VERIFY_IS_EQUAL(1u, q.Size());
+  VERIFY_IS_EQUAL(1, q.PopFront());
+  VERIFY_IS_EQUAL(0u, q.Size());
+  // Push front to overflow.
+  VERIFY_IS_EQUAL(0, q.PushFront(2));
+  VERIFY_IS_EQUAL(1u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PushFront(3));
+  VERIFY_IS_EQUAL(2u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PushFront(4));
+  VERIFY_IS_EQUAL(3u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PushFront(5));
+  VERIFY_IS_EQUAL(4u, q.Size());
+  VERIFY_IS_EQUAL(6, q.PushFront(6));
+  VERIFY_IS_EQUAL(4u, q.Size());
+  VERIFY_IS_EQUAL(5, q.PopFront());
+  VERIFY_IS_EQUAL(3u, q.Size());
+  VERIFY_IS_EQUAL(4, q.PopFront());
+  VERIFY_IS_EQUAL(2u, q.Size());
+  VERIFY_IS_EQUAL(3, q.PopFront());
+  VERIFY_IS_EQUAL(1u, q.Size());
+  VERIFY_IS_EQUAL(2, q.PopFront());
+  VERIFY_IS_EQUAL(0u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PopFront());
+  // Push one back, pop one back.
+  VERIFY_IS_EQUAL(0, q.PushBack(7));
+  VERIFY_IS_EQUAL(1u, q.Size());
+  VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+  VERIFY_IS_EQUAL(1u, stolen.size());
+  VERIFY_IS_EQUAL(7, stolen[0]);
+  VERIFY_IS_EQUAL(0u, q.Size());
+  stolen.clear();
+  // Push back to overflow.
+  VERIFY_IS_EQUAL(0, q.PushBack(8));
+  VERIFY_IS_EQUAL(1u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PushBack(9));
+  VERIFY_IS_EQUAL(2u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PushBack(10));
+  VERIFY_IS_EQUAL(3u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PushBack(11));
+  VERIFY_IS_EQUAL(4u, q.Size());
+  VERIFY_IS_EQUAL(12, q.PushBack(12));
+  VERIFY_IS_EQUAL(4u, q.Size());
+  // Pop back in halves.
+  VERIFY_IS_EQUAL(2u, q.PopBackHalf(&stolen));
+  VERIFY_IS_EQUAL(2u, stolen.size());
+  VERIFY_IS_EQUAL(10, stolen[0]);
+  VERIFY_IS_EQUAL(11, stolen[1]);
+  VERIFY_IS_EQUAL(2u, q.Size());
+  stolen.clear();
+  VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+  VERIFY_IS_EQUAL(1u, stolen.size());
+  VERIFY_IS_EQUAL(9, stolen[0]);
+  VERIFY_IS_EQUAL(1u, q.Size());
+  stolen.clear();
+  VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen));
+  VERIFY_IS_EQUAL(1u, stolen.size());
+  VERIFY_IS_EQUAL(8, stolen[0]);
+  stolen.clear();
+  VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen));
+  VERIFY_IS_EQUAL(0u, stolen.size());
+  // Empty again.
+  VERIFY(q.Empty());
+  VERIFY_IS_EQUAL(0u, q.Size());
+  VERIFY_IS_EQUAL(0, q.PushFront(1));
+  VERIFY_IS_EQUAL(0, q.PushFront(2));
+  VERIFY_IS_EQUAL(0, q.PushFront(3));
+  VERIFY_IS_EQUAL(1, q.PopBack());
+  VERIFY_IS_EQUAL(2, q.PopBack());
+  VERIFY_IS_EQUAL(3, q.PopBack());
+  VERIFY(q.Empty());
+  VERIFY_IS_EQUAL(0u, q.Size());
+}
+
+// Empty tests that the queue is not claimed to be empty when is is in fact not.
+// Emptiness property is crucial part of thread pool blocking scheme,
+// so we go to great effort to ensure this property. We create a queue with
+// 1 element and then push 1 element (either front or back at random) and pop
+// 1 element (either front or back at random). So queue always contains at least
+// 1 element, but otherwise changes chaotically. Another thread constantly tests
+// that the queue is not claimed to be empty.
+void test_empty_runqueue()
+{
+  RunQueue<int, 4> q;
+  q.PushFront(1);
+  std::atomic<bool> done(false);
+  std::thread mutator([&q, &done]() {
+    unsigned rnd = 0;
+    std::vector<int> stolen;
+    for (int i = 0; i < 1 << 18; i++) {
+      if (rand_reentrant(&rnd) % 2)
+        VERIFY_IS_EQUAL(0, q.PushFront(1));
+      else
+        VERIFY_IS_EQUAL(0, q.PushBack(1));
+      if (rand_reentrant(&rnd) % 2)
+        VERIFY_IS_EQUAL(1, q.PopFront());
+      else {
+        for (;;) {
+          if (q.PopBackHalf(&stolen) == 1) {
+            stolen.clear();
+            break;
+          }
+          VERIFY_IS_EQUAL(0u, stolen.size());
+        }
+      }
+    }
+    done = true;
+  });
+  while (!done) {
+    VERIFY(!q.Empty());
+    int size = q.Size();
+    VERIFY_GE(size, 1);
+    VERIFY_LE(size, 2);
+  }
+  VERIFY_IS_EQUAL(1, q.PopFront());
+  mutator.join();
+}
+
+// Stress is a chaotic random test.
+// One thread (owner) calls PushFront/PopFront, other threads call PushBack/
+// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass.
+void test_stress_runqueue()
+{
+  static const int kEvents = 1 << 18;
+  RunQueue<int, 8> q;
+  std::atomic<int> total(0);
+  std::vector<std::unique_ptr<std::thread>> threads;
+  threads.emplace_back(new std::thread([&q, &total]() {
+    int sum = 0;
+    int pushed = 1;
+    int popped = 1;
+    while (pushed < kEvents || popped < kEvents) {
+      if (pushed < kEvents) {
+        if (q.PushFront(pushed) == 0) {
+          sum += pushed;
+          pushed++;
+        }
+      }
+      if (popped < kEvents) {
+        int v = q.PopFront();
+        if (v != 0) {
+          sum -= v;
+          popped++;
+        }
+      }
+    }
+    total += sum;
+  }));
+  for (int i = 0; i < 2; i++) {
+    threads.emplace_back(new std::thread([&q, &total]() {
+      int sum = 0;
+      for (int j = 1; j < kEvents; j++) {
+        if (q.PushBack(j) == 0) {
+          sum += j;
+          continue;
+        }
+        EIGEN_THREAD_YIELD();
+        j--;
+      }
+      total += sum;
+    }));
+    threads.emplace_back(new std::thread([&q, &total]() {
+      int sum = 0;
+      std::vector<int> stolen;
+      for (int j = 1; j < kEvents;) {
+        if (q.PopBackHalf(&stolen) == 0) {
+          EIGEN_THREAD_YIELD();
+          continue;
+        }
+        while (stolen.size() && j < kEvents) {
+          int v = stolen.back();
+          stolen.pop_back();
+          VERIFY_IS_NOT_EQUAL(v, 0);
+          sum += v;
+          j++;
+        }
+      }
+      while (stolen.size()) {
+        int v = stolen.back();
+        stolen.pop_back();
+        VERIFY_IS_NOT_EQUAL(v, 0);
+        while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD();
+      }
+      total -= sum;
+    }));
+  }
+  for (size_t i = 0; i < threads.size(); i++) threads[i]->join();
+  VERIFY(q.Empty());
+  VERIFY(total.load() == 0);
+}
+
+void test_cxx11_runqueue()
+{
+  CALL_SUBTEST_1(test_basic_runqueue());
+  CALL_SUBTEST_2(test_empty_runqueue());
+  CALL_SUBTEST_3(test_stress_runqueue());
+}
diff --git a/unsupported/test/cxx11_tensor_argmax.cpp b/unsupported/test/cxx11_tensor_argmax.cpp
new file mode 100644
index 000000000..037767270
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_argmax.cpp
@@ -0,0 +1,294 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Eugene Brevdo <ebrevdo@google.com>
+//                    Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::array;
+using Eigen::Tuple;
+
+template <int DataLayout>
+static void test_simple_index_tuples()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  tensor = (tensor + tensor.constant(0.5)).log();
+
+  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
+  index_tuples = tensor.index_tuples();
+
+  for (DenseIndex n = 0; n < 2*3*5*7; ++n) {
+    const Tuple<DenseIndex, float>& v = index_tuples.coeff(n);
+    VERIFY_IS_EQUAL(v.first, n);
+    VERIFY_IS_EQUAL(v.second, tensor.coeff(n));
+  }
+}
+
+template <int DataLayout>
+static void test_index_tuples_dim()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  tensor = (tensor + tensor.constant(0.5)).log();
+
+  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
+
+  index_tuples = tensor.index_tuples();
+
+  for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) {
+    const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l);
+    VERIFY_IS_EQUAL(v.first, n);
+    VERIFY_IS_EQUAL(v.second, tensor(n));
+  }
+}
+
+template <int DataLayout>
+static void test_argmax_tuple_reducer()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  tensor = (tensor + tensor.constant(0.5)).log();
+
+  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
+  index_tuples = tensor.index_tuples();
+
+  Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
+  DimensionList<DenseIndex, 4> dims;
+  reduced = index_tuples.reduce(
+      dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
+
+  Tensor<float, 0, DataLayout> maxi = tensor.maximum();
+
+  VERIFY_IS_EQUAL(maxi(), reduced(0).second);
+
+  array<DenseIndex, 3> reduce_dims;
+  for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
+  Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
+  reduced_by_dims = index_tuples.reduce(
+      reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
+
+  Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
+
+  for (int l = 0; l < 7; ++l) {
+    VERIFY_IS_EQUAL(max_by_dims(l), reduced_by_dims(l).second);
+  }
+}
+
+template <int DataLayout>
+static void test_argmin_tuple_reducer()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  tensor = (tensor + tensor.constant(0.5)).log();
+
+  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
+  index_tuples = tensor.index_tuples();
+
+  Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
+  DimensionList<DenseIndex, 4> dims;
+  reduced = index_tuples.reduce(
+      dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
+
+  Tensor<float, 0, DataLayout> mini = tensor.minimum();
+
+  VERIFY_IS_EQUAL(mini(), reduced(0).second);
+
+  array<DenseIndex, 3> reduce_dims;
+  for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
+  Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
+  reduced_by_dims = index_tuples.reduce(
+      reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
+
+  Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);
+
+  for (int l = 0; l < 7; ++l) {
+    VERIFY_IS_EQUAL(min_by_dims(l), reduced_by_dims(l).second);
+  }
+}
+
+template <int DataLayout>
+static void test_simple_argmax()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  tensor = (tensor + tensor.constant(0.5)).log();
+  tensor(0,0,0,0) = 10.0;
+
+  Tensor<DenseIndex, 0, DataLayout> tensor_argmax;
+
+  tensor_argmax = tensor.argmax();
+
+  VERIFY_IS_EQUAL(tensor_argmax(0), 0);
+
+  tensor(1,2,4,6) = 20.0;
+
+  tensor_argmax = tensor.argmax();
+
+  VERIFY_IS_EQUAL(tensor_argmax(0), 2*3*5*7 - 1);
+}
+
+template <int DataLayout>
+static void test_simple_argmin()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  tensor = (tensor + tensor.constant(0.5)).log();
+  tensor(0,0,0,0) = -10.0;
+
+  Tensor<DenseIndex, 0, DataLayout> tensor_argmin;
+
+  tensor_argmin = tensor.argmin();
+
+  VERIFY_IS_EQUAL(tensor_argmin(0), 0);
+
+  tensor(1,2,4,6) = -20.0;
+
+  tensor_argmin = tensor.argmin();
+
+  VERIFY_IS_EQUAL(tensor_argmin(0), 2*3*5*7 - 1);
+}
+
+template <int DataLayout>
+static void test_argmax_dim()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  std::vector<int> dims {2, 3, 5, 7};
+
+  for (int dim = 0; dim < 4; ++dim) {
+    tensor.setRandom();
+    tensor = (tensor + tensor.constant(0.5)).log();
+
+    Tensor<DenseIndex, 3, DataLayout> tensor_argmax;
+    array<DenseIndex, 4> ix;
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != 0) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0
+            tensor(ix) = 10.0;
+          }
+        }
+      }
+    }
+
+    tensor_argmax = tensor.argmax(dim);
+
+    VERIFY_IS_EQUAL(tensor_argmax.size(),
+                    ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
+    for (ptrdiff_t n = 0; n < tensor_argmax.size(); ++n) {
+      // Expect max to be in the first index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_argmax.data()[n], 0);
+    }
+
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != tensor.dimension(dim) - 1) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
+            tensor(ix) = 20.0;
+          }
+        }
+      }
+    }
+
+    tensor_argmax = tensor.argmax(dim);
+
+    VERIFY_IS_EQUAL(tensor_argmax.size(),
+                    ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
+    for (ptrdiff_t n = 0; n < tensor_argmax.size(); ++n) {
+      // Expect max to be in the last index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_argmax.data()[n], tensor.dimension(dim) - 1);
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_argmin_dim()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  std::vector<int> dims {2, 3, 5, 7};
+
+  for (int dim = 0; dim < 4; ++dim) {
+    tensor.setRandom();
+    tensor = (tensor + tensor.constant(0.5)).log();
+
+    Tensor<DenseIndex, 3, DataLayout> tensor_argmin;
+    array<DenseIndex, 4> ix;
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != 0) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0
+            tensor(ix) = -10.0;
+          }
+        }
+      }
+    }
+
+    tensor_argmin = tensor.argmin(dim);
+
+    VERIFY_IS_EQUAL(tensor_argmin.size(),
+                    ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
+    for (ptrdiff_t n = 0; n < tensor_argmin.size(); ++n) {
+      // Expect min to be in the first index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_argmin.data()[n], 0);
+    }
+
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != tensor.dimension(dim) - 1) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0
+            tensor(ix) = -20.0;
+          }
+        }
+      }
+    }
+
+    tensor_argmin = tensor.argmin(dim);
+
+    VERIFY_IS_EQUAL(tensor_argmin.size(),
+                    ptrdiff_t(2*3*5*7 / tensor.dimension(dim)));
+    for (ptrdiff_t n = 0; n < tensor_argmin.size(); ++n) {
+      // Expect min to be in the last index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_argmin.data()[n], tensor.dimension(dim) - 1);
+    }
+  }
+}
+
+void test_cxx11_tensor_argmax()
+{
+  CALL_SUBTEST(test_simple_index_tuples<RowMajor>());
+  CALL_SUBTEST(test_simple_index_tuples<ColMajor>());
+  CALL_SUBTEST(test_index_tuples_dim<RowMajor>());
+  CALL_SUBTEST(test_index_tuples_dim<ColMajor>());
+  CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>());
+  CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>());
+  CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>());
+  CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>());
+  CALL_SUBTEST(test_simple_argmax<RowMajor>());
+  CALL_SUBTEST(test_simple_argmax<ColMajor>());
+  CALL_SUBTEST(test_simple_argmin<RowMajor>());
+  CALL_SUBTEST(test_simple_argmin<ColMajor>());
+  CALL_SUBTEST(test_argmax_dim<RowMajor>());
+  CALL_SUBTEST(test_argmax_dim<ColMajor>());
+  CALL_SUBTEST(test_argmin_dim<RowMajor>());
+  CALL_SUBTEST(test_argmin_dim<ColMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu
new file mode 100644
index 000000000..653443dc5
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu
@@ -0,0 +1,254 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_cuda
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template <int Layout>
+void test_cuda_simple_argmax()
+{
+  Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97));
+  Tensor<DenseIndex, 1, Layout> out_max(Eigen::array<DenseIndex, 1>(1));
+  Tensor<DenseIndex, 1, Layout> out_min(Eigen::array<DenseIndex, 1>(1));
+  in.setRandom();
+  in *= in.constant(100.0);
+  in(0, 0, 0) = -1000.0;
+  in(71, 52, 96) = 1000.0;
+
+  std::size_t in_bytes = in.size() * sizeof(double);
+  std::size_t out_bytes = out_max.size() * sizeof(DenseIndex);
+
+  double* d_in;
+  DenseIndex* d_out_max;
+  DenseIndex* d_out_min;
+  cudaMalloc((void**)(&d_in), in_bytes);
+  cudaMalloc((void**)(&d_out_max), out_bytes);
+  cudaMalloc((void**)(&d_out_min), out_bytes);
+
+  cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97));
+  Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_max(d_out_max, Eigen::array<DenseIndex, 1>(1));
+  Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_min(d_out_min, Eigen::array<DenseIndex, 1>(1));
+
+  gpu_out_max.device(gpu_device) = gpu_in.argmax();
+  gpu_out_min.device(gpu_device) = gpu_in.argmin();
+
+  assert(cudaMemcpyAsync(out_max.data(), d_out_max, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaMemcpyAsync(out_min.data(), d_out_min, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1);
+  VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0);
+
+  cudaFree(d_in);
+  cudaFree(d_out_max);
+  cudaFree(d_out_min);
+}
+
+template <int DataLayout>
+void test_cuda_argmax_dim()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  std::vector<int> dims;
+  dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7);
+
+  for (int dim = 0; dim < 4; ++dim) {
+    tensor.setRandom();
+    tensor = (tensor + tensor.constant(0.5)).log();
+
+    array<DenseIndex, 3> out_shape;
+    for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1];
+
+    Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape);
+
+    array<DenseIndex, 4> ix;
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != 0) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0
+            tensor(ix) = 10.0;
+          }
+        }
+      }
+    }
+
+    std::size_t in_bytes = tensor.size() * sizeof(float);
+    std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
+
+    float* d_in;
+    DenseIndex* d_out;
+    cudaMalloc((void**)(&d_in), in_bytes);
+    cudaMalloc((void**)(&d_out), out_bytes);
+
+    cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+
+    Eigen::CudaStreamDevice stream;
+    Eigen::GpuDevice gpu_device(&stream);
+
+    Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
+    Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape);
+
+    gpu_out.device(gpu_device) = gpu_in.argmax(dim);
+
+    assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+    assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+    VERIFY_IS_EQUAL(tensor_arg.size(),
+                    size_t(2*3*5*7 / tensor.dimension(dim)));
+
+    for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
+      // Expect max to be in the first index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
+    }
+
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != tensor.dimension(dim) - 1) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
+            tensor(ix) = 20.0;
+          }
+        }
+      }
+    }
+
+    cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+
+    gpu_out.device(gpu_device) = gpu_in.argmax(dim);
+
+    assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+    assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+    for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
+      // Expect max to be in the last index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
+    }
+
+    cudaFree(d_in);
+    cudaFree(d_out);
+  }
+}
+
+template <int DataLayout>
+void test_cuda_argmin_dim()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  std::vector<int> dims;
+  dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7);
+
+  for (int dim = 0; dim < 4; ++dim) {
+    tensor.setRandom();
+    tensor = (tensor + tensor.constant(0.5)).log();
+
+    array<DenseIndex, 3> out_shape;
+    for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1];
+
+    Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape);
+
+    array<DenseIndex, 4> ix;
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != 0) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0
+            tensor(ix) = -10.0;
+          }
+        }
+      }
+    }
+
+    std::size_t in_bytes = tensor.size() * sizeof(float);
+    std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
+
+    float* d_in;
+    DenseIndex* d_out;
+    cudaMalloc((void**)(&d_in), in_bytes);
+    cudaMalloc((void**)(&d_out), out_bytes);
+
+    cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+
+    Eigen::CudaStreamDevice stream;
+    Eigen::GpuDevice gpu_device(&stream);
+
+    Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
+    Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape);
+
+    gpu_out.device(gpu_device) = gpu_in.argmin(dim);
+
+    assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+    assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+    VERIFY_IS_EQUAL(tensor_arg.size(),
+                    2*3*5*7 / tensor.dimension(dim));
+
+    for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
+      // Expect min to be in the first index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
+    }
+
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        for (int k = 0; k < 5; ++k) {
+          for (int l = 0; l < 7; ++l) {
+            ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l;
+            if (ix[dim] != tensor.dimension(dim) - 1) continue;
+            // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
+            tensor(ix) = -20.0;
+          }
+        }
+      }
+    }
+
+    cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+
+    gpu_out.device(gpu_device) = gpu_in.argmin(dim);
+
+    assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+    assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+    for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
+      // Expect max to be in the last index of the reduced dimension
+      VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
+    }
+
+    cudaFree(d_in);
+    cudaFree(d_out);
+  }
+}
+
+void test_cxx11_tensor_cuda()
+{
+  CALL_SUBTEST_1(test_cuda_simple_argmax<RowMajor>());
+  CALL_SUBTEST_1(test_cuda_simple_argmax<ColMajor>());
+  CALL_SUBTEST_2(test_cuda_argmax_dim<RowMajor>());
+  CALL_SUBTEST_2(test_cuda_argmax_dim<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_argmin_dim<RowMajor>());
+  CALL_SUBTEST_3(test_cuda_argmin_dim<ColMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_assign.cpp b/unsupported/test/cxx11_tensor_assign.cpp
new file mode 100644
index 000000000..8fe85d83c
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_assign.cpp
@@ -0,0 +1,370 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_1d()
+{
+  Tensor<int, 1> vec1(6);
+  Tensor<int, 1, RowMajor> vec2(6);
+  vec1(0) = 4;  vec2(0) = 0;
+  vec1(1) = 8;  vec2(1) = 1;
+  vec1(2) = 15; vec2(2) = 2;
+  vec1(3) = 16; vec2(3) = 3;
+  vec1(4) = 23; vec2(4) = 4;
+  vec1(5) = 42; vec2(5) = 5;
+
+  int col_major[6];
+  int row_major[6];
+  memset(col_major, 0, 6*sizeof(int));
+  memset(row_major, 0, 6*sizeof(int));
+  TensorMap<Tensor<int, 1> > vec3(col_major, 6);
+  TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);
+
+  vec3 = vec1;
+  vec4 = vec2;
+
+  VERIFY_IS_EQUAL(vec3(0), 4);
+  VERIFY_IS_EQUAL(vec3(1), 8);
+  VERIFY_IS_EQUAL(vec3(2), 15);
+  VERIFY_IS_EQUAL(vec3(3), 16);
+  VERIFY_IS_EQUAL(vec3(4), 23);
+  VERIFY_IS_EQUAL(vec3(5), 42);
+
+  VERIFY_IS_EQUAL(vec4(0), 0);
+  VERIFY_IS_EQUAL(vec4(1), 1);
+  VERIFY_IS_EQUAL(vec4(2), 2);
+  VERIFY_IS_EQUAL(vec4(3), 3);
+  VERIFY_IS_EQUAL(vec4(4), 4);
+  VERIFY_IS_EQUAL(vec4(5), 5);
+
+  vec1.setZero();
+  vec2.setZero();
+  vec1 = vec3;
+  vec2 = vec4;
+
+  VERIFY_IS_EQUAL(vec1(0), 4);
+  VERIFY_IS_EQUAL(vec1(1), 8);
+  VERIFY_IS_EQUAL(vec1(2), 15);
+  VERIFY_IS_EQUAL(vec1(3), 16);
+  VERIFY_IS_EQUAL(vec1(4), 23);
+  VERIFY_IS_EQUAL(vec1(5), 42);
+
+  VERIFY_IS_EQUAL(vec2(0), 0);
+  VERIFY_IS_EQUAL(vec2(1), 1);
+  VERIFY_IS_EQUAL(vec2(2), 2);
+  VERIFY_IS_EQUAL(vec2(3), 3);
+  VERIFY_IS_EQUAL(vec2(4), 4);
+  VERIFY_IS_EQUAL(vec2(5), 5);
+}
+
+static void test_2d()
+{
+  Tensor<int, 2> mat1(2,3);
+  Tensor<int, 2, RowMajor> mat2(2,3);
+
+  mat1(0,0) = 0;
+  mat1(0,1) = 1;
+  mat1(0,2) = 2;
+  mat1(1,0) = 3;
+  mat1(1,1) = 4;
+  mat1(1,2) = 5;
+
+  mat2(0,0) = 0;
+  mat2(0,1) = 1;
+  mat2(0,2) = 2;
+  mat2(1,0) = 3;
+  mat2(1,1) = 4;
+  mat2(1,2) = 5;
+
+  int col_major[6];
+  int row_major[6];
+  memset(col_major, 0, 6*sizeof(int));
+  memset(row_major, 0, 6*sizeof(int));
+  TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
+  TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);
+
+  mat3 = mat1;
+  mat4 = mat2;
+
+  VERIFY_IS_EQUAL(mat3(0,0), 0);
+  VERIFY_IS_EQUAL(mat3(0,1), 1);
+  VERIFY_IS_EQUAL(mat3(0,2), 2);
+  VERIFY_IS_EQUAL(mat3(1,0), 3);
+  VERIFY_IS_EQUAL(mat3(1,1), 4);
+  VERIFY_IS_EQUAL(mat3(1,2), 5);
+
+  VERIFY_IS_EQUAL(mat4(0,0), 0);
+  VERIFY_IS_EQUAL(mat4(0,1), 1);
+  VERIFY_IS_EQUAL(mat4(0,2), 2);
+  VERIFY_IS_EQUAL(mat4(1,0), 3);
+  VERIFY_IS_EQUAL(mat4(1,1), 4);
+  VERIFY_IS_EQUAL(mat4(1,2), 5);
+
+  mat1.setZero();
+  mat2.setZero();
+  mat1 = mat3;
+  mat2 = mat4;
+
+  VERIFY_IS_EQUAL(mat1(0,0), 0);
+  VERIFY_IS_EQUAL(mat1(0,1), 1);
+  VERIFY_IS_EQUAL(mat1(0,2), 2);
+  VERIFY_IS_EQUAL(mat1(1,0), 3);
+  VERIFY_IS_EQUAL(mat1(1,1), 4);
+  VERIFY_IS_EQUAL(mat1(1,2), 5);
+
+  VERIFY_IS_EQUAL(mat2(0,0), 0);
+  VERIFY_IS_EQUAL(mat2(0,1), 1);
+  VERIFY_IS_EQUAL(mat2(0,2), 2);
+  VERIFY_IS_EQUAL(mat2(1,0), 3);
+  VERIFY_IS_EQUAL(mat2(1,1), 4);
+  VERIFY_IS_EQUAL(mat2(1,2), 5);
+}
+
+static void test_3d()
+{
+  Tensor<int, 3> mat1(2,3,7);
+  Tensor<int, 3, RowMajor> mat2(2,3,7);
+
+  int val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        mat2(i,j,k) = val;
+        val++;
+      }
+    }
+  }
+
+  int col_major[2*3*7];
+  int row_major[2*3*7];
+  memset(col_major, 0, 2*3*7*sizeof(int));
+  memset(row_major, 0, 2*3*7*sizeof(int));
+  TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
+  TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);
+
+  mat3 = mat1;
+  mat4 = mat2;
+
+  val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(mat3(i,j,k), val);
+        VERIFY_IS_EQUAL(mat4(i,j,k), val);
+        val++;
+      }
+    }
+  }
+
+  mat1.setZero();
+  mat2.setZero();
+  mat1 = mat3;
+  mat2 = mat4;
+
+  val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(mat1(i,j,k), val);
+        VERIFY_IS_EQUAL(mat2(i,j,k), val);
+        val++;
+      }
+    }
+  }
+}
+
+static void test_same_type()
+{
+  Tensor<int, 1> orig_tensor(5);
+  Tensor<int, 1> dest_tensor(5);
+  orig_tensor.setRandom();
+  dest_tensor.setRandom();
+  int* orig_data = orig_tensor.data();
+  int* dest_data = dest_tensor.data();
+  dest_tensor = orig_tensor;
+  VERIFY_IS_EQUAL(orig_tensor.data(), orig_data);
+  VERIFY_IS_EQUAL(dest_tensor.data(), dest_data);
+  for (int i = 0; i < 5; ++i) {
+    VERIFY_IS_EQUAL(dest_tensor(i), orig_tensor(i));
+  }
+
+  TensorFixedSize<int, Sizes<5> > orig_array;
+  TensorFixedSize<int, Sizes<5> > dest_array;
+  orig_array.setRandom();
+  dest_array.setRandom();
+  orig_data = orig_array.data();
+  dest_data = dest_array.data();
+  dest_array = orig_array;
+  VERIFY_IS_EQUAL(orig_array.data(), orig_data);
+  VERIFY_IS_EQUAL(dest_array.data(), dest_data);
+  for (int i = 0; i < 5; ++i) {
+    VERIFY_IS_EQUAL(dest_array(i), orig_array(i));
+  }
+
+  int orig[5] = {1, 2, 3, 4, 5};
+  int dest[5] = {6, 7, 8, 9, 10};
+  TensorMap<Tensor<int, 1> > orig_map(orig, 5);
+  TensorMap<Tensor<int, 1> > dest_map(dest, 5);
+  orig_data = orig_map.data();
+  dest_data = dest_map.data();
+  dest_map = orig_map;
+  VERIFY_IS_EQUAL(orig_map.data(), orig_data);
+  VERIFY_IS_EQUAL(dest_map.data(), dest_data);
+  for (int i = 0; i < 5; ++i) {
+    VERIFY_IS_EQUAL(dest[i], i+1);
+  }
+}
+
+static void test_auto_resize()
+{
+  Tensor<int, 1> tensor1;
+  Tensor<int, 1> tensor2(3);
+  Tensor<int, 1> tensor3(5);
+  Tensor<int, 1> tensor4(7);
+
+  Tensor<int, 1> new_tensor(5);
+  new_tensor.setRandom();
+
+  tensor1 = tensor2 = tensor3 = tensor4 = new_tensor;
+
+  VERIFY_IS_EQUAL(tensor1.dimension(0), new_tensor.dimension(0));
+  VERIFY_IS_EQUAL(tensor2.dimension(0), new_tensor.dimension(0));
+  VERIFY_IS_EQUAL(tensor3.dimension(0), new_tensor.dimension(0));
+  VERIFY_IS_EQUAL(tensor4.dimension(0), new_tensor.dimension(0));
+  for (int i = 0; i < new_tensor.dimension(0); ++i) {
+    VERIFY_IS_EQUAL(tensor1(i), new_tensor(i));
+    VERIFY_IS_EQUAL(tensor2(i), new_tensor(i));
+    VERIFY_IS_EQUAL(tensor3(i), new_tensor(i));
+    VERIFY_IS_EQUAL(tensor4(i), new_tensor(i));
+  }
+}
+
+
+static void test_compound_assign()
+{
+  Tensor<int, 1> start_tensor(10);
+  Tensor<int, 1> offset_tensor(10);
+  start_tensor.setRandom();
+  offset_tensor.setRandom();
+
+  Tensor<int, 1> tensor = start_tensor;
+  tensor += offset_tensor;
+  for (int i = 0; i < 10; ++i) {
+    VERIFY_IS_EQUAL(tensor(i), start_tensor(i) + offset_tensor(i));
+  }
+
+  tensor = start_tensor;
+  tensor -= offset_tensor;
+  for (int i = 0; i < 10; ++i) {
+    VERIFY_IS_EQUAL(tensor(i), start_tensor(i) - offset_tensor(i));
+  }
+
+  tensor = start_tensor;
+  tensor *= offset_tensor;
+  for (int i = 0; i < 10; ++i) {
+    VERIFY_IS_EQUAL(tensor(i), start_tensor(i) * offset_tensor(i));
+  }
+
+  tensor = start_tensor;
+  tensor /= offset_tensor;
+  for (int i = 0; i < 10; ++i) {
+    VERIFY_IS_EQUAL(tensor(i), start_tensor(i) / offset_tensor(i));
+  }
+}
+
+static void test_std_initializers_tensor() {
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+  Tensor<int, 1> a(3);
+  a.setValues({0, 1, 2});
+  VERIFY_IS_EQUAL(a(0), 0);
+  VERIFY_IS_EQUAL(a(1), 1);
+  VERIFY_IS_EQUAL(a(2), 2);
+
+  // It fills the top-left slice.
+  a.setValues({10, 20});
+  VERIFY_IS_EQUAL(a(0), 10);
+  VERIFY_IS_EQUAL(a(1), 20);
+  VERIFY_IS_EQUAL(a(2), 2);
+
+  // Chaining.
+  Tensor<int, 1> a2(3);
+  a2 = a.setValues({100, 200, 300});
+  VERIFY_IS_EQUAL(a(0), 100);
+  VERIFY_IS_EQUAL(a(1), 200);
+  VERIFY_IS_EQUAL(a(2), 300);
+  VERIFY_IS_EQUAL(a2(0), 100);
+  VERIFY_IS_EQUAL(a2(1), 200);
+  VERIFY_IS_EQUAL(a2(2), 300);
+
+  Tensor<int, 2> b(2, 3);
+  b.setValues({{0, 1, 2}, {3, 4, 5}});
+  VERIFY_IS_EQUAL(b(0, 0), 0);
+  VERIFY_IS_EQUAL(b(0, 1), 1);
+  VERIFY_IS_EQUAL(b(0, 2), 2);
+  VERIFY_IS_EQUAL(b(1, 0), 3);
+  VERIFY_IS_EQUAL(b(1, 1), 4);
+  VERIFY_IS_EQUAL(b(1, 2), 5);
+
+  // It fills the top-left slice.
+  b.setValues({{10, 20}, {30}});
+  VERIFY_IS_EQUAL(b(0, 0), 10);
+  VERIFY_IS_EQUAL(b(0, 1), 20);
+  VERIFY_IS_EQUAL(b(0, 2), 2);
+  VERIFY_IS_EQUAL(b(1, 0), 30);
+  VERIFY_IS_EQUAL(b(1, 1), 4);
+  VERIFY_IS_EQUAL(b(1, 2), 5);
+
+  Eigen::Tensor<int, 3> c(3, 2, 4);
+  c.setValues({{{0, 1, 2, 3}, {4, 5, 6, 7}},
+               {{10, 11, 12, 13}, {14, 15, 16, 17}},
+               {{20, 21, 22, 23}, {24, 25, 26, 27}}});
+  VERIFY_IS_EQUAL(c(0, 0, 0), 0);
+  VERIFY_IS_EQUAL(c(0, 0, 1), 1);
+  VERIFY_IS_EQUAL(c(0, 0, 2), 2);
+  VERIFY_IS_EQUAL(c(0, 0, 3), 3);
+  VERIFY_IS_EQUAL(c(0, 1, 0), 4);
+  VERIFY_IS_EQUAL(c(0, 1, 1), 5);
+  VERIFY_IS_EQUAL(c(0, 1, 2), 6);
+  VERIFY_IS_EQUAL(c(0, 1, 3), 7);
+  VERIFY_IS_EQUAL(c(1, 0, 0), 10);
+  VERIFY_IS_EQUAL(c(1, 0, 1), 11);
+  VERIFY_IS_EQUAL(c(1, 0, 2), 12);
+  VERIFY_IS_EQUAL(c(1, 0, 3), 13);
+  VERIFY_IS_EQUAL(c(1, 1, 0), 14);
+  VERIFY_IS_EQUAL(c(1, 1, 1), 15);
+  VERIFY_IS_EQUAL(c(1, 1, 2), 16);
+  VERIFY_IS_EQUAL(c(1, 1, 3), 17);
+  VERIFY_IS_EQUAL(c(2, 0, 0), 20);
+  VERIFY_IS_EQUAL(c(2, 0, 1), 21);
+  VERIFY_IS_EQUAL(c(2, 0, 2), 22);
+  VERIFY_IS_EQUAL(c(2, 0, 3), 23);
+  VERIFY_IS_EQUAL(c(2, 1, 0), 24);
+  VERIFY_IS_EQUAL(c(2, 1, 1), 25);
+  VERIFY_IS_EQUAL(c(2, 1, 2), 26);
+  VERIFY_IS_EQUAL(c(2, 1, 3), 27);
+#endif  // EIGEN_HAS_VARIADIC_TEMPLATES
+}
+
+void test_cxx11_tensor_assign()
+{
+  CALL_SUBTEST(test_1d());
+  CALL_SUBTEST(test_2d());
+  CALL_SUBTEST(test_3d());
+  CALL_SUBTEST(test_same_type());
+  CALL_SUBTEST(test_auto_resize());
+  CALL_SUBTEST(test_compound_assign());
+  CALL_SUBTEST(test_std_initializers_tensor());
+}
diff --git a/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
new file mode 100644
index 000000000..7201bfe37
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
@@ -0,0 +1,74 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){
+
+  // BROADCAST test:
+  array<int, 4> in_range   = {{2, 3, 5, 7}};
+  array<int, 4> broadcasts = {{2, 3, 1, 4}};
+  array<int, 4> out_range;  // = in_range * broadcasts
+  for (size_t i = 0; i < out_range.size(); ++i)
+    out_range[i] = in_range[i] * broadcasts[i];
+
+  Tensor<float, 4>  input(in_range);
+  Tensor<float, 4> out(out_range);
+
+  for (size_t i = 0; i < in_range.size(); ++i)
+    VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
+
+
+  for (int i = 0; i < input.size(); ++i)
+    input(i) = static_cast<float>(i);
+
+  float * gpu_in_data  = static_cast<float*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_out_data  = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
+
+  TensorMap<Tensor<float, 4>>  gpu_in(gpu_in_data, in_range);
+  TensorMap<Tensor<float, 4>> gpu_out(gpu_out_data, out_range);
+  sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(float));
+  gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 28; ++l) {
+          VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l));
+        }
+      }
+    }
+  }
+  printf("Broadcast Test Passed\n");
+  sycl_device.deallocate(gpu_in_data);
+  sycl_device.deallocate(gpu_out_data);
+}
+
+void test_cxx11_tensor_broadcast_sycl() {
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_broadcast_sycl(sycl_device));
+}
diff --git a/unsupported/test/cxx11_tensor_broadcasting.cpp b/unsupported/test/cxx11_tensor_broadcasting.cpp
new file mode 100644
index 000000000..5c0ea5889
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_broadcasting.cpp
@@ -0,0 +1,194 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template <int DataLayout>
+static void test_simple_broadcasting()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> broadcasts;
+  broadcasts[0] = 1;
+  broadcasts[1] = 1;
+  broadcasts[2] = 1;
+  broadcasts[3] = 1;
+
+  Tensor<float, 4, DataLayout> no_broadcast;
+  no_broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(no_broadcast.dimension(0), 2);
+  VERIFY_IS_EQUAL(no_broadcast.dimension(1), 3);
+  VERIFY_IS_EQUAL(no_broadcast.dimension(2), 5);
+  VERIFY_IS_EQUAL(no_broadcast.dimension(3), 7);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_broadcast(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  broadcasts[0] = 2;
+  broadcasts[1] = 3;
+  broadcasts[2] = 1;
+  broadcasts[3] = 4;
+  Tensor<float, 4, DataLayout> broadcast;
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), 4);
+  VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
+  VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
+  VERIFY_IS_EQUAL(broadcast.dimension(3), 28);
+
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 28; ++l) {
+          VERIFY_IS_EQUAL(tensor(i%2,j%3,k%5,l%7), broadcast(i,j,k,l));
+        }
+      }
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_vectorized_broadcasting()
+{
+  Tensor<float, 3, DataLayout> tensor(8,3,5);
+  tensor.setRandom();
+  array<ptrdiff_t, 3> broadcasts;
+  broadcasts[0] = 2;
+  broadcasts[1] = 3;
+  broadcasts[2] = 4;
+
+  Tensor<float, 3, DataLayout> broadcast;
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), 16);
+  VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
+  VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
+
+  for (int i = 0; i < 16; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 20; ++k) {
+        VERIFY_IS_EQUAL(tensor(i%8,j%3,k%5), broadcast(i,j,k));
+      }
+    }
+  }
+
+  tensor.resize(11,3,5);
+  tensor.setRandom();
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), 22);
+  VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
+  VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
+
+  for (int i = 0; i < 22; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 20; ++k) {
+        VERIFY_IS_EQUAL(tensor(i%11,j%3,k%5), broadcast(i,j,k));
+      }
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_static_broadcasting()
+{
+  Tensor<float, 3, DataLayout> tensor(8,3,5);
+  tensor.setRandom();
+
+#if EIGEN_HAS_CONSTEXPR
+  Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts;
+#else
+  Eigen::array<int, 3> broadcasts;
+  broadcasts[0] = 2;
+  broadcasts[1] = 3;
+  broadcasts[2] = 4;
+#endif
+
+  Tensor<float, 3, DataLayout> broadcast;
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), 16);
+  VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
+  VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
+
+  for (int i = 0; i < 16; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 20; ++k) {
+        VERIFY_IS_EQUAL(tensor(i%8,j%3,k%5), broadcast(i,j,k));
+      }
+    }
+  }
+
+  tensor.resize(11,3,5);
+  tensor.setRandom();
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), 22);
+  VERIFY_IS_EQUAL(broadcast.dimension(1), 9);
+  VERIFY_IS_EQUAL(broadcast.dimension(2), 20);
+
+  for (int i = 0; i < 22; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 20; ++k) {
+        VERIFY_IS_EQUAL(tensor(i%11,j%3,k%5), broadcast(i,j,k));
+      }
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_fixed_size_broadcasting()
+{
+  // Need to add a [] operator to the Size class for this to work
+#if 0
+  Tensor<float, 1, DataLayout> t1(10);
+  t1.setRandom();
+  TensorFixedSize<float, Sizes<1>, DataLayout> t2;
+  t2 = t2.constant(20.0f);
+
+  Tensor<float, 1, DataLayout> t3 = t1 + t2.broadcast(Eigen::array<int, 1>{{10}});
+  for (int i = 0; i < 10; ++i) {
+    VERIFY_IS_APPROX(t3(i), t1(i) + t2(0));
+  }
+
+  TensorMap<TensorFixedSize<float, Sizes<1>, DataLayout> > t4(t2.data(), {{1}});
+  Tensor<float, 1, DataLayout> t5 = t1 + t4.broadcast(Eigen::array<int, 1>{{10}});
+  for (int i = 0; i < 10; ++i) {
+    VERIFY_IS_APPROX(t5(i), t1(i) + t2(0));
+  }
+#endif
+}
+
+
+void test_cxx11_tensor_broadcasting()
+{
+  CALL_SUBTEST(test_simple_broadcasting<ColMajor>());
+  CALL_SUBTEST(test_simple_broadcasting<RowMajor>());
+  CALL_SUBTEST(test_vectorized_broadcasting<ColMajor>());
+  CALL_SUBTEST(test_vectorized_broadcasting<RowMajor>());
+  CALL_SUBTEST(test_static_broadcasting<ColMajor>());
+  CALL_SUBTEST(test_static_broadcasting<RowMajor>());
+  CALL_SUBTEST(test_fixed_size_broadcasting<ColMajor>());
+  CALL_SUBTEST(test_fixed_size_broadcasting<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
new file mode 100644
index 000000000..88c233994
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
@@ -0,0 +1,82 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_cast_float16_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_cuda_conversion() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  Tensor<float, 1> floats(num_elem);
+  floats.setRandom();
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half(
+      d_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+      d_conv, num_elem);
+
+  gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float));
+
+  gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>();
+  gpu_conv.device(gpu_device) = gpu_half.cast<float>();
+
+  Tensor<float, 1> initial(num_elem);
+  Tensor<float, 1> final(num_elem);
+  gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(initial(i), final(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_half);
+  gpu_device.deallocate(d_conv);
+}
+
+
+void test_fallback_conversion() {
+  int num_elem = 101;
+  Tensor<float, 1> floats(num_elem);
+  floats.setRandom();
+
+  Eigen::Tensor<Eigen::half, 1> halfs = floats.cast<Eigen::half>();
+  Eigen::Tensor<float, 1> conv = halfs.cast<float>();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(floats(i), conv(i));
+  }
+}
+
+
+void test_cxx11_tensor_cast_float16_cuda()
+{
+  CALL_SUBTEST(test_cuda_conversion());
+  CALL_SUBTEST(test_fallback_conversion());
+}
diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp
new file mode 100644
index 000000000..3c6d0d2ff
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_casts.cpp
@@ -0,0 +1,115 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::array;
+
+static void test_simple_cast()
+{
+  Tensor<float, 2> ftensor(20,30);
+  ftensor = ftensor.random() * 100.f;
+  Tensor<char, 2> chartensor(20,30);
+  chartensor.setRandom();
+  Tensor<std::complex<float>, 2> cplextensor(20,30);
+  cplextensor.setRandom();
+
+  chartensor = ftensor.cast<char>();
+  cplextensor = ftensor.cast<std::complex<float> >();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_EQUAL(chartensor(i,j), static_cast<char>(ftensor(i,j)));
+      VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float> >(ftensor(i,j)));
+    }
+  }
+}
+
+
+static void test_vectorized_cast()
+{
+  Tensor<int, 2> itensor(20,30);
+  itensor = itensor.random() / 1000;
+  Tensor<float, 2> ftensor(20,30);
+  ftensor.setRandom();
+  Tensor<double, 2> dtensor(20,30);
+  dtensor.setRandom();
+
+  ftensor = itensor.cast<float>();
+  dtensor = itensor.cast<double>();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_EQUAL(itensor(i,j), static_cast<int>(ftensor(i,j)));
+      VERIFY_IS_EQUAL(dtensor(i,j), static_cast<double>(ftensor(i,j)));
+    }
+  }
+}
+
+
+static void test_float_to_int_cast()
+{
+  Tensor<float, 2> ftensor(20,30);
+  ftensor = ftensor.random() * 1000.0f;
+  Tensor<double, 2> dtensor(20,30);
+  dtensor = dtensor.random() * 1000.0;
+
+  Tensor<int, 2> i1tensor = ftensor.cast<int>();
+  Tensor<int, 2> i2tensor = dtensor.cast<int>();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_EQUAL(i1tensor(i,j), static_cast<int>(ftensor(i,j)));
+      VERIFY_IS_EQUAL(i2tensor(i,j), static_cast<int>(dtensor(i,j)));
+    }
+  }
+}
+
+
+static void test_big_to_small_type_cast()
+{
+  Tensor<double, 2> dtensor(20, 30);
+  dtensor.setRandom();
+  Tensor<float, 2> ftensor(20, 30);
+  ftensor = dtensor.cast<float>();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
+    }
+  }
+}
+
+
+static void test_small_to_big_type_cast()
+{
+  Tensor<float, 2> ftensor(20, 30);
+  ftensor.setRandom();
+  Tensor<double, 2> dtensor(20, 30);
+  dtensor = ftensor.cast<double>();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j)));
+    }
+  }
+}
+
+
+void test_cxx11_tensor_casts()
+{
+   CALL_SUBTEST(test_simple_cast());
+   CALL_SUBTEST(test_vectorized_cast());
+   CALL_SUBTEST(test_float_to_int_cast());
+   CALL_SUBTEST(test_big_to_small_type_cast());
+   CALL_SUBTEST(test_small_to_big_type_cast());
+}
diff --git a/unsupported/test/cxx11_tensor_chipping.cpp b/unsupported/test/cxx11_tensor_chipping.cpp
new file mode 100644
index 000000000..1832dec8b
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_chipping.cpp
@@ -0,0 +1,425 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<int DataLayout>
+static void test_simple_chip()
+{
+  Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
+  tensor.setRandom();
+
+  Tensor<float, 4, DataLayout> chip1;
+  chip1 = tensor.template chip<0>(1);
+
+  VERIFY_IS_EQUAL(chip1.dimension(0), 3);
+  VERIFY_IS_EQUAL(chip1.dimension(1), 5);
+  VERIFY_IS_EQUAL(chip1.dimension(2), 7);
+  VERIFY_IS_EQUAL(chip1.dimension(3), 11);
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1,i,j,k,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip2 = tensor.template chip<1>(1);
+  VERIFY_IS_EQUAL(chip2.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip2.dimension(1), 5);
+  VERIFY_IS_EQUAL(chip2.dimension(2), 7);
+  VERIFY_IS_EQUAL(chip2.dimension(3), 11);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip3 = tensor.template chip<2>(2);
+  VERIFY_IS_EQUAL(chip3.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip3.dimension(1), 3);
+  VERIFY_IS_EQUAL(chip3.dimension(2), 7);
+  VERIFY_IS_EQUAL(chip3.dimension(3), 11);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2,k,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip4(tensor.template chip<3>(5));
+  VERIFY_IS_EQUAL(chip4.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip4.dimension(1), 3);
+  VERIFY_IS_EQUAL(chip4.dimension(2), 5);
+  VERIFY_IS_EQUAL(chip4.dimension(3), 11);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip5(tensor.template chip<4>(7));
+  VERIFY_IS_EQUAL(chip5.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip5.dimension(1), 3);
+  VERIFY_IS_EQUAL(chip5.dimension(2), 5);
+  VERIFY_IS_EQUAL(chip5.dimension(3), 7);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7));
+        }
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_dynamic_chip()
+{
+  Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
+  tensor.setRandom();
+
+  Tensor<float, 4, DataLayout> chip1;
+  chip1 = tensor.chip(1, 0);
+  VERIFY_IS_EQUAL(chip1.dimension(0), 3);
+  VERIFY_IS_EQUAL(chip1.dimension(1), 5);
+  VERIFY_IS_EQUAL(chip1.dimension(2), 7);
+  VERIFY_IS_EQUAL(chip1.dimension(3), 11);
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1,i,j,k,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip2 = tensor.chip(1, 1);
+  VERIFY_IS_EQUAL(chip2.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip2.dimension(1), 5);
+  VERIFY_IS_EQUAL(chip2.dimension(2), 7);
+  VERIFY_IS_EQUAL(chip2.dimension(3), 11);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip3 = tensor.chip(2, 2);
+  VERIFY_IS_EQUAL(chip3.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip3.dimension(1), 3);
+  VERIFY_IS_EQUAL(chip3.dimension(2), 7);
+  VERIFY_IS_EQUAL(chip3.dimension(3), 11);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2,k,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip4(tensor.chip(5, 3));
+  VERIFY_IS_EQUAL(chip4.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip4.dimension(1), 3);
+  VERIFY_IS_EQUAL(chip4.dimension(2), 5);
+  VERIFY_IS_EQUAL(chip4.dimension(3), 11);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> chip5(tensor.chip(7, 4));
+  VERIFY_IS_EQUAL(chip5.dimension(0), 2);
+  VERIFY_IS_EQUAL(chip5.dimension(1), 3);
+  VERIFY_IS_EQUAL(chip5.dimension(2), 5);
+  VERIFY_IS_EQUAL(chip5.dimension(3), 7);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7));
+        }
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_chip_in_expr() {
+  Tensor<float, 5, DataLayout> input1(2,3,5,7,11);
+  input1.setRandom();
+  Tensor<float, 4, DataLayout> input2(3,5,7,11);
+  input2.setRandom();
+
+  Tensor<float, 4, DataLayout> result = input1.template chip<0>(0) + input2;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          float expected = input1(0,i,j,k,l) + input2(i,j,k,l);
+          VERIFY_IS_EQUAL(result(i,j,k,l), expected);
+        }
+      }
+    }
+  }
+
+  Tensor<float, 3, DataLayout> input3(3,7,11);
+  input3.setRandom();
+  Tensor<float, 3, DataLayout> result2 = input1.template chip<0>(0).template chip<1>(2) + input3;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 7; ++j) {
+      for (int k = 0; k < 11; ++k) {
+        float expected = input1(0,i,2,j,k) + input3(i,j,k);
+        VERIFY_IS_EQUAL(result2(i,j,k), expected);
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_chip_as_lvalue()
+{
+  Tensor<float, 5, DataLayout> input1(2,3,5,7,11);
+  input1.setRandom();
+
+  Tensor<float, 4, DataLayout> input2(3,5,7,11);
+  input2.setRandom();
+  Tensor<float, 5, DataLayout> tensor = input1;
+  tensor.template chip<0>(1) = input2;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          for (int m = 0; m < 11; ++m) {
+            if (i != 1) {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
+            } else {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> input3(2,5,7,11);
+  input3.setRandom();
+  tensor = input1;
+  tensor.template chip<1>(1) = input3;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          for (int m = 0; m < 11; ++m) {
+            if (j != 1) {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
+            } else {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> input4(2,3,7,11);
+  input4.setRandom();
+  tensor = input1;
+  tensor.template chip<2>(3) = input4;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          for (int m = 0; m < 11; ++m) {
+            if (k != 3) {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
+            } else {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> input5(2,3,5,11);
+  input5.setRandom();
+  tensor = input1;
+  tensor.template chip<3>(4) = input5;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          for (int m = 0; m < 11; ++m) {
+            if (l != 4) {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
+            } else {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  Tensor<float, 4, DataLayout> input6(2,3,5,7);
+  input6.setRandom();
+  tensor = input1;
+  tensor.template chip<4>(5) = input6;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          for (int m = 0; m < 11; ++m) {
+            if (m != 5) {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
+            } else {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  Tensor<float, 5, DataLayout> input7(2,3,5,7,11);
+  input7.setRandom();
+  tensor = input1;
+  tensor.chip(0, 0) = input7.chip(0, 0);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          for (int m = 0; m < 11; ++m) {
+            if (i != 0) {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
+            } else {
+              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+static void test_chip_raw_data_col_major()
+{
+  Tensor<float, 5, ColMajor> tensor(2,3,5,7,11);
+  tensor.setRandom();
+
+  typedef TensorEvaluator<decltype(tensor.chip<4>(3)), DefaultDevice> Evaluator4;
+  auto chip = Evaluator4(tensor.chip<4>(3), DefaultDevice());
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          int chip_index = i + 2 * (j + 3 * (k + 5 * l));
+          VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3));
+        }
+      }
+    }
+  }
+
+  typedef TensorEvaluator<decltype(tensor.chip<0>(0)), DefaultDevice> Evaluator0;
+  auto chip0 = Evaluator0(tensor.chip<0>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip0.data(), static_cast<float*>(0));
+
+  typedef TensorEvaluator<decltype(tensor.chip<1>(0)), DefaultDevice> Evaluator1;
+  auto chip1 = Evaluator1(tensor.chip<1>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0));
+
+  typedef TensorEvaluator<decltype(tensor.chip<2>(0)), DefaultDevice> Evaluator2;
+  auto chip2 = Evaluator2(tensor.chip<2>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0));
+
+  typedef TensorEvaluator<decltype(tensor.chip<3>(0)), DefaultDevice> Evaluator3;
+  auto chip3 = Evaluator3(tensor.chip<3>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
+}
+
+static void test_chip_raw_data_row_major()
+{
+  Tensor<float, 5, RowMajor> tensor(11,7,5,3,2);
+  tensor.setRandom();
+
+  typedef TensorEvaluator<decltype(tensor.chip<0>(3)), DefaultDevice> Evaluator0;
+  auto chip = Evaluator0(tensor.chip<0>(3), DefaultDevice());
+  for (int i = 0; i < 7; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 2; ++l) {
+          int chip_index = l + 2 * (k + 3 * (j + 5 * i));
+          VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(3,i,j,k,l));
+        }
+      }
+    }
+  }
+
+  typedef TensorEvaluator<decltype(tensor.chip<1>(0)), DefaultDevice> Evaluator1;
+  auto chip1 = Evaluator1(tensor.chip<1>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0));
+
+  typedef TensorEvaluator<decltype(tensor.chip<2>(0)), DefaultDevice> Evaluator2;
+  auto chip2 = Evaluator2(tensor.chip<2>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0));
+
+  typedef TensorEvaluator<decltype(tensor.chip<3>(0)), DefaultDevice> Evaluator3;
+  auto chip3 = Evaluator3(tensor.chip<3>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0));
+
+  typedef TensorEvaluator<decltype(tensor.chip<4>(0)), DefaultDevice> Evaluator4;
+  auto chip4 = Evaluator4(tensor.chip<4>(0), DefaultDevice());
+  VERIFY_IS_EQUAL(chip4.data(), static_cast<float*>(0));
+}
+
+void test_cxx11_tensor_chipping()
+{
+  CALL_SUBTEST(test_simple_chip<ColMajor>());
+  CALL_SUBTEST(test_simple_chip<RowMajor>());
+  CALL_SUBTEST(test_dynamic_chip<ColMajor>());
+  CALL_SUBTEST(test_dynamic_chip<RowMajor>());
+  CALL_SUBTEST(test_chip_in_expr<ColMajor>());
+  CALL_SUBTEST(test_chip_in_expr<RowMajor>());
+  CALL_SUBTEST(test_chip_as_lvalue<ColMajor>());
+  CALL_SUBTEST(test_chip_as_lvalue<RowMajor>());
+  CALL_SUBTEST(test_chip_raw_data_col_major());
+  CALL_SUBTEST(test_chip_raw_data_row_major());
+}
diff --git a/unsupported/test/cxx11_tensor_comparisons.cpp b/unsupported/test/cxx11_tensor_comparisons.cpp
new file mode 100644
index 000000000..b1ff8aecb
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_comparisons.cpp
@@ -0,0 +1,84 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_orderings()
+{
+  Tensor<float, 3> mat1(2,3,7);
+  Tensor<float, 3> mat2(2,3,7);
+  Tensor<bool, 3> lt(2,3,7);
+  Tensor<bool, 3> le(2,3,7);
+  Tensor<bool, 3> gt(2,3,7);
+  Tensor<bool, 3> ge(2,3,7);
+
+  mat1.setRandom();
+  mat2.setRandom();
+
+  lt = mat1 < mat2;
+  le = mat1 <= mat2;
+  gt = mat1 > mat2;
+  ge = mat1 >= mat2;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(lt(i,j,k), mat1(i,j,k) < mat2(i,j,k));
+        VERIFY_IS_EQUAL(le(i,j,k), mat1(i,j,k) <= mat2(i,j,k));
+        VERIFY_IS_EQUAL(gt(i,j,k), mat1(i,j,k) > mat2(i,j,k));
+        VERIFY_IS_EQUAL(ge(i,j,k), mat1(i,j,k) >= mat2(i,j,k));
+      }
+    }
+  }
+}
+
+
+static void test_equality()
+{
+  Tensor<float, 3> mat1(2,3,7);
+  Tensor<float, 3> mat2(2,3,7);
+
+  mat1.setRandom();
+  mat2.setRandom();
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        if (internal::random<bool>()) {
+          mat2(i,j,k) = mat1(i,j,k);
+        }
+      }
+    }
+  }
+
+  Tensor<bool, 3> eq(2,3,7);
+  Tensor<bool, 3> ne(2,3,7);
+  eq = (mat1 == mat2);
+  ne = (mat1 != mat2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(eq(i,j,k), mat1(i,j,k) == mat2(i,j,k));
+        VERIFY_IS_EQUAL(ne(i,j,k), mat1(i,j,k) != mat2(i,j,k));
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_comparisons()
+{
+  CALL_SUBTEST(test_orderings());
+  CALL_SUBTEST(test_equality());
+}
diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu
new file mode 100644
index 000000000..d4e111f5d
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_complex_cuda.cu
@@ -0,0 +1,153 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_complex
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_cuda_nullary() {
+  Tensor<std::complex<float>, 1, 0, int> in1(2);
+  Tensor<std::complex<float>, 1, 0, int> in2(2);
+  in1.setRandom();
+  in2.setRandom();
+
+  std::size_t float_bytes = in1.size() * sizeof(float);
+  std::size_t complex_bytes = in1.size() * sizeof(std::complex<float>);
+
+  std::complex<float>* d_in1;
+  std::complex<float>* d_in2;
+  float* d_out2;
+  cudaMalloc((void**)(&d_in1), complex_bytes);
+  cudaMalloc((void**)(&d_in2), complex_bytes);
+  cudaMalloc((void**)(&d_out2), float_bytes);
+  cudaMemcpy(d_in1, in1.data(), complex_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in2, in2.data(), complex_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in1(
+      d_in1, 2);
+  Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in2(
+      d_in2, 2);
+  Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_out2(
+      d_out2, 2);
+
+  gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f));
+  gpu_out2.device(gpu_device) = gpu_in2.abs();
+
+  Tensor<std::complex<float>, 1, 0, int> new1(2);
+  Tensor<float, 1, 0, int> new2(2);
+
+  assert(cudaMemcpyAsync(new1.data(), d_in1, complex_bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+  assert(cudaMemcpyAsync(new2.data(), d_out2, float_bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 2; ++i) {
+    VERIFY_IS_APPROX(new1(i), std::complex<float>(3.14f, 2.7f));
+    VERIFY_IS_APPROX(new2(i), std::abs(in2(i)));
+  }
+
+  cudaFree(d_in1);
+  cudaFree(d_in2);
+  cudaFree(d_out2);
+}
+
+
+static void test_cuda_sum_reductions() {
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  const int num_rows = internal::random<int>(1024, 5*1024);
+  const int num_cols = internal::random<int>(1024, 5*1024);
+
+  Tensor<std::complex<float>, 2> in(num_rows, num_cols);
+  in.setRandom();
+
+  Tensor<std::complex<float>, 0> full_redux;
+  full_redux = in.sum();
+
+  std::size_t in_bytes = in.size() * sizeof(std::complex<float>);
+  std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>);
+  std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes));
+  std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes));
+  gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
+
+  TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols);
+  TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr);
+
+  out_gpu.device(gpu_device) = in_gpu.sum();
+
+  Tensor<std::complex<float>, 0> full_redux_gpu;
+  gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
+  gpu_device.synchronize();
+
+  // Check that the CPU and GPU reductions return the same result.
+  VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
+
+  gpu_device.deallocate(gpu_in_ptr);
+  gpu_device.deallocate(gpu_out_ptr);
+}
+
+
+static void test_cuda_product_reductions() {
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  const int num_rows = internal::random<int>(1024, 5*1024);
+  const int num_cols = internal::random<int>(1024, 5*1024);
+
+  Tensor<std::complex<float>, 2> in(num_rows, num_cols);
+  in.setRandom();
+
+  Tensor<std::complex<float>, 0> full_redux;
+  full_redux = in.prod();
+
+  std::size_t in_bytes = in.size() * sizeof(std::complex<float>);
+  std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>);
+  std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes));
+  std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes));
+  gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
+
+  TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols);
+  TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr);
+
+  out_gpu.device(gpu_device) = in_gpu.prod();
+
+  Tensor<std::complex<float>, 0> full_redux_gpu;
+  gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
+  gpu_device.synchronize();
+
+  // Check that the CPU and GPU reductions return the same result.
+  VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
+
+  gpu_device.deallocate(gpu_in_ptr);
+  gpu_device.deallocate(gpu_out_ptr);
+}
+
+
+void test_cxx11_tensor_complex()
+{
+  CALL_SUBTEST(test_cuda_nullary());
+  CALL_SUBTEST(test_cuda_sum_reductions());
+  CALL_SUBTEST(test_cuda_product_reductions());
+}
diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
new file mode 100644
index 000000000..2baf5eaad
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
@@ -0,0 +1,97 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<typename T>
+void test_cuda_complex_cwise_ops() {
+  const int kNumItems = 2;
+  std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
+
+  std::complex<T>* d_in1;
+  std::complex<T>* d_in2;
+  std::complex<T>* d_out;
+  cudaMalloc((void**)(&d_in1), complex_bytes);
+  cudaMalloc((void**)(&d_in2), complex_bytes);
+  cudaMalloc((void**)(&d_out), complex_bytes);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
+      d_in1, kNumItems);
+  Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
+      d_in2, kNumItems);
+  Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
+      d_out, kNumItems);
+
+  const std::complex<T> a(3.14f, 2.7f);
+  const std::complex<T> b(-10.6f, 1.4f);
+
+  gpu_in1.device(gpu_device) = gpu_in1.constant(a);
+  gpu_in2.device(gpu_device) = gpu_in2.constant(b);
+
+  enum CwiseOp {
+    Add = 0,
+    Sub,
+    Mul,
+    Div
+  };
+
+  Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
+  for (int op = Add; op <= Div; op++) {
+    std::complex<T> expected;
+    switch (static_cast<CwiseOp>(op)) {
+      case Add:
+        gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
+        expected = a + b;
+        break;
+      case Sub:
+        gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
+        expected = a - b;
+        break;
+      case Mul:
+        gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
+        expected = a * b;
+        break;
+      case Div:
+        gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
+        expected = a / b;
+        break;
+    }
+    assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
+                           gpu_device.stream()) == cudaSuccess);
+    assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+    for (int i = 0; i < kNumItems; ++i) {
+      VERIFY_IS_APPROX(actual(i), expected);
+    }
+  }
+
+  cudaFree(d_in1);
+  cudaFree(d_in2);
+  cudaFree(d_out);
+}
+
+
+void test_cxx11_tensor_complex_cwise_ops()
+{
+  CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
+  CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
+}
diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp
new file mode 100644
index 000000000..03ef12e63
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_concatenation.cpp
@@ -0,0 +1,137 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<int DataLayout>
+static void test_dimension_failures()
+{
+  Tensor<int, 3, DataLayout> left(2, 3, 1);
+  Tensor<int, 3, DataLayout> right(3, 3, 1);
+  left.setRandom();
+  right.setRandom();
+
+  // Okay; other dimensions are equal.
+  Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0);
+
+  // Dimension mismatches.
+  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 1));
+  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 2));
+
+  // Axis > NumDims or < 0.
+  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 3));
+  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, -1));
+}
+
+template<int DataLayout>
+static void test_static_dimension_failure()
+{
+  Tensor<int, 2, DataLayout> left(2, 3);
+  Tensor<int, 3, DataLayout> right(2, 3, 1);
+
+#ifdef CXX11_TENSOR_CONCATENATION_STATIC_DIMENSION_FAILURE
+  // Technically compatible, but we static assert that the inputs have same
+  // NumDims.
+  Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0);
+#endif
+
+  // This can be worked around in this case.
+  Tensor<int, 3, DataLayout> concatenation = left
+      .reshape(Tensor<int, 3>::Dimensions(2, 3, 1))
+      .concatenate(right, 0);
+  Tensor<int, 2, DataLayout> alternative = left
+      .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{{2, 3}}}), 0);
+}
+
+template<int DataLayout>
+static void test_simple_concatenation()
+{
+  Tensor<int, 3, DataLayout> left(2, 3, 1);
+  Tensor<int, 3, DataLayout> right(2, 3, 1);
+  left.setRandom();
+  right.setRandom();
+
+  Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0);
+  VERIFY_IS_EQUAL(concatenation.dimension(0), 4);
+  VERIFY_IS_EQUAL(concatenation.dimension(1), 3);
+  VERIFY_IS_EQUAL(concatenation.dimension(2), 1);
+  for (int j = 0; j < 3; ++j) {
+    for (int i = 0; i < 2; ++i) {
+      VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
+    }
+    for (int i = 2; i < 4; ++i) {
+      VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i - 2, j, 0));
+    }
+  }
+
+  concatenation = left.concatenate(right, 1);
+  VERIFY_IS_EQUAL(concatenation.dimension(0), 2);
+  VERIFY_IS_EQUAL(concatenation.dimension(1), 6);
+  VERIFY_IS_EQUAL(concatenation.dimension(2), 1);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
+    }
+    for (int j = 3; j < 6; ++j) {
+      VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i, j - 3, 0));
+    }
+  }
+
+  concatenation = left.concatenate(right, 2);
+  VERIFY_IS_EQUAL(concatenation.dimension(0), 2);
+  VERIFY_IS_EQUAL(concatenation.dimension(1), 3);
+  VERIFY_IS_EQUAL(concatenation.dimension(2), 2);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
+      VERIFY_IS_EQUAL(concatenation(i, j, 1), right(i, j, 0));
+    }
+  }
+}
+
+
+// TODO(phli): Add test once we have a real vectorized implementation.
+// static void test_vectorized_concatenation() {}
+
+static void test_concatenation_as_lvalue()
+{
+  Tensor<int, 2> t1(2, 3);
+  Tensor<int, 2> t2(2, 3);
+  t1.setRandom();
+  t2.setRandom();
+
+  Tensor<int, 2> result(4, 3);
+  result.setRandom();
+  t1.concatenate(t2, 0) = result;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_EQUAL(t1(i, j), result(i, j));
+      VERIFY_IS_EQUAL(t2(i, j), result(i+2, j));
+    }
+  }
+}
+
+
+void test_cxx11_tensor_concatenation()
+{
+   CALL_SUBTEST(test_dimension_failures<ColMajor>());
+   CALL_SUBTEST(test_dimension_failures<RowMajor>());
+   CALL_SUBTEST(test_static_dimension_failure<ColMajor>());
+   CALL_SUBTEST(test_static_dimension_failure<RowMajor>());
+   CALL_SUBTEST(test_simple_concatenation<ColMajor>());
+   CALL_SUBTEST(test_simple_concatenation<RowMajor>());
+   // CALL_SUBTEST(test_vectorized_concatenation());
+   CALL_SUBTEST(test_concatenation_as_lvalue());
+
+}
diff --git a/unsupported/test/cxx11_tensor_const.cpp b/unsupported/test/cxx11_tensor_const.cpp
new file mode 100644
index 000000000..ad9c9da39
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_const.cpp
@@ -0,0 +1,62 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+using Eigen::Tensor;
+
+
+static void test_simple_assign()
+{
+  Tensor<int, 3> random(2,3,7);
+  random.setRandom();
+
+  TensorMap<Tensor<const int, 3> > constant(random.data(), 2, 3, 7);
+  Tensor<int, 3> result(2,3,7);
+  result = constant;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL((result(i,j,k)), random(i,j,k));
+      }
+    }
+  }
+}
+
+
+static void test_assign_of_const_tensor()
+{
+  Tensor<int, 3> random(2,3,7);
+  random.setRandom();
+
+  TensorMap<Tensor<const int, 3> > constant1(random.data(), 2, 3, 7);
+  TensorMap<const Tensor<int, 3> > constant2(random.data(), 2, 3, 7);
+  const TensorMap<Tensor<int, 3> > constant3(random.data(), 2, 3, 7);
+
+  Tensor<int, 2> result1 = constant1.chip(0, 2);
+  Tensor<int, 2> result2 = constant2.chip(0, 2);
+  Tensor<int, 2> result3 = constant3.chip(0, 2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_EQUAL((result1(i,j)), random(i,j,0));
+      VERIFY_IS_EQUAL((result2(i,j)), random(i,j,0));
+      VERIFY_IS_EQUAL((result3(i,j)), random(i,j,0));
+    }
+  }
+}
+
+
+void test_cxx11_tensor_const()
+{
+  CALL_SUBTEST(test_simple_assign());
+  CALL_SUBTEST(test_assign_of_const_tensor());
+}
diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu
new file mode 100644
index 000000000..dd68430ce
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_contract_cuda.cu
@@ -0,0 +1,216 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+typedef Tensor<float, 1>::DimensionPair DimPair;
+
+template<int DataLayout>
+void test_cuda_contraction(int m_size, int k_size, int n_size)
+{
+  std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
+  // with these dimensions, the output has 300 * 140 elements, which is
+  // more than 30 * 1024, which is the number of threads in blocks on
+  // a 15 SM GK110 GPU
+  Tensor<float, 2, DataLayout> t_left(m_size, k_size);
+  Tensor<float, 2, DataLayout> t_right(k_size, n_size);
+  Tensor<float, 2, DataLayout> t_result(m_size, n_size);
+  Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size);
+  Eigen::array<DimPair, 1> dims(DimPair(1, 0));
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  std::size_t t_left_bytes = t_left.size()  * sizeof(float);
+  std::size_t t_right_bytes = t_right.size() * sizeof(float);
+  std::size_t t_result_bytes = t_result.size() * sizeof(float);
+
+  float* d_t_left;
+  float* d_t_right;
+  float* d_t_result;
+
+  cudaMalloc((void**)(&d_t_left), t_left_bytes);
+  cudaMalloc((void**)(&d_t_right), t_right_bytes);
+  cudaMalloc((void**)(&d_t_result), t_result_bytes);
+
+  cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+      gpu_t_left(d_t_left, Eigen::array<int, 2>(m_size, k_size));
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+      gpu_t_right(d_t_right, Eigen::array<int, 2>(k_size, n_size));
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+      gpu_t_result(d_t_result, Eigen::array<int, 2>(m_size, n_size));
+
+
+  gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
+  t_result = t_left.contract(t_right, dims);
+
+  cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
+  for (DenseIndex i = 0; i < t_result.size(); i++) {
+    if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
+      continue;
+    }
+    if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) {
+      continue;
+    }
+    std::cout << "mismatch detected at index " << i << ": " << t_result(i)
+              << " vs " <<  t_result_gpu(i) << std::endl;
+    assert(false);
+  }
+
+  cudaFree((void*)d_t_left);
+  cudaFree((void*)d_t_right);
+  cudaFree((void*)d_t_result);
+}
+
+
+template<int DataLayout>
+void test_scalar(int m_size, int k_size, int n_size)
+{
+  std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
+  // with these dimensions, the output has 300 * 140 elements, which is
+  // more than 30 * 1024, which is the number of threads in blocks on
+  // a 15 SM GK110 GPU
+  Tensor<float, 2, DataLayout> t_left(m_size, k_size);
+  Tensor<float, 2, DataLayout> t_right(k_size, n_size);
+  Tensor<float, 0, DataLayout> t_result;
+  Tensor<float, 0, DataLayout> t_result_gpu;
+  Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1));
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  std::size_t t_left_bytes = t_left.size()  * sizeof(float);
+  std::size_t t_right_bytes = t_right.size() * sizeof(float);
+  std::size_t t_result_bytes = sizeof(float);
+
+  float* d_t_left;
+  float* d_t_right;
+  float* d_t_result;
+
+  cudaMalloc((void**)(&d_t_left), t_left_bytes);
+  cudaMalloc((void**)(&d_t_right), t_right_bytes);
+  cudaMalloc((void**)(&d_t_result), t_result_bytes);
+
+  cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+      gpu_t_left(d_t_left, m_size, k_size);
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
+      gpu_t_right(d_t_right, k_size, n_size);
+  Eigen::TensorMap<Eigen::Tensor<float, 0, DataLayout> >
+      gpu_t_result(d_t_result);
+
+  gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
+  t_result = t_left.contract(t_right, dims);
+
+  cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
+  if (fabs(t_result() - t_result_gpu()) > 1e-4f &&
+      !Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) {
+    std::cout << "mismatch detected: " << t_result()
+              << " vs " <<  t_result_gpu() << std::endl;
+    assert(false);
+  }
+
+  cudaFree((void*)d_t_left);
+  cudaFree((void*)d_t_right);
+  cudaFree((void*)d_t_result);
+}
+
+
+template<int DataLayout>
+void test_cuda_contraction_m() {
+  for (int k = 32; k < 256; k++) {
+    test_cuda_contraction<ColMajor>(k, 128, 128);
+    test_cuda_contraction<RowMajor>(k, 128, 128);
+  }
+}
+
+template<int DataLayout>
+void test_cuda_contraction_k() {
+  for (int k = 32; k < 256; k++) {
+    test_cuda_contraction<ColMajor>(128, k, 128);
+    test_cuda_contraction<RowMajor>(128, k, 128);
+  }
+}
+
+template<int DataLayout>
+void test_cuda_contraction_n() {
+  for (int k = 32; k < 256; k++) {
+    test_cuda_contraction<ColMajor>(128, 128, k);
+    test_cuda_contraction<RowMajor>(128, 128, k);
+  }
+}
+
+
+template<int DataLayout>
+void test_cuda_contraction_sizes() {
+  int m_sizes[] = { 31,  39,   63,   64,   65,
+                   127, 129,  255,  257 , 511,
+                   512, 513, 1023, 1024, 1025};
+
+  int n_sizes[] = { 31,  39,   63,   64,   65,
+                   127, 129,  255,  257,  511,
+                   512, 513, 1023, 1024, 1025};
+
+  int k_sizes[] = {  31,   39,  63,  64,   65,
+                     95,   96, 127, 129,  255,
+                    257,  511, 512, 513, 1023,
+                   1024, 1025};
+
+  for (int i = 0; i < 15; i++) {
+    for (int j = 0; j < 15; j++) {
+      for (int k = 0; k < 17; k++) {
+        test_cuda_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_cuda()
+{
+  CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128));
+  CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128));
+
+  CALL_SUBTEST_1(test_scalar<ColMajor>(128, 128, 128));
+  CALL_SUBTEST_1(test_scalar<RowMajor>(128, 128, 128));
+
+  CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>());
+
+  CALL_SUBTEST_4(test_cuda_contraction_k<ColMajor>());
+  CALL_SUBTEST_5(test_cuda_contraction_k<RowMajor>());
+
+  CALL_SUBTEST_6(test_cuda_contraction_n<ColMajor>());
+  CALL_SUBTEST_7(test_cuda_contraction_n<RowMajor>());
+
+  CALL_SUBTEST_8(test_cuda_contraction_sizes<ColMajor>());
+  CALL_SUBTEST_9(test_cuda_contraction_sizes<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
new file mode 100644
index 000000000..ace97057f
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -0,0 +1,545 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::DefaultDevice;
+using Eigen::Tensor;
+
+typedef Tensor<float, 1>::DimensionPair DimPair;
+
+template<int DataLayout>
+static void test_evals()
+{
+  Tensor<float, 2, DataLayout> mat1(2, 3);
+  Tensor<float, 2, DataLayout> mat2(2, 3);
+  Tensor<float, 2, DataLayout> mat3(3, 2);
+
+  mat1.setRandom();
+  mat2.setRandom();
+  mat3.setRandom();
+
+  Tensor<float, 2, DataLayout> mat4(3,3);
+  mat4.setZero();
+  Eigen::array<DimPair, 1> dims3 = {{DimPair(0, 0)}};
+  typedef TensorEvaluator<decltype(mat1.contract(mat2, dims3)), DefaultDevice> Evaluator;
+  Evaluator eval(mat1.contract(mat2, dims3), DefaultDevice());
+  eval.evalTo(mat4.data());
+  EIGEN_STATIC_ASSERT(Evaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+  VERIFY_IS_EQUAL(eval.dimensions()[0], 3);
+  VERIFY_IS_EQUAL(eval.dimensions()[1], 3);
+
+  VERIFY_IS_APPROX(mat4(0,0), mat1(0,0)*mat2(0,0) + mat1(1,0)*mat2(1,0));
+  VERIFY_IS_APPROX(mat4(0,1), mat1(0,0)*mat2(0,1) + mat1(1,0)*mat2(1,1));
+  VERIFY_IS_APPROX(mat4(0,2), mat1(0,0)*mat2(0,2) + mat1(1,0)*mat2(1,2));
+  VERIFY_IS_APPROX(mat4(1,0), mat1(0,1)*mat2(0,0) + mat1(1,1)*mat2(1,0));
+  VERIFY_IS_APPROX(mat4(1,1), mat1(0,1)*mat2(0,1) + mat1(1,1)*mat2(1,1));
+  VERIFY_IS_APPROX(mat4(1,2), mat1(0,1)*mat2(0,2) + mat1(1,1)*mat2(1,2));
+  VERIFY_IS_APPROX(mat4(2,0), mat1(0,2)*mat2(0,0) + mat1(1,2)*mat2(1,0));
+  VERIFY_IS_APPROX(mat4(2,1), mat1(0,2)*mat2(0,1) + mat1(1,2)*mat2(1,1));
+  VERIFY_IS_APPROX(mat4(2,2), mat1(0,2)*mat2(0,2) + mat1(1,2)*mat2(1,2));
+
+  Tensor<float, 2, DataLayout> mat5(2,2);
+  mat5.setZero();
+  Eigen::array<DimPair, 1> dims4 = {{DimPair(1, 1)}};
+  typedef TensorEvaluator<decltype(mat1.contract(mat2, dims4)), DefaultDevice> Evaluator2;
+  Evaluator2 eval2(mat1.contract(mat2, dims4), DefaultDevice());
+  eval2.evalTo(mat5.data());
+  EIGEN_STATIC_ASSERT(Evaluator2::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+  VERIFY_IS_EQUAL(eval2.dimensions()[0], 2);
+  VERIFY_IS_EQUAL(eval2.dimensions()[1], 2);
+
+  VERIFY_IS_APPROX(mat5(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(0,1) + mat1(0,2)*mat2(0,2));
+  VERIFY_IS_APPROX(mat5(0,1), mat1(0,0)*mat2(1,0) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(1,2));
+  VERIFY_IS_APPROX(mat5(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(0,1) + mat1(1,2)*mat2(0,2));
+  VERIFY_IS_APPROX(mat5(1,1), mat1(1,0)*mat2(1,0) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(1,2));
+
+  Tensor<float, 2, DataLayout> mat6(2,2);
+  mat6.setZero();
+  Eigen::array<DimPair, 1> dims6 = {{DimPair(1, 0)}};
+  typedef TensorEvaluator<decltype(mat1.contract(mat3, dims6)), DefaultDevice> Evaluator3;
+  Evaluator3 eval3(mat1.contract(mat3, dims6), DefaultDevice());
+  eval3.evalTo(mat6.data());
+  EIGEN_STATIC_ASSERT(Evaluator3::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+  VERIFY_IS_EQUAL(eval3.dimensions()[0], 2);
+  VERIFY_IS_EQUAL(eval3.dimensions()[1], 2);
+
+  VERIFY_IS_APPROX(mat6(0,0), mat1(0,0)*mat3(0,0) + mat1(0,1)*mat3(1,0) + mat1(0,2)*mat3(2,0));
+  VERIFY_IS_APPROX(mat6(0,1), mat1(0,0)*mat3(0,1) + mat1(0,1)*mat3(1,1) + mat1(0,2)*mat3(2,1));
+  VERIFY_IS_APPROX(mat6(1,0), mat1(1,0)*mat3(0,0) + mat1(1,1)*mat3(1,0) + mat1(1,2)*mat3(2,0));
+  VERIFY_IS_APPROX(mat6(1,1), mat1(1,0)*mat3(0,1) + mat1(1,1)*mat3(1,1) + mat1(1,2)*mat3(2,1));
+}
+
+template<int DataLayout>
+static void test_scalar()
+{
+  Tensor<float, 1, DataLayout> vec1({6});
+  Tensor<float, 1, DataLayout> vec2({6});
+
+  vec1.setRandom();
+  vec2.setRandom();
+
+  Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}};
+  Tensor<float, 0, DataLayout> scalar = vec1.contract(vec2, dims);
+
+  float expected = 0.0f;
+  for (int i = 0; i < 6; ++i) {
+    expected += vec1(i) * vec2(i);
+  }
+  VERIFY_IS_APPROX(scalar(), expected);
+}
+
+template<int DataLayout>
+static void test_multidims()
+{
+  Tensor<float, 3, DataLayout> mat1(2, 2, 2);
+  Tensor<float, 4, DataLayout> mat2(2, 2, 2, 2);
+
+  mat1.setRandom();
+  mat2.setRandom();
+
+  Tensor<float, 3, DataLayout> mat3(2, 2, 2);
+  mat3.setZero();
+  Eigen::array<DimPair, 2> dims = {{DimPair(1, 2), DimPair(2, 3)}};
+  typedef TensorEvaluator<decltype(mat1.contract(mat2, dims)), DefaultDevice> Evaluator;
+  Evaluator eval(mat1.contract(mat2, dims), DefaultDevice());
+  eval.evalTo(mat3.data());
+  EIGEN_STATIC_ASSERT(Evaluator::NumDims==3ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+  VERIFY_IS_EQUAL(eval.dimensions()[0], 2);
+  VERIFY_IS_EQUAL(eval.dimensions()[1], 2);
+  VERIFY_IS_EQUAL(eval.dimensions()[2], 2);
+
+  VERIFY_IS_APPROX(mat3(0,0,0), mat1(0,0,0)*mat2(0,0,0,0) + mat1(0,1,0)*mat2(0,0,1,0) +
+                                mat1(0,0,1)*mat2(0,0,0,1) + mat1(0,1,1)*mat2(0,0,1,1));
+  VERIFY_IS_APPROX(mat3(0,0,1), mat1(0,0,0)*mat2(0,1,0,0) + mat1(0,1,0)*mat2(0,1,1,0) +
+                                mat1(0,0,1)*mat2(0,1,0,1) + mat1(0,1,1)*mat2(0,1,1,1));
+  VERIFY_IS_APPROX(mat3(0,1,0), mat1(0,0,0)*mat2(1,0,0,0) + mat1(0,1,0)*mat2(1,0,1,0) +
+                                mat1(0,0,1)*mat2(1,0,0,1) + mat1(0,1,1)*mat2(1,0,1,1));
+  VERIFY_IS_APPROX(mat3(0,1,1), mat1(0,0,0)*mat2(1,1,0,0) + mat1(0,1,0)*mat2(1,1,1,0) +
+                                mat1(0,0,1)*mat2(1,1,0,1) + mat1(0,1,1)*mat2(1,1,1,1));
+  VERIFY_IS_APPROX(mat3(1,0,0), mat1(1,0,0)*mat2(0,0,0,0) + mat1(1,1,0)*mat2(0,0,1,0) +
+                                mat1(1,0,1)*mat2(0,0,0,1) + mat1(1,1,1)*mat2(0,0,1,1));
+  VERIFY_IS_APPROX(mat3(1,0,1), mat1(1,0,0)*mat2(0,1,0,0) + mat1(1,1,0)*mat2(0,1,1,0) +
+                                mat1(1,0,1)*mat2(0,1,0,1) + mat1(1,1,1)*mat2(0,1,1,1));
+  VERIFY_IS_APPROX(mat3(1,1,0), mat1(1,0,0)*mat2(1,0,0,0) + mat1(1,1,0)*mat2(1,0,1,0) +
+                                mat1(1,0,1)*mat2(1,0,0,1) + mat1(1,1,1)*mat2(1,0,1,1));
+  VERIFY_IS_APPROX(mat3(1,1,1), mat1(1,0,0)*mat2(1,1,0,0) + mat1(1,1,0)*mat2(1,1,1,0) +
+                                mat1(1,0,1)*mat2(1,1,0,1) + mat1(1,1,1)*mat2(1,1,1,1));
+
+  Tensor<float, 2, DataLayout> mat4(2, 2);
+  Tensor<float, 3, DataLayout> mat5(2, 2, 2);
+
+  mat4.setRandom();
+  mat5.setRandom();
+
+  Tensor<float, 1, DataLayout> mat6(2);
+  mat6.setZero();
+  Eigen::array<DimPair, 2> dims2({{DimPair(0, 1), DimPair(1, 0)}});
+  typedef TensorEvaluator<decltype(mat4.contract(mat5, dims2)), DefaultDevice> Evaluator2;
+  Evaluator2 eval2(mat4.contract(mat5, dims2), DefaultDevice());
+  eval2.evalTo(mat6.data());
+  EIGEN_STATIC_ASSERT(Evaluator2::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+  VERIFY_IS_EQUAL(eval2.dimensions()[0], 2);
+
+  VERIFY_IS_APPROX(mat6(0), mat4(0,0)*mat5(0,0,0) + mat4(1,0)*mat5(0,1,0) +
+                   mat4(0,1)*mat5(1,0,0) + mat4(1,1)*mat5(1,1,0));
+  VERIFY_IS_APPROX(mat6(1), mat4(0,0)*mat5(0,0,1) + mat4(1,0)*mat5(0,1,1) +
+                   mat4(0,1)*mat5(1,0,1) + mat4(1,1)*mat5(1,1,1));
+}
+
+template<int DataLayout>
+static void test_holes() {
+  Tensor<float, 4, DataLayout> t1(2, 5, 7, 3);
+  Tensor<float, 5, DataLayout> t2(2, 7, 11, 13, 3);
+  t1.setRandom();
+  t2.setRandom();
+
+  Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(3, 4)}};
+  Tensor<float, 5, DataLayout> result = t1.contract(t2, dims);
+  VERIFY_IS_EQUAL(result.dimension(0), 5);
+  VERIFY_IS_EQUAL(result.dimension(1), 7);
+  VERIFY_IS_EQUAL(result.dimension(2), 7);
+  VERIFY_IS_EQUAL(result.dimension(3), 11);
+  VERIFY_IS_EQUAL(result.dimension(4), 13);
+
+  for (int i = 0; i < 5; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 5; ++l) {
+          for (int m = 0; m < 5; ++m) {
+            VERIFY_IS_APPROX(result(i, j, k, l, m),
+                             t1(0, i, j, 0) * t2(0, k, l, m, 0) +
+                             t1(1, i, j, 0) * t2(1, k, l, m, 0) +
+                             t1(0, i, j, 1) * t2(0, k, l, m, 1) +
+                             t1(1, i, j, 1) * t2(1, k, l, m, 1) +
+                             t1(0, i, j, 2) * t2(0, k, l, m, 2) +
+                             t1(1, i, j, 2) * t2(1, k, l, m, 2));
+          }
+        }
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_full_redux()
+{
+  Tensor<float, 2, DataLayout> t1(2, 2);
+  Tensor<float, 3, DataLayout> t2(2, 2, 2);
+  t1.setRandom();
+  t2.setRandom();
+
+  Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}};
+  Tensor<float, 1, DataLayout> result = t1.contract(t2, dims);
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) +  t1(1, 0) * t2(1, 0, 0)
+                            + t1(0, 1) * t2(0, 1, 0) +  t1(1, 1) * t2(1, 1, 0));
+  VERIFY_IS_APPROX(result(1), t1(0, 0) * t2(0, 0, 1) +  t1(1, 0) * t2(1, 0, 1)
+                            + t1(0, 1) * t2(0, 1, 1) +  t1(1, 1) * t2(1, 1, 1));
+
+  dims[0] = DimPair(1, 0);
+  dims[1] = DimPair(2, 1);
+  result = t2.contract(t1, dims);
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) +  t1(1, 0) * t2(0, 1, 0)
+                            + t1(0, 1) * t2(0, 0, 1) +  t1(1, 1) * t2(0, 1, 1));
+  VERIFY_IS_APPROX(result(1), t1(0, 0) * t2(1, 0, 0) +  t1(1, 0) * t2(1, 1, 0)
+                            + t1(0, 1) * t2(1, 0, 1) +  t1(1, 1) * t2(1, 1, 1));
+}
+
+template<int DataLayout>
+static void test_contraction_of_contraction()
+{
+  Tensor<float, 2, DataLayout> t1(2, 2);
+  Tensor<float, 2, DataLayout> t2(2, 2);
+  Tensor<float, 2, DataLayout> t3(2, 2);
+  Tensor<float, 2, DataLayout> t4(2, 2);
+  t1.setRandom();
+  t2.setRandom();
+  t3.setRandom();
+  t4.setRandom();
+
+  Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
+  auto contract1 = t1.contract(t2, dims);
+  auto diff = t3 - contract1;
+  auto contract2 = t1.contract(t4, dims);
+  Tensor<float, 2, DataLayout> result = contract2.contract(diff, dims);
+
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_EQUAL(result.dimension(1), 2);
+
+  Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>>
+      m1(t1.data(), 2, 2), m2(t2.data(), 2, 2), m3(t3.data(), 2, 2),
+      m4(t4.data(), 2, 2);
+  Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>
+      expected = (m1 * m4) * (m3 - m1 * m2);
+
+  VERIFY_IS_APPROX(result(0, 0), expected(0, 0));
+  VERIFY_IS_APPROX(result(0, 1), expected(0, 1));
+  VERIFY_IS_APPROX(result(1, 0), expected(1, 0));
+  VERIFY_IS_APPROX(result(1, 1), expected(1, 1));
+}
+
+template<int DataLayout>
+static void test_expr()
+{
+  Tensor<float, 2, DataLayout> mat1(2, 3);
+  Tensor<float, 2, DataLayout> mat2(3, 2);
+  mat1.setRandom();
+  mat2.setRandom();
+
+  Tensor<float, 2, DataLayout> mat3(2,2);
+
+  Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
+  mat3 = mat1.contract(mat2, dims);
+
+  VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0));
+  VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1));
+  VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0));
+  VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1));
+}
+
+template<int DataLayout>
+static void test_out_of_order_contraction()
+{
+  Tensor<float, 3, DataLayout> mat1(2, 2, 2);
+  Tensor<float, 3, DataLayout> mat2(2, 2, 2);
+
+  mat1.setRandom();
+  mat2.setRandom();
+
+  Tensor<float, 2, DataLayout> mat3(2, 2);
+
+  Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(0, 2)}};
+  mat3 = mat1.contract(mat2, dims);
+
+  VERIFY_IS_APPROX(mat3(0, 0),
+                   mat1(0,0,0)*mat2(0,0,0) + mat1(1,0,0)*mat2(0,0,1) +
+                   mat1(0,0,1)*mat2(1,0,0) + mat1(1,0,1)*mat2(1,0,1));
+  VERIFY_IS_APPROX(mat3(1, 0),
+                   mat1(0,1,0)*mat2(0,0,0) + mat1(1,1,0)*mat2(0,0,1) +
+                   mat1(0,1,1)*mat2(1,0,0) + mat1(1,1,1)*mat2(1,0,1));
+  VERIFY_IS_APPROX(mat3(0, 1),
+                   mat1(0,0,0)*mat2(0,1,0) + mat1(1,0,0)*mat2(0,1,1) +
+                   mat1(0,0,1)*mat2(1,1,0) + mat1(1,0,1)*mat2(1,1,1));
+  VERIFY_IS_APPROX(mat3(1, 1),
+                   mat1(0,1,0)*mat2(0,1,0) + mat1(1,1,0)*mat2(0,1,1) +
+                   mat1(0,1,1)*mat2(1,1,0) + mat1(1,1,1)*mat2(1,1,1));
+
+  Eigen::array<DimPair, 2> dims2 = {{DimPair(0, 2), DimPair(2, 0)}};
+  mat3 = mat1.contract(mat2, dims2);
+
+  VERIFY_IS_APPROX(mat3(0, 0),
+                   mat1(0,0,0)*mat2(0,0,0) + mat1(1,0,0)*mat2(0,0,1) +
+                   mat1(0,0,1)*mat2(1,0,0) + mat1(1,0,1)*mat2(1,0,1));
+  VERIFY_IS_APPROX(mat3(1, 0),
+                   mat1(0,1,0)*mat2(0,0,0) + mat1(1,1,0)*mat2(0,0,1) +
+                   mat1(0,1,1)*mat2(1,0,0) + mat1(1,1,1)*mat2(1,0,1));
+  VERIFY_IS_APPROX(mat3(0, 1),
+                   mat1(0,0,0)*mat2(0,1,0) + mat1(1,0,0)*mat2(0,1,1) +
+                   mat1(0,0,1)*mat2(1,1,0) + mat1(1,0,1)*mat2(1,1,1));
+  VERIFY_IS_APPROX(mat3(1, 1),
+                   mat1(0,1,0)*mat2(0,1,0) + mat1(1,1,0)*mat2(0,1,1) +
+                   mat1(0,1,1)*mat2(1,1,0) + mat1(1,1,1)*mat2(1,1,1));
+
+}
+
+template<int DataLayout>
+static void test_consistency()
+{
+  // this does something like testing (A*B)^T = (B^T * A^T)
+
+  Tensor<float, 3, DataLayout> mat1(4, 3, 5);
+  Tensor<float, 5, DataLayout> mat2(3, 2, 1, 5, 4);
+  mat1.setRandom();
+  mat2.setRandom();
+
+  Tensor<float, 4, DataLayout> mat3(5, 2, 1, 5);
+  Tensor<float, 4, DataLayout> mat4(2, 1, 5, 5);
+
+  // contract on dimensions of size 4 and 3
+  Eigen::array<DimPair, 2> dims1 = {{DimPair(0, 4), DimPair(1, 0)}};
+  Eigen::array<DimPair, 2> dims2 = {{DimPair(4, 0), DimPair(0, 1)}};
+
+  mat3 = mat1.contract(mat2, dims1);
+  mat4 = mat2.contract(mat1, dims2);
+
+  // check that these are equal except for ordering of dimensions
+  if (DataLayout == ColMajor) {
+    for (size_t i = 0; i < 5; i++) {
+      for (size_t j = 0; j < 10; j++) {
+        VERIFY_IS_APPROX(mat3.data()[i + 5 * j], mat4.data()[j + 10 * i]);
+      }
+    }
+  } else {
+    // Row major
+    for (size_t i = 0; i < 5; i++) {
+      for (size_t j = 0; j < 10; j++) {
+        VERIFY_IS_APPROX(mat3.data()[10 * i + j], mat4.data()[i + 5 * j]);
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_large_contraction()
+{
+  Tensor<float, 4, DataLayout> t_left(30, 50, 8, 31);
+  Tensor<float, 5, DataLayout> t_right(8, 31, 7, 20, 10);
+  Tensor<float, 5, DataLayout> t_result(30, 50, 7, 20, 10);
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  // Add a little offset so that the results won't be close to zero.
+  t_left += t_left.constant(1.0f);
+  t_right += t_right.constant(1.0f);
+
+  typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
+  MapXf m_left(t_left.data(), 1500, 248);
+  MapXf m_right(t_right.data(), 248, 1400);
+  Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
+
+  // this contraction should be equivalent to a single matrix multiplication
+  Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}};
+
+  // compute results by separate methods
+  t_result = t_left.contract(t_right, dims);
+  m_result = m_left * m_right;
+
+  for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
+    VERIFY(&t_result.data()[i] != &m_result.data()[i]);
+    VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
+  }
+}
+
+template<int DataLayout>
+static void test_matrix_vector()
+{
+  Tensor<float, 2, DataLayout> t_left(30, 50);
+  Tensor<float, 1, DataLayout> t_right(50);
+  Tensor<float, 1, DataLayout> t_result(30);
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
+  MapXf m_left(t_left.data(), 30, 50);
+  MapXf m_right(t_right.data(), 50, 1);
+  Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(30, 1);
+
+  // this contraction should be equivalent to a single matrix multiplication
+  Eigen::array<DimPair, 1> dims{{DimPair(1, 0)}};
+
+  // compute results by separate methods
+  t_result = t_left.contract(t_right, dims);
+  m_result = m_left * m_right;
+
+  for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
+    VERIFY(internal::isApprox(t_result(i), m_result(i, 0), 1));
+  }
+}
+
+
+template<int DataLayout>
+static void test_tensor_vector()
+{
+  Tensor<float, 3, DataLayout> t_left(7, 13, 17);
+  Tensor<float, 2, DataLayout> t_right(1, 7);
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  typedef typename Tensor<float, 1, DataLayout>::DimensionPair DimensionPair;
+  Eigen::array<DimensionPair, 1> dim_pair01{{{0, 1}}};
+  Tensor<float, 3, DataLayout> t_result = t_left.contract(t_right, dim_pair01);
+
+  typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
+  MapXf m_left(t_left.data(), 7, 13*17);
+  MapXf m_right(t_right.data(), 1, 7);
+  Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result = m_left.transpose() * m_right.transpose();
+
+  for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
+    VERIFY(internal::isApprox(t_result(i), m_result(i, 0), 1));
+  }
+}
+
+
+template<int DataLayout>
+static void test_small_blocking_factors()
+{
+  Tensor<float, 4, DataLayout> t_left(30, 5, 3, 31);
+  Tensor<float, 5, DataLayout> t_right(3, 31, 7, 20, 1);
+  t_left.setRandom();
+  t_right.setRandom();
+
+  // Add a little offset so that the results won't be close to zero.
+  t_left += t_left.constant(1.0f);
+  t_right += t_right.constant(1.0f);
+
+  // Force the cache sizes, which results in smaller blocking factors.
+  Eigen::setCpuCacheSizes(896, 1920, 2944);
+
+  // this contraction should be equivalent to a single matrix multiplication
+  Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}};
+  Tensor<float, 5, DataLayout> t_result;
+  t_result = t_left.contract(t_right, dims);
+
+  // compute result using a simple eigen matrix product
+  Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> m_left(t_left.data(), 150, 93);
+  Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> m_right(t_right.data(), 93, 140);
+  Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result = m_left * m_right;
+
+  for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
+    VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
+  }
+}
+
+template<int DataLayout>
+static void test_tensor_product()
+{
+  Tensor<float, 2, DataLayout> mat1(2, 3);
+  Tensor<float, 2, DataLayout> mat2(4, 1);
+  mat1.setRandom();
+  mat2.setRandom();
+
+  Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{{}});
+
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_EQUAL(result.dimension(1), 3);
+  VERIFY_IS_EQUAL(result.dimension(2), 4);
+  VERIFY_IS_EQUAL(result.dimension(3), 1);
+  for (int i = 0; i < result.dimension(0); ++i) {
+    for (int j = 0; j < result.dimension(1); ++j) {
+      for (int k = 0; k < result.dimension(2); ++k) {
+        for (int l = 0; l < result.dimension(3); ++l) {
+			VERIFY_IS_APPROX(result(i, j, k, l), mat1(i, j) * mat2(k, l) );
+        }
+      }
+    }
+  }
+}
+
+
+template<int DataLayout>
+static void test_const_inputs()
+{
+  Tensor<float, 2, DataLayout> in1(2, 3);
+  Tensor<float, 2, DataLayout> in2(3, 2);
+  in1.setRandom();
+  in2.setRandom();
+
+  TensorMap<Tensor<const float, 2, DataLayout> > mat1(in1.data(), 2, 3);
+  TensorMap<Tensor<const float, 2, DataLayout> > mat2(in2.data(), 3, 2);
+  Tensor<float, 2, DataLayout> mat3(2,2);
+
+  Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
+  mat3 = mat1.contract(mat2, dims);
+
+  VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0));
+  VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1));
+  VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0));
+  VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1));
+}
+
+void test_cxx11_tensor_contraction()
+{
+  CALL_SUBTEST(test_evals<ColMajor>());
+  CALL_SUBTEST(test_evals<RowMajor>());
+  CALL_SUBTEST(test_scalar<ColMajor>());
+  CALL_SUBTEST(test_scalar<RowMajor>());
+  CALL_SUBTEST(test_multidims<ColMajor>());
+  CALL_SUBTEST(test_multidims<RowMajor>());
+  CALL_SUBTEST(test_holes<ColMajor>());
+  CALL_SUBTEST(test_holes<RowMajor>());
+  CALL_SUBTEST(test_full_redux<ColMajor>());
+  CALL_SUBTEST(test_full_redux<RowMajor>());
+  CALL_SUBTEST(test_contraction_of_contraction<ColMajor>());
+  CALL_SUBTEST(test_contraction_of_contraction<RowMajor>());
+  CALL_SUBTEST(test_expr<ColMajor>());
+  CALL_SUBTEST(test_expr<RowMajor>());
+  CALL_SUBTEST(test_out_of_order_contraction<ColMajor>());
+  CALL_SUBTEST(test_out_of_order_contraction<RowMajor>());
+  CALL_SUBTEST(test_consistency<ColMajor>());
+  CALL_SUBTEST(test_consistency<RowMajor>());
+  CALL_SUBTEST(test_large_contraction<ColMajor>());
+  CALL_SUBTEST(test_large_contraction<RowMajor>());
+  CALL_SUBTEST(test_matrix_vector<ColMajor>());
+  CALL_SUBTEST(test_matrix_vector<RowMajor>());
+  CALL_SUBTEST(test_tensor_vector<ColMajor>());
+  CALL_SUBTEST(test_tensor_vector<RowMajor>());
+  CALL_SUBTEST(test_small_blocking_factors<ColMajor>());
+  CALL_SUBTEST(test_small_blocking_factors<RowMajor>());
+  CALL_SUBTEST(test_tensor_product<ColMajor>());
+  CALL_SUBTEST(test_tensor_product<RowMajor>());
+  CALL_SUBTEST(test_const_inputs<ColMajor>());
+  CALL_SUBTEST(test_const_inputs<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_convolution.cpp b/unsupported/test/cxx11_tensor_convolution.cpp
new file mode 100644
index 000000000..e3d4675eb
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_convolution.cpp
@@ -0,0 +1,149 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::DefaultDevice;
+
+template <int DataLayout>
+static void test_evals()
+{
+  Tensor<float, 2, DataLayout> input(3, 3);
+  Tensor<float, 1, DataLayout> kernel(2);
+
+  input.setRandom();
+  kernel.setRandom();
+
+  Tensor<float, 2, DataLayout> result(2,3);
+  result.setZero();
+  Eigen::array<Tensor<float, 2>::Index, 1> dims3{{0}};
+
+  typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator;
+  Evaluator eval(input.convolve(kernel, dims3), DefaultDevice());
+  eval.evalTo(result.data());
+  EIGEN_STATIC_ASSERT(Evaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE);
+  VERIFY_IS_EQUAL(eval.dimensions()[0], 2);
+  VERIFY_IS_EQUAL(eval.dimensions()[1], 3);
+
+  VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1));  // index 0
+  VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1));  // index 2
+  VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1));  // index 4
+  VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1));  // index 1
+  VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1));  // index 3
+  VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1));  // index 5
+}
+
+template <int DataLayout>
+static void test_expr()
+{
+  Tensor<float, 2, DataLayout> input(3, 3);
+  Tensor<float, 2, DataLayout> kernel(2, 2);
+  input.setRandom();
+  kernel.setRandom();
+
+  Tensor<float, 2, DataLayout> result(2,2);
+  Eigen::array<ptrdiff_t, 2> dims;
+  dims[0] = 0;
+  dims[1] = 1;
+  result = input.convolve(kernel, dims);
+
+  VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) +
+                                input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1));
+  VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) +
+                                input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1));
+  VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) +
+                                input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1));
+  VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) +
+                                input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
+}
+
+template <int DataLayout>
+static void test_modes() {
+  Tensor<float, 1, DataLayout> input(3);
+  Tensor<float, 1, DataLayout> kernel(3);
+  input(0) = 1.0f;
+  input(1) = 2.0f;
+  input(2) = 3.0f;
+  kernel(0) = 0.5f;
+  kernel(1) = 1.0f;
+  kernel(2) = 0.0f;
+
+  Eigen::array<ptrdiff_t, 1> dims;
+  dims[0] = 0;
+  Eigen::array<std::pair<ptrdiff_t, ptrdiff_t>, 1> padding;
+
+  // Emulate VALID mode (as defined in
+  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
+  padding[0] = std::make_pair(0, 0);
+  Tensor<float, 1, DataLayout> valid(1);
+  valid = input.pad(padding).convolve(kernel, dims);
+  VERIFY_IS_EQUAL(valid.dimension(0), 1);
+  VERIFY_IS_APPROX(valid(0), 2.5f);
+
+  // Emulate SAME mode (as defined in
+  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
+  padding[0] = std::make_pair(1, 1);
+  Tensor<float, 1, DataLayout> same(3);
+  same = input.pad(padding).convolve(kernel, dims);
+  VERIFY_IS_EQUAL(same.dimension(0), 3);
+  VERIFY_IS_APPROX(same(0), 1.0f);
+  VERIFY_IS_APPROX(same(1), 2.5f);
+  VERIFY_IS_APPROX(same(2), 4.0f);
+
+  // Emulate FULL mode (as defined in
+  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
+  padding[0] = std::make_pair(2, 2);
+  Tensor<float, 1, DataLayout> full(5);
+  full = input.pad(padding).convolve(kernel, dims);
+  VERIFY_IS_EQUAL(full.dimension(0), 5);
+  VERIFY_IS_APPROX(full(0), 0.0f);
+  VERIFY_IS_APPROX(full(1), 1.0f);
+  VERIFY_IS_APPROX(full(2), 2.5f);
+  VERIFY_IS_APPROX(full(3), 4.0f);
+  VERIFY_IS_APPROX(full(4), 1.5f);
+}
+
+template <int DataLayout>
+static void test_strides() {
+  Tensor<float, 1, DataLayout> input(13);
+  Tensor<float, 1, DataLayout> kernel(3);
+  input.setRandom();
+  kernel.setRandom();
+
+  Eigen::array<ptrdiff_t, 1> dims;
+  dims[0] = 0;
+  Eigen::array<ptrdiff_t, 1> stride_of_3;
+  stride_of_3[0] = 3;
+  Eigen::array<ptrdiff_t, 1> stride_of_2;
+  stride_of_2[0] = 2;
+
+  Tensor<float, 1, DataLayout> result;
+  result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2);
+
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
+                               input(6)*kernel(2)));
+  VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
+                               input(12)*kernel(2)));
+}
+
+void test_cxx11_tensor_convolution()
+{
+  CALL_SUBTEST(test_evals<ColMajor>());
+  CALL_SUBTEST(test_evals<RowMajor>());
+  CALL_SUBTEST(test_expr<ColMajor>());
+  CALL_SUBTEST(test_expr<RowMajor>());
+  CALL_SUBTEST(test_modes<ColMajor>());
+  CALL_SUBTEST(test_modes<RowMajor>());
+  CALL_SUBTEST(test_strides<ColMajor>());
+  CALL_SUBTEST(test_strides<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
new file mode 100644
index 000000000..0ba9d52e9
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -0,0 +1,1287 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_cuda
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_cuda_nullary() {
+  Tensor<float, 1, 0, int> in1(2);
+  Tensor<float, 1, 0, int> in2(2);
+  in1.setRandom();
+  in2.setRandom();
+
+  std::size_t tensor_bytes = in1.size() * sizeof(float);
+
+  float* d_in1;
+  float* d_in2;
+  cudaMalloc((void**)(&d_in1), tensor_bytes);
+  cudaMalloc((void**)(&d_in2), tensor_bytes);
+  cudaMemcpy(d_in1, in1.data(), tensor_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in2, in2.data(), tensor_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in1(
+      d_in1, 2);
+  Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in2(
+      d_in2, 2);
+
+  gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f);
+  gpu_in2.device(gpu_device) = gpu_in2.random();
+
+  Tensor<float, 1, 0, int> new1(2);
+  Tensor<float, 1, 0, int> new2(2);
+
+  assert(cudaMemcpyAsync(new1.data(), d_in1, tensor_bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+  assert(cudaMemcpyAsync(new2.data(), d_in2, tensor_bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 2; ++i) {
+    VERIFY_IS_APPROX(new1(i), 3.14f);
+    VERIFY_IS_NOT_EQUAL(new2(i), in2(i));
+  }
+
+  cudaFree(d_in1);
+  cudaFree(d_in2);
+}
+
+void test_cuda_elementwise_small() {
+  Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
+  Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
+  Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>(2));
+  in1.setRandom();
+  in2.setRandom();
+
+  std::size_t in1_bytes = in1.size() * sizeof(float);
+  std::size_t in2_bytes = in2.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_in1;
+  float* d_in2;
+  float* d_out;
+  cudaMalloc((void**)(&d_in1), in1_bytes);
+  cudaMalloc((void**)(&d_in2), in2_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(
+      d_in1, Eigen::array<Eigen::DenseIndex, 1>(2));
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2(
+      d_in2, Eigen::array<Eigen::DenseIndex, 1>(2));
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out(
+      d_out, Eigen::array<Eigen::DenseIndex, 1>(2));
+
+  gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 2; ++i) {
+    VERIFY_IS_APPROX(
+        out(Eigen::array<Eigen::DenseIndex, 1>(i)),
+        in1(Eigen::array<Eigen::DenseIndex, 1>(i)) + in2(Eigen::array<Eigen::DenseIndex, 1>(i)));
+  }
+
+  cudaFree(d_in1);
+  cudaFree(d_in2);
+  cudaFree(d_out);
+}
+
+void test_cuda_elementwise()
+{
+  Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+  Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+  Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+  Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+  in1.setRandom();
+  in2.setRandom();
+  in3.setRandom();
+
+  std::size_t in1_bytes = in1.size() * sizeof(float);
+  std::size_t in2_bytes = in2.size() * sizeof(float);
+  std::size_t in3_bytes = in3.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_in1;
+  float* d_in2;
+  float* d_in3;
+  float* d_out;
+  cudaMalloc((void**)(&d_in1), in1_bytes);
+  cudaMalloc((void**)(&d_in2), in2_bytes);
+  cudaMalloc((void**)(&d_in3), in3_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in3, in3.data(), in3_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+
+  gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3;
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 53; ++j) {
+      for (int k = 0; k < 97; ++k) {
+        VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)), in1(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) + in2(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) * in3(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)));
+      }
+    }
+  }
+
+  cudaFree(d_in1);
+  cudaFree(d_in2);
+  cudaFree(d_in3);
+  cudaFree(d_out);
+}
+
+void test_cuda_props() {
+  Tensor<float, 1> in1(200);
+  Tensor<bool, 1> out(200);
+  in1.setRandom();
+
+  std::size_t in1_bytes = in1.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(bool);
+
+  float* d_in1;
+  bool* d_out;
+  cudaMalloc((void**)(&d_in1), in1_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(
+      d_in1, 200);
+  Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_out(
+      d_out, 200);
+
+  gpu_out.device(gpu_device) = (gpu_in1.isnan)();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost,
+                         gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 200; ++i) {
+    VERIFY_IS_EQUAL(out(i), (std::isnan)(in1(i)));
+  }
+
+  cudaFree(d_in1);
+  cudaFree(d_out);
+}
+
+void test_cuda_reduction()
+{
+  Tensor<float, 4> in1(72,53,97,113);
+  Tensor<float, 2> out(72,97);
+  in1.setRandom();
+
+  std::size_t in1_bytes = in1.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_in1;
+  float* d_out;
+  cudaMalloc((void**)(&d_in1), in1_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113);
+  Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
+
+  array<Eigen::DenseIndex, 2> reduction_axis;
+  reduction_axis[0] = 1;
+  reduction_axis[1] = 3;
+
+  gpu_out.device(gpu_device) = gpu_in1.maximum(reduction_axis);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 97; ++j) {
+      float expected = 0;
+      for (int k = 0; k < 53; ++k) {
+        for (int l = 0; l < 113; ++l) {
+          expected =
+              std::max<float>(expected, in1(i, k, j, l));
+        }
+      }
+      VERIFY_IS_APPROX(out(i,j), expected);
+    }
+  }
+
+  cudaFree(d_in1);
+  cudaFree(d_out);
+}
+
+template<int DataLayout>
+void test_cuda_contraction()
+{
+  // with these dimensions, the output has 300 * 140 elements, which is
+  // more than 30 * 1024, which is the number of threads in blocks on
+  // a 15 SM GK110 GPU
+  Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31);
+  Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>(3, 31, 7, 20, 1));
+  Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>(6, 50, 7, 20, 1));
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  std::size_t t_left_bytes = t_left.size()  * sizeof(float);
+  std::size_t t_right_bytes = t_right.size() * sizeof(float);
+  std::size_t t_result_bytes = t_result.size() * sizeof(float);
+
+  float* d_t_left;
+  float* d_t_right;
+  float* d_t_result;
+
+  cudaMalloc((void**)(&d_t_left), t_left_bytes);
+  cudaMalloc((void**)(&d_t_right), t_right_bytes);
+  cudaMalloc((void**)(&d_t_result), t_result_bytes);
+
+  cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_t_left(d_t_left, 6, 50, 3, 31);
+  Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_right(d_t_right, 3, 31, 7, 20, 1);
+  Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_result(d_t_result, 6, 50, 7, 20, 1);
+
+  typedef Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> > MapXf;
+  MapXf m_left(t_left.data(), 300, 93);
+  MapXf m_right(t_right.data(), 93, 140);
+  Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(300, 140);
+
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  Eigen::array<DimPair, 2> dims;
+  dims[0] = DimPair(2, 0);
+  dims[1] = DimPair(3, 1);
+
+  m_result = m_left * m_right;
+  gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims);
+
+  cudaMemcpy(t_result.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
+
+  for (DenseIndex i = 0; i < t_result.size(); i++) {
+    if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
+      std::cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
+      assert(false);
+    }
+  }
+
+  cudaFree(d_t_left);
+  cudaFree(d_t_right);
+  cudaFree(d_t_result);
+}
+
+template<int DataLayout>
+void test_cuda_convolution_1d()
+{
+  Tensor<float, 4, DataLayout> input(74,37,11,137);
+  Tensor<float, 1, DataLayout> kernel(4);
+  Tensor<float, 4, DataLayout> out(74,34,11,137);
+  input = input.constant(10.0f) + input.random();
+  kernel = kernel.constant(7.0f) + kernel.random();
+
+  std::size_t input_bytes = input.size() * sizeof(float);
+  std::size_t kernel_bytes = kernel.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_input;
+  float* d_kernel;
+  float* d_out;
+  cudaMalloc((void**)(&d_input), input_bytes);
+  cudaMalloc((void**)(&d_kernel), kernel_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input, 74,37,11,137);
+  Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4);
+  Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137);
+
+  Eigen::array<Eigen::DenseIndex, 1> dims(1);
+  gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 74; ++i) {
+    for (int j = 0; j < 34; ++j) {
+      for (int k = 0; k < 11; ++k) {
+        for (int l = 0; l < 137; ++l) {
+          const float result = out(i,j,k,l);
+          const float expected = input(i,j+0,k,l) * kernel(0) + input(i,j+1,k,l) * kernel(1) +
+                                 input(i,j+2,k,l) * kernel(2) + input(i,j+3,k,l) * kernel(3);
+          VERIFY_IS_APPROX(result, expected);
+        }
+      }
+    }
+  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
+}
+
+void test_cuda_convolution_inner_dim_col_major_1d()
+{
+  Tensor<float, 4, ColMajor> input(74,9,11,7);
+  Tensor<float, 1, ColMajor> kernel(4);
+  Tensor<float, 4, ColMajor> out(71,9,11,7);
+  input = input.constant(10.0f) + input.random();
+  kernel = kernel.constant(7.0f) + kernel.random();
+
+  std::size_t input_bytes = input.size() * sizeof(float);
+  std::size_t kernel_bytes = kernel.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_input;
+  float* d_kernel;
+  float* d_out;
+  cudaMalloc((void**)(&d_input), input_bytes);
+  cudaMalloc((void**)(&d_kernel), kernel_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_input(d_input,74,9,11,7);
+  Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4);
+  Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7);
+
+  Eigen::array<Eigen::DenseIndex, 1> dims(0);
+  gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 71; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 11; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          const float result = out(i,j,k,l);
+          const float expected = input(i+0,j,k,l) * kernel(0) + input(i+1,j,k,l) * kernel(1) +
+                                 input(i+2,j,k,l) * kernel(2) + input(i+3,j,k,l) * kernel(3);
+          VERIFY_IS_APPROX(result, expected);
+        }
+      }
+    }
+  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
+}
+
+void test_cuda_convolution_inner_dim_row_major_1d()
+{
+  Tensor<float, 4, RowMajor> input(7,9,11,74);
+  Tensor<float, 1, RowMajor> kernel(4);
+  Tensor<float, 4, RowMajor> out(7,9,11,71);
+  input = input.constant(10.0f) + input.random();
+  kernel = kernel.constant(7.0f) + kernel.random();
+
+  std::size_t input_bytes = input.size() * sizeof(float);
+  std::size_t kernel_bytes = kernel.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_input;
+  float* d_kernel;
+  float* d_out;
+  cudaMalloc((void**)(&d_input), input_bytes);
+  cudaMalloc((void**)(&d_kernel), kernel_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_input(d_input, 7,9,11,74);
+  Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4);
+  Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71);
+
+  Eigen::array<Eigen::DenseIndex, 1> dims(3);
+  gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 7; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 11; ++k) {
+        for (int l = 0; l < 71; ++l) {
+          const float result = out(i,j,k,l);
+          const float expected = input(i,j,k,l+0) * kernel(0) + input(i,j,k,l+1) * kernel(1) +
+                                 input(i,j,k,l+2) * kernel(2) + input(i,j,k,l+3) * kernel(3);
+          VERIFY_IS_APPROX(result, expected);
+        }
+      }
+    }
+  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
+}
+
+template<int DataLayout>
+void test_cuda_convolution_2d()
+{
+  Tensor<float, 4, DataLayout> input(74,37,11,137);
+  Tensor<float, 2, DataLayout> kernel(3,4);
+  Tensor<float, 4, DataLayout> out(74,35,8,137);
+  input = input.constant(10.0f) + input.random();
+  kernel = kernel.constant(7.0f) + kernel.random();
+
+  std::size_t input_bytes = input.size() * sizeof(float);
+  std::size_t kernel_bytes = kernel.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_input;
+  float* d_kernel;
+  float* d_out;
+  cudaMalloc((void**)(&d_input), input_bytes);
+  cudaMalloc((void**)(&d_kernel), kernel_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input,74,37,11,137);
+  Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4);
+  Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137);
+
+  Eigen::array<Eigen::DenseIndex, 2> dims(1,2);
+  gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 74; ++i) {
+    for (int j = 0; j < 35; ++j) {
+      for (int k = 0; k < 8; ++k) {
+        for (int l = 0; l < 137; ++l) {
+          const float result = out(i,j,k,l);
+          const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
+                                 input(i,j+1,k+0,l) * kernel(1,0) +
+                                 input(i,j+2,k+0,l) * kernel(2,0) +
+                                 input(i,j+0,k+1,l) * kernel(0,1) +
+                                 input(i,j+1,k+1,l) * kernel(1,1) +
+                                 input(i,j+2,k+1,l) * kernel(2,1) +
+                                 input(i,j+0,k+2,l) * kernel(0,2) +
+                                 input(i,j+1,k+2,l) * kernel(1,2) +
+                                 input(i,j+2,k+2,l) * kernel(2,2) +
+                                 input(i,j+0,k+3,l) * kernel(0,3) +
+                                 input(i,j+1,k+3,l) * kernel(1,3) +
+                                 input(i,j+2,k+3,l) * kernel(2,3);
+          VERIFY_IS_APPROX(result, expected);
+        }
+      }
+    }
+  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
+}
+
+template<int DataLayout>
+void test_cuda_convolution_3d()
+{
+  Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>(74,37,11,137,17));
+  Tensor<float, 3, DataLayout> kernel(3,4,2);
+  Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>(74,35,8,136,17));
+  input = input.constant(10.0f) + input.random();
+  kernel = kernel.constant(7.0f) + kernel.random();
+
+  std::size_t input_bytes = input.size() * sizeof(float);
+  std::size_t kernel_bytes = kernel.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_input;
+  float* d_kernel;
+  float* d_out;
+  cudaMalloc((void**)(&d_input), input_bytes);
+  cudaMalloc((void**)(&d_kernel), kernel_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;    
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_input(d_input,74,37,11,137,17);
+  Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2);
+  Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17);
+
+  Eigen::array<Eigen::DenseIndex, 3> dims(1,2,3);
+  gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 74; ++i) {
+    for (int j = 0; j < 35; ++j) {
+      for (int k = 0; k < 8; ++k) {
+        for (int l = 0; l < 136; ++l) {
+          for (int m = 0; m < 17; ++m) {
+            const float result = out(i,j,k,l,m);
+            const float expected = input(i,j+0,k+0,l+0,m) * kernel(0,0,0) +
+                                   input(i,j+1,k+0,l+0,m) * kernel(1,0,0) +
+                                   input(i,j+2,k+0,l+0,m) * kernel(2,0,0) +
+                                   input(i,j+0,k+1,l+0,m) * kernel(0,1,0) +
+                                   input(i,j+1,k+1,l+0,m) * kernel(1,1,0) +
+                                   input(i,j+2,k+1,l+0,m) * kernel(2,1,0) +
+                                   input(i,j+0,k+2,l+0,m) * kernel(0,2,0) +
+                                   input(i,j+1,k+2,l+0,m) * kernel(1,2,0) +
+                                   input(i,j+2,k+2,l+0,m) * kernel(2,2,0) +
+                                   input(i,j+0,k+3,l+0,m) * kernel(0,3,0) +
+                                   input(i,j+1,k+3,l+0,m) * kernel(1,3,0) +
+                                   input(i,j+2,k+3,l+0,m) * kernel(2,3,0) +
+                                   input(i,j+0,k+0,l+1,m) * kernel(0,0,1) +
+                                   input(i,j+1,k+0,l+1,m) * kernel(1,0,1) +
+                                   input(i,j+2,k+0,l+1,m) * kernel(2,0,1) +
+                                   input(i,j+0,k+1,l+1,m) * kernel(0,1,1) +
+                                   input(i,j+1,k+1,l+1,m) * kernel(1,1,1) +
+                                   input(i,j+2,k+1,l+1,m) * kernel(2,1,1) +
+                                   input(i,j+0,k+2,l+1,m) * kernel(0,2,1) +
+                                   input(i,j+1,k+2,l+1,m) * kernel(1,2,1) +
+                                   input(i,j+2,k+2,l+1,m) * kernel(2,2,1) +
+                                   input(i,j+0,k+3,l+1,m) * kernel(0,3,1) +
+                                   input(i,j+1,k+3,l+1,m) * kernel(1,3,1) +
+                                   input(i,j+2,k+3,l+1,m) * kernel(2,3,1);
+            VERIFY_IS_APPROX(result, expected);
+          }
+        }
+      }
+    }
+  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
+}
+
+
+template <typename Scalar>
+void test_cuda_lgamma(const Scalar stddev)
+{
+  Tensor<Scalar, 2> in(72,97);
+  in.setRandom();
+  in *= in.constant(stddev);
+  Tensor<Scalar, 2> out(72,97);
+  out.setZero();
+
+  std::size_t bytes = in.size() * sizeof(Scalar);
+
+  Scalar* d_in;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+  gpu_out.device(gpu_device) = gpu_in.lgamma();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 97; ++j) {
+      VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j)));
+    }
+  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_digamma()
+{
+  Tensor<Scalar, 1> in(7);
+  Tensor<Scalar, 1> out(7);
+  Tensor<Scalar, 1> expected_out(7);
+  out.setZero();
+
+  in(0) = Scalar(1);
+  in(1) = Scalar(1.5);
+  in(2) = Scalar(4);
+  in(3) = Scalar(-10.5);
+  in(4) = Scalar(10000.5);
+  in(5) = Scalar(0);
+  in(6) = Scalar(-1);
+
+  expected_out(0) = Scalar(-0.5772156649015329);
+  expected_out(1) = Scalar(0.03648997397857645);
+  expected_out(2) = Scalar(1.2561176684318);
+  expected_out(3) = Scalar(2.398239129535781);
+  expected_out(4) = Scalar(9.210340372392849);
+  expected_out(5) = std::numeric_limits<Scalar>::infinity();
+  expected_out(6) = std::numeric_limits<Scalar>::infinity();
+
+  std::size_t bytes = in.size() * sizeof(Scalar);
+
+  Scalar* d_in;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 7);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7);
+
+  gpu_out.device(gpu_device) = gpu_in.digamma();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 5; ++i) {
+    VERIFY_IS_APPROX(out(i), expected_out(i));
+  }
+  for (int i = 5; i < 7; ++i) {
+    VERIFY_IS_EQUAL(out(i), expected_out(i));
+  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_zeta()
+{
+  Tensor<Scalar, 1> in_x(6);
+  Tensor<Scalar, 1> in_q(6);
+  Tensor<Scalar, 1> out(6);
+  Tensor<Scalar, 1> expected_out(6);
+  out.setZero();
+
+  in_x(0) = Scalar(1);
+  in_x(1) = Scalar(1.5);
+  in_x(2) = Scalar(4);
+  in_x(3) = Scalar(-10.5);
+  in_x(4) = Scalar(10000.5);
+  in_x(5) = Scalar(3);
+  
+  in_q(0) = Scalar(1.2345);
+  in_q(1) = Scalar(2);
+  in_q(2) = Scalar(1.5);
+  in_q(3) = Scalar(3);
+  in_q(4) = Scalar(1.0001);
+  in_q(5) = Scalar(-2.5);
+
+  expected_out(0) = std::numeric_limits<Scalar>::infinity();
+  expected_out(1) = Scalar(1.61237534869);
+  expected_out(2) = Scalar(0.234848505667);
+  expected_out(3) = Scalar(1.03086757337e-5);
+  expected_out(4) = Scalar(0.367879440865);
+  expected_out(5) = Scalar(0.054102025820864097);
+
+  std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+  Scalar* d_in_x;
+  Scalar* d_in_q;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in_x), bytes);
+  cudaMalloc((void**)(&d_in_q), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in_q, in_q.data(), bytes, cudaMemcpyHostToDevice);
+  
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_q(d_in_q, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
+
+  gpu_out.device(gpu_device) = gpu_in_x.zeta(gpu_in_q);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  VERIFY_IS_EQUAL(out(0), expected_out(0));
+  VERIFY((std::isnan)(out(3)));
+
+  for (int i = 1; i < 6; ++i) {
+    if (i != 3) {
+      VERIFY_IS_APPROX(out(i), expected_out(i));
+    }
+  }
+
+  cudaFree(d_in_x);
+  cudaFree(d_in_q);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_polygamma()
+{
+  Tensor<Scalar, 1> in_x(7);
+  Tensor<Scalar, 1> in_n(7);
+  Tensor<Scalar, 1> out(7);
+  Tensor<Scalar, 1> expected_out(7);
+  out.setZero();
+
+  in_n(0) = Scalar(1);
+  in_n(1) = Scalar(1);
+  in_n(2) = Scalar(1);
+  in_n(3) = Scalar(17);
+  in_n(4) = Scalar(31);
+  in_n(5) = Scalar(28);
+  in_n(6) = Scalar(8);
+  
+  in_x(0) = Scalar(2);
+  in_x(1) = Scalar(3);
+  in_x(2) = Scalar(25.5);
+  in_x(3) = Scalar(4.7);
+  in_x(4) = Scalar(11.8);
+  in_x(5) = Scalar(17.7);
+  in_x(6) = Scalar(30.2);
+
+  expected_out(0) = Scalar(0.644934066848);
+  expected_out(1) = Scalar(0.394934066848);
+  expected_out(2) = Scalar(0.0399946696496);
+  expected_out(3) = Scalar(293.334565435);
+  expected_out(4) = Scalar(0.445487887616);
+  expected_out(5) = Scalar(-2.47810300902e-07);
+  expected_out(6) = Scalar(-8.29668781082e-09);
+
+  std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+  Scalar* d_in_x;
+  Scalar* d_in_n;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in_x), bytes);
+  cudaMalloc((void**)(&d_in_n), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in_n, in_n.data(), bytes, cudaMemcpyHostToDevice);
+  
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 7);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_n(d_in_n, 7);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7);
+
+  gpu_out.device(gpu_device) = gpu_in_n.polygamma(gpu_in_x);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 7; ++i) {
+    VERIFY_IS_APPROX(out(i), expected_out(i));
+  }
+
+  cudaFree(d_in_x);
+  cudaFree(d_in_n);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_igamma()
+{
+  Tensor<Scalar, 2> a(6, 6);
+  Tensor<Scalar, 2> x(6, 6);
+  Tensor<Scalar, 2> out(6, 6);
+  out.setZero();
+
+  Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+  Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      a(i, j) = a_s[i];
+      x(i, j) = x_s[j];
+    }
+  }
+
+  Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+  Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan},
+                          {0.0, 0.6321205588285578, 0.7768698398515702,
+                           0.9816843611112658, 9.999500016666262e-05, 1.0},
+                          {0.0, 0.4275932955291202, 0.608374823728911,
+                           0.9539882943107686, 7.522076445089201e-07, 1.0},
+                          {0.0, 0.01898815687615381, 0.06564245437845008,
+                           0.5665298796332909, 4.166333347221828e-18, 1.0},
+                          {0.0, 0.9999780593618628, 0.9999899967080838,
+                           0.9999996219837988, 0.9991370418689945, 1.0},
+                          {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}};
+
+
+
+  std::size_t bytes = a.size() * sizeof(Scalar);
+
+  Scalar* d_a;
+  Scalar* d_x;
+  Scalar* d_out;
+  assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess);
+  assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess);
+  assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
+
+  cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6);
+
+  gpu_out.device(gpu_device) = gpu_a.igamma(gpu_x);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      if ((std::isnan)(igamma_s[i][j])) {
+        VERIFY((std::isnan)(out(i, j)));
+      } else {
+        VERIFY_IS_APPROX(out(i, j), igamma_s[i][j]);
+      }
+    }
+  }
+
+  cudaFree(d_a);
+  cudaFree(d_x);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_igammac()
+{
+  Tensor<Scalar, 2> a(6, 6);
+  Tensor<Scalar, 2> x(6, 6);
+  Tensor<Scalar, 2> out(6, 6);
+  out.setZero();
+
+  Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+  Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      a(i, j) = a_s[i];
+      x(i, j) = x_s[j];
+    }
+  }
+
+  Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+  Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan},
+                           {1.0, 0.36787944117144233, 0.22313016014842982,
+                            0.018315638888734182, 0.9999000049998333, 0.0},
+                           {1.0, 0.5724067044708798, 0.3916251762710878,
+                            0.04601170568923136, 0.9999992477923555, 0.0},
+                           {1.0, 0.9810118431238462, 0.9343575456215499,
+                            0.4334701203667089, 1.0, 0.0},
+                           {1.0, 2.1940638138146658e-05, 1.0003291916285e-05,
+                            3.7801620118431334e-07, 0.0008629581310054535,
+                            0.0},
+                           {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}};
+
+  std::size_t bytes = a.size() * sizeof(Scalar);
+
+  Scalar* d_a;
+  Scalar* d_x;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_a), bytes);
+  cudaMalloc((void**)(&d_x), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6);
+
+  gpu_out.device(gpu_device) = gpu_a.igammac(gpu_x);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 6; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      if ((std::isnan)(igammac_s[i][j])) {
+        VERIFY((std::isnan)(out(i, j)));
+      } else {
+        VERIFY_IS_APPROX(out(i, j), igammac_s[i][j]);
+      }
+    }
+  }
+
+  cudaFree(d_a);
+  cudaFree(d_x);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_erf(const Scalar stddev)
+{
+  Tensor<Scalar, 2> in(72,97);
+  in.setRandom();
+  in *= in.constant(stddev);
+  Tensor<Scalar, 2> out(72,97);
+  out.setZero();
+
+  std::size_t bytes = in.size() * sizeof(Scalar);
+
+  Scalar* d_in;
+  Scalar* d_out;
+  assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess);
+  assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess);
+
+  cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+  gpu_out.device(gpu_device) = gpu_in.erf();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 97; ++j) {
+      VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j)));
+    }
+  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_erfc(const Scalar stddev)
+{
+  Tensor<Scalar, 2> in(72,97);
+  in.setRandom();
+  in *= in.constant(stddev);
+  Tensor<Scalar, 2> out(72,97);
+  out.setZero();
+
+  std::size_t bytes = in.size() * sizeof(Scalar);
+
+  Scalar* d_in;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+  gpu_out.device(gpu_device) = gpu_in.erfc();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 97; ++j) {
+      VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j)));
+    }
+  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_betainc()
+{
+  Tensor<Scalar, 1> in_x(125);
+  Tensor<Scalar, 1> in_a(125);
+  Tensor<Scalar, 1> in_b(125);
+  Tensor<Scalar, 1> out(125);
+  Tensor<Scalar, 1> expected_out(125);
+  out.setZero();
+
+  Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+
+  Array<Scalar, 1, Dynamic> x(125);
+  Array<Scalar, 1, Dynamic> a(125);
+  Array<Scalar, 1, Dynamic> b(125);
+  Array<Scalar, 1, Dynamic> v(125);
+
+  a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+      0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+      0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+      0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+      999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+      999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+      999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999;
+
+  b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+      0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+      999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0,
+      0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+      0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+      999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0,
+      0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+      0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+      0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+      31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999,
+      999.999, 999.999, 999.999;
+
+  x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8,
+      1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+      0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+      0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+      0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1,
+      -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8,
+      1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+      0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+      0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1;
+
+  v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+      nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+      nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan,
+      0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan,
+      0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan,
+      0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, nan,
+      nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256,
+      0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001,
+      0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403,
+      0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999,
+      0.9999999999999999, nan, nan, nan, nan, nan, nan, nan,
+      1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, nan,
+      nan, 7.864342668429763e-23, 3.015969667594166e-10, 0.0008598571564165444,
+      nan, nan, 6.031987710123844e-08, 0.5000000000000007, 0.9999999396801229,
+      nan, nan, 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan,
+      nan, nan, nan, nan, nan, nan, 0.0, 7.029920380986636e-306,
+      2.2450728208591345e-101, nan, nan, 0.0, 9.275871147869727e-302,
+      1.2232913026152827e-97, nan, nan, 0.0, 3.0891393081932924e-252,
+      2.9303043666183996e-60, nan, nan, 2.248913486879199e-196,
+      0.5000000000004947, 0.9999999999999999, nan;
+
+  for (int i = 0; i < 125; ++i) {
+    in_x(i) = x(i);
+    in_a(i) = a(i);
+    in_b(i) = b(i);
+    expected_out(i) = v(i);
+  }
+
+  std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+  Scalar* d_in_x;
+  Scalar* d_in_a;
+  Scalar* d_in_b;
+  Scalar* d_out;
+  cudaMalloc((void**)(&d_in_x), bytes);
+  cudaMalloc((void**)(&d_in_a), bytes);
+  cudaMalloc((void**)(&d_in_b), bytes);
+  cudaMalloc((void**)(&d_out), bytes);
+
+  cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in_a, in_a.data(), bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in_b, in_b.data(), bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 125);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_a(d_in_a, 125);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_b(d_in_b, 125);
+  Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 125);
+
+  gpu_out.device(gpu_device) = betainc(gpu_in_a, gpu_in_b, gpu_in_x);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  for (int i = 1; i < 125; ++i) {
+    if ((std::isnan)(expected_out(i))) {
+      VERIFY((std::isnan)(out(i)));
+    } else {
+      VERIFY_IS_APPROX(out(i), expected_out(i));
+    }
+  }
+
+  cudaFree(d_in_x);
+  cudaFree(d_in_a);
+  cudaFree(d_in_b);
+  cudaFree(d_out);
+}
+
+
+void test_cxx11_tensor_cuda()
+{
+  CALL_SUBTEST_1(test_cuda_nullary());
+  CALL_SUBTEST_1(test_cuda_elementwise_small());
+  CALL_SUBTEST_1(test_cuda_elementwise());
+  CALL_SUBTEST_1(test_cuda_props());
+  CALL_SUBTEST_1(test_cuda_reduction());
+  CALL_SUBTEST_2(test_cuda_contraction<ColMajor>());
+  CALL_SUBTEST_2(test_cuda_contraction<RowMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_1d<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_1d<RowMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_inner_dim_col_major_1d());
+  CALL_SUBTEST_3(test_cuda_convolution_inner_dim_row_major_1d());
+  CALL_SUBTEST_3(test_cuda_convolution_2d<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>());
+
+#if __cplusplus > 199711L
+  // std::erf, std::erfc, and so on where only added in c++11. We use them
+  // as a golden reference to validate the results produced by Eigen. Therefore
+  // we can only run these tests if we use a c++11 compiler.
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f));
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f));
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f));
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f));
+
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
+
+  CALL_SUBTEST_4(test_cuda_erf<float>(1.0f));
+  CALL_SUBTEST_4(test_cuda_erf<float>(100.0f));
+  CALL_SUBTEST_4(test_cuda_erf<float>(0.01f));
+  CALL_SUBTEST_4(test_cuda_erf<float>(0.001f));
+
+  CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f));
+  // CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
+  CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
+  CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f));
+  CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f));
+
+  CALL_SUBTEST_4(test_cuda_erf<double>(1.0));
+  CALL_SUBTEST_4(test_cuda_erf<double>(100.0));
+  CALL_SUBTEST_4(test_cuda_erf<double>(0.01));
+  CALL_SUBTEST_4(test_cuda_erf<double>(0.001));
+
+  CALL_SUBTEST_4(test_cuda_erfc<double>(1.0));
+  // CALL_SUBTEST(test_cuda_erfc<double>(100.0));
+  CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
+  CALL_SUBTEST_4(test_cuda_erfc<double>(0.01));
+  CALL_SUBTEST_4(test_cuda_erfc<double>(0.001));
+
+  CALL_SUBTEST_5(test_cuda_digamma<float>());
+  CALL_SUBTEST_5(test_cuda_digamma<double>());
+
+  CALL_SUBTEST_5(test_cuda_polygamma<float>());
+  CALL_SUBTEST_5(test_cuda_polygamma<double>());
+
+  CALL_SUBTEST_5(test_cuda_zeta<float>());
+  CALL_SUBTEST_5(test_cuda_zeta<double>());
+
+  CALL_SUBTEST_5(test_cuda_igamma<float>());
+  CALL_SUBTEST_5(test_cuda_igammac<float>());
+
+  CALL_SUBTEST_5(test_cuda_igamma<double>());
+  CALL_SUBTEST_5(test_cuda_igammac<double>());
+
+  CALL_SUBTEST_6(test_cuda_betainc<float>());
+  CALL_SUBTEST_6(test_cuda_betainc<double>());
+#endif
+}
diff --git a/unsupported/test/cxx11_tensor_custom_index.cpp b/unsupported/test/cxx11_tensor_custom_index.cpp
new file mode 100644
index 000000000..4528cc176
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_custom_index.cpp
@@ -0,0 +1,100 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <limits>
+#include <map>
+
+#include <Eigen/Dense>
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+
+template <int DataLayout>
+static void test_map_as_index()
+{
+#ifdef EIGEN_HAS_SFINAE
+  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+  tensor.setRandom();
+
+  using NormalIndex = DSizes<ptrdiff_t, 4>;
+  using CustomIndex = std::map<ptrdiff_t, ptrdiff_t>;
+  CustomIndex coeffC;
+  coeffC[0] = 1;
+  coeffC[1] = 2;
+  coeffC[2] = 4;
+  coeffC[3] = 1;
+  NormalIndex coeff(1,2,4,1);
+
+  VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
+  VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
+#endif
+}
+
+
+template <int DataLayout>
+static void test_matrix_as_index()
+{
+#ifdef EIGEN_HAS_SFINAE
+  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+  tensor.setRandom();
+
+  using NormalIndex = DSizes<ptrdiff_t, 4>;
+  using CustomIndex = Matrix<unsigned int, 4, 1>;
+  CustomIndex coeffC(1,2,4,1);
+  NormalIndex coeff(1,2,4,1);
+
+  VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
+  VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
+#endif
+}
+
+
+template <int DataLayout>
+static void test_varlist_as_index()
+{
+#ifdef EIGEN_HAS_SFINAE
+  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+  tensor.setRandom();
+
+  DSizes<ptrdiff_t, 4> coeff(1,2,4,1);
+
+  VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff));
+  VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff));
+#endif
+}
+
+
+template <int DataLayout>
+static void test_sizes_as_index()
+{
+#ifdef EIGEN_HAS_SFINAE
+  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+  tensor.setRandom();
+
+  DSizes<ptrdiff_t, 4> coeff(1,2,4,1);
+  Sizes<1,2,4,1> coeffC;
+
+  VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
+  VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
+#endif
+}
+
+
+void test_cxx11_tensor_custom_index() {
+  test_map_as_index<ColMajor>();
+  test_map_as_index<RowMajor>();
+  test_matrix_as_index<ColMajor>();
+  test_matrix_as_index<RowMajor>();
+  test_varlist_as_index<ColMajor>();
+  test_varlist_as_index<RowMajor>();
+  test_sizes_as_index<ColMajor>();
+  test_sizes_as_index<RowMajor>();
+}
diff --git a/unsupported/test/cxx11_tensor_custom_op.cpp b/unsupported/test/cxx11_tensor_custom_op.cpp
new file mode 100644
index 000000000..8baa477cc
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_custom_op.cpp
@@ -0,0 +1,111 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+
+struct InsertZeros {
+  DSizes<DenseIndex, 2> dimensions(const Tensor<float, 2>& input) const {
+    DSizes<DenseIndex, 2> result;
+    result[0] = input.dimension(0) * 2;
+    result[1] = input.dimension(1) * 2;
+    return result;
+  }
+
+  template <typename Output, typename Device>
+  void eval(const Tensor<float, 2>& input, Output& output, const Device& device) const
+  {
+    array<DenseIndex, 2> strides;
+    strides[0] = 2;
+    strides[1] = 2;
+    output.stride(strides).device(device) = input;
+
+    Eigen::DSizes<DenseIndex, 2> offsets(1,1);
+    Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
+    output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
+  }
+};
+
+static void test_custom_unary_op()
+{
+  Tensor<float, 2> tensor(3,5);
+  tensor.setRandom();
+
+  Tensor<float, 2> result = tensor.customOp(InsertZeros());
+  VERIFY_IS_EQUAL(result.dimension(0), 6);
+  VERIFY_IS_EQUAL(result.dimension(1), 10);
+
+  for (int i = 0; i < 6; i+=2) {
+    for (int j = 0; j < 10; j+=2) {
+      VERIFY_IS_EQUAL(result(i, j), tensor(i/2, j/2));
+    }
+  }
+  for (int i = 1; i < 6; i+=2) {
+    for (int j = 1; j < 10; j+=2) {
+      VERIFY_IS_EQUAL(result(i, j), 0);
+    }
+  }
+}
+
+
+struct BatchMatMul {
+  DSizes<DenseIndex, 3> dimensions(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2) const {
+    DSizes<DenseIndex, 3> result;
+    result[0] = input1.dimension(0);
+    result[1] = input2.dimension(1);
+    result[2] = input2.dimension(2);
+    return result;
+  }
+
+  template <typename Output, typename Device>
+  void eval(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2,
+            Output& output, const Device& device) const
+  {
+    typedef Tensor<float, 3>::DimensionPair DimPair;
+    array<DimPair, 1> dims;
+    dims[0] = DimPair(1, 0);
+    for (int i = 0; i < output.dimension(2); ++i) {
+      output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims);
+    }
+  }
+};
+
+
+static void test_custom_binary_op()
+{
+  Tensor<float, 3> tensor1(2,3,5);
+  tensor1.setRandom();
+  Tensor<float, 3> tensor2(3,7,5);
+  tensor2.setRandom();
+
+  Tensor<float, 3> result = tensor1.customOp(tensor2, BatchMatMul());
+  for (int i = 0; i < 5; ++i) {
+    typedef Tensor<float, 3>::DimensionPair DimPair;
+    array<DimPair, 1> dims;
+    dims[0] = DimPair(1, 0);
+    Tensor<float, 2> reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims);
+    TensorRef<Tensor<float, 2> > val = result.chip<2>(i);
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(val(j, k), reference(j, k));
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_custom_op()
+{
+  CALL_SUBTEST(test_custom_unary_op());
+  CALL_SUBTEST(test_custom_binary_op());
+}
diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
new file mode 100644
index 000000000..fde20ddf2
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -0,0 +1,390 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_device
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+// Context for evaluation on cpu
+struct CPUContext {
+  CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out), kernel_1d_(2), kernel_2d_(2,2), kernel_3d_(2,2,2) {
+    kernel_1d_(0) = 3.14f;
+    kernel_1d_(1) = 2.7f;
+
+    kernel_2d_(0,0) = 3.14f;
+    kernel_2d_(1,0) = 2.7f;
+    kernel_2d_(0,1) = 0.2f;
+    kernel_2d_(1,1) = 7.0f;
+
+    kernel_3d_(0,0,0) = 3.14f;
+    kernel_3d_(0,1,0) = 2.7f;
+    kernel_3d_(0,0,1) = 0.2f;
+    kernel_3d_(0,1,1) = 7.0f;
+    kernel_3d_(1,0,0) = -1.0f;
+    kernel_3d_(1,1,0) = -0.3f;
+    kernel_3d_(1,0,1) = -0.7f;
+    kernel_3d_(1,1,1) = -0.5f;
+  }
+
+  const Eigen::DefaultDevice& device() const { return cpu_device_; }
+
+  const Eigen::Tensor<float, 3>& in1() const { return in1_; }
+  const Eigen::Tensor<float, 3>& in2() const { return in2_; }
+  Eigen::Tensor<float, 3>& out() { return out_; }
+  const Eigen::Tensor<float, 1>& kernel1d() const { return kernel_1d_; }
+  const Eigen::Tensor<float, 2>& kernel2d() const { return kernel_2d_; }
+  const Eigen::Tensor<float, 3>& kernel3d() const { return kernel_3d_; }
+
+ private:
+  const Eigen::Tensor<float, 3>& in1_;
+  const Eigen::Tensor<float, 3>& in2_;
+  Eigen::Tensor<float, 3>& out_;
+
+  Eigen::Tensor<float, 1> kernel_1d_;
+  Eigen::Tensor<float, 2> kernel_2d_;
+  Eigen::Tensor<float, 3> kernel_3d_;
+
+  Eigen::DefaultDevice cpu_device_;
+};
+
+
+// Context for evaluation on GPU
+struct GPUContext {
+  GPUContext(const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1, Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2, Eigen::TensorMap<Eigen::Tensor<float, 3> >& out) : in1_(in1), in2_(in2), out_(out), gpu_device_(&stream_) {
+    assert(cudaMalloc((void**)(&kernel_1d_), 2*sizeof(float)) == cudaSuccess);
+    float kernel_1d_val[] = {3.14f, 2.7f};
+    assert(cudaMemcpy(kernel_1d_, kernel_1d_val, 2*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
+
+    assert(cudaMalloc((void**)(&kernel_2d_), 4*sizeof(float)) == cudaSuccess);
+    float kernel_2d_val[] = {3.14f, 2.7f, 0.2f, 7.0f};
+    assert(cudaMemcpy(kernel_2d_, kernel_2d_val, 4*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
+
+    assert(cudaMalloc((void**)(&kernel_3d_), 8*sizeof(float)) == cudaSuccess);
+    float kernel_3d_val[] = {3.14f, -1.0f, 2.7f, -0.3f, 0.2f, -0.7f, 7.0f, -0.5f};
+    assert(cudaMemcpy(kernel_3d_, kernel_3d_val, 8*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
+  }
+  ~GPUContext() {
+    assert(cudaFree(kernel_1d_) == cudaSuccess);
+    assert(cudaFree(kernel_2d_) == cudaSuccess);
+    assert(cudaFree(kernel_3d_) == cudaSuccess);
+  }
+
+  const Eigen::GpuDevice& device() const { return gpu_device_; }
+
+  const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1() const { return in1_; }
+  const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2() const { return in2_; }
+  Eigen::TensorMap<Eigen::Tensor<float, 3> >& out() { return out_; }
+  Eigen::TensorMap<Eigen::Tensor<float, 1> > kernel1d() const { return Eigen::TensorMap<Eigen::Tensor<float, 1> >(kernel_1d_, 2); }
+  Eigen::TensorMap<Eigen::Tensor<float, 2> > kernel2d() const { return Eigen::TensorMap<Eigen::Tensor<float, 2> >(kernel_2d_, 2, 2); }
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > kernel3d() const { return Eigen::TensorMap<Eigen::Tensor<float, 3> >(kernel_3d_, 2, 2, 2); }
+
+ private:
+  const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1_;
+  const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2_;
+  Eigen::TensorMap<Eigen::Tensor<float, 3> >& out_;
+
+  float* kernel_1d_;
+  float* kernel_2d_;
+  float* kernel_3d_;
+
+  Eigen::CudaStreamDevice stream_;
+  Eigen::GpuDevice gpu_device_;
+};
+
+
+// The actual expression to evaluate
+template <typename Context>
+void test_contextual_eval(Context* context)
+{
+  context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f);
+}
+
+template <typename Context>
+void test_forced_contextual_eval(Context* context)
+{
+  context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f);
+}
+
+template <typename Context>
+void test_compound_assignment(Context* context)
+{
+  context->out().device(context->device()) = context->in1().constant(2.718f);
+  context->out().device(context->device()) += context->in1() + context->in2() * 3.14f;
+}
+
+
+template <typename Context>
+void test_contraction(Context* context)
+{
+  Eigen::array<std::pair<int, int>, 2> dims;
+  dims[0] = std::make_pair(1, 1);
+  dims[1] = std::make_pair(2, 2);
+
+  Eigen::array<int, 2> shape(40, 50*70);
+
+  Eigen::DSizes<int, 2> indices(0,0);
+  Eigen::DSizes<int, 2> sizes(40,40);
+
+  context->out().reshape(shape).slice(indices, sizes).device(context->device()) = context->in1().contract(context->in2(), dims);
+}
+
+
+template <typename Context>
+void test_1d_convolution(Context* context)
+{
+  Eigen::DSizes<int, 3> indices(0,0,0);
+  Eigen::DSizes<int, 3> sizes(40,49,70);
+
+  Eigen::array<int, 1> dims(1);
+  context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel1d(), dims);
+}
+
+template <typename Context>
+void test_2d_convolution(Context* context)
+{
+  Eigen::DSizes<int, 3> indices(0,0,0);
+  Eigen::DSizes<int, 3> sizes(40,49,69);
+
+  Eigen::array<int, 2> dims(1,2);
+  context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel2d(), dims);
+}
+
+template <typename Context>
+void test_3d_convolution(Context* context)
+{
+  Eigen::DSizes<int, 3> indices(0,0,0);
+  Eigen::DSizes<int, 3> sizes(39,49,69);
+
+  Eigen::array<int, 3> dims(0,1,2);
+  context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
+}
+
+
+void test_cpu() {
+  Eigen::Tensor<float, 3> in1(40,50,70);
+  Eigen::Tensor<float, 3> in2(40,50,70);
+  Eigen::Tensor<float, 3> out(40,50,70);
+
+  in1 = in1.random() + in1.constant(10.0f);
+  in2 = in2.random() + in2.constant(10.0f);
+
+  CPUContext context(in1, in2, out);
+  test_contextual_eval(&context);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 50; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
+      }
+    }
+  }
+
+  test_forced_contextual_eval(&context);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 50; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f);
+      }
+    }
+  }
+
+  test_compound_assignment(&context);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 50; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
+      }
+    }
+  }
+
+  test_contraction(&context);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 40; ++j) {
+      const float result = out(i,j,0);
+      float expected = 0;
+      for (int k = 0; k < 50; ++k) {
+        for (int l = 0; l < 70; ++l) {
+          expected += in1(i, k, l) * in2(j, k, l);
+        }
+      }
+      VERIFY_IS_APPROX(expected, result);
+    }
+  }
+
+  test_1d_convolution(&context);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 49; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f));
+      }
+    }
+  }
+
+  test_2d_convolution(&context);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 49; ++j) {
+      for (int k = 0; k < 69; ++k) {
+        const float result = out(i,j,k);
+        const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) +
+                               (in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f);
+        if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
+          continue;
+        }
+        VERIFY_IS_APPROX(expected, result);
+      }
+    }
+  }
+
+  test_3d_convolution(&context);
+  for (int i = 0; i < 39; ++i) {
+    for (int j = 0; j < 49; ++j) {
+      for (int k = 0; k < 69; ++k) {
+        const float result = out(i,j,k);
+        const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f +
+                                in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) +
+                               (in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f +
+                                in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f);
+        if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) {
+          continue;
+        }
+        VERIFY_IS_APPROX(expected, result);
+      }
+    }
+  }
+}
+
+void test_gpu() {
+  Eigen::Tensor<float, 3> in1(40,50,70);
+  Eigen::Tensor<float, 3> in2(40,50,70);
+  Eigen::Tensor<float, 3> out(40,50,70);
+  in1 = in1.random() + in1.constant(10.0f);
+  in2 = in2.random() + in2.constant(10.0f);
+
+  std::size_t in1_bytes = in1.size() * sizeof(float);
+  std::size_t in2_bytes = in2.size() * sizeof(float);
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_in1;
+  float* d_in2;
+  float* d_out;
+  cudaMalloc((void**)(&d_in1), in1_bytes);
+  cudaMalloc((void**)(&d_in2), in2_bytes);
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
+  cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, 40,50,70);
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, 40,50,70);
+  Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, 40,50,70);
+
+  GPUContext context(gpu_in1, gpu_in2, gpu_out);
+  test_contextual_eval(&context);
+  assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 50; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
+      }
+    }
+  }
+
+  test_forced_contextual_eval(&context);
+  assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 50; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f);
+      }
+    }
+  }
+
+  test_compound_assignment(&context);
+  assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 50; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f);
+      }
+    }
+  }
+
+  test_contraction(&context);
+  assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 40; ++j) {
+      const float result = out(i,j,0);
+      float expected = 0;
+      for (int k = 0; k < 50; ++k) {
+        for (int l = 0; l < 70; ++l) {
+          expected += in1(i, k, l) * in2(j, k, l);
+        }
+      }
+      VERIFY_IS_APPROX(expected, result);
+    }
+  }
+
+  test_1d_convolution(&context);
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 49; ++j) {
+      for (int k = 0; k < 70; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f));
+      }
+    }
+  }
+
+  test_2d_convolution(&context);
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess);
+  for (int i = 0; i < 40; ++i) {
+    for (int j = 0; j < 49; ++j) {
+      for (int k = 0; k < 69; ++k) {
+        const float result = out(i,j,k);
+        const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f +
+                                in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f);
+        VERIFY_IS_APPROX(expected, result);
+      }
+    }
+  }
+
+  test_3d_convolution(&context);
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess);
+  for (int i = 0; i < 39; ++i) {
+    for (int j = 0; j < 49; ++j) {
+      for (int k = 0; k < 69; ++k) {
+       const float result = out(i,j,k);
+        const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f +
+                                in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f +
+                                in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f +
+                                in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f);
+        VERIFY_IS_APPROX(expected, result);
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_device()
+{
+  CALL_SUBTEST_1(test_cpu());
+  CALL_SUBTEST_2(test_gpu());
+}
diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp
new file mode 100644
index 000000000..7f79753c5
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_device_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+void test_device_sycl(const Eigen::SyclDevice &sycl_device) {
+  std::cout <<"Helo from ComputeCpp: the requested device exists and the device name is : "
+    << sycl_device.m_queue.get_device(). template get_info<cl::sycl::info::device::name>() <<std::endl;;
+}
+void test_cxx11_tensor_device_sycl() {
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_device_sycl(sycl_device));
+}
diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp
new file mode 100644
index 000000000..16f168ed4
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_dimension.cpp
@@ -0,0 +1,69 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+
+static void test_dynamic_size()
+{
+  Eigen::DSizes<int, 3> dimensions(2,3,7);
+
+  VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
+  VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
+  VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
+  VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
+  VERIFY_IS_EQUAL((int)dimensions[0], 2);
+  VERIFY_IS_EQUAL((int)dimensions[1], 3);
+  VERIFY_IS_EQUAL((int)dimensions[2], 7);
+}
+
+static void test_fixed_size()
+{
+  Eigen::Sizes<2,3,7> dimensions;
+
+  VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
+  VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
+  VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
+  VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
+}
+
+static void test_match()
+{
+  Eigen::DSizes<unsigned int, 3> dyn((unsigned int)2,(unsigned int)3,(unsigned int)7);
+  Eigen::Sizes<2,3,7> stat;
+  VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn, stat), true);
+
+  Eigen::DSizes<int, 3> dyn1(2,3,7);
+  Eigen::DSizes<int, 2> dyn2(2,3);
+  VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false);
+}
+
+static void test_rank_zero()
+{
+  Eigen::Sizes<> scalar;
+  VERIFY_IS_EQUAL((int)scalar.TotalSize(), 1);
+  VERIFY_IS_EQUAL((int)scalar.rank(), 0);
+  VERIFY_IS_EQUAL((int)internal::array_prod(scalar), 1);
+
+  Eigen::DSizes<ptrdiff_t, 0> dscalar;
+  VERIFY_IS_EQUAL((int)dscalar.TotalSize(), 1);
+  VERIFY_IS_EQUAL((int)dscalar.rank(), 0);
+}
+
+void test_cxx11_tensor_dimension()
+{
+  CALL_SUBTEST(test_dynamic_size());
+  CALL_SUBTEST(test_fixed_size());
+  CALL_SUBTEST(test_match());
+  CALL_SUBTEST(test_rank_zero());
+}
diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp
new file mode 100644
index 000000000..d7eea42d7
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_empty.cpp
@@ -0,0 +1,40 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+
+static void test_empty_tensor()
+{
+  Tensor<float, 2> source;
+  Tensor<float, 2> tgt1 = source;
+  Tensor<float, 2> tgt2(source);
+  Tensor<float, 2> tgt3;
+  tgt3 = tgt1;
+  tgt3 = tgt2;
+}
+
+static void test_empty_fixed_size_tensor()
+{
+  TensorFixedSize<float, Sizes<0> > source;
+  TensorFixedSize<float, Sizes<0> > tgt1 = source;
+  TensorFixedSize<float, Sizes<0> > tgt2(source);
+  TensorFixedSize<float, Sizes<0> > tgt3;
+  tgt3 = tgt1;
+  tgt3 = tgt2;
+}
+
+
+void test_cxx11_tensor_empty()
+{
+   CALL_SUBTEST(test_empty_tensor());
+   CALL_SUBTEST(test_empty_fixed_size_tensor());
+}
diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp
new file mode 100644
index 000000000..77e24cb67
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_expr.cpp
@@ -0,0 +1,314 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_1d()
+{
+  Tensor<float, 1> vec1(6);
+  Tensor<float, 1, RowMajor> vec2(6);
+
+  vec1(0) = 4.0;  vec2(0) = 0.0;
+  vec1(1) = 8.0;  vec2(1) = 1.0;
+  vec1(2) = 15.0; vec2(2) = 2.0;
+  vec1(3) = 16.0; vec2(3) = 3.0;
+  vec1(4) = 23.0; vec2(4) = 4.0;
+  vec1(5) = 42.0; vec2(5) = 5.0;
+
+  float data3[6];
+  TensorMap<Tensor<float, 1>> vec3(data3, 6);
+  vec3 = vec1.sqrt();
+  float data4[6];
+  TensorMap<Tensor<float, 1, RowMajor>> vec4(data4, 6);
+  vec4 = vec2.square();
+  float data5[6];
+  TensorMap<Tensor<float, 1, RowMajor>> vec5(data5, 6);
+  vec5 = vec2.cube();
+
+  VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
+  VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
+  VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
+  VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
+  VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
+  VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
+
+  VERIFY_IS_APPROX(vec4(0), 0.0f);
+  VERIFY_IS_APPROX(vec4(1), 1.0f);
+  VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f);
+  VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f);
+  VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f);
+  VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f);
+
+  VERIFY_IS_APPROX(vec5(0), 0.0f);
+  VERIFY_IS_APPROX(vec5(1), 1.0f);
+  VERIFY_IS_APPROX(vec5(2), 2.0f * 2.0f * 2.0f);
+  VERIFY_IS_APPROX(vec5(3), 3.0f * 3.0f * 3.0f);
+  VERIFY_IS_APPROX(vec5(4), 4.0f * 4.0f * 4.0f);
+  VERIFY_IS_APPROX(vec5(5), 5.0f * 5.0f * 5.0f);
+
+  vec3 = vec1 + vec2;
+  VERIFY_IS_APPROX(vec3(0), 4.0f + 0.0f);
+  VERIFY_IS_APPROX(vec3(1), 8.0f + 1.0f);
+  VERIFY_IS_APPROX(vec3(2), 15.0f + 2.0f);
+  VERIFY_IS_APPROX(vec3(3), 16.0f + 3.0f);
+  VERIFY_IS_APPROX(vec3(4), 23.0f + 4.0f);
+  VERIFY_IS_APPROX(vec3(5), 42.0f + 5.0f);
+}
+
+static void test_2d()
+{
+  float data1[6];
+  TensorMap<Tensor<float, 2>> mat1(data1, 2, 3);
+  float data2[6];
+  TensorMap<Tensor<float, 2, RowMajor>> mat2(data2, 2, 3);
+
+  mat1(0,0) = 0.0;
+  mat1(0,1) = 1.0;
+  mat1(0,2) = 2.0;
+  mat1(1,0) = 3.0;
+  mat1(1,1) = 4.0;
+  mat1(1,2) = 5.0;
+
+  mat2(0,0) = -0.0;
+  mat2(0,1) = -1.0;
+  mat2(0,2) = -2.0;
+  mat2(1,0) = -3.0;
+  mat2(1,1) = -4.0;
+  mat2(1,2) = -5.0;
+
+  Tensor<float, 2> mat3(2,3);
+  Tensor<float, 2, RowMajor> mat4(2,3);
+  mat3 = mat1.abs();
+  mat4 = mat2.abs();
+
+  VERIFY_IS_APPROX(mat3(0,0), 0.0f);
+  VERIFY_IS_APPROX(mat3(0,1), 1.0f);
+  VERIFY_IS_APPROX(mat3(0,2), 2.0f);
+  VERIFY_IS_APPROX(mat3(1,0), 3.0f);
+  VERIFY_IS_APPROX(mat3(1,1), 4.0f);
+  VERIFY_IS_APPROX(mat3(1,2), 5.0f);
+
+  VERIFY_IS_APPROX(mat4(0,0), 0.0f);
+  VERIFY_IS_APPROX(mat4(0,1), 1.0f);
+  VERIFY_IS_APPROX(mat4(0,2), 2.0f);
+  VERIFY_IS_APPROX(mat4(1,0), 3.0f);
+  VERIFY_IS_APPROX(mat4(1,1), 4.0f);
+  VERIFY_IS_APPROX(mat4(1,2), 5.0f);
+}
+
+static void test_3d()
+{
+  Tensor<float, 3> mat1(2,3,7);
+  Tensor<float, 3, RowMajor> mat2(2,3,7);
+
+  float val = 1.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        mat2(i,j,k) = val;
+        val += 1.0f;
+      }
+    }
+  }
+
+  Tensor<float, 3> mat3(2,3,7);
+  mat3 = mat1 + mat1;
+  Tensor<float, 3, RowMajor> mat4(2,3,7);
+  mat4 = mat2 * 3.14f;
+  Tensor<float, 3> mat5(2,3,7);
+  mat5 = mat1.inverse().log();
+  Tensor<float, 3, RowMajor> mat6(2,3,7);
+  mat6 = mat2.pow(0.5f) * 3.14f;
+  Tensor<float, 3> mat7(2,3,7);
+  mat7 = mat1.cwiseMax(mat5 * 2.0f).exp();
+  Tensor<float, 3, RowMajor> mat8(2,3,7);
+  mat8 = (-mat2).exp() * 3.14f;
+  Tensor<float, 3, RowMajor> mat9(2,3,7);
+  mat9 = mat2 + 3.14f;
+  Tensor<float, 3, RowMajor> mat10(2,3,7);
+  mat10 = mat2 - 3.14f;
+  Tensor<float, 3, RowMajor> mat11(2,3,7);
+  mat11 = mat2 / 3.14f;
+
+  val = 1.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat3(i,j,k), val + val);
+        VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
+        VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val));
+        VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
+        VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
+        VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
+        VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f);
+        VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f);
+        VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f);
+        val += 1.0f;
+      }
+    }
+  }
+}
+
+static void test_constants()
+{
+  Tensor<float, 3> mat1(2,3,7);
+  Tensor<float, 3> mat2(2,3,7);
+  Tensor<float, 3> mat3(2,3,7);
+
+  float val = 1.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        val += 1.0f;
+      }
+    }
+  }
+  mat2 = mat1.constant(3.14f);
+  mat3 = mat1.cwiseMax(7.3f).exp();
+
+  val = 1.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat2(i,j,k), 3.14f);
+        VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f)));
+        val += 1.0f;
+      }
+    }
+  }
+}
+
+static void test_boolean()
+{
+  Tensor<int, 1> vec(6);
+  std::copy_n(std::begin({0, 1, 2, 3, 4, 5}), 6, vec.data());
+
+  // Test ||.
+  Tensor<bool, 1> bool1 = vec < vec.constant(1) || vec > vec.constant(4);
+  VERIFY_IS_EQUAL(bool1[0], true);
+  VERIFY_IS_EQUAL(bool1[1], false);
+  VERIFY_IS_EQUAL(bool1[2], false);
+  VERIFY_IS_EQUAL(bool1[3], false);
+  VERIFY_IS_EQUAL(bool1[4], false);
+  VERIFY_IS_EQUAL(bool1[5], true);
+
+  // Test &&, including cast of operand vec.
+  Tensor<bool, 1> bool2 = vec.cast<bool>() && vec < vec.constant(4);
+  VERIFY_IS_EQUAL(bool2[0], false);
+  VERIFY_IS_EQUAL(bool2[1], true);
+  VERIFY_IS_EQUAL(bool2[2], true);
+  VERIFY_IS_EQUAL(bool2[3], true);
+  VERIFY_IS_EQUAL(bool2[4], false);
+  VERIFY_IS_EQUAL(bool2[5], false);
+
+  // Compilation tests:
+  // Test Tensor<bool> against results of cast or comparison; verifies that
+  // CoeffReturnType is set to match Op return type of bool for Unary and Binary
+  // Ops.
+  Tensor<bool, 1> bool3 = vec.cast<bool>() && bool2;
+  bool3 = vec < vec.constant(4) && bool2;
+}
+
+static void test_functors()
+{
+  Tensor<float, 3> mat1(2,3,7);
+  Tensor<float, 3> mat2(2,3,7);
+  Tensor<float, 3> mat3(2,3,7);
+
+  float val = 1.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        val += 1.0f;
+      }
+    }
+  }
+  mat2 = mat1.inverse().unaryExpr(&asinf);
+  mat3 = mat1.unaryExpr(&tanhf);
+
+  val = 1.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k)));
+        VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k)));
+        val += 1.0f;
+      }
+    }
+  }
+}
+
+static void test_type_casting()
+{
+  Tensor<bool, 3> mat1(2,3,7);
+  Tensor<float, 3> mat2(2,3,7);
+  Tensor<double, 3> mat3(2,3,7);
+  mat1.setRandom();
+  mat2.setRandom();
+
+  mat3 = mat1.cast<double>();
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat3(i,j,k), mat1(i,j,k) ? 1.0 : 0.0);
+      }
+    }
+  }
+
+  mat3 = mat2.cast<double>();
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat3(i,j,k), static_cast<double>(mat2(i,j,k)));
+      }
+    }
+  }
+}
+
+static void test_select()
+{
+  Tensor<float, 3> selector(2,3,7);
+  Tensor<float, 3> mat1(2,3,7);
+  Tensor<float, 3> mat2(2,3,7);
+  Tensor<float, 3> result(2,3,7);
+
+  selector.setRandom();
+  mat1.setRandom();
+  mat2.setRandom();
+  result = (selector > selector.constant(0.5f)).select(mat1, mat2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(result(i,j,k), (selector(i,j,k) > 0.5f) ? mat1(i,j,k) : mat2(i,j,k));
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_expr()
+{
+  CALL_SUBTEST(test_1d());
+  CALL_SUBTEST(test_2d());
+  CALL_SUBTEST(test_3d());
+  CALL_SUBTEST(test_constants());
+  CALL_SUBTEST(test_boolean());
+  CALL_SUBTEST(test_functors());
+  CALL_SUBTEST(test_type_casting());
+  CALL_SUBTEST(test_select());
+}
diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp
new file mode 100644
index 000000000..2f14ebc62
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_fft.cpp
@@ -0,0 +1,273 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Jianwei Cui <thucjw@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template <int DataLayout>
+static void test_fft_2D_golden() {
+  Tensor<float, 2, DataLayout> input(2, 3);
+  input(0, 0) = 1;
+  input(0, 1) = 2;
+  input(0, 2) = 3;
+  input(1, 0) = 4;
+  input(1, 1) = 5;
+  input(1, 2) = 6;
+
+  array<ptrdiff_t, 2> fft;
+  fft[0] = 0;
+  fft[1] = 1;
+
+  Tensor<std::complex<float>, 2, DataLayout> output = input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
+
+  std::complex<float> output_golden[6]; // in ColMajor order
+  output_golden[0] = std::complex<float>(21, 0);
+  output_golden[1] = std::complex<float>(-9, 0);
+  output_golden[2] = std::complex<float>(-3, 1.73205);
+  output_golden[3] = std::complex<float>( 0, 0);
+  output_golden[4] = std::complex<float>(-3, -1.73205);
+  output_golden[5] = std::complex<float>(0 ,0);
+
+  std::complex<float> c_offset = std::complex<float>(1.0, 1.0);
+
+  if (DataLayout == ColMajor) {
+    VERIFY_IS_APPROX(output(0) + c_offset, output_golden[0] + c_offset);
+    VERIFY_IS_APPROX(output(1) + c_offset, output_golden[1] + c_offset);
+    VERIFY_IS_APPROX(output(2) + c_offset, output_golden[2] + c_offset);
+    VERIFY_IS_APPROX(output(3) + c_offset, output_golden[3] + c_offset);
+    VERIFY_IS_APPROX(output(4) + c_offset, output_golden[4] + c_offset);
+    VERIFY_IS_APPROX(output(5) + c_offset, output_golden[5] + c_offset);
+  }
+  else {
+    VERIFY_IS_APPROX(output(0)+ c_offset, output_golden[0]+ c_offset);
+    VERIFY_IS_APPROX(output(1)+ c_offset, output_golden[2]+ c_offset);
+    VERIFY_IS_APPROX(output(2)+ c_offset, output_golden[4]+ c_offset);
+    VERIFY_IS_APPROX(output(3)+ c_offset, output_golden[1]+ c_offset);
+    VERIFY_IS_APPROX(output(4)+ c_offset, output_golden[3]+ c_offset);
+    VERIFY_IS_APPROX(output(5)+ c_offset, output_golden[5]+ c_offset);
+  }
+}
+
+static void test_fft_complex_input_golden() {
+  Tensor<std::complex<float>, 1, ColMajor> input(5);
+  input(0) = std::complex<float>(1, 1);
+  input(1) = std::complex<float>(2, 2);
+  input(2) = std::complex<float>(3, 3);
+  input(3) = std::complex<float>(4, 4);
+  input(4) = std::complex<float>(5, 5);
+
+  array<ptrdiff_t, 1> fft;
+  fft[0] = 0;
+
+  Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
+  Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
+
+  Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
+  Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
+
+  Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
+  Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
+
+  VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0));
+  VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0));
+
+  VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0));
+  VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0));
+
+  VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0));
+  VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0));
+
+  std::complex<float> forward_golden_result[5];
+  std::complex<float> reverse_golden_result[5];
+
+  forward_golden_result[0] = std::complex<float>(15.000000000000000,+15.000000000000000);
+  forward_golden_result[1] = std::complex<float>(-5.940954801177935, +0.940954801177934);
+  forward_golden_result[2] = std::complex<float>(-3.312299240582266, -1.687700759417735);
+  forward_golden_result[3] = std::complex<float>(-1.687700759417735, -3.312299240582266);
+  forward_golden_result[4] = std::complex<float>( 0.940954801177934, -5.940954801177935);
+
+  reverse_golden_result[0] = std::complex<float>( 3.000000000000000, + 3.000000000000000);
+  reverse_golden_result[1] = std::complex<float>( 0.188190960235587, - 1.188190960235587);
+  reverse_golden_result[2] = std::complex<float>(-0.337540151883547, - 0.662459848116453);
+  reverse_golden_result[3] = std::complex<float>(-0.662459848116453, - 0.337540151883547);
+  reverse_golden_result[4] = std::complex<float>(-1.188190960235587, + 0.188190960235587);
+
+  for(int i = 0; i < 5; ++i) {
+    VERIFY_IS_APPROX(forward_output_both_parts(i), forward_golden_result[i]);
+    VERIFY_IS_APPROX(forward_output_real_part(i), forward_golden_result[i].real());
+    VERIFY_IS_APPROX(forward_output_imag_part(i), forward_golden_result[i].imag());
+  }
+
+  for(int i = 0; i < 5; ++i) {
+    VERIFY_IS_APPROX(reverse_output_both_parts(i), reverse_golden_result[i]);
+    VERIFY_IS_APPROX(reverse_output_real_part(i), reverse_golden_result[i].real());
+    VERIFY_IS_APPROX(reverse_output_imag_part(i), reverse_golden_result[i].imag());
+  }
+}
+
+static void test_fft_real_input_golden() {
+  Tensor<float, 1, ColMajor> input(5);
+  input(0) = 1.0;
+  input(1) = 2.0;
+  input(2) = 3.0;
+  input(3) = 4.0;
+  input(4) = 5.0;
+
+  array<ptrdiff_t, 1> fft;
+  fft[0] = 0;
+
+  Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft);
+  Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft);
+
+  Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft);
+  Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft);
+
+  Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft);
+  Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft);
+
+  VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0));
+  VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0));
+
+  VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0));
+  VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0));
+
+  VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0));
+  VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0));
+
+  std::complex<float> forward_golden_result[5];
+  std::complex<float> reverse_golden_result[5];
+
+
+  forward_golden_result[0] = std::complex<float>(  15, 0);
+  forward_golden_result[1] = std::complex<float>(-2.5, +3.44095480117793);
+  forward_golden_result[2] = std::complex<float>(-2.5, +0.81229924058227);
+  forward_golden_result[3] = std::complex<float>(-2.5, -0.81229924058227);
+  forward_golden_result[4] = std::complex<float>(-2.5, -3.44095480117793);
+
+  reverse_golden_result[0] = std::complex<float>( 3.0, 0);
+  reverse_golden_result[1] = std::complex<float>(-0.5, -0.688190960235587);
+  reverse_golden_result[2] = std::complex<float>(-0.5, -0.162459848116453);
+  reverse_golden_result[3] = std::complex<float>(-0.5, +0.162459848116453);
+  reverse_golden_result[4] = std::complex<float>(-0.5, +0.688190960235587);
+
+  std::complex<float> c_offset(1.0, 1.0);
+  float r_offset = 1.0;
+
+  for(int i = 0; i < 5; ++i) {
+    VERIFY_IS_APPROX(forward_output_both_parts(i) + c_offset, forward_golden_result[i] + c_offset);
+    VERIFY_IS_APPROX(forward_output_real_part(i)  + r_offset, forward_golden_result[i].real() + r_offset);
+    VERIFY_IS_APPROX(forward_output_imag_part(i)  + r_offset, forward_golden_result[i].imag() + r_offset);
+  }
+
+  for(int i = 0; i < 5; ++i) {
+    VERIFY_IS_APPROX(reverse_output_both_parts(i) + c_offset, reverse_golden_result[i] + c_offset);
+    VERIFY_IS_APPROX(reverse_output_real_part(i)  + r_offset, reverse_golden_result[i].real() + r_offset);
+    VERIFY_IS_APPROX(reverse_output_imag_part(i)  + r_offset, reverse_golden_result[i].imag() + r_offset);
+  }
+}
+
+
+template <int DataLayout, typename RealScalar, bool isComplexInput, int FFTResultType, int FFTDirection, int TensorRank>
+static void test_fft_real_input_energy() {
+
+  Eigen::DSizes<ptrdiff_t, TensorRank> dimensions;
+  ptrdiff_t total_size = 1;
+  for (int i = 0; i < TensorRank; ++i) {
+    dimensions[i] = rand() % 20 + 1;
+    total_size *= dimensions[i];
+  }
+  const DSizes<ptrdiff_t, TensorRank> arr = dimensions;
+
+  typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar;
+
+  Tensor<InputScalar, TensorRank, DataLayout> input;
+  input.resize(arr);
+  input.setRandom();
+
+  array<ptrdiff_t, TensorRank> fft;
+  for (int i = 0; i < TensorRank; ++i) {
+    fft[i] = i;
+  }
+
+  typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar;
+  Tensor<OutputScalar, TensorRank, DataLayout> output;
+  output = input.template fft<FFTResultType, FFTDirection>(fft);
+
+  for (int i = 0; i < TensorRank; ++i) {
+    VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i));
+  }
+
+  RealScalar energy_original = 0.0;
+  RealScalar energy_after_fft = 0.0;
+
+  for (int i = 0; i < total_size; ++i) {
+    energy_original += numext::abs2(input(i));
+  }
+
+  for (int i = 0; i < total_size; ++i) {
+    energy_after_fft += numext::abs2(output(i));
+  }
+
+  if(FFTDirection == FFT_FORWARD) {
+    VERIFY_IS_APPROX(energy_original, energy_after_fft / total_size);
+  }
+  else {
+    VERIFY_IS_APPROX(energy_original, energy_after_fft * total_size);
+  }
+}
+
+void test_cxx11_tensor_fft() {
+    test_fft_complex_input_golden();
+    test_fft_real_input_golden();
+
+    test_fft_2D_golden<ColMajor>();
+    test_fft_2D_golden<RowMajor>();
+
+    test_fft_real_input_energy<ColMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 1>();
+    test_fft_real_input_energy<ColMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 1>();
+    test_fft_real_input_energy<ColMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 1>();
+    test_fft_real_input_energy<ColMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 1>();
+
+    test_fft_real_input_energy<ColMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 2>();
+    test_fft_real_input_energy<ColMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 2>();
+    test_fft_real_input_energy<ColMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 2>();
+    test_fft_real_input_energy<ColMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 2>();
+
+    test_fft_real_input_energy<ColMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 3>();
+    test_fft_real_input_energy<ColMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 3>();
+    test_fft_real_input_energy<ColMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 3>();
+    test_fft_real_input_energy<ColMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 3>();
+
+    test_fft_real_input_energy<ColMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 4>();
+    test_fft_real_input_energy<ColMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 4>();
+    test_fft_real_input_energy<ColMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 4>();
+    test_fft_real_input_energy<ColMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 4>();
+
+    test_fft_real_input_energy<RowMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 1>();
+    test_fft_real_input_energy<RowMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 1>();
+    test_fft_real_input_energy<RowMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 1>();
+    test_fft_real_input_energy<RowMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 1>();
+
+    test_fft_real_input_energy<RowMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 2>();
+    test_fft_real_input_energy<RowMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 2>();
+    test_fft_real_input_energy<RowMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 2>();
+    test_fft_real_input_energy<RowMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 2>();
+
+    test_fft_real_input_energy<RowMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 3>();
+    test_fft_real_input_energy<RowMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 3>();
+    test_fft_real_input_energy<RowMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 3>();
+    test_fft_real_input_energy<RowMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 3>();
+
+    test_fft_real_input_energy<RowMajor, float,  true,  Eigen::BothParts, FFT_FORWARD, 4>();
+    test_fft_real_input_energy<RowMajor, double, true,  Eigen::BothParts, FFT_FORWARD, 4>();
+    test_fft_real_input_energy<RowMajor, float,  false,  Eigen::BothParts, FFT_FORWARD, 4>();
+    test_fft_real_input_energy<RowMajor, double, false,  Eigen::BothParts, FFT_FORWARD, 4>();
+}
diff --git a/unsupported/test/cxx11_tensor_fixed_size.cpp b/unsupported/test/cxx11_tensor_fixed_size.cpp
new file mode 100644
index 000000000..4c660de65
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_fixed_size.cpp
@@ -0,0 +1,261 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+
+static void test_0d()
+{
+  TensorFixedSize<float, Sizes<> > scalar1;
+  TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
+  VERIFY_IS_EQUAL(scalar1.rank(), 0);
+  VERIFY_IS_EQUAL(scalar1.size(), 1);
+  VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1);
+
+  scalar1() = 7.0;
+  scalar2() = 13.0;
+
+  // Test against shallow copy.
+  TensorFixedSize<float, Sizes<> > copy = scalar1;
+  VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data());
+  VERIFY_IS_APPROX(scalar1(), copy());
+  copy = scalar1;
+  VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data());
+  VERIFY_IS_APPROX(scalar1(), copy());
+
+  TensorFixedSize<float, Sizes<> > scalar3 = scalar1.sqrt();
+  TensorFixedSize<float, Sizes<>, RowMajor> scalar4 = scalar2.sqrt();
+  VERIFY_IS_EQUAL(scalar3.rank(), 0);
+  VERIFY_IS_APPROX(scalar3(), sqrtf(7.0));
+  VERIFY_IS_APPROX(scalar4(), sqrtf(13.0));
+
+  scalar3 = scalar1 + scalar2;
+  VERIFY_IS_APPROX(scalar3(), 7.0f + 13.0f);
+}
+
+static void test_1d()
+{
+  TensorFixedSize<float, Sizes<6> > vec1;
+  TensorFixedSize<float, Sizes<6>, RowMajor> vec2;
+
+  VERIFY_IS_EQUAL((vec1.size()), 6);
+  //  VERIFY_IS_EQUAL((vec1.dimensions()[0]), 6);
+  //  VERIFY_IS_EQUAL((vec1.dimension(0)), 6);
+
+  vec1(0) = 4.0;  vec2(0) = 0.0;
+  vec1(1) = 8.0;  vec2(1) = 1.0;
+  vec1(2) = 15.0; vec2(2) = 2.0;
+  vec1(3) = 16.0; vec2(3) = 3.0;
+  vec1(4) = 23.0; vec2(4) = 4.0;
+  vec1(5) = 42.0; vec2(5) = 5.0;
+
+  // Test against shallow copy.
+  TensorFixedSize<float, Sizes<6> > copy = vec1;
+  VERIFY_IS_NOT_EQUAL(vec1.data(), copy.data());
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_APPROX(vec1(i), copy(i));
+  }
+  copy = vec1;
+  VERIFY_IS_NOT_EQUAL(vec1.data(), copy.data());
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_APPROX(vec1(i), copy(i));
+  }
+
+  TensorFixedSize<float, Sizes<6> > vec3 = vec1.sqrt();
+  TensorFixedSize<float, Sizes<6>, RowMajor> vec4 = vec2.sqrt();
+
+  VERIFY_IS_EQUAL((vec3.size()), 6);
+  VERIFY_IS_EQUAL(vec3.rank(), 1);
+  //  VERIFY_IS_EQUAL((vec3.dimensions()[0]), 6);
+  //  VERIFY_IS_EQUAL((vec3.dimension(0)), 6);
+
+  VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
+  VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
+  VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
+  VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
+  VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
+  VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
+
+  VERIFY_IS_APPROX(vec4(0), sqrtf(0.0));
+  VERIFY_IS_APPROX(vec4(1), sqrtf(1.0));
+  VERIFY_IS_APPROX(vec4(2), sqrtf(2.0));
+  VERIFY_IS_APPROX(vec4(3), sqrtf(3.0));
+  VERIFY_IS_APPROX(vec4(4), sqrtf(4.0));
+  VERIFY_IS_APPROX(vec4(5), sqrtf(5.0));
+
+  vec3 = vec1 + vec2;
+  VERIFY_IS_APPROX(vec3(0), 4.0f + 0.0f);
+  VERIFY_IS_APPROX(vec3(1), 8.0f + 1.0f);
+  VERIFY_IS_APPROX(vec3(2), 15.0f + 2.0f);
+  VERIFY_IS_APPROX(vec3(3), 16.0f + 3.0f);
+  VERIFY_IS_APPROX(vec3(4), 23.0f + 4.0f);
+  VERIFY_IS_APPROX(vec3(5), 42.0f + 5.0f);
+}
+
+static void test_tensor_map()
+{
+  TensorFixedSize<float, Sizes<6> > vec1;
+  TensorFixedSize<float, Sizes<6>, RowMajor> vec2;
+
+  vec1(0) = 4.0;  vec2(0) = 0.0;
+  vec1(1) = 8.0;  vec2(1) = 1.0;
+  vec1(2) = 15.0; vec2(2) = 2.0;
+  vec1(3) = 16.0; vec2(3) = 3.0;
+  vec1(4) = 23.0; vec2(4) = 4.0;
+  vec1(5) = 42.0; vec2(5) = 5.0;
+
+  float data3[6];
+  TensorMap<TensorFixedSize<float, Sizes<6> > > vec3(data3, 6);
+  vec3 = vec1.sqrt() + vec2;
+
+  VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
+  VERIFY_IS_APPROX(vec3(1), sqrtf(8.0) + 1.0f);
+  VERIFY_IS_APPROX(vec3(2), sqrtf(15.0) + 2.0f);
+  VERIFY_IS_APPROX(vec3(3), sqrtf(16.0) + 3.0f);
+  VERIFY_IS_APPROX(vec3(4), sqrtf(23.0) + 4.0f);
+  VERIFY_IS_APPROX(vec3(5), sqrtf(42.0) + 5.0f);
+}
+
+static void test_2d()
+{
+  float data1[6];
+  TensorMap<TensorFixedSize<float, Sizes<2, 3> > > mat1(data1,2,3);
+  float data2[6];
+  TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor> > mat2(data2,2,3);
+
+  VERIFY_IS_EQUAL((mat1.size()), 2*3);
+  VERIFY_IS_EQUAL(mat1.rank(), 2);
+  //  VERIFY_IS_EQUAL((mat1.dimension(0)), 2);
+  //  VERIFY_IS_EQUAL((mat1.dimension(1)), 3);
+
+  mat1(0,0) = 0.0;
+  mat1(0,1) = 1.0;
+  mat1(0,2) = 2.0;
+  mat1(1,0) = 3.0;
+  mat1(1,1) = 4.0;
+  mat1(1,2) = 5.0;
+
+  mat2(0,0) = -0.0;
+  mat2(0,1) = -1.0;
+  mat2(0,2) = -2.0;
+  mat2(1,0) = -3.0;
+  mat2(1,1) = -4.0;
+  mat2(1,2) = -5.0;
+
+  TensorFixedSize<float, Sizes<2, 3> > mat3;
+  TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4;
+  mat3 = mat1.abs();
+  mat4 = mat2.abs();
+
+  VERIFY_IS_EQUAL((mat3.size()), 2*3);
+    //  VERIFY_IS_EQUAL((mat3.dimension(0)), 2);
+    //  VERIFY_IS_EQUAL((mat3.dimension(1)), 3);
+
+  VERIFY_IS_APPROX(mat3(0,0), 0.0f);
+  VERIFY_IS_APPROX(mat3(0,1), 1.0f);
+  VERIFY_IS_APPROX(mat3(0,2), 2.0f);
+  VERIFY_IS_APPROX(mat3(1,0), 3.0f);
+  VERIFY_IS_APPROX(mat3(1,1), 4.0f);
+  VERIFY_IS_APPROX(mat3(1,2), 5.0f);
+
+  VERIFY_IS_APPROX(mat4(0,0), 0.0f);
+  VERIFY_IS_APPROX(mat4(0,1), 1.0f);
+  VERIFY_IS_APPROX(mat4(0,2), 2.0f);
+  VERIFY_IS_APPROX(mat4(1,0), 3.0f);
+  VERIFY_IS_APPROX(mat4(1,1), 4.0f);
+  VERIFY_IS_APPROX(mat4(1,2), 5.0f);
+}
+
+static void test_3d()
+{
+  TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
+  TensorFixedSize<float, Sizes<2, 3, 7>, RowMajor> mat2;
+
+  VERIFY_IS_EQUAL((mat1.size()), 2*3*7);
+  VERIFY_IS_EQUAL(mat1.rank(), 3);
+  //  VERIFY_IS_EQUAL((mat1.dimension(0)), 2);
+  //  VERIFY_IS_EQUAL((mat1.dimension(1)), 3);
+  //  VERIFY_IS_EQUAL((mat1.dimension(2)), 7);
+
+  float val = 0.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        mat2(i,j,k) = val;
+        val += 1.0f;
+      }
+    }
+  }
+
+  TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
+  mat3 = mat1.sqrt();
+  TensorFixedSize<float, Sizes<2, 3, 7>, RowMajor> mat4;
+  mat4 = mat2.sqrt();
+
+  VERIFY_IS_EQUAL((mat3.size()), 2*3*7);
+  //  VERIFY_IS_EQUAL((mat3.dimension(0)), 2);
+  //  VERIFY_IS_EQUAL((mat3.dimension(1)), 3);
+  //  VERIFY_IS_EQUAL((mat3.dimension(2)), 7);
+
+
+  val = 0.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat3(i,j,k), sqrtf(val));
+        VERIFY_IS_APPROX(mat4(i,j,k), sqrtf(val));
+        val += 1.0f;
+      }
+    }
+  }
+}
+
+
+static void test_array()
+{
+  TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
+  float val = 0.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        val += 1.0f;
+      }
+    }
+  }
+
+  TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
+  mat3 = mat1.pow(3.5f);
+
+  val = 0.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f));
+        val += 1.0f;
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_fixed_size()
+{
+  CALL_SUBTEST(test_0d());
+  CALL_SUBTEST(test_1d());
+  CALL_SUBTEST(test_tensor_map());
+  CALL_SUBTEST(test_2d());
+  CALL_SUBTEST(test_3d());
+  CALL_SUBTEST(test_array());
+}
diff --git a/unsupported/test/cxx11_tensor_forced_eval.cpp b/unsupported/test/cxx11_tensor_forced_eval.cpp
new file mode 100644
index 000000000..45d7345e9
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_forced_eval.cpp
@@ -0,0 +1,79 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/Core>
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::MatrixXf;
+using Eigen::Tensor;
+
+static void test_simple()
+{
+  MatrixXf m1(3,3);
+  MatrixXf m2(3,3);
+  m1.setRandom();
+  m2.setRandom();
+
+  TensorMap<Tensor<float, 2> > mat1(m1.data(), 3,3);
+  TensorMap<Tensor<float, 2> > mat2(m2.data(), 3,3);
+
+  Tensor<float, 2> mat3(3,3);
+  mat3 = mat1;
+
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  Eigen::array<DimPair, 1> dims;
+  dims[0] = DimPair(1, 0);
+
+  mat3 = mat3.contract(mat2, dims).eval();
+
+  VERIFY_IS_APPROX(mat3(0, 0), (m1*m2).eval()(0,0));
+  VERIFY_IS_APPROX(mat3(0, 1), (m1*m2).eval()(0,1));
+  VERIFY_IS_APPROX(mat3(0, 2), (m1*m2).eval()(0,2));
+  VERIFY_IS_APPROX(mat3(1, 0), (m1*m2).eval()(1,0));
+  VERIFY_IS_APPROX(mat3(1, 1), (m1*m2).eval()(1,1));
+  VERIFY_IS_APPROX(mat3(1, 2), (m1*m2).eval()(1,2));
+  VERIFY_IS_APPROX(mat3(2, 0), (m1*m2).eval()(2,0));
+  VERIFY_IS_APPROX(mat3(2, 1), (m1*m2).eval()(2,1));
+  VERIFY_IS_APPROX(mat3(2, 2), (m1*m2).eval()(2,2));
+}
+
+
+static void test_const()
+{
+  MatrixXf input(3,3);
+  input.setRandom();
+  MatrixXf output = input;
+  output.rowwise() -= input.colwise().maxCoeff();
+
+  Eigen::array<int, 1> depth_dim;
+  depth_dim[0] = 0;
+  Tensor<float, 2>::Dimensions dims2d;
+  dims2d[0] = 1;
+  dims2d[1] = 3;
+  Eigen::array<int, 2> bcast;
+  bcast[0] = 3;
+  bcast[1] = 1;
+  const TensorMap<Tensor<const float, 2> > input_tensor(input.data(), 3, 3);
+  Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_APPROX(output(i, j), output_tensor(i, j));
+    }
+  }
+}
+
+
+void test_cxx11_tensor_forced_eval()
+{
+  CALL_SUBTEST(test_simple());
+  CALL_SUBTEST(test_const());
+}
diff --git a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
new file mode 100644
index 000000000..5690da723
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
@@ -0,0 +1,70 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) {
+
+  int sizeDim1 = 100;
+  int sizeDim2 = 200;
+  int sizeDim3 = 200;
+  Eigen::array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+  Eigen::Tensor<float, 3> in1(tensorRange);
+  Eigen::Tensor<float, 3> in2(tensorRange);
+  Eigen::Tensor<float, 3> out(tensorRange);
+
+  float * gpu_in1_data  = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_in2_data  = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_out_data =  static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
+
+  in1 = in1.random() + in1.constant(10.0f);
+  in2 = in2.random() + in2.constant(10.0f);
+
+  // creating TensorMap from tensor
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange);
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange);
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange);
+  sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(float));
+  sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(float));
+  /// c=(a+b)*b
+  gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2;
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i, j, k),
+                         (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k));
+      }
+    }
+  }
+  printf("(a+b)*b Test Passed\n");
+  sycl_device.deallocate(gpu_in1_data);
+  sycl_device.deallocate(gpu_in2_data);
+  sycl_device.deallocate(gpu_out_data);
+
+}
+
+void test_cxx11_tensor_forced_eval_sycl() {
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_forced_eval_sycl(sycl_device));
+}
diff --git a/unsupported/test/cxx11_tensor_generator.cpp b/unsupported/test/cxx11_tensor_generator.cpp
new file mode 100644
index 000000000..dcb928714
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_generator.cpp
@@ -0,0 +1,91 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+struct Generator1D {
+  Generator1D() { }
+
+  float operator()(const array<Eigen::DenseIndex, 1>& coordinates) const {
+    return coordinates[0];
+  }
+};
+
+template <int DataLayout>
+static void test_1D()
+{
+  Tensor<float, 1> vec(6);
+  Tensor<float, 1> result = vec.generate(Generator1D());
+
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_EQUAL(result(i), i);
+  }
+}
+
+
+struct Generator2D {
+  Generator2D() { }
+
+  float operator()(const array<Eigen::DenseIndex, 2>& coordinates) const {
+    return 3 * coordinates[0] + 11 * coordinates[1];
+  }
+};
+
+template <int DataLayout>
+static void test_2D()
+{
+  Tensor<float, 2> matrix(5, 7);
+  Tensor<float, 2> result = matrix.generate(Generator2D());
+
+  for (int i = 0; i < 5; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      VERIFY_IS_EQUAL(result(i, j), 3*i + 11*j);
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_gaussian()
+{
+  int rows = 32;
+  int cols = 48;
+  array<float, 2> means;
+  means[0] = rows / 2.0f;
+  means[1] = cols / 2.0f;
+  array<float, 2> std_devs;
+  std_devs[0] = 3.14f;
+  std_devs[1] = 2.7f;
+  internal::GaussianGenerator<float, Eigen::DenseIndex, 2> gaussian_gen(means, std_devs);
+
+  Tensor<float, 2> matrix(rows, cols);
+  Tensor<float, 2> result = matrix.generate(gaussian_gen);
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      float g_rows = powf(rows/2.0f - i, 2) / (3.14f * 3.14f) * 0.5f;
+      float g_cols = powf(cols/2.0f - j, 2) / (2.7f * 2.7f) * 0.5f;
+      float gaussian = expf(-g_rows - g_cols);
+      VERIFY_IS_EQUAL(result(i, j), gaussian);
+    }
+  }
+}
+
+
+void test_cxx11_tensor_generator()
+{
+  CALL_SUBTEST(test_1D<ColMajor>());
+  CALL_SUBTEST(test_1D<RowMajor>());
+  CALL_SUBTEST(test_2D<ColMajor>());
+  CALL_SUBTEST(test_2D<RowMajor>());
+  CALL_SUBTEST(test_gaussian<ColMajor>());
+  CALL_SUBTEST(test_gaussian<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_ifft.cpp b/unsupported/test/cxx11_tensor_ifft.cpp
new file mode 100644
index 000000000..5fd88fa6c
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_ifft.cpp
@@ -0,0 +1,154 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Jianwei Cui <thucjw@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <complex>
+#include <cmath>
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template <int DataLayout>
+static void test_1D_fft_ifft_invariant(int sequence_length) {
+  Tensor<double, 1, DataLayout> tensor(sequence_length);
+  tensor.setRandom();
+
+  array<int, 1> fft;
+  fft[0] = 0;
+
+  Tensor<std::complex<double>, 1, DataLayout> tensor_after_fft;
+  Tensor<std::complex<double>, 1, DataLayout> tensor_after_fft_ifft;
+
+  tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
+  tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft);
+
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), sequence_length);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), sequence_length);
+
+  for (int i = 0; i < sequence_length; ++i) {
+    VERIFY_IS_APPROX(static_cast<float>(tensor(i)), static_cast<float>(std::real(tensor_after_fft_ifft(i))));
+  }
+}
+
+template <int DataLayout>
+static void test_2D_fft_ifft_invariant(int dim0, int dim1) {
+  Tensor<double, 2, DataLayout> tensor(dim0, dim1);
+  tensor.setRandom();
+
+  array<int, 2> fft;
+  fft[0] = 0;
+  fft[1] = 1;
+
+  Tensor<std::complex<double>, 2, DataLayout> tensor_after_fft;
+  Tensor<std::complex<double>, 2, DataLayout> tensor_after_fft_ifft;
+
+  tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
+  tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft);
+
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0);
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1);
+
+  for (int i = 0; i < dim0; ++i) {
+    for (int j = 0; j < dim1; ++j) {
+      //std::cout << "[" << i << "][" << j << "]" <<  "  Original data: " << tensor(i,j) << " Transformed data:" << tensor_after_fft_ifft(i,j) << std::endl;
+      VERIFY_IS_APPROX(static_cast<float>(tensor(i,j)), static_cast<float>(std::real(tensor_after_fft_ifft(i,j))));
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_3D_fft_ifft_invariant(int dim0, int dim1, int dim2) {
+  Tensor<double, 3, DataLayout> tensor(dim0, dim1, dim2);
+  tensor.setRandom();
+
+  array<int, 3> fft;
+  fft[0] = 0;
+  fft[1] = 1;
+  fft[2] = 2;
+
+  Tensor<std::complex<double>, 3, DataLayout> tensor_after_fft;
+  Tensor<std::complex<double>, 3, DataLayout> tensor_after_fft_ifft;
+
+  tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
+  tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft);
+
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0);
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1);
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2);
+
+  for (int i = 0; i < dim0; ++i) {
+    for (int j = 0; j < dim1; ++j) {
+      for (int k = 0; k < dim2; ++k) {
+        VERIFY_IS_APPROX(static_cast<float>(tensor(i,j,k)), static_cast<float>(std::real(tensor_after_fft_ifft(i,j,k))));
+      }
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_sub_fft_ifft_invariant(int dim0, int dim1, int dim2, int dim3) {
+  Tensor<double, 4, DataLayout> tensor(dim0, dim1, dim2, dim3);
+  tensor.setRandom();
+
+  array<int, 2> fft;
+  fft[0] = 2;
+  fft[1] = 0;
+
+  Tensor<std::complex<double>, 4, DataLayout> tensor_after_fft;
+  Tensor<double, 4, DataLayout> tensor_after_fft_ifft;
+
+  tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft);
+  tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::RealPart, Eigen::FFT_REVERSE>(fft);
+
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0);
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1);
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2);
+  VERIFY_IS_EQUAL(tensor_after_fft.dimension(3), dim3);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2);
+  VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(3), dim3);
+
+  for (int i = 0; i < dim0; ++i) {
+    for (int j = 0; j < dim1; ++j) {
+      for (int k = 0; k < dim2; ++k) {
+        for (int l = 0; l < dim3; ++l) {
+          VERIFY_IS_APPROX(static_cast<float>(tensor(i,j,k,l)), static_cast<float>(tensor_after_fft_ifft(i,j,k,l)));
+        }
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_ifft() {
+  CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(4));
+  CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(16));
+  CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(32));
+  CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(1024*1024));
+
+  CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(4,4));
+  CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(8,16));
+  CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(16,32));
+  CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(1024,1024));
+
+  CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(4,4,4));
+  CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(8,16,32));
+  CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(16,4,8));
+  CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(256,256,256));
+
+  CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(4,4,4,4));
+  CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(8,16,32,64));
+  CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(16,4,8,12));
+  CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(64,64,64,64));
+}
diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp
new file mode 100644
index 000000000..475c59651
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_image_patch.cpp
@@ -0,0 +1,757 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_simple_patch()
+{
+  Tensor<float, 4> tensor(2,3,5,7);
+  tensor.setRandom();
+  Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
+  VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
+  VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
+  VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
+  VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
+
+  // Single pixel patch: ColMajor
+  Tensor<float, 5> single_pixel_patch;
+  single_pixel_patch = tensor.extract_image_patches(1, 1);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(0), 2);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(1), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(2), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(3), 3*5);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(4), 7);
+
+  // Single pixel patch: RowMajor
+  Tensor<float, 5, RowMajor> single_pixel_patch_row_major;
+  single_pixel_patch_row_major = tensor_row_major.extract_image_patches(1, 1);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(0), 7);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(1), 3*5);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(2), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(3), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(4), 2);
+
+  for (int i = 0; i < tensor.size(); ++i) {
+    // ColMajor
+    if (tensor.data()[i] != single_pixel_patch.data()[i]) {
+      std::cout << "Mismatch detected at index " << i << " : "
+           << tensor.data()[i] << " vs " << single_pixel_patch.data()[i]
+           << std::endl;
+    }
+    VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]);
+    // RowMajor
+    if (tensor_row_major.data()[i] != single_pixel_patch_row_major.data()[i]) {
+      std::cout << "Mismatch detected at index " << i << " : "
+           << tensor.data()[i] << " vs "
+           << single_pixel_patch_row_major.data()[i] << std::endl;
+    }
+    VERIFY_IS_EQUAL(single_pixel_patch_row_major.data()[i],
+                    tensor_row_major.data()[i]);
+    VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]);
+    VERIFY_IS_EQUAL(single_pixel_patch.data()[i],
+                    single_pixel_patch_row_major.data()[i]);
+  }
+
+  // Entire image patch: ColMajor
+  Tensor<float, 5> entire_image_patch;
+  entire_image_patch = tensor.extract_image_patches(3, 5);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(0), 2);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(1), 3);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(2), 5);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(3), 3*5);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(4), 7);
+
+  // Entire image patch: RowMajor
+  Tensor<float, 5, RowMajor> entire_image_patch_row_major;
+  entire_image_patch_row_major = tensor_row_major.extract_image_patches(3, 5);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2);
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      int patchId = i+3*j;
+      for (int r = 0; r < 3; ++r) {
+        for (int c = 0; c < 5; ++c) {
+          for (int d = 0; d < 2; ++d) {
+            for (int b = 0; b < 7; ++b) {
+              float expected = 0.0f;
+              float expected_row_major = 0.0f;
+              if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
+                expected = tensor(d, r-1+i, c-2+j, b);
+                expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d);
+              }
+              // ColMajor
+              if (entire_image_patch(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (entire_image_patch_row_major(b, patchId, c, r, d) !=
+                  expected_row_major) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j
+                     << " r=" << r << " c=" << c << " d=" << d << " b=" << b
+                     << std::endl;
+              }
+              VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d),
+                              expected_row_major);
+              // Check that ColMajor and RowMajor agree.
+              VERIFY_IS_EQUAL(expected, expected_row_major);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // 2D patch: ColMajor
+  Tensor<float, 5> twod_patch;
+  twod_patch = tensor.extract_image_patches(2, 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(0), 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(1), 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5);
+  VERIFY_IS_EQUAL(twod_patch.dimension(4), 7);
+
+  // 2D patch: RowMajor
+  Tensor<float, 5, RowMajor> twod_patch_row_major;
+  twod_patch_row_major = tensor_row_major.extract_image_patches(2, 2);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2);
+
+
+  // Based on the calculation described in TensorTraits.h, padding happens to be 0.
+  int row_padding = 0;
+  int col_padding = 0;
+  int stride = 1;
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      int patchId = i+3*j;
+      for (int r = 0; r < 2; ++r) {
+        for (int c = 0; c < 2; ++c) {
+          for (int d = 0; d < 2; ++d) {
+            for (int b = 0; b < 7; ++b) {
+              float expected = 0.0f;
+              float expected_row_major = 0.0f;
+              int row_offset = r*stride + i - row_padding;
+              int col_offset = c*stride + j - col_padding;
+              // ColMajor
+              if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) {
+                expected = tensor(d, row_offset, col_offset, b);
+              }
+              if (twod_patch(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId, b), expected);
+
+              // RowMajor
+              if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) {
+                expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
+
+              }
+              if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major);
+              // Check that ColMajor and RowMajor agree.
+              VERIFY_IS_EQUAL(expected, expected_row_major);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+// Verifies VALID padding (no padding) with incrementing values.
+void test_patch_padding_valid()
+{
+  int input_depth = 3;
+  int input_rows = 3;
+  int input_cols = 3;
+  int input_batches = 1;
+  int ksize = 2;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
+  int stride = 2;  // Only same stride is supported.
+  Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches);
+  // Initializes tensor with incrementing numbers.
+  for (int i = 0; i < tensor.size(); ++i) {
+    tensor.data()[i] = i + 1;
+  }
+  // ColMajor
+  Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
+
+  VERIFY_IS_EQUAL(result.dimension(0), input_depth);  // depth
+  VERIFY_IS_EQUAL(result.dimension(1), ksize);  // kernel rows
+  VERIFY_IS_EQUAL(result.dimension(2), ksize);  // kernel cols
+  VERIFY_IS_EQUAL(result.dimension(3), 1);  // number of patches
+  VERIFY_IS_EQUAL(result.dimension(4), input_batches);  // number of batches
+
+  // RowMajor
+  Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
+  VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
+  VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
+  VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
+  VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
+
+  Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
+  VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4));
+  VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3));
+  VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2));
+  VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1));
+  VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0));
+
+  // No padding is carried out.
+  int row_padding = 0;
+  int col_padding = 0;
+
+  for (int i = 0; (i+stride+ksize-1) < input_rows; i += stride) {  // input rows
+    for (int j = 0; (j+stride+ksize-1) < input_cols; j += stride) {  // input cols
+      int patchId = i+input_rows*j;
+      for (int r = 0; r < ksize; ++r) {  // patch rows
+        for (int c = 0; c < ksize; ++c) {  // patch cols
+          for (int d = 0; d < input_depth; ++d) {  // depth
+            for (int b = 0; b < input_batches; ++b) {  // batch
+              float expected = 0.0f;
+              float expected_row_major = 0.0f;
+              int row_offset = r + i - row_padding;
+              int col_offset = c + j - col_padding;
+              if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
+                expected = tensor(d, row_offset, col_offset, b);
+                expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
+              }
+              // ColMajor
+              if (result(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
+              // Check that ColMajor and RowMajor agree.
+              VERIFY_IS_EQUAL(expected, expected_row_major);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+// Verifies VALID padding (no padding) with the same value.
+void test_patch_padding_valid_same_value()
+{
+  int input_depth = 1;
+  int input_rows = 5;
+  int input_cols = 5;
+  int input_batches = 2;
+  int ksize = 3;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
+  int stride = 2;  // Only same stride is supported.
+  // ColMajor
+  Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches);
+  tensor = tensor.constant(11.0f);
+  Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
+
+  VERIFY_IS_EQUAL(result.dimension(0), input_depth);  // depth
+  VERIFY_IS_EQUAL(result.dimension(1), ksize);  // kernel rows
+  VERIFY_IS_EQUAL(result.dimension(2), ksize);  // kernel cols
+  VERIFY_IS_EQUAL(result.dimension(3), 4);  // number of patches
+  VERIFY_IS_EQUAL(result.dimension(4), input_batches);  // number of batches
+
+  // RowMajor
+  Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
+  VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
+  VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
+  VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
+  VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
+
+  Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
+  VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4));
+  VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3));
+  VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2));
+  VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1));
+  VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0));
+
+  // No padding is carried out.
+  int row_padding = 0;
+  int col_padding = 0;
+
+  for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) {  // input rows
+    for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) {  // input cols
+      int patchId = i+input_rows*j;
+      for (int r = 0; r < ksize; ++r) {  // patch rows
+        for (int c = 0; c < ksize; ++c) {  // patch cols
+          for (int d = 0; d < input_depth; ++d) {  // depth
+            for (int b = 0; b < input_batches; ++b) {  // batch
+              float expected = 0.0f;
+              float expected_row_major = 0.0f;
+              int row_offset = r + i - row_padding;
+              int col_offset = c + j - col_padding;
+              if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
+                expected = tensor(d, row_offset, col_offset, b);
+                expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
+              }
+              // ColMajor
+              if (result(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
+              // Check that ColMajor and RowMajor agree.
+              VERIFY_IS_EQUAL(expected, expected_row_major);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+// Verifies SAME padding.
+void test_patch_padding_same()
+{
+  int input_depth = 3;
+  int input_rows = 4;
+  int input_cols = 2;
+  int input_batches = 1;
+  int ksize = 2;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
+  int stride = 2;  // Only same stride is supported.
+  // ColMajor
+  Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches);
+  // Initializes tensor with incrementing numbers.
+  for (int i = 0; i < tensor.size(); ++i) {
+    tensor.data()[i] = i + 1;
+  }
+  Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
+
+  VERIFY_IS_EQUAL(result.dimension(0), input_depth);  // depth
+  VERIFY_IS_EQUAL(result.dimension(1), ksize);  // kernel rows
+  VERIFY_IS_EQUAL(result.dimension(2), ksize);  // kernel cols
+  VERIFY_IS_EQUAL(result.dimension(3), 2);  // number of patches
+  VERIFY_IS_EQUAL(result.dimension(4), input_batches);  // number of batches
+
+  // RowMajor
+  Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout();
+  VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3));
+  VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2));
+  VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1));
+  VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0));
+
+  Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
+  VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4));
+  VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3));
+  VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2));
+  VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1));
+  VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0));
+
+  // Based on the calculation described in TensorTraits.h, padding happens to be
+  // 0.
+  int row_padding = 0;
+  int col_padding = 0;
+
+  for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) {  // input rows
+    for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) {  // input cols
+      int patchId = i+input_rows*j;
+      for (int r = 0; r < ksize; ++r) {  // patch rows
+        for (int c = 0; c < ksize; ++c) {  // patch cols
+          for (int d = 0; d < input_depth; ++d) {  // depth
+            for (int b = 0; b < input_batches; ++b) {  // batch
+              float expected = 0.0f;
+              float expected_row_major = 0.0f;
+              int row_offset = r*stride + i - row_padding;
+              int col_offset = c*stride + j - col_padding;
+              if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
+                expected = tensor(d, row_offset, col_offset, b);
+                expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
+              }
+              // ColMajor
+              if (result(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
+              // Check that ColMajor and RowMajor agree.
+              VERIFY_IS_EQUAL(expected, expected_row_major);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_patch_no_extra_dim()
+{
+  Tensor<float, 3> tensor(2,3,5);
+  tensor.setRandom();
+  Tensor<float, 3, RowMajor> tensor_row_major = tensor.swap_layout();
+  VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(2));
+  VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(1));
+  VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(0));
+
+  // Single pixel patch: ColMajor
+  Tensor<float, 4> single_pixel_patch;
+  single_pixel_patch = tensor.extract_image_patches(1, 1);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(0), 2);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(1), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(2), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch.dimension(3), 3*5);
+
+  // Single pixel patch: RowMajor
+  Tensor<float, 4, RowMajor> single_pixel_patch_row_major;
+  single_pixel_patch_row_major = tensor_row_major.extract_image_patches(1, 1);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(0), 3*5);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(1), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(2), 1);
+  VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(3), 2);
+
+  for (int i = 0; i < tensor.size(); ++i) {
+    // ColMajor
+    if (tensor.data()[i] != single_pixel_patch.data()[i]) {
+      std::cout << "Mismatch detected at index " << i << " : " << tensor.data()[i] << " vs " << single_pixel_patch.data()[i] << std::endl;
+    }
+    VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]);
+    // RowMajor
+    if (tensor_row_major.data()[i] != single_pixel_patch_row_major.data()[i]) {
+      std::cout << "Mismatch detected at index " << i << " : "
+           << tensor.data()[i] << " vs "
+           << single_pixel_patch_row_major.data()[i] << std::endl;
+    }
+    VERIFY_IS_EQUAL(single_pixel_patch_row_major.data()[i],
+                    tensor_row_major.data()[i]);
+    VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]);
+    VERIFY_IS_EQUAL(single_pixel_patch.data()[i],
+                    single_pixel_patch_row_major.data()[i]);
+  }
+
+  // Entire image patch: ColMajor
+  Tensor<float, 4> entire_image_patch;
+  entire_image_patch = tensor.extract_image_patches(3, 5);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(0), 2);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(1), 3);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(2), 5);
+  VERIFY_IS_EQUAL(entire_image_patch.dimension(3), 3*5);
+
+  // Entire image patch: RowMajor
+  Tensor<float, 4, RowMajor> entire_image_patch_row_major;
+  entire_image_patch_row_major = tensor_row_major.extract_image_patches(3, 5);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3);
+  VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2);
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      int patchId = i+3*j;
+      for (int r = 0; r < 3; ++r) {
+        for (int c = 0; c < 5; ++c) {
+          for (int d = 0; d < 2; ++d) {
+            float expected = 0.0f;
+            float expected_row_major = 0.0f;
+            if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
+              expected = tensor(d, r-1+i, c-2+j);
+              expected_row_major = tensor_row_major(c-2+j, r-1+i, d);
+            }
+            // ColMajor
+            if (entire_image_patch(d, r, c, patchId) != expected) {
+              std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
+            }
+            VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId), expected);
+            // RowMajor
+            if (entire_image_patch_row_major(patchId, c, r, d) !=
+                expected_row_major) {
+              std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
+            }
+            VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d),
+                            expected_row_major);
+            // Check that ColMajor and RowMajor agree.
+            VERIFY_IS_EQUAL(expected, expected_row_major);
+          }
+        }
+      }
+    }
+  }
+
+  // 2D patch: ColMajor
+  Tensor<float, 4> twod_patch;
+  twod_patch = tensor.extract_image_patches(2, 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(0), 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(1), 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
+  VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5);
+
+  // 2D patch: RowMajor
+  Tensor<float, 4, RowMajor> twod_patch_row_major;
+  twod_patch_row_major = tensor_row_major.extract_image_patches(2, 2);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
+  VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
+
+  // Based on the calculation described in TensorTraits.h, padding happens to be 0.
+  int row_padding = 0;
+  int col_padding = 0;
+  int stride = 1;
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      int patchId = i+3*j;
+      for (int r = 0; r < 2; ++r) {
+        for (int c = 0; c < 2; ++c) {
+          for (int d = 0; d < 2; ++d) {
+            float expected = 0.0f;
+            float expected_row_major = 0.0f;
+            int row_offset = r*stride + i - row_padding;
+            int col_offset = c*stride + j - col_padding;
+            // ColMajor
+            if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) {
+              expected = tensor(d, row_offset, col_offset);
+            }
+            if (twod_patch(d, r, c, patchId) != expected) {
+              std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
+            }
+            VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId), expected);
+            // RowMajor
+            if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) {
+              expected_row_major = tensor_row_major(col_offset, row_offset, d);
+            }
+            if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) {
+              std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
+            }
+            VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major);
+            // Check that ColMajor and RowMajor agree.
+            VERIFY_IS_EQUAL(expected, expected_row_major);
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_imagenet_patches()
+{
+  // Test the code on typical configurations used by the 'imagenet' benchmarks at
+  // https://github.com/soumith/convnet-benchmarks
+  // ColMajor
+  Tensor<float, 4> l_in(3, 128, 128, 16);
+  l_in.setRandom();
+  Tensor<float, 5> l_out = l_in.extract_image_patches(11, 11);
+  VERIFY_IS_EQUAL(l_out.dimension(0), 3);
+  VERIFY_IS_EQUAL(l_out.dimension(1), 11);
+  VERIFY_IS_EQUAL(l_out.dimension(2), 11);
+  VERIFY_IS_EQUAL(l_out.dimension(3), 128*128);
+  VERIFY_IS_EQUAL(l_out.dimension(4), 16);
+
+  // RowMajor
+  Tensor<float, 5, RowMajor> l_out_row_major = l_in.swap_layout().extract_image_patches(11, 11);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 16);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 128*128);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 3);
+
+  for (int b = 0; b < 16; ++b) {
+    for (int i = 0; i < 128; ++i) {
+      for (int j = 0; j < 128; ++j) {
+        int patchId = i+128*j;
+        for (int c = 0; c < 11; ++c) {
+          for (int r = 0; r < 11; ++r) {
+            for (int d = 0; d < 3; ++d) {
+              float expected = 0.0f;
+              if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
+                expected = l_in(d, r-5+i, c-5+j, b);
+              }
+              // ColMajor
+              if (l_out(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (l_out_row_major(b, patchId, c, r, d) !=
+                  expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j
+                     << " r=" << r << " c=" << c << " d=" << d << " b=" << b
+                     << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
+                              expected);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // ColMajor
+  l_in.resize(16, 64, 64, 32);
+  l_in.setRandom();
+  l_out = l_in.extract_image_patches(9, 9);
+  VERIFY_IS_EQUAL(l_out.dimension(0), 16);
+  VERIFY_IS_EQUAL(l_out.dimension(1), 9);
+  VERIFY_IS_EQUAL(l_out.dimension(2), 9);
+  VERIFY_IS_EQUAL(l_out.dimension(3), 64*64);
+  VERIFY_IS_EQUAL(l_out.dimension(4), 32);
+
+  // RowMajor
+  l_out_row_major = l_in.swap_layout().extract_image_patches(9, 9);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16);
+
+  for (int b = 0; b < 32; ++b) {
+    for (int i = 0; i < 64; ++i) {
+      for (int j = 0; j < 64; ++j) {
+        int patchId = i+64*j;
+        for (int c = 0; c < 9; ++c) {
+          for (int r = 0; r < 9; ++r) {
+            for (int d = 0; d < 16; ++d) {
+              float expected = 0.0f;
+              if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
+                expected = l_in(d, r-4+i, c-4+j, b);
+              }
+              // ColMajor
+              if (l_out(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (l_out_row_major(b, patchId, c, r, d) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // ColMajor
+  l_in.resize(32, 16, 16, 32);
+  l_in.setRandom();
+  l_out = l_in.extract_image_patches(7, 7);
+  VERIFY_IS_EQUAL(l_out.dimension(0), 32);
+  VERIFY_IS_EQUAL(l_out.dimension(1), 7);
+  VERIFY_IS_EQUAL(l_out.dimension(2), 7);
+  VERIFY_IS_EQUAL(l_out.dimension(3), 16*16);
+  VERIFY_IS_EQUAL(l_out.dimension(4), 32);
+
+  // RowMajor
+  l_out_row_major = l_in.swap_layout().extract_image_patches(7, 7);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32);
+
+  for (int b = 0; b < 32; ++b) {
+    for (int i = 0; i < 16; ++i) {
+      for (int j = 0; j < 16; ++j) {
+        int patchId = i+16*j;
+        for (int c = 0; c < 7; ++c) {
+          for (int r = 0; r < 7; ++r) {
+            for (int d = 0; d < 32; ++d) {
+              float expected = 0.0f;
+              if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
+                expected = l_in(d, r-3+i, c-3+j, b);
+              }
+              // ColMajor
+              if (l_out(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (l_out_row_major(b, patchId, c, r, d) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // ColMajor
+  l_in.resize(64, 13, 13, 32);
+  l_in.setRandom();
+  l_out = l_in.extract_image_patches(3, 3);
+  VERIFY_IS_EQUAL(l_out.dimension(0), 64);
+  VERIFY_IS_EQUAL(l_out.dimension(1), 3);
+  VERIFY_IS_EQUAL(l_out.dimension(2), 3);
+  VERIFY_IS_EQUAL(l_out.dimension(3), 13*13);
+  VERIFY_IS_EQUAL(l_out.dimension(4), 32);
+
+  // RowMajor
+  l_out_row_major = l_in.swap_layout().extract_image_patches(3, 3);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3);
+  VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64);
+
+  for (int b = 0; b < 32; ++b) {
+    for (int i = 0; i < 13; ++i) {
+      for (int j = 0; j < 13; ++j) {
+        int patchId = i+13*j;
+        for (int c = 0; c < 3; ++c) {
+          for (int r = 0; r < 3; ++r) {
+            for (int d = 0; d < 64; ++d) {
+              float expected = 0.0f;
+              if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
+                expected = l_in(d, r-1+i, c-1+j, b);
+              }
+              // ColMajor
+              if (l_out(d, r, c, patchId, b) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected);
+              // RowMajor
+              if (l_out_row_major(b, patchId, c, r, d) != expected) {
+                std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
+              }
+              VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_image_patch()
+{
+  CALL_SUBTEST_1(test_simple_patch());
+  CALL_SUBTEST_2(test_patch_no_extra_dim());
+  CALL_SUBTEST_3(test_patch_padding_valid());
+  CALL_SUBTEST_4(test_patch_padding_valid_same_value());
+  CALL_SUBTEST_5(test_patch_padding_same());
+  CALL_SUBTEST_6(test_imagenet_patches());
+}
diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp
new file mode 100644
index 000000000..4cf5df666
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_index_list.cpp
@@ -0,0 +1,386 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+#ifdef EIGEN_HAS_INDEX_LIST
+
+static void test_static_index_list()
+{
+  Tensor<float, 4> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  constexpr auto reduction_axis = make_index_list(0, 1, 2);
+  VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 0);
+  VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1);
+  VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 2);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[0]), 0);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[1]), 1);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[2]), 2);
+
+  EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_axis) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::array_get<1>(reduction_axis) == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_axis) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  Tensor<float, 1> result = tensor.sum(reduction_axis);
+  for (int i = 0; i < result.size(); ++i) {
+    float expected = 0.0f;
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 5; ++l) {
+          expected += tensor(j,k,l,i);
+        }
+      }
+    }
+    VERIFY_IS_APPROX(result(i), expected);
+  }
+}
+
+
+static void test_type2index_list()
+{
+  Tensor<float, 5> tensor(2,3,5,7,11);
+  tensor.setRandom();
+  tensor += tensor.constant(10.0f);
+
+  typedef Eigen::IndexList<Eigen::type2index<0>> Dims0;
+  typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>> Dims1;
+  typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>> Dims2;
+  typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>> Dims3;
+  typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> Dims4;
+
+#if 0
+  EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims0>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims1>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims2>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims3>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims4>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+#endif
+
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims0, 1, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims1, 2, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims2, 3, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims3, 4, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims4, 5, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims0, 1, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims1, 2, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims2, 3, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims3, 4, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims4, 5, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  const Dims0 reduction_axis0;
+  Tensor<float, 4> result0 = tensor.sum(reduction_axis0);
+  for (int m = 0; m < 11; ++m) {
+    for (int l = 0; l < 7; ++l) {
+      for (int k = 0; k < 5; ++k) {
+        for (int j = 0; j < 3; ++j) {
+          float expected = 0.0f;
+          for (int i = 0; i < 2; ++i) {
+            expected += tensor(i,j,k,l,m);
+          }
+          VERIFY_IS_APPROX(result0(j,k,l,m), expected);
+        }
+      }
+    }
+  }
+
+  const Dims1 reduction_axis1;
+  Tensor<float, 3> result1 = tensor.sum(reduction_axis1);
+  for (int m = 0; m < 11; ++m) {
+    for (int l = 0; l < 7; ++l) {
+      for (int k = 0; k < 5; ++k) {
+        float expected = 0.0f;
+        for (int j = 0; j < 3; ++j) {
+          for (int i = 0; i < 2; ++i) {
+            expected += tensor(i,j,k,l,m);
+          }
+        }
+        VERIFY_IS_APPROX(result1(k,l,m), expected);
+      }
+    }
+  }
+
+  const Dims2 reduction_axis2;
+  Tensor<float, 2> result2 = tensor.sum(reduction_axis2);
+  for (int m = 0; m < 11; ++m) {
+    for (int l = 0; l < 7; ++l) {
+      float expected = 0.0f;
+      for (int k = 0; k < 5; ++k) {
+        for (int j = 0; j < 3; ++j) {
+          for (int i = 0; i < 2; ++i) {
+            expected += tensor(i,j,k,l,m);
+          }
+        }
+      }
+      VERIFY_IS_APPROX(result2(l,m), expected);
+    }
+  }
+
+  const Dims3 reduction_axis3;
+  Tensor<float, 1> result3 = tensor.sum(reduction_axis3);
+  for (int m = 0; m < 11; ++m) {
+    float expected = 0.0f;
+    for (int l = 0; l < 7; ++l) {
+      for (int k = 0; k < 5; ++k) {
+        for (int j = 0; j < 3; ++j) {
+          for (int i = 0; i < 2; ++i) {
+            expected += tensor(i,j,k,l,m);
+          }
+        }
+      }
+    }
+    VERIFY_IS_APPROX(result3(m), expected);
+  }
+
+  const Dims4 reduction_axis4;
+  Tensor<float, 0> result4 = tensor.sum(reduction_axis4);
+  float expected = 0.0f;
+  for (int m = 0; m < 11; ++m) {
+    for (int l = 0; l < 7; ++l) {
+      for (int k = 0; k < 5; ++k) {
+        for (int j = 0; j < 3; ++j) {
+          for (int i = 0; i < 2; ++i) {
+            expected += tensor(i,j,k,l,m);
+          }
+        }
+      }
+    }
+  }
+  VERIFY_IS_APPROX(result4(), expected);
+}
+
+
+static void test_type2indexpair_list()
+{
+  Tensor<float, 5> tensor(2,3,5,7,11);
+  tensor.setRandom();
+  tensor += tensor.constant(10.0f);
+
+  typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>> Dims0;
+  typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::type2indexpair<1,11>, Eigen::type2indexpair<2,12>> Dims2_a;
+  typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
+  typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
+
+  Dims0 d0;
+  Dims2_a d2_a;
+
+  Dims2_b d2_b;
+  d2_b.set(1, Eigen::IndexPair<DenseIndex>(1,11));
+
+  Dims2_c d2_c;
+  d2_c.set(0, Eigen::IndexPair<DenseIndex>(Eigen::IndexPair<DenseIndex>(0,10)));
+  d2_c.set(1, Eigen::IndexPair<DenseIndex>(1,11));  // setting type2indexpair to correct value.
+  d2_c.set(2, Eigen::IndexPair<DenseIndex>(2,12));
+
+  VERIFY_IS_EQUAL(d2_a[0].first, 0);
+  VERIFY_IS_EQUAL(d2_a[0].second, 10);
+  VERIFY_IS_EQUAL(d2_a[1].first, 1);
+  VERIFY_IS_EQUAL(d2_a[1].second, 11);
+  VERIFY_IS_EQUAL(d2_a[2].first, 2);
+  VERIFY_IS_EQUAL(d2_a[2].second, 12);
+
+  VERIFY_IS_EQUAL(d2_b[0].first, 0);
+  VERIFY_IS_EQUAL(d2_b[0].second, 10);
+  VERIFY_IS_EQUAL(d2_b[1].first, 1);
+  VERIFY_IS_EQUAL(d2_b[1].second, 11);
+  VERIFY_IS_EQUAL(d2_b[2].first, 2);
+  VERIFY_IS_EQUAL(d2_b[2].second, 12);
+
+  VERIFY_IS_EQUAL(d2_c[0].first, 0);
+  VERIFY_IS_EQUAL(d2_c[0].second, 10);
+  VERIFY_IS_EQUAL(d2_c[1].first, 1);
+  VERIFY_IS_EQUAL(d2_c[1].second, 11);
+  VERIFY_IS_EQUAL(d2_c[2].first, 2);
+  VERIFY_IS_EQUAL(d2_c[2].second, 12);
+
+  EIGEN_STATIC_ASSERT((d2_a.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((d2_a.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((d2_a.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((d2_b.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((d2_b.value_known_statically(1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((d2_b.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((d2_c.value_known_statically(0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((d2_c.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((d2_c.value_known_statically(2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 10) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+}
+
+
+static void test_dynamic_index_list()
+{
+  Tensor<float, 4> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  int dim1 = 2;
+  int dim2 = 1;
+  int dim3 = 0;
+
+  auto reduction_axis = make_index_list(dim1, dim2, dim3);
+
+  VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 2);
+  VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1);
+  VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 0);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[0]), 2);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[1]), 1);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[2]), 0);
+
+  Tensor<float, 1> result = tensor.sum(reduction_axis);
+  for (int i = 0; i < result.size(); ++i) {
+    float expected = 0.0f;
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 5; ++l) {
+          expected += tensor(j,k,l,i);
+        }
+      }
+    }
+    VERIFY_IS_APPROX(result(i), expected);
+  }
+}
+
+static void test_mixed_index_list()
+{
+  Tensor<float, 4> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  int dim2 = 1;
+  int dim4 = 3;
+
+  auto reduction_axis = make_index_list(0, dim2, 2, dim4);
+
+  VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 0);
+  VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1);
+  VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 2);
+  VERIFY_IS_EQUAL(internal::array_get<3>(reduction_axis), 3);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[0]), 0);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[1]), 1);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[2]), 2);
+  VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[3]), 3);
+
+  typedef IndexList<type2index<0>, int, type2index<2>, int> ReductionIndices;
+  ReductionIndices reduction_indices;
+  reduction_indices.set(1, 1);
+  reduction_indices.set(3, 3);
+  EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_indices) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_indices) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::index_known_statically<ReductionIndices>(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::index_known_statically<ReductionIndices>(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionIndices>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionIndices>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+#if 0
+  EIGEN_STATIC_ASSERT((internal::all_indices_known_statically<ReductionIndices>() == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<ReductionIndices>() == false), YOU_MADE_A_PROGRAMMING_MISTAKE);
+#endif
+
+  typedef IndexList<type2index<0>, type2index<1>, type2index<2>, type2index<3>> ReductionList;
+  ReductionList reduction_list;
+  EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(3, 3) == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+#if 0
+  EIGEN_STATIC_ASSERT((internal::all_indices_known_statically<ReductionList>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<ReductionList>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE);
+#endif
+
+  Tensor<float, 0> result1 = tensor.sum(reduction_axis);
+  Tensor<float, 0> result2 = tensor.sum(reduction_indices);
+  Tensor<float, 0> result3 = tensor.sum(reduction_list);
+
+  float expected = 0.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          expected += tensor(i,j,k,l);
+        }
+      }
+    }
+  }
+  VERIFY_IS_APPROX(result1(), expected);
+  VERIFY_IS_APPROX(result2(), expected);
+  VERIFY_IS_APPROX(result3(), expected);
+}
+
+
+static void test_dim_check()
+{
+  Eigen::IndexList<Eigen::type2index<1>, int> dim1;
+  dim1.set(1, 2);
+  Eigen::IndexList<Eigen::type2index<1>, int> dim2;
+  dim2.set(1, 2);
+  VERIFY(dimensions_match(dim1, dim2));
+}
+
+
+#endif
+
+void test_cxx11_tensor_index_list()
+{
+#ifdef EIGEN_HAS_INDEX_LIST
+  CALL_SUBTEST(test_static_index_list());
+  CALL_SUBTEST(test_type2index_list());
+  CALL_SUBTEST(test_type2indexpair_list());
+  CALL_SUBTEST(test_dynamic_index_list());
+  CALL_SUBTEST(test_mixed_index_list());
+  CALL_SUBTEST(test_dim_check());
+#endif
+}
diff --git a/unsupported/test/cxx11_tensor_inflation.cpp b/unsupported/test/cxx11_tensor_inflation.cpp
new file mode 100644
index 000000000..4997935e9
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_inflation.cpp
@@ -0,0 +1,81 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Ke Yang <yangke@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<int DataLayout>
+static void test_simple_inflation()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> strides;
+
+  strides[0] = 1;
+  strides[1] = 1;
+  strides[2] = 1;
+  strides[3] = 1;
+
+  Tensor<float, 4, DataLayout> no_stride;
+  no_stride = tensor.inflate(strides);
+
+  VERIFY_IS_EQUAL(no_stride.dimension(0), 2);
+  VERIFY_IS_EQUAL(no_stride.dimension(1), 3);
+  VERIFY_IS_EQUAL(no_stride.dimension(2), 5);
+  VERIFY_IS_EQUAL(no_stride.dimension(3), 7);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  strides[0] = 2;
+  strides[1] = 4;
+  strides[2] = 2;
+  strides[3] = 3;
+  Tensor<float, 4, DataLayout> inflated;
+  inflated = tensor.inflate(strides);
+
+  VERIFY_IS_EQUAL(inflated.dimension(0), 3);
+  VERIFY_IS_EQUAL(inflated.dimension(1), 9);
+  VERIFY_IS_EQUAL(inflated.dimension(2), 9);
+  VERIFY_IS_EQUAL(inflated.dimension(3), 19);
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 9; ++k) {
+        for (int l = 0; l < 19; ++l) {
+          if (i % 2 == 0 &&
+              j % 4 == 0 &&
+              k % 2 == 0 &&
+              l % 3 == 0) {
+            VERIFY_IS_EQUAL(inflated(i,j,k,l),
+                            tensor(i/2, j/4, k/2, l/3));
+          } else {
+            VERIFY_IS_EQUAL(0, inflated(i,j,k,l));
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_inflation()
+{
+  CALL_SUBTEST(test_simple_inflation<ColMajor>());
+  CALL_SUBTEST(test_simple_inflation<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_intdiv.cpp b/unsupported/test/cxx11_tensor_intdiv.cpp
new file mode 100644
index 000000000..8e2b70b75
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_intdiv.cpp
@@ -0,0 +1,147 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+
+void test_signed_32bit()
+{
+  // Divide by one
+  const Eigen::internal::TensorIntDivisor<int32_t, false> div_by_one(1);
+
+  for (int32_t j = 0; j < 25000; ++j) {
+    const int32_t fast_div = j / div_by_one;
+    const int32_t slow_div = j / 1;
+    VERIFY_IS_EQUAL(fast_div, slow_div);
+  }
+
+  // Standard divide by 2 or more
+  for (int32_t i = 2; i < 25000; ++i) {
+    const Eigen::internal::TensorIntDivisor<int32_t, false> div(i);
+
+    for (int32_t j = 0; j < 25000; ++j) {
+      const int32_t fast_div = j / div;
+      const int32_t slow_div = j / i;
+      VERIFY_IS_EQUAL(fast_div, slow_div);
+    }
+  }
+
+  // Optimized divide by 2 or more
+  for (int32_t i = 2; i < 25000; ++i) {
+    const Eigen::internal::TensorIntDivisor<int32_t, true> div(i);
+
+    for (int32_t j = 0; j < 25000; ++j) {
+      const int32_t fast_div = j / div;
+      const int32_t slow_div = j / i;
+      VERIFY_IS_EQUAL(fast_div, slow_div);
+    }
+  }
+}
+
+
+void test_unsigned_32bit()
+{
+  for (uint32_t i = 1; i < 25000; ++i) {
+    const Eigen::internal::TensorIntDivisor<uint32_t> div(i);
+
+    for (uint32_t j = 0; j < 25000; ++j) {
+      const uint32_t fast_div = j / div;
+      const uint32_t slow_div = j / i;
+      VERIFY_IS_EQUAL(fast_div, slow_div);
+    }
+  }
+}
+
+
+void test_signed_64bit()
+{
+  for (int64_t i = 1; i < 25000; ++i) {
+    const Eigen::internal::TensorIntDivisor<int64_t> div(i);
+
+    for (int64_t j = 0; j < 25000; ++j) {
+      const int64_t fast_div = j / div;
+      const int64_t slow_div = j / i;
+      VERIFY_IS_EQUAL(fast_div, slow_div);
+    }
+  }
+}
+
+
+void test_unsigned_64bit()
+{
+  for (uint64_t i = 1; i < 25000; ++i) {
+    const Eigen::internal::TensorIntDivisor<uint64_t> div(i);
+
+    for (uint64_t j = 0; j < 25000; ++j) {
+      const uint64_t fast_div = j / div;
+      const uint64_t slow_div = j / i;
+      VERIFY_IS_EQUAL(fast_div, slow_div);
+    }
+  }
+}
+
+void test_powers_32bit() {
+  for (int expon = 1; expon < 31; expon++) {
+    int32_t div = (1 << expon);
+    for (int num_expon = 0; num_expon < 32; num_expon++) {
+      int32_t start_num = (1 << num_expon) - 100;
+      int32_t end_num = (1 << num_expon) + 100;
+      if (start_num < 0)
+        start_num = 0;
+      for (int32_t num = start_num; num < end_num; num++) {
+        Eigen::internal::TensorIntDivisor<int32_t> divider =
+          Eigen::internal::TensorIntDivisor<int32_t>(div);
+        int32_t result = num/div;
+        int32_t result_op = divider.divide(num);
+        VERIFY_IS_EQUAL(result_op, result);
+      }
+    }
+  }
+}
+
+void test_powers_64bit() {
+  for (int expon = 0; expon < 63; expon++) {
+    int64_t div = (1ull << expon);
+    for (int num_expon = 0; num_expon < 63; num_expon++) {
+      int64_t start_num = (1ull << num_expon) - 10;
+      int64_t end_num = (1ull << num_expon) + 10;
+      if (start_num < 0)
+        start_num = 0;
+      for (int64_t num = start_num; num < end_num; num++) {
+        Eigen::internal::TensorIntDivisor<int64_t> divider(div);
+        int64_t result = num/div;
+        int64_t result_op = divider.divide(num);
+        VERIFY_IS_EQUAL(result_op, result);
+      }
+    }
+  }
+}
+
+void test_specific() {
+  // A particular combination that was previously failing
+  int64_t div = 209715200;
+  int64_t num = 3238002688ll;
+  Eigen::internal::TensorIntDivisor<int64_t> divider(div);
+  int64_t result = num/div;
+  int64_t result_op = divider.divide(num);
+  VERIFY_IS_EQUAL(result, result_op);
+}
+
+void test_cxx11_tensor_intdiv()
+{
+  CALL_SUBTEST_1(test_signed_32bit());
+  CALL_SUBTEST_2(test_unsigned_32bit());
+  CALL_SUBTEST_3(test_signed_64bit());
+  CALL_SUBTEST_4(test_unsigned_64bit());
+  CALL_SUBTEST_5(test_powers_32bit());
+  CALL_SUBTEST_6(test_powers_64bit());
+  CALL_SUBTEST_7(test_specific());
+}
diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp
new file mode 100644
index 000000000..489960529
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_io.cpp
@@ -0,0 +1,136 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <sstream>
+#include <string>
+#include <Eigen/CXX11/Tensor>
+
+
+template<int DataLayout>
+static void test_output_0d()
+{
+  Tensor<int, 0, DataLayout> tensor;
+  tensor() = 123;
+
+  std::stringstream os;
+  os << tensor;
+
+  std::string expected("123");
+  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+}
+
+
+template<int DataLayout>
+static void test_output_1d()
+{
+  Tensor<int, 1, DataLayout> tensor(5);
+  for (int i = 0; i < 5; ++i) {
+    tensor(i) = i;
+  }
+
+  std::stringstream os;
+  os << tensor;
+
+  std::string expected("0\n1\n2\n3\n4");
+  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+
+  Eigen::Tensor<double,1,DataLayout> empty_tensor(0);
+  std::stringstream empty_os;
+  empty_os << empty_tensor;
+  std::string empty_string;
+  VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string);
+}
+
+
+template<int DataLayout>
+static void test_output_2d()
+{
+  Tensor<int, 2, DataLayout> tensor(5, 3);
+  for (int i = 0; i < 5; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      tensor(i, j) = i*j;
+    }
+  }
+
+  std::stringstream os;
+  os << tensor;
+
+  std::string expected("0  0  0\n0  1  2\n0  2  4\n0  3  6\n0  4  8");
+  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+}
+
+
+template<int DataLayout>
+static void test_output_expr()
+{
+  Tensor<int, 1, DataLayout> tensor1(5);
+  Tensor<int, 1, DataLayout> tensor2(5);
+  for (int i = 0; i < 5; ++i) {
+    tensor1(i) = i;
+    tensor2(i) = 7;
+  }
+
+  std::stringstream os;
+  os << tensor1 + tensor2;
+
+  std::string expected(" 7\n 8\n 9\n10\n11");
+  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+}
+
+
+template<int DataLayout>
+static void test_output_string()
+{
+  Tensor<std::string, 2, DataLayout> tensor(5, 3);
+  tensor.setConstant(std::string("foo"));
+
+  std::cout << tensor << std::endl;
+
+  std::stringstream os;
+  os << tensor;
+
+  std::string expected("foo  foo  foo\nfoo  foo  foo\nfoo  foo  foo\nfoo  foo  foo\nfoo  foo  foo");
+  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+}
+
+
+template<int DataLayout>
+static void test_output_const()
+{
+  Tensor<int, 1, DataLayout> tensor(5);
+  for (int i = 0; i < 5; ++i) {
+    tensor(i) = i;
+  }
+
+  TensorMap<Tensor<const int, 1, DataLayout> > tensor_map(tensor.data(), 5);
+
+  std::stringstream os;
+  os << tensor_map;
+
+  std::string expected("0\n1\n2\n3\n4");
+  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+}
+
+
+void test_cxx11_tensor_io()
+{
+  CALL_SUBTEST(test_output_0d<ColMajor>());
+  CALL_SUBTEST(test_output_0d<RowMajor>());
+  CALL_SUBTEST(test_output_1d<ColMajor>());
+  CALL_SUBTEST(test_output_1d<RowMajor>());
+  CALL_SUBTEST(test_output_2d<ColMajor>());
+  CALL_SUBTEST(test_output_2d<RowMajor>());
+  CALL_SUBTEST(test_output_expr<ColMajor>());
+  CALL_SUBTEST(test_output_expr<RowMajor>());
+  CALL_SUBTEST(test_output_string<ColMajor>());
+  CALL_SUBTEST(test_output_string<RowMajor>());
+  CALL_SUBTEST(test_output_const<ColMajor>());
+  CALL_SUBTEST(test_output_const<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_layout_swap.cpp b/unsupported/test/cxx11_tensor_layout_swap.cpp
new file mode 100644
index 000000000..ae297a9da
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_layout_swap.cpp
@@ -0,0 +1,61 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+static void test_simple_swap()
+{
+  Tensor<float, 3, ColMajor> tensor(2,3,7);
+  tensor.setRandom();
+
+  Tensor<float, 3, RowMajor> tensor2 = tensor.swap_layout();
+  VERIFY_IS_EQUAL(tensor.dimension(0), tensor2.dimension(2));
+  VERIFY_IS_EQUAL(tensor.dimension(1), tensor2.dimension(1));
+  VERIFY_IS_EQUAL(tensor.dimension(2), tensor2.dimension(0));
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(tensor(i,j,k), tensor2(k,j,i));
+      }
+    }
+  }
+}
+
+
+static void test_swap_as_lvalue()
+{
+  Tensor<float, 3, ColMajor> tensor(2,3,7);
+  tensor.setRandom();
+
+  Tensor<float, 3, RowMajor> tensor2(7,3,2);
+  tensor2.swap_layout() = tensor;
+  VERIFY_IS_EQUAL(tensor.dimension(0), tensor2.dimension(2));
+  VERIFY_IS_EQUAL(tensor.dimension(1), tensor2.dimension(1));
+  VERIFY_IS_EQUAL(tensor.dimension(2), tensor2.dimension(0));
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(tensor(i,j,k), tensor2(k,j,i));
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_layout_swap()
+{
+  CALL_SUBTEST(test_simple_swap());
+  CALL_SUBTEST(test_swap_as_lvalue());
+}
diff --git a/unsupported/test/cxx11_tensor_lvalue.cpp b/unsupported/test/cxx11_tensor_lvalue.cpp
new file mode 100644
index 000000000..071f5b406
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_lvalue.cpp
@@ -0,0 +1,42 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+
+static void test_compound_assignment()
+{
+  Tensor<float, 3> mat1(2,3,7);
+  Tensor<float, 3> mat2(2,3,7);
+  Tensor<float, 3> mat3(2,3,7);
+
+  mat1.setRandom();
+  mat2.setRandom();
+  mat3 = mat1;
+  mat3 += mat2;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(mat3(i,j,k), mat1(i,j,k) + mat2(i,j,k));
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_lvalue()
+{
+  CALL_SUBTEST(test_compound_assignment());
+}
diff --git a/unsupported/test/cxx11_tensor_map.cpp b/unsupported/test/cxx11_tensor_map.cpp
new file mode 100644
index 000000000..3db0ee7c0
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_map.cpp
@@ -0,0 +1,277 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_0d()
+{
+  Tensor<int, 0> scalar1;
+  Tensor<int, 0, RowMajor> scalar2;
+
+  TensorMap<Tensor<const int, 0> > scalar3(scalar1.data());
+  TensorMap<Tensor<const int, 0, RowMajor> > scalar4(scalar2.data());
+
+  scalar1() = 7;
+  scalar2() = 13;
+
+  VERIFY_IS_EQUAL(scalar1.rank(), 0);
+  VERIFY_IS_EQUAL(scalar1.size(), 1);
+
+  VERIFY_IS_EQUAL(scalar3(), 7);
+  VERIFY_IS_EQUAL(scalar4(), 13);
+}
+
+static void test_1d()
+{
+  Tensor<int, 1> vec1(6);
+  Tensor<int, 1, RowMajor> vec2(6);
+
+  TensorMap<Tensor<const int, 1> > vec3(vec1.data(), 6);
+  TensorMap<Tensor<const int, 1, RowMajor> > vec4(vec2.data(), 6);
+
+  vec1(0) = 4;  vec2(0) = 0;
+  vec1(1) = 8;  vec2(1) = 1;
+  vec1(2) = 15; vec2(2) = 2;
+  vec1(3) = 16; vec2(3) = 3;
+  vec1(4) = 23; vec2(4) = 4;
+  vec1(5) = 42; vec2(5) = 5;
+
+  VERIFY_IS_EQUAL(vec1.rank(), 1);
+  VERIFY_IS_EQUAL(vec1.size(), 6);
+  VERIFY_IS_EQUAL(vec1.dimension(0), 6);
+
+  VERIFY_IS_EQUAL(vec3(0), 4);
+  VERIFY_IS_EQUAL(vec3(1), 8);
+  VERIFY_IS_EQUAL(vec3(2), 15);
+  VERIFY_IS_EQUAL(vec3(3), 16);
+  VERIFY_IS_EQUAL(vec3(4), 23);
+  VERIFY_IS_EQUAL(vec3(5), 42);
+
+  VERIFY_IS_EQUAL(vec4(0), 0);
+  VERIFY_IS_EQUAL(vec4(1), 1);
+  VERIFY_IS_EQUAL(vec4(2), 2);
+  VERIFY_IS_EQUAL(vec4(3), 3);
+  VERIFY_IS_EQUAL(vec4(4), 4);
+  VERIFY_IS_EQUAL(vec4(5), 5);
+}
+
+static void test_2d()
+{
+  Tensor<int, 2> mat1(2,3);
+  Tensor<int, 2, RowMajor> mat2(2,3);
+
+  mat1(0,0) = 0;
+  mat1(0,1) = 1;
+  mat1(0,2) = 2;
+  mat1(1,0) = 3;
+  mat1(1,1) = 4;
+  mat1(1,2) = 5;
+
+  mat2(0,0) = 0;
+  mat2(0,1) = 1;
+  mat2(0,2) = 2;
+  mat2(1,0) = 3;
+  mat2(1,1) = 4;
+  mat2(1,2) = 5;
+
+  TensorMap<Tensor<const int, 2> > mat3(mat1.data(), 2, 3);
+  TensorMap<Tensor<const int, 2, RowMajor> > mat4(mat2.data(), 2, 3);
+
+  VERIFY_IS_EQUAL(mat3.rank(), 2);
+  VERIFY_IS_EQUAL(mat3.size(), 6);
+  VERIFY_IS_EQUAL(mat3.dimension(0), 2);
+  VERIFY_IS_EQUAL(mat3.dimension(1), 3);
+
+  VERIFY_IS_EQUAL(mat4.rank(), 2);
+  VERIFY_IS_EQUAL(mat4.size(), 6);
+  VERIFY_IS_EQUAL(mat4.dimension(0), 2);
+  VERIFY_IS_EQUAL(mat4.dimension(1), 3);
+
+  VERIFY_IS_EQUAL(mat3(0,0), 0);
+  VERIFY_IS_EQUAL(mat3(0,1), 1);
+  VERIFY_IS_EQUAL(mat3(0,2), 2);
+  VERIFY_IS_EQUAL(mat3(1,0), 3);
+  VERIFY_IS_EQUAL(mat3(1,1), 4);
+  VERIFY_IS_EQUAL(mat3(1,2), 5);
+
+  VERIFY_IS_EQUAL(mat4(0,0), 0);
+  VERIFY_IS_EQUAL(mat4(0,1), 1);
+  VERIFY_IS_EQUAL(mat4(0,2), 2);
+  VERIFY_IS_EQUAL(mat4(1,0), 3);
+  VERIFY_IS_EQUAL(mat4(1,1), 4);
+  VERIFY_IS_EQUAL(mat4(1,2), 5);
+}
+
+static void test_3d()
+{
+  Tensor<int, 3> mat1(2,3,7);
+  Tensor<int, 3, RowMajor> mat2(2,3,7);
+
+  int val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        mat2(i,j,k) = val;
+        val++;
+      }
+    }
+  }
+
+  TensorMap<Tensor<const int, 3> > mat3(mat1.data(), 2, 3, 7);
+  TensorMap<Tensor<const int, 3, RowMajor> > mat4(mat2.data(), 2, 3, 7);
+
+  VERIFY_IS_EQUAL(mat3.rank(), 3);
+  VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
+  VERIFY_IS_EQUAL(mat3.dimension(0), 2);
+  VERIFY_IS_EQUAL(mat3.dimension(1), 3);
+  VERIFY_IS_EQUAL(mat3.dimension(2), 7);
+
+  VERIFY_IS_EQUAL(mat4.rank(), 3);
+  VERIFY_IS_EQUAL(mat4.size(), 2*3*7);
+  VERIFY_IS_EQUAL(mat4.dimension(0), 2);
+  VERIFY_IS_EQUAL(mat4.dimension(1), 3);
+  VERIFY_IS_EQUAL(mat4.dimension(2), 7);
+
+  val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(mat3(i,j,k), val);
+        VERIFY_IS_EQUAL(mat4(i,j,k), val);
+        val++;
+      }
+    }
+  }
+}
+
+
+static void test_from_tensor()
+{
+  Tensor<int, 3> mat1(2,3,7);
+  Tensor<int, 3, RowMajor> mat2(2,3,7);
+
+  int val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        mat1(i,j,k) = val;
+        mat2(i,j,k) = val;
+        val++;
+      }
+    }
+  }
+
+  TensorMap<Tensor<int, 3> > mat3(mat1);
+  TensorMap<Tensor<int, 3, RowMajor> > mat4(mat2);
+
+  VERIFY_IS_EQUAL(mat3.rank(), 3);
+  VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
+  VERIFY_IS_EQUAL(mat3.dimension(0), 2);
+  VERIFY_IS_EQUAL(mat3.dimension(1), 3);
+  VERIFY_IS_EQUAL(mat3.dimension(2), 7);
+
+  VERIFY_IS_EQUAL(mat4.rank(), 3);
+  VERIFY_IS_EQUAL(mat4.size(), 2*3*7);
+  VERIFY_IS_EQUAL(mat4.dimension(0), 2);
+  VERIFY_IS_EQUAL(mat4.dimension(1), 3);
+  VERIFY_IS_EQUAL(mat4.dimension(2), 7);
+
+  val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(mat3(i,j,k), val);
+        VERIFY_IS_EQUAL(mat4(i,j,k), val);
+        val++;
+      }
+    }
+  }
+
+  TensorFixedSize<int, Sizes<2,3,7> > mat5;
+
+  val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        array<ptrdiff_t, 3> coords;
+        coords[0] = i;
+        coords[1] = j;
+        coords[2] = k;
+        mat5(coords) = val;
+        val++;
+      }
+    }
+  }
+
+  TensorMap<TensorFixedSize<int, Sizes<2,3,7> > > mat6(mat5);
+
+  VERIFY_IS_EQUAL(mat6.rank(), 3);
+  VERIFY_IS_EQUAL(mat6.size(), 2*3*7);
+  VERIFY_IS_EQUAL(mat6.dimension(0), 2);
+  VERIFY_IS_EQUAL(mat6.dimension(1), 3);
+  VERIFY_IS_EQUAL(mat6.dimension(2), 7);
+
+  val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(mat6(i,j,k), val);
+        val++;
+      }
+    }
+  }
+}
+
+
+static int f(const TensorMap<Tensor<int, 3> >& tensor) {
+  //  Size<0> empty;
+  EIGEN_STATIC_ASSERT((internal::array_size<Sizes<> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+  Tensor<int, 0> result = tensor.sum();
+  return result();
+}
+
+static void test_casting()
+{
+  Tensor<int, 3> tensor(2,3,7);
+
+  int val = 0;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        tensor(i,j,k) = val;
+        val++;
+      }
+    }
+  }
+
+  TensorMap<Tensor<int, 3> > map(tensor);
+  int sum1 = f(map);
+  int sum2 = f(tensor);
+
+  VERIFY_IS_EQUAL(sum1, sum2);
+  VERIFY_IS_EQUAL(sum1, 861);
+}
+
+void test_cxx11_tensor_map()
+{
+  CALL_SUBTEST(test_0d());
+  CALL_SUBTEST(test_1d());
+  CALL_SUBTEST(test_2d());
+  CALL_SUBTEST(test_3d());
+
+  CALL_SUBTEST(test_from_tensor());
+  CALL_SUBTEST(test_casting());
+}
diff --git a/unsupported/test/cxx11_tensor_math.cpp b/unsupported/test/cxx11_tensor_math.cpp
new file mode 100644
index 000000000..61c742a16
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_math.cpp
@@ -0,0 +1,46 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_tanh()
+{
+  Tensor<float, 1> vec1(6);
+  vec1.setRandom();
+
+  Tensor<float, 1> vec2 = vec1.tanh();
+
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_APPROX(vec2(i), tanhf(vec1(i)));
+  }
+}
+
+static void test_sigmoid()
+{
+  Tensor<float, 1> vec1(6);
+  vec1.setRandom();
+
+  Tensor<float, 1> vec2 = vec1.sigmoid();
+
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_APPROX(vec2(i), 1.0f / (1.0f + std::exp(-vec1(i))));
+  }
+}
+
+
+void test_cxx11_tensor_math()
+{
+  CALL_SUBTEST(test_tanh());
+  CALL_SUBTEST(test_sigmoid());
+}
diff --git a/unsupported/test/cxx11_tensor_mixed_indices.cpp b/unsupported/test/cxx11_tensor_mixed_indices.cpp
new file mode 100644
index 000000000..4fba6fdd1
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_mixed_indices.cpp
@@ -0,0 +1,53 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+
+static void test_simple()
+{
+  Tensor<float, 1, ColMajor> vec1(6);
+  Tensor<float, 1, ColMajor, int> vec2(6);
+
+  vec1(0) = 4.0;  vec2(0) = 0.0;
+  vec1(1) = 8.0;  vec2(1) = 1.0;
+  vec1(2) = 15.0; vec2(2) = 2.0;
+  vec1(3) = 16.0; vec2(3) = 3.0;
+  vec1(4) = 23.0; vec2(4) = 4.0;
+  vec1(5) = 42.0; vec2(5) = 5.0;
+
+  float data3[6];
+  TensorMap<Tensor<float, 1, ColMajor>> vec3(data3, 6);
+  vec3 = vec1.sqrt();
+  float data4[6];
+  TensorMap<Tensor<float, 1, ColMajor, int>> vec4(data4, 6);
+  vec4 = vec2.square();
+
+  VERIFY_IS_APPROX(vec3(0), sqrtf(4.0));
+  VERIFY_IS_APPROX(vec3(1), sqrtf(8.0));
+  VERIFY_IS_APPROX(vec3(2), sqrtf(15.0));
+  VERIFY_IS_APPROX(vec3(3), sqrtf(16.0));
+  VERIFY_IS_APPROX(vec3(4), sqrtf(23.0));
+  VERIFY_IS_APPROX(vec3(5), sqrtf(42.0));
+
+  VERIFY_IS_APPROX(vec4(0), 0.0f);
+  VERIFY_IS_APPROX(vec4(1), 1.0f);
+  VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f);
+  VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f);
+  VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f);
+  VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f);
+}
+
+
+void test_cxx11_tensor_mixed_indices()
+{
+  CALL_SUBTEST(test_simple());
+}
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
new file mode 100644
index 000000000..f7de43110
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -0,0 +1,485 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<typename>
+static void test_simple_reshape()
+{
+  Tensor<float, 5> tensor1(2,3,1,7,1);
+  tensor1.setRandom();
+
+  Tensor<float, 3> tensor2(2,3,7);
+  Tensor<float, 2> tensor3(6,7);
+  Tensor<float, 2> tensor4(2,21);
+
+  Tensor<float, 3>::Dimensions dim1(2,3,7);
+  tensor2 = tensor1.reshape(dim1);
+  Tensor<float, 2>::Dimensions dim2(6,7);
+  tensor3 = tensor1.reshape(dim2);
+  Tensor<float, 2>::Dimensions dim3(2,21);
+  tensor4 = tensor1.reshape(dim1).reshape(dim3);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k));
+        VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k));
+        VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k));
+      }
+    }
+  }
+}
+
+template<typename>
+static void test_reshape_in_expr() {
+  MatrixXf m1(2,3*5*7*11);
+  MatrixXf m2(3*5*7*11,13);
+  m1.setRandom();
+  m2.setRandom();
+  MatrixXf m3 = m1 * m2;
+
+  TensorMap<Tensor<float, 5>> tensor1(m1.data(), 2,3,5,7,11);
+  TensorMap<Tensor<float, 5>> tensor2(m2.data(), 3,5,7,11,13);
+  Tensor<float, 2>::Dimensions newDims1(2,3*5*7*11);
+  Tensor<float, 2>::Dimensions newDims2(3*5*7*11,13);
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  array<DimPair, 1> contract_along{{DimPair(1, 0)}};
+  Tensor<float, 2> tensor3(2,13);
+  tensor3 = tensor1.reshape(newDims1).contract(tensor2.reshape(newDims2), contract_along);
+
+  Map<MatrixXf> res(tensor3.data(), 2, 13);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 13; ++j) {
+      VERIFY_IS_APPROX(res(i,j), m3(i,j));
+    }
+  }
+}
+
+template<typename>
+static void test_reshape_as_lvalue()
+{
+  Tensor<float, 3> tensor(2,3,7);
+  tensor.setRandom();
+
+  Tensor<float, 2> tensor2d(6,7);
+  Tensor<float, 3>::Dimensions dim(2,3,7);
+  tensor2d.reshape(dim) = tensor;
+
+  float scratch[2*3*1*7*1];
+  TensorMap<Tensor<float, 5>> tensor5d(scratch, 2,3,1,7,1);
+  tensor5d.reshape(dim).device(Eigen::DefaultDevice()) = tensor;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k));
+        VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k));
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_simple_slice()
+{
+  Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
+  tensor.setRandom();
+
+  Tensor<float, 5, DataLayout> slice1(1,1,1,1,1);
+  Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5);
+  Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1);
+  slice1 = tensor.slice(indices, sizes);
+  VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
+
+  Tensor<float, 5, DataLayout> slice2(1,1,2,2,3);
+  Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5);
+  Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3);
+  slice2 = tensor.slice(indices2, sizes2);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
+      }
+    }
+  }
+}
+
+template<typename=void>
+static void test_const_slice()
+{
+  const float b[1] = {42};
+  TensorMap<Tensor<const float, 1> > m(b, 1);
+  DSizes<DenseIndex, 1> offsets;
+  offsets[0] = 0;
+  TensorRef<Tensor<const float, 1> > slice_ref(m.slice(offsets, m.dimensions()));
+  VERIFY_IS_EQUAL(slice_ref(0), 42);
+}
+
+template<int DataLayout>
+static void test_slice_in_expr() {
+  typedef Matrix<float, Dynamic, Dynamic, DataLayout> Mtx;
+  Mtx m1(7,7);
+  Mtx m2(3,3);
+  m1.setRandom();
+  m2.setRandom();
+
+  Mtx m3 = m1.block(1, 2, 3, 3) * m2.block(0, 2, 3, 1);
+
+  TensorMap<Tensor<float, 2, DataLayout>> tensor1(m1.data(), 7, 7);
+  TensorMap<Tensor<float, 2, DataLayout>> tensor2(m2.data(), 3, 3);
+  Tensor<float, 2, DataLayout> tensor3(3,1);
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  array<DimPair, 1> contract_along{{DimPair(1, 0)}};
+
+  Eigen::DSizes<ptrdiff_t, 2> indices1(1,2);
+  Eigen::DSizes<ptrdiff_t, 2> sizes1(3,3);
+  Eigen::DSizes<ptrdiff_t, 2> indices2(0,2);
+  Eigen::DSizes<ptrdiff_t, 2> sizes2(3,1);
+  tensor3 = tensor1.slice(indices1, sizes1).contract(tensor2.slice(indices2, sizes2), contract_along);
+
+  Map<Mtx> res(tensor3.data(), 3, 1);
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 1; ++j) {
+      VERIFY_IS_APPROX(res(i,j), m3(i,j));
+    }
+  }
+
+  // Take an arbitrary slice of an arbitrarily sized tensor.
+  TensorMap<Tensor<const float, 2, DataLayout>> tensor4(m1.data(), 7, 7);
+  Tensor<float, 1, DataLayout> tensor6 = tensor4.reshape(DSizes<ptrdiff_t, 1>(7*7)).exp().slice(DSizes<ptrdiff_t, 1>(0), DSizes<ptrdiff_t, 1>(35));
+  for (int i = 0; i < 35; ++i) {
+    VERIFY_IS_APPROX(tensor6(i), expf(tensor4.data()[i]));
+  }
+}
+
+template<int DataLayout>
+static void test_slice_as_lvalue()
+{
+  Tensor<float, 3, DataLayout> tensor1(2,2,7);
+  tensor1.setRandom();
+  Tensor<float, 3, DataLayout> tensor2(2,2,7);
+  tensor2.setRandom();
+  Tensor<float, 3, DataLayout> tensor3(4,3,5);
+  tensor3.setRandom();
+  Tensor<float, 3, DataLayout> tensor4(4,3,2);
+  tensor4.setRandom();
+  Tensor<float, 3, DataLayout> tensor5(10,13,12);
+  tensor5.setRandom();
+
+  Tensor<float, 3, DataLayout> result(4,5,7);
+  Eigen::DSizes<ptrdiff_t, 3> sizes12(2,2,7);
+  Eigen::DSizes<ptrdiff_t, 3> first_slice(0,0,0);
+  result.slice(first_slice, sizes12) = tensor1;
+  Eigen::DSizes<ptrdiff_t, 3> second_slice(2,0,0);
+  result.slice(second_slice, sizes12).device(Eigen::DefaultDevice()) = tensor2;
+
+  Eigen::DSizes<ptrdiff_t, 3> sizes3(4,3,5);
+  Eigen::DSizes<ptrdiff_t, 3> third_slice(0,2,0);
+  result.slice(third_slice, sizes3) = tensor3;
+
+  Eigen::DSizes<ptrdiff_t, 3> sizes4(4,3,2);
+  Eigen::DSizes<ptrdiff_t, 3> fourth_slice(0,2,5);
+  result.slice(fourth_slice, sizes4) = tensor4;
+
+  for (int j = 0; j < 2; ++j) {
+    for (int k = 0; k < 7; ++k) {
+      for (int i = 0; i < 2; ++i) {
+        VERIFY_IS_EQUAL(result(i,j,k), tensor1(i,j,k));
+        VERIFY_IS_EQUAL(result(i+2,j,k), tensor2(i,j,k));
+      }
+    }
+  }
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 2; j < 5; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        VERIFY_IS_EQUAL(result(i,j,k), tensor3(i,j-2,k));
+      }
+      for (int k = 5; k < 7; ++k) {
+        VERIFY_IS_EQUAL(result(i,j,k), tensor4(i,j-2,k-5));
+      }
+    }
+  }
+
+  Eigen::DSizes<ptrdiff_t, 3> sizes5(4,5,7);
+  Eigen::DSizes<ptrdiff_t, 3> fifth_slice(0,0,0);
+  result.slice(fifth_slice, sizes5) = tensor5.slice(fifth_slice, sizes5);
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 2; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(result(i,j,k), tensor5(i,j,k));
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_slice_raw_data()
+{
+  Tensor<float, 4, DataLayout> tensor(3,5,7,11);
+  tensor.setRandom();
+
+  Eigen::DSizes<ptrdiff_t, 4> offsets(1,2,3,4);
+  Eigen::DSizes<ptrdiff_t, 4> extents(1,1,1,1);
+  typedef TensorEvaluator<decltype(tensor.slice(offsets, extents)), DefaultDevice> SliceEvaluator;
+  auto slice1 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+  VERIFY_IS_EQUAL(slice1.dimensions().TotalSize(), 1);
+  VERIFY_IS_EQUAL(slice1.data()[0], tensor(1,2,3,4));
+
+  if (DataLayout == ColMajor) {
+    extents = Eigen::DSizes<ptrdiff_t, 4>(2,1,1,1);
+    auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+    VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2);
+    VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4));
+    VERIFY_IS_EQUAL(slice2.data()[1], tensor(2,2,3,4));
+  } else {
+    extents = Eigen::DSizes<ptrdiff_t, 4>(1,1,1,2);
+    auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+    VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2);
+    VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4));
+    VERIFY_IS_EQUAL(slice2.data()[1], tensor(1,2,3,5));
+  }
+
+  extents = Eigen::DSizes<ptrdiff_t, 4>(1,2,1,1);
+  auto slice3 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+  VERIFY_IS_EQUAL(slice3.dimensions().TotalSize(), 2);
+  VERIFY_IS_EQUAL(slice3.data(), static_cast<float*>(0));
+
+  if (DataLayout == ColMajor) {
+    offsets = Eigen::DSizes<ptrdiff_t, 4>(0,2,3,4);
+    extents = Eigen::DSizes<ptrdiff_t, 4>(3,2,1,1);
+    auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+    VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 6);
+    for (int i = 0; i < 3; ++i) {
+      for (int j = 0; j < 2; ++j) {
+        VERIFY_IS_EQUAL(slice4.data()[i+3*j], tensor(i,2+j,3,4));
+      }
+    }
+  } else {
+    offsets = Eigen::DSizes<ptrdiff_t, 4>(1,2,3,0);
+    extents = Eigen::DSizes<ptrdiff_t, 4>(1,1,2,11);
+    auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+    VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 22);
+    for (int l = 0; l < 11; ++l) {
+      for (int k = 0; k < 2; ++k) {
+        VERIFY_IS_EQUAL(slice4.data()[l+11*k], tensor(1,2,3+k,l));
+      }
+    }
+  }
+
+  if (DataLayout == ColMajor) {
+    offsets = Eigen::DSizes<ptrdiff_t, 4>(0,0,0,4);
+    extents = Eigen::DSizes<ptrdiff_t, 4>(3,5,7,2);
+    auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+    VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 210);
+    for (int i = 0; i < 3; ++i) {
+      for (int j = 0; j < 5; ++j) {
+        for (int k = 0; k < 7; ++k) {
+          for (int l = 0; l < 2; ++l) {
+            int slice_index = i + 3 * (j + 5 * (k + 7 * l));
+            VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i,j,k,l+4));
+          }
+        }
+      }
+    }
+  } else {
+    offsets = Eigen::DSizes<ptrdiff_t, 4>(1,0,0,0);
+    extents = Eigen::DSizes<ptrdiff_t, 4>(2,5,7,11);
+    auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+    VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 770);
+    for (int l = 0; l < 11; ++l) {
+      for (int k = 0; k < 7; ++k) {
+        for (int j = 0; j < 5; ++j) {
+          for (int i = 0; i < 2; ++i) {
+            int slice_index = l + 11 * (k + 7 * (j + 5 * i));
+            VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i+1,j,k,l));
+          }
+        }
+      }
+    }
+
+  }
+
+  offsets = Eigen::DSizes<ptrdiff_t, 4>(0,0,0,0);
+  extents = Eigen::DSizes<ptrdiff_t, 4>(3,5,7,11);
+  auto slice6 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
+  VERIFY_IS_EQUAL(slice6.dimensions().TotalSize(), 3*5*7*11);
+  VERIFY_IS_EQUAL(slice6.data(), tensor.data());
+}
+
+
+template<int DataLayout>
+static void test_strided_slice()
+{
+  typedef Tensor<float, 5, DataLayout> Tensor5f;
+  typedef Eigen::DSizes<Eigen::DenseIndex, 5> Index5;
+  typedef Tensor<float, 2, DataLayout> Tensor2f;
+  typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
+  Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
+  Tensor<float, 2, DataLayout> tensor2(7,11);
+  tensor.setRandom();
+  tensor2.setRandom();
+
+  if (true) {
+    Tensor2f slice(2,3);
+    Index2 strides(-2,-1);
+    Index2 indicesStart(5,7);
+    Index2 indicesStop(0,4);
+    slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        VERIFY_IS_EQUAL(slice(j,k), tensor2(5-2*j,7-k));
+      }
+    }
+  }
+
+  if(true) {
+    Tensor2f slice(0,1);
+    Index2 strides(1,1);
+    Index2 indicesStart(5,4);
+    Index2 indicesStop(5,5);
+    slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
+  }
+
+  if(true) { // test clamped degenerate interavls
+    Tensor2f slice(7,11);
+    Index2 strides(1,-1);
+    Index2 indicesStart(-3,20); // should become 0,10
+    Index2 indicesStop(20,-11); // should become 11, -1
+    slice = tensor2.stridedSlice(indicesStart, indicesStop, strides);
+    for (int j = 0; j < 7; ++j) {
+      for (int k = 0; k < 11; ++k) {
+        VERIFY_IS_EQUAL(slice(j,k), tensor2(j,10-k));
+      }
+    }
+  }
+
+  if(true) {
+    Tensor5f slice1(1,1,1,1,1);
+    Eigen::DSizes<Eigen::DenseIndex, 5> indicesStart(1, 2, 3, 4, 5);
+    Eigen::DSizes<Eigen::DenseIndex, 5> indicesStop(2, 3, 4, 5, 6);
+    Eigen::DSizes<Eigen::DenseIndex, 5> strides(1, 1, 1, 1, 1);
+    slice1 = tensor.stridedSlice(indicesStart, indicesStop, strides);
+    VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
+  }
+
+  if(true) {
+    Tensor5f slice(1,1,2,2,3);
+    Index5 start(1, 1, 3, 4, 5);
+    Index5 stop(2, 2, 5, 6, 8);
+    Index5 strides(1, 1, 1, 1, 1);
+    slice = tensor.stridedSlice(start, stop, strides);
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 2; ++j) {
+        for (int k = 0; k < 3; ++k) {
+          VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
+        }
+      }
+    }
+  }
+
+  if(true) {
+    Tensor5f slice(1,1,2,2,3);
+    Index5 strides3(1, 1, -2, 1, -1);
+    Index5 indices3Start(1, 1, 4, 4, 7);
+    Index5 indices3Stop(2, 2, 0, 6, 4);
+    slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3);
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 2; ++j) {
+        for (int k = 0; k < 3; ++k) {
+          VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,4-2*i,4+j,7-k));
+        }
+      }
+    }
+  }
+
+  if(false) { // tests degenerate interval
+    Tensor5f slice(1,1,2,2,3);
+    Index5 strides3(1, 1, 2, 1, 1);
+    Index5 indices3Start(1, 1, 4, 4, 7);
+    Index5 indices3Stop(2, 2, 0, 6, 4);
+    slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3);
+  }
+}
+
+template<int DataLayout>
+static void test_strided_slice_write()
+{
+  typedef Tensor<float, 2, DataLayout> Tensor2f;
+  typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
+
+  Tensor<float, 2, DataLayout> tensor(7,11),tensor2(7,11);
+  tensor.setRandom();
+  tensor2=tensor;
+  Tensor2f slice(2,3);
+
+  slice.setRandom();
+
+  Index2 strides(1,1);
+  Index2 indicesStart(3,4);
+  Index2 indicesStop(5,7);
+  Index2 lengths(2,3);
+
+  tensor.slice(indicesStart,lengths)=slice;
+  tensor2.stridedSlice(indicesStart,indicesStop,strides)=slice;
+
+  for(int i=0;i<7;i++) for(int j=0;j<11;j++){
+    VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j));
+  }
+}
+
+
+template<int DataLayout>
+static void test_composition()
+{
+  Eigen::Tensor<float, 2, DataLayout> matrix(7, 11);
+  matrix.setRandom();
+
+  const DSizes<ptrdiff_t, 3> newDims(1, 1, 11);
+  Eigen::Tensor<float, 3, DataLayout> tensor =
+      matrix.slice(DSizes<ptrdiff_t, 2>(2, 0), DSizes<ptrdiff_t, 2>(1, 11)).reshape(newDims);
+
+  VERIFY_IS_EQUAL(tensor.dimensions().TotalSize(), 11);
+  VERIFY_IS_EQUAL(tensor.dimension(0), 1);
+  VERIFY_IS_EQUAL(tensor.dimension(1), 1);
+  VERIFY_IS_EQUAL(tensor.dimension(2), 11);
+  for (int i = 0; i < 11; ++i) {
+    VERIFY_IS_EQUAL(tensor(0,0,i), matrix(2,i));
+  }
+}
+
+
+void test_cxx11_tensor_morphing()
+{
+  CALL_SUBTEST_1(test_simple_reshape<void>());
+  CALL_SUBTEST_1(test_reshape_in_expr<void>());
+  CALL_SUBTEST_1(test_reshape_as_lvalue<void>());
+
+  CALL_SUBTEST_1(test_simple_slice<ColMajor>());
+  CALL_SUBTEST_1(test_simple_slice<RowMajor>());
+  CALL_SUBTEST_1(test_const_slice());
+  CALL_SUBTEST_2(test_slice_in_expr<ColMajor>());
+  CALL_SUBTEST_3(test_slice_in_expr<RowMajor>());
+  CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>());
+  CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>());
+  CALL_SUBTEST_5(test_slice_raw_data<ColMajor>());
+  CALL_SUBTEST_5(test_slice_raw_data<RowMajor>());
+
+  CALL_SUBTEST_6(test_strided_slice_write<ColMajor>());
+  CALL_SUBTEST_6(test_strided_slice<ColMajor>());
+  CALL_SUBTEST_6(test_strided_slice_write<RowMajor>());
+  CALL_SUBTEST_6(test_strided_slice<RowMajor>());
+
+  CALL_SUBTEST_7(test_composition<ColMajor>());
+  CALL_SUBTEST_7(test_composition<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_notification.cpp b/unsupported/test/cxx11_tensor_notification.cpp
new file mode 100644
index 000000000..c946007b8
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_notification.cpp
@@ -0,0 +1,81 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Vijay Vasudevan <vrv@google.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+
+#include <stdlib.h>
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+
+#if EIGEN_OS_WIN || EIGEN_OS_WIN64
+#include <windows.h>
+void sleep(int seconds) {
+  Sleep(seconds*1000);
+}
+#else
+#include <unistd.h>
+#endif
+
+
+namespace {
+
+void WaitAndAdd(Eigen::Notification* n, int* counter) {
+  n->Wait();
+  *counter = *counter + 1;
+}
+
+}  // namespace
+
+static void test_notification_single()
+{
+  ThreadPool thread_pool(1);
+
+  int counter = 0;
+  Eigen::Notification n;
+  std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter);
+  thread_pool.Schedule(func);
+  sleep(1);
+
+  // The thread should be waiting for the notification.
+  VERIFY_IS_EQUAL(counter, 0);
+
+  // Unblock the thread
+  n.Notify();
+
+  sleep(1);
+
+  // Verify the counter has been incremented
+  VERIFY_IS_EQUAL(counter, 1);
+}
+
+// Like test_notification_single() but enqueues multiple threads to
+// validate that all threads get notified by Notify().
+static void test_notification_multiple()
+{
+  ThreadPool thread_pool(1);
+
+  int counter = 0;
+  Eigen::Notification n;
+  std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter);
+  thread_pool.Schedule(func);
+  thread_pool.Schedule(func);
+  thread_pool.Schedule(func);
+  thread_pool.Schedule(func);
+  sleep(1);
+  VERIFY_IS_EQUAL(counter, 0);
+  n.Notify();
+  sleep(1);
+  VERIFY_IS_EQUAL(counter, 4);
+}
+
+void test_cxx11_tensor_notification()
+{
+  CALL_SUBTEST(test_notification_single());
+  CALL_SUBTEST(test_notification_multiple());
+}
diff --git a/unsupported/test/cxx11_tensor_of_complex.cpp b/unsupported/test/cxx11_tensor_of_complex.cpp
new file mode 100644
index 000000000..e9d1b2d3c
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_of_complex.cpp
@@ -0,0 +1,103 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+
+
+static void test_additions()
+{
+  Tensor<std::complex<float>, 1> data1(3);
+  Tensor<std::complex<float>, 1> data2(3);
+  for (int i = 0; i < 3; ++i) {
+    data1(i) = std::complex<float>(i, -i);
+    data2(i) = std::complex<float>(i, 7 * i);
+  }
+
+  Tensor<std::complex<float>, 1> sum = data1 + data2;
+  for (int i = 0; i < 3; ++i) {
+    VERIFY_IS_EQUAL(sum(i),  std::complex<float>(2*i, 6*i));
+  }
+}
+
+
+static void test_abs()
+{
+  Tensor<std::complex<float>, 1> data1(3);
+  Tensor<std::complex<double>, 1> data2(3);
+  data1.setRandom();
+  data2.setRandom();
+
+  Tensor<float, 1> abs1 = data1.abs();
+  Tensor<double, 1> abs2 = data2.abs();
+  for (int i = 0; i < 3; ++i) {
+    VERIFY_IS_APPROX(abs1(i), std::abs(data1(i)));
+    VERIFY_IS_APPROX(abs2(i), std::abs(data2(i)));
+  }
+}
+
+
+static void test_conjugate()
+{
+  Tensor<std::complex<float>, 1> data1(3);
+  Tensor<std::complex<double>, 1> data2(3);
+  Tensor<int, 1> data3(3);
+  data1.setRandom();
+  data2.setRandom();
+  data3.setRandom();
+
+  Tensor<std::complex<float>, 1> conj1 = data1.conjugate();
+  Tensor<std::complex<double>, 1> conj2 = data2.conjugate();
+  Tensor<int, 1> conj3 = data3.conjugate();
+  for (int i = 0; i < 3; ++i) {
+    VERIFY_IS_APPROX(conj1(i), std::conj(data1(i)));
+    VERIFY_IS_APPROX(conj2(i), std::conj(data2(i)));
+    VERIFY_IS_APPROX(conj3(i), data3(i));
+  }
+}
+
+static void test_contractions()
+{
+  Tensor<std::complex<float>, 4> t_left(30, 50, 8, 31);
+  Tensor<std::complex<float>, 5> t_right(8, 31, 7, 20, 10);
+  Tensor<std::complex<float>, 5> t_result(30, 50, 7, 20, 10);
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  typedef Map<Matrix<std::complex<float>, Dynamic, Dynamic>> MapXcf;
+  MapXcf m_left(t_left.data(), 1500, 248);
+  MapXcf m_right(t_right.data(), 248, 1400);
+  Matrix<std::complex<float>, Dynamic, Dynamic> m_result(1500, 1400);
+
+  // This contraction should be equivalent to a regular matrix multiplication
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  Eigen::array<DimPair, 2> dims;
+  dims[0] = DimPair(2, 0);
+  dims[1] = DimPair(3, 1);
+  t_result = t_left.contract(t_right, dims);
+  m_result = m_left * m_right;
+  for (int i = 0; i < t_result.dimensions().TotalSize(); i++) {
+    VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
+  }
+}
+
+
+void test_cxx11_tensor_of_complex()
+{
+  CALL_SUBTEST(test_additions());
+  CALL_SUBTEST(test_abs());
+  CALL_SUBTEST(test_conjugate());
+  CALL_SUBTEST(test_contractions());
+}
diff --git a/unsupported/test/cxx11_tensor_of_const_values.cpp b/unsupported/test/cxx11_tensor_of_const_values.cpp
new file mode 100644
index 000000000..f179a0c21
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_of_const_values.cpp
@@ -0,0 +1,105 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_assign()
+{
+  float data1[6];
+  TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3);
+  float data2[6];
+  const TensorMap<Tensor<float, 2>> mat2(data2, 2, 3);
+
+  for (int i = 0; i < 6; ++i) {
+    data1[i] = i;
+    data2[i] = -i;
+  }
+
+  Tensor<float, 2> rslt1;
+  rslt1 = mat1;
+  Tensor<float, 2> rslt2;
+  rslt2 = mat2;
+
+  Tensor<float, 2> rslt3 = mat1;
+  Tensor<float, 2> rslt4 = mat2;
+
+  Tensor<float, 2> rslt5(mat1);
+  Tensor<float, 2> rslt6(mat2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_APPROX(rslt1(i,j), static_cast<float>(i + 2*j));
+      VERIFY_IS_APPROX(rslt2(i,j), static_cast<float>(-i - 2*j));
+      VERIFY_IS_APPROX(rslt3(i,j), static_cast<float>(i + 2*j));
+      VERIFY_IS_APPROX(rslt4(i,j), static_cast<float>(-i - 2*j));
+      VERIFY_IS_APPROX(rslt5(i,j), static_cast<float>(i + 2*j));
+      VERIFY_IS_APPROX(rslt6(i,j), static_cast<float>(-i - 2*j));
+    }
+  }
+}
+
+
+static void test_plus()
+{
+  float data1[6];
+  TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3);
+  float data2[6];
+  TensorMap<Tensor<float, 2>> mat2(data2, 2, 3);
+
+  for (int i = 0; i < 6; ++i) {
+    data1[i] = i;
+    data2[i] = -i;
+  }
+
+  Tensor<float, 2> sum1;
+  sum1 = mat1 + mat2;
+  Tensor<float, 2> sum2;
+  sum2 = mat2 + mat1;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_APPROX(sum1(i,j), 0.0f);
+      VERIFY_IS_APPROX(sum2(i,j), 0.0f);
+    }
+  }
+}
+
+
+static void test_plus_equal()
+{
+  float data1[6];
+  TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3);
+  float data2[6];
+  TensorMap<Tensor<float, 2>> mat2(data2, 2, 3);
+
+  for (int i = 0; i < 6; ++i) {
+    data1[i] = i;
+    data2[i] = -i;
+  }
+  mat2 += mat1;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_APPROX(mat2(i,j), 0.0f);
+    }
+  }
+}
+
+
+void test_cxx11_tensor_of_const_values()
+{
+  CALL_SUBTEST(test_assign());
+  CALL_SUBTEST(test_plus());
+  CALL_SUBTEST(test_plus_equal());
+}
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
new file mode 100644
index 000000000..2f86980a2
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -0,0 +1,494 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_of_float16_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<typename>
+void test_cuda_numext() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+  bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_half(
+      d_res_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
+  gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::half>());
+
+  Tensor<bool, 1> half_prec(num_elem);
+  Tensor<bool, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    std::cout << "Checking numext " << i << std::endl;
+    VERIFY_IS_EQUAL(full_prec(i), half_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_half);
+  gpu_device.deallocate(d_res_float);
+}
+
+
+#ifdef EIGEN_HAS_CUDA_FP16
+
+template<typename>
+void test_cuda_conversion() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+  
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half(
+      d_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+      d_conv, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random();
+  gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>();
+  gpu_conv.device(gpu_device) = gpu_half.cast<float>();
+
+  Tensor<float, 1> initial(num_elem);
+  Tensor<float, 1> final(num_elem);
+  gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(initial(i), final(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_half);
+  gpu_device.deallocate(d_conv);
+}
+
+template<typename>
+void test_cuda_unary() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+      d_res_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_res_float.device(gpu_device) = gpu_float.abs();
+  gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().cast<float>();
+
+  Tensor<float, 1> half_prec(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    std::cout << "Checking unary " << i << std::endl;
+    VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_half);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_cuda_elementwise() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
+      d_float1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
+      d_float2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half(
+      d_res_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random();
+  gpu_float2.device(gpu_device) = gpu_float2.random();
+  gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1;
+  gpu_res_half.device(gpu_device) = ((gpu_float1.cast<Eigen::half>() + gpu_float2.cast<Eigen::half>()) * gpu_float1.cast<Eigen::half>()).cast<float>();
+
+  Tensor<float, 1> half_prec(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    std::cout << "Checking elemwise " << i << ": full prec = " << full_prec(i) << " vs half prec = " << half_prec(i) << std::endl;
+    VERIFY_IS_APPROX(static_cast<Eigen::half>(full_prec(i)), static_cast<Eigen::half>(half_prec(i)));
+  }
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_half);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_cuda_trancendental() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  Eigen::half* d_res3_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  Eigen::half* d_res3_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(d_res1_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(d_res2_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+  gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
+  gpu_float3.device(gpu_device) = gpu_float3.random();
+  gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>();
+  gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>();
+  gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>();
+
+  gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>();
+  gpu_res1_half.device(gpu_device) = gpu_res1_half.exp();
+
+  gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>();
+  gpu_res2_half.device(gpu_device) = gpu_res2_half.log();
+
+  gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
+  gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p();
+
+  Tensor<float, 1> input1(num_elem);
+  Tensor<Eigen::half, 1> half_prec1(num_elem);
+  Tensor<Eigen::half, 1> full_prec1(num_elem);
+  Tensor<float, 1> input2(num_elem);
+  Tensor<Eigen::half, 1> half_prec2(num_elem);
+  Tensor<Eigen::half, 1> full_prec2(num_elem);
+  Tensor<float, 1> input3(num_elem);
+  Tensor<Eigen::half, 1> half_prec3(num_elem);
+  Tensor<Eigen::half, 1> full_prec3(num_elem);
+  gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half, num_elem*sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::half));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl;
+    VERIFY_IS_APPROX(full_prec1(i), half_prec1(i));
+  }
+  for (int i = 0; i < num_elem; ++i) {
+    std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
+    if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1
+      VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f));
+    else
+      VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
+  }
+  for (int i = 0; i < num_elem; ++i) {
+    std::cout << "Checking elemwise plog1 " << i << " input = " << input3(i) << " full = " << full_prec3(i) << " half = " << half_prec3(i) << std::endl;
+    VERIFY_IS_APPROX(full_prec3(i), half_prec3(i));
+  }
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_float3);
+  gpu_device.deallocate(d_res1_half);
+  gpu_device.deallocate(d_res1_float);
+  gpu_device.deallocate(d_res2_half);
+  gpu_device.deallocate(d_res2_float);
+  gpu_device.deallocate(d_res3_float);
+  gpu_device.deallocate(d_res3_half);
+}
+
+template<typename>
+void test_cuda_contractions() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int rows = 23;
+  int cols = 23;
+  int num_elem = rows*cols;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+  Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+      d_float1, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+      d_float2, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_half(
+      d_res_half, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_float(
+      d_res_float, rows, cols);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+  gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
+
+  typedef Tensor<float, 2>::DimensionPair DimPair;
+  Eigen::array<DimPair, 1> dims(DimPair(1, 0));
+  gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>();
+  gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims);
+
+  Tensor<Eigen::half, 2> half_prec(rows, cols);
+  Tensor<Eigen::half, 2> full_prec(rows, cols);
+  gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::half));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      std::cout << "Checking contract " << i << " " << j << full_prec(i, j) << " " << half_prec(i, j) << std::endl;
+      if (numext::abs(full_prec(i, j) - half_prec(i, j)) > Eigen::half(1e-2f)) {
+        VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j));
+      }
+    }
+  }
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_half);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_cuda_reductions(int size1, int size2, int redux) {
+
+   std::cout << "Reducing " << size1 << " by " << size2
+             << " tensor along dim " << redux << std::endl; 
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = size1*size2;
+  int result_size = (redux == 1 ? size1 : size2);
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
+  Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+      d_float1, size1, size2);
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+      d_float2, size1, size2);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half(
+      d_res_half, result_size);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, result_size);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random() * 2.0f;
+  gpu_float2.device(gpu_device) = gpu_float2.random() * 2.0f;
+
+  Eigen::array<int, 1> redux_dim = {{redux}};
+  gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>();
+  gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim);
+
+  Tensor<Eigen::half, 1> half_prec(result_size);
+  Tensor<Eigen::half, 1> full_prec(result_size);
+  gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, result_size*sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::half));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < result_size; ++i) {
+    std::cout << "EXPECTED " << full_prec(i) << " GOT " << half_prec(i) << std::endl;
+    VERIFY_IS_APPROX(full_prec(i), half_prec(i));
+  }
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_half);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_cuda_reductions() {
+  test_cuda_reductions<void>(13, 13, 0);
+  test_cuda_reductions<void>(13, 13, 1);
+
+  test_cuda_reductions<void>(35, 36, 0);
+  test_cuda_reductions<void>(35, 36, 1);
+
+  test_cuda_reductions<void>(36, 35, 0);
+  test_cuda_reductions<void>(36, 35, 1);
+}
+
+template<typename>
+void test_cuda_full_reductions() {
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int size = 13;
+  int num_elem = size*size;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
+  Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+      d_float1, size, size);
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+      d_float2, size, size);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half(
+      d_res_half);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float(
+      d_res_float);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random();
+  gpu_float2.device(gpu_device) = gpu_float2.random();
+
+  gpu_res_float.device(gpu_device) = gpu_float1.sum().cast<Eigen::half>();
+  gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum();
+
+  Tensor<Eigen::half, 0> half_prec;
+  Tensor<Eigen::half, 0> full_prec;
+  gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
+  gpu_device.synchronize();
+
+  VERIFY_IS_APPROX(full_prec(), half_prec());
+
+  gpu_res_float.device(gpu_device) = gpu_float1.maximum().cast<Eigen::half>();
+  gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().maximum();
+  gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half));
+  gpu_device.synchronize();
+
+  VERIFY_IS_APPROX(full_prec(), half_prec());
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_half);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_cuda_forced_evals() {
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_half1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_half2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half1(
+      d_res_half1, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2(
+      d_res_half2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  Eigen::array<int, 1> no_bcast;
+  no_bcast[0] = 1;
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_res_float.device(gpu_device) = gpu_float.abs();
+  gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
+  gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>();
+
+  Tensor<float, 1> half_prec1(num_elem);
+  Tensor<float, 1> half_prec2(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res_half1, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half1, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl;
+    VERIFY_IS_APPROX(full_prec(i), half_prec1(i));
+    VERIFY_IS_APPROX(full_prec(i), half_prec2(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_half1);
+  gpu_device.deallocate(d_res_half2);
+  gpu_device.deallocate(d_res_float);
+}
+#endif
+
+
+void test_cxx11_tensor_of_float16_cuda()
+{
+  CALL_SUBTEST_1(test_cuda_numext<void>());
+
+#ifdef EIGEN_HAS_CUDA_FP16
+  CALL_SUBTEST_1(test_cuda_conversion<void>());
+  CALL_SUBTEST_1(test_cuda_unary<void>());
+  CALL_SUBTEST_1(test_cuda_elementwise<void>());
+  CALL_SUBTEST_1(test_cuda_trancendental<void>());
+  CALL_SUBTEST_2(test_cuda_contractions<void>());
+  CALL_SUBTEST_3(test_cuda_reductions<void>());
+  CALL_SUBTEST_4(test_cuda_full_reductions<void>());
+  CALL_SUBTEST_5(test_cuda_forced_evals<void>());
+#else
+  std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
+#endif
+}
diff --git a/unsupported/test/cxx11_tensor_of_strings.cpp b/unsupported/test/cxx11_tensor_of_strings.cpp
new file mode 100644
index 000000000..4ef9aed91
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_of_strings.cpp
@@ -0,0 +1,152 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+static void test_assign()
+{
+  std::string data1[6];
+  TensorMap<Tensor<std::string, 2>> mat1(data1, 2, 3);
+  std::string data2[6];
+  const TensorMap<Tensor<const std::string, 2>> mat2(data2, 2, 3);
+
+  for (int i = 0; i < 6; ++i) {
+    std::ostringstream s1;
+    s1 << "abc" << i*3;
+    data1[i] = s1.str();
+    std::ostringstream s2;
+    s2 << "def" << i*5;
+    data2[i] = s2.str();
+  }
+
+  Tensor<std::string, 2> rslt1;
+  rslt1 = mat1;
+  Tensor<std::string, 2> rslt2;
+  rslt2 = mat2;
+
+  Tensor<std::string, 2> rslt3 = mat1;
+  Tensor<std::string, 2> rslt4 = mat2;
+
+  Tensor<std::string, 2> rslt5(mat1);
+  Tensor<std::string, 2> rslt6(mat2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_EQUAL(rslt1(i,j), data1[i+2*j]);
+      VERIFY_IS_EQUAL(rslt2(i,j), data2[i+2*j]);
+      VERIFY_IS_EQUAL(rslt3(i,j), data1[i+2*j]);
+      VERIFY_IS_EQUAL(rslt4(i,j), data2[i+2*j]);
+      VERIFY_IS_EQUAL(rslt5(i,j), data1[i+2*j]);
+      VERIFY_IS_EQUAL(rslt6(i,j), data2[i+2*j]);
+    }
+  }
+}
+
+
+static void test_concat()
+{
+  Tensor<std::string, 2> t1(2, 3);
+  Tensor<std::string, 2> t2(2, 3);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      std::ostringstream s1;
+      s1 << "abc" << i + j*2;
+      t1(i, j) = s1.str();
+      std::ostringstream s2;
+      s2 << "def" << i*5 + j*32;
+      t2(i, j) = s2.str();
+    }
+  }
+
+  Tensor<std::string, 2> result = t1.concatenate(t2, 1);
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_EQUAL(result.dimension(1), 6);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_EQUAL(result(i, j),   t1(i, j));
+      VERIFY_IS_EQUAL(result(i, j+3), t2(i, j));
+    }
+  }
+}
+
+
+static void test_slices()
+{
+  Tensor<std::string, 2> data(2, 6);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      std::ostringstream s1;
+      s1 << "abc" << i + j*2;
+      data(i, j) = s1.str();
+    }
+  }
+
+  const Eigen::DSizes<ptrdiff_t, 2> half_size(2, 3);
+  const Eigen::DSizes<ptrdiff_t, 2> first_half(0, 0);
+  const Eigen::DSizes<ptrdiff_t, 2> second_half(0, 3);
+
+  Tensor<std::string, 2> t1 = data.slice(first_half, half_size);
+  Tensor<std::string, 2> t2 = data.slice(second_half, half_size);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      VERIFY_IS_EQUAL(data(i, j),   t1(i, j));
+      VERIFY_IS_EQUAL(data(i, j+3), t2(i, j));
+    }
+  }
+}
+
+
+static void test_additions()
+{
+  Tensor<std::string, 1> data1(3);
+  Tensor<std::string, 1> data2(3);
+  for (int i = 0; i < 3; ++i) {
+    data1(i) = "abc";
+    std::ostringstream s1;
+    s1 << i;
+    data2(i) = s1.str();
+  }
+
+  Tensor<std::string, 1> sum = data1 + data2;
+  for (int i = 0; i < 3; ++i) {
+    std::ostringstream concat;
+    concat << "abc" << i;
+    std::string expected = concat.str();
+    VERIFY_IS_EQUAL(sum(i), expected);
+  }
+}
+
+
+static void test_initialization()
+{
+  Tensor<std::string, 2> a(2, 3);
+  a.setConstant(std::string("foo"));
+  for (int i = 0; i < 2*3; ++i) {
+    VERIFY_IS_EQUAL(a(i), std::string("foo"));
+  }
+}
+
+
+void test_cxx11_tensor_of_strings()
+{
+  // Beware: none of this is likely to ever work on a GPU.
+  CALL_SUBTEST(test_assign());
+  CALL_SUBTEST(test_concat());
+  CALL_SUBTEST(test_slices());
+  CALL_SUBTEST(test_additions());
+  CALL_SUBTEST(test_initialization());
+}
diff --git a/unsupported/test/cxx11_tensor_padding.cpp b/unsupported/test/cxx11_tensor_padding.cpp
new file mode 100644
index 000000000..ffa19896e
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_padding.cpp
@@ -0,0 +1,93 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<int DataLayout>
+static void test_simple_padding()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
+  paddings[0] = std::make_pair(0, 0);
+  paddings[1] = std::make_pair(2, 1);
+  paddings[2] = std::make_pair(3, 4);
+  paddings[3] = std::make_pair(0, 0);
+
+  Tensor<float, 4, DataLayout> padded;
+  padded = tensor.pad(paddings);
+
+  VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
+  VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
+  VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
+  VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      for (int k = 0; k < 12; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          if (j >= 2 && j < 5 && k >= 3 && k < 8) {
+            VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l));
+          } else {
+            VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
+          }
+        }
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+static void test_padded_expr()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
+  paddings[0] = std::make_pair(0, 0);
+  paddings[1] = std::make_pair(2, 1);
+  paddings[2] = std::make_pair(3, 4);
+  paddings[3] = std::make_pair(0, 0);
+
+  Eigen::DSizes<ptrdiff_t, 2> reshape_dims;
+  reshape_dims[0] = 12;
+  reshape_dims[1] = 84;
+
+  Tensor<float, 2, DataLayout> result;
+  result = tensor.pad(paddings).reshape(reshape_dims);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      for (int k = 0; k < 12; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          const float result_value = DataLayout == ColMajor ?
+              result(i+2*j,k+12*l) : result(j+6*i,l+7*k);
+          if (j >= 2 && j < 5 && k >= 3 && k < 8) {
+            VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l));
+          } else {
+            VERIFY_IS_EQUAL(result_value, 0.0f);
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_padding()
+{
+  CALL_SUBTEST(test_simple_padding<ColMajor>());
+  CALL_SUBTEST(test_simple_padding<RowMajor>());
+  CALL_SUBTEST(test_padded_expr<ColMajor>());
+  CALL_SUBTEST(test_padded_expr<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_patch.cpp b/unsupported/test/cxx11_tensor_patch.cpp
new file mode 100644
index 000000000..434359730
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_patch.cpp
@@ -0,0 +1,172 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<int DataLayout>
+static void test_simple_patch()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> patch_dims;
+
+  patch_dims[0] = 1;
+  patch_dims[1] = 1;
+  patch_dims[2] = 1;
+  patch_dims[3] = 1;
+
+  Tensor<float, 5, DataLayout> no_patch;
+  no_patch = tensor.extract_patches(patch_dims);
+
+  if (DataLayout == ColMajor) {
+    VERIFY_IS_EQUAL(no_patch.dimension(0), 1);
+    VERIFY_IS_EQUAL(no_patch.dimension(1), 1);
+    VERIFY_IS_EQUAL(no_patch.dimension(2), 1);
+    VERIFY_IS_EQUAL(no_patch.dimension(3), 1);
+    VERIFY_IS_EQUAL(no_patch.dimension(4), tensor.size());
+  } else {
+    VERIFY_IS_EQUAL(no_patch.dimension(0), tensor.size());
+    VERIFY_IS_EQUAL(no_patch.dimension(1), 1);
+    VERIFY_IS_EQUAL(no_patch.dimension(2), 1);
+    VERIFY_IS_EQUAL(no_patch.dimension(3), 1);
+    VERIFY_IS_EQUAL(no_patch.dimension(4), 1);
+  }
+
+  for (int i = 0; i < tensor.size(); ++i) {
+    VERIFY_IS_EQUAL(tensor.data()[i], no_patch.data()[i]);
+  }
+
+  patch_dims[0] = 2;
+  patch_dims[1] = 3;
+  patch_dims[2] = 5;
+  patch_dims[3] = 7;
+  Tensor<float, 5, DataLayout> single_patch;
+  single_patch = tensor.extract_patches(patch_dims);
+
+  if (DataLayout == ColMajor) {
+    VERIFY_IS_EQUAL(single_patch.dimension(0), 2);
+    VERIFY_IS_EQUAL(single_patch.dimension(1), 3);
+    VERIFY_IS_EQUAL(single_patch.dimension(2), 5);
+    VERIFY_IS_EQUAL(single_patch.dimension(3), 7);
+    VERIFY_IS_EQUAL(single_patch.dimension(4), 1);
+  } else {
+    VERIFY_IS_EQUAL(single_patch.dimension(0), 1);
+    VERIFY_IS_EQUAL(single_patch.dimension(1), 2);
+    VERIFY_IS_EQUAL(single_patch.dimension(2), 3);
+    VERIFY_IS_EQUAL(single_patch.dimension(3), 5);
+    VERIFY_IS_EQUAL(single_patch.dimension(4), 7);
+  }
+
+  for (int i = 0; i < tensor.size(); ++i) {
+    VERIFY_IS_EQUAL(tensor.data()[i], single_patch.data()[i]);
+  }
+
+  patch_dims[0] = 1;
+  patch_dims[1] = 2;
+  patch_dims[2] = 2;
+  patch_dims[3] = 1;
+  Tensor<float, 5, DataLayout> twod_patch;
+  twod_patch = tensor.extract_patches(patch_dims);
+
+  if (DataLayout == ColMajor) {
+    VERIFY_IS_EQUAL(twod_patch.dimension(0), 1);
+    VERIFY_IS_EQUAL(twod_patch.dimension(1), 2);
+    VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
+    VERIFY_IS_EQUAL(twod_patch.dimension(3), 1);
+    VERIFY_IS_EQUAL(twod_patch.dimension(4), 2*2*4*7);
+  } else {
+    VERIFY_IS_EQUAL(twod_patch.dimension(0), 2*2*4*7);
+    VERIFY_IS_EQUAL(twod_patch.dimension(1), 1);
+    VERIFY_IS_EQUAL(twod_patch.dimension(2), 2);
+    VERIFY_IS_EQUAL(twod_patch.dimension(3), 2);
+    VERIFY_IS_EQUAL(twod_patch.dimension(4), 1);
+  }
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 4; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          int patch_loc;
+          if (DataLayout == ColMajor) {
+            patch_loc = i + 2 * (j + 2 * (k + 4 * l));
+          } else {
+            patch_loc = l + 7 * (k + 4 * (j + 2 * i));
+          }
+          for (int x = 0; x < 2; ++x) {
+            for (int y = 0; y < 2; ++y) {
+              if (DataLayout == ColMajor) {
+                VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(0,x,y,0,patch_loc));
+              } else {
+                VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(patch_loc,0,x,y,0));
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  patch_dims[0] = 1;
+  patch_dims[1] = 2;
+  patch_dims[2] = 3;
+  patch_dims[3] = 5;
+  Tensor<float, 5, DataLayout> threed_patch;
+  threed_patch = tensor.extract_patches(patch_dims);
+
+  if (DataLayout == ColMajor) {
+    VERIFY_IS_EQUAL(threed_patch.dimension(0), 1);
+    VERIFY_IS_EQUAL(threed_patch.dimension(1), 2);
+    VERIFY_IS_EQUAL(threed_patch.dimension(2), 3);
+    VERIFY_IS_EQUAL(threed_patch.dimension(3), 5);
+    VERIFY_IS_EQUAL(threed_patch.dimension(4), 2*2*3*3);
+  } else {
+    VERIFY_IS_EQUAL(threed_patch.dimension(0), 2*2*3*3);
+    VERIFY_IS_EQUAL(threed_patch.dimension(1), 1);
+    VERIFY_IS_EQUAL(threed_patch.dimension(2), 2);
+    VERIFY_IS_EQUAL(threed_patch.dimension(3), 3);
+    VERIFY_IS_EQUAL(threed_patch.dimension(4), 5);
+  }
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 3; ++l) {
+          int patch_loc;
+          if (DataLayout == ColMajor) {
+            patch_loc = i + 2 * (j + 2 * (k + 3 * l));
+          } else {
+            patch_loc = l + 3 * (k + 3 * (j + 2 * i));
+          }
+          for (int x = 0; x < 2; ++x) {
+            for (int y = 0; y < 3; ++y) {
+              for (int z = 0; z < 5; ++z) {
+                if (DataLayout == ColMajor) {
+                  VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(0,x,y,z,patch_loc));
+                } else {
+                  VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(patch_loc,0,x,y,z));
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_patch()
+{
+   CALL_SUBTEST(test_simple_patch<ColMajor>());
+   CALL_SUBTEST(test_simple_patch<RowMajor>());
+   //   CALL_SUBTEST(test_expr_shuffling());
+}
diff --git a/unsupported/test/cxx11_tensor_random.cpp b/unsupported/test/cxx11_tensor_random.cpp
new file mode 100644
index 000000000..0f3dc5787
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_random.cpp
@@ -0,0 +1,78 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+static void test_default()
+{
+  Tensor<float, 1> vec(6);
+  vec.setRandom();
+
+  // Fixme: we should check that the generated numbers follow a uniform
+  // distribution instead.
+  for (int i = 1; i < 6; ++i) {
+    VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
+  }
+}
+
+static void test_normal()
+{
+  Tensor<float, 1> vec(6);
+  vec.setRandom<Eigen::internal::NormalRandomGenerator<float>>();
+
+  // Fixme: we should check that the generated numbers follow a gaussian
+  // distribution instead.
+  for (int i = 1; i < 6; ++i) {
+    VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
+  }
+}
+
+
+struct MyGenerator {
+  MyGenerator() { }
+  MyGenerator(const MyGenerator&) { }
+
+  // Return a random value to be used.  "element_location" is the
+  // location of the entry to set in the tensor, it can typically
+  // be ignored.
+  int operator()(Eigen::DenseIndex element_location, Eigen::DenseIndex /*unused*/ = 0) const {
+    return static_cast<int>(3 * element_location);
+  }
+
+  // Same as above but generates several numbers at a time.
+  internal::packet_traits<int>::type packetOp(
+      Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const {
+    const int packetSize = internal::packet_traits<int>::size;
+    EIGEN_ALIGN_MAX int values[packetSize];
+    for (int i = 0; i < packetSize; ++i) {
+      values[i] = static_cast<int>(3 * (packet_location + i));
+    }
+    return internal::pload<typename internal::packet_traits<int>::type>(values);
+  }
+};
+
+
+static void test_custom()
+{
+  Tensor<int, 1> vec(6);
+  vec.setRandom<MyGenerator>();
+
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_EQUAL(vec(i), 3*i);
+  }
+}
+
+void test_cxx11_tensor_random()
+{
+  CALL_SUBTEST(test_default());
+  CALL_SUBTEST(test_normal());
+  CALL_SUBTEST(test_custom());
+}
diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu
new file mode 100644
index 000000000..b3be199e1
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_random_cuda.cu
@@ -0,0 +1,88 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_random_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <Eigen/CXX11/Tensor>
+
+
+void test_cuda_random_uniform()
+{
+  Tensor<float, 2> out(72,97);
+  out.setZero();
+
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_out;
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
+
+  gpu_out.device(gpu_device) = gpu_out.random();
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+  // For now we just check thes code doesn't crash.
+  // TODO: come up with a valid test of randomness
+}
+
+
+void test_cuda_random_normal()
+{
+  Tensor<float, 2> out(72,97);
+  out.setZero();
+
+  std::size_t out_bytes = out.size() * sizeof(float);
+
+  float* d_out;
+  cudaMalloc((void**)(&d_out), out_bytes);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
+
+  Eigen::internal::NormalRandomGenerator<float> gen(true);
+  gpu_out.device(gpu_device) = gpu_out.random(gen);
+
+  assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+  assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+}
+
+static void test_complex()
+{
+  Tensor<std::complex<float>, 1> vec(6);
+  vec.setRandom();
+
+  // Fixme: we should check that the generated numbers follow a uniform
+  // distribution instead.
+  for (int i = 1; i < 6; ++i) {
+    VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1));
+  }
+}
+
+
+void test_cxx11_tensor_random_cuda()
+{
+  CALL_SUBTEST(test_cuda_random_uniform());
+  CALL_SUBTEST(test_cuda_random_normal());
+  CALL_SUBTEST(test_complex());
+}
diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp
new file mode 100644
index 000000000..1490ec3da
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_reduction.cpp
@@ -0,0 +1,508 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <limits>
+#include <numeric>
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template <int DataLayout>
+static void test_trivial_reductions() {
+  {
+    Tensor<float, 0, DataLayout> tensor;
+    tensor.setRandom();
+    array<ptrdiff_t, 0> reduction_axis;
+
+    Tensor<float, 0, DataLayout> result = tensor.sum(reduction_axis);
+    VERIFY_IS_EQUAL(result(), tensor());
+  }
+
+  {
+    Tensor<float, 1, DataLayout> tensor(7);
+    tensor.setRandom();
+    array<ptrdiff_t, 0> reduction_axis;
+
+    Tensor<float, 1, DataLayout> result = tensor.sum(reduction_axis);
+    VERIFY_IS_EQUAL(result.dimension(0), 7);
+    for (int i = 0; i < 7; ++i) {
+      VERIFY_IS_EQUAL(result(i), tensor(i));
+    }
+  }
+
+  {
+    Tensor<float, 2, DataLayout> tensor(2, 3);
+    tensor.setRandom();
+    array<ptrdiff_t, 0> reduction_axis;
+
+    Tensor<float, 2, DataLayout> result = tensor.sum(reduction_axis);
+    VERIFY_IS_EQUAL(result.dimension(0), 2);
+    VERIFY_IS_EQUAL(result.dimension(1), 3);
+    for (int i = 0; i < 2; ++i) {
+      for (int j = 0; j < 3; ++j) {
+        VERIFY_IS_EQUAL(result(i, j), tensor(i, j));
+      }
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_simple_reductions() {
+  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+  tensor.setRandom();
+  array<ptrdiff_t, 2> reduction_axis2;
+  reduction_axis2[0] = 1;
+  reduction_axis2[1] = 3;
+
+  Tensor<float, 2, DataLayout> result = tensor.sum(reduction_axis2);
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_EQUAL(result.dimension(1), 5);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      float sum = 0.0f;
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          sum += tensor(i, k, j, l);
+        }
+      }
+      VERIFY_IS_APPROX(result(i, j), sum);
+    }
+  }
+
+  {
+    Tensor<float, 0, DataLayout> sum1 = tensor.sum();
+    VERIFY_IS_EQUAL(sum1.rank(), 0);
+
+    array<ptrdiff_t, 4> reduction_axis4;
+    reduction_axis4[0] = 0;
+    reduction_axis4[1] = 1;
+    reduction_axis4[2] = 2;
+    reduction_axis4[3] = 3;
+    Tensor<float, 0, DataLayout> sum2 = tensor.sum(reduction_axis4);
+    VERIFY_IS_EQUAL(sum2.rank(), 0);
+
+    VERIFY_IS_APPROX(sum1(), sum2());
+  }
+
+  reduction_axis2[0] = 0;
+  reduction_axis2[1] = 2;
+  result = tensor.prod(reduction_axis2);
+  VERIFY_IS_EQUAL(result.dimension(0), 3);
+  VERIFY_IS_EQUAL(result.dimension(1), 7);
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 7; ++j) {
+      float prod = 1.0f;
+      for (int k = 0; k < 2; ++k) {
+        for (int l = 0; l < 5; ++l) {
+          prod *= tensor(k, i, l, j);
+        }
+      }
+      VERIFY_IS_APPROX(result(i, j), prod);
+    }
+  }
+
+  {
+    Tensor<float, 0, DataLayout> prod1 = tensor.prod();
+    VERIFY_IS_EQUAL(prod1.rank(), 0);
+
+    array<ptrdiff_t, 4> reduction_axis4;
+    reduction_axis4[0] = 0;
+    reduction_axis4[1] = 1;
+    reduction_axis4[2] = 2;
+    reduction_axis4[3] = 3;
+    Tensor<float, 0, DataLayout> prod2 = tensor.prod(reduction_axis4);
+    VERIFY_IS_EQUAL(prod2.rank(), 0);
+
+    VERIFY_IS_APPROX(prod1(), prod2());
+  }
+
+  reduction_axis2[0] = 0;
+  reduction_axis2[1] = 2;
+  result = tensor.maximum(reduction_axis2);
+  VERIFY_IS_EQUAL(result.dimension(0), 3);
+  VERIFY_IS_EQUAL(result.dimension(1), 7);
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 7; ++j) {
+      float max_val = std::numeric_limits<float>::lowest();
+      for (int k = 0; k < 2; ++k) {
+        for (int l = 0; l < 5; ++l) {
+          max_val = (std::max)(max_val, tensor(k, i, l, j));
+        }
+      }
+      VERIFY_IS_APPROX(result(i, j), max_val);
+    }
+  }
+
+  {
+    Tensor<float, 0, DataLayout> max1 = tensor.maximum();
+    VERIFY_IS_EQUAL(max1.rank(), 0);
+
+    array<ptrdiff_t, 4> reduction_axis4;
+    reduction_axis4[0] = 0;
+    reduction_axis4[1] = 1;
+    reduction_axis4[2] = 2;
+    reduction_axis4[3] = 3;
+    Tensor<float, 0, DataLayout> max2 = tensor.maximum(reduction_axis4);
+    VERIFY_IS_EQUAL(max2.rank(), 0);
+
+    VERIFY_IS_APPROX(max1(), max2());
+  }
+
+  reduction_axis2[0] = 0;
+  reduction_axis2[1] = 1;
+  result = tensor.minimum(reduction_axis2);
+  VERIFY_IS_EQUAL(result.dimension(0), 5);
+  VERIFY_IS_EQUAL(result.dimension(1), 7);
+  for (int i = 0; i < 5; ++i) {
+    for (int j = 0; j < 7; ++j) {
+      float min_val = (std::numeric_limits<float>::max)();
+      for (int k = 0; k < 2; ++k) {
+        for (int l = 0; l < 3; ++l) {
+          min_val = (std::min)(min_val, tensor(k, l, i, j));
+        }
+      }
+      VERIFY_IS_APPROX(result(i, j), min_val);
+    }
+  }
+
+  {
+    Tensor<float, 0, DataLayout> min1 = tensor.minimum();
+    VERIFY_IS_EQUAL(min1.rank(), 0);
+
+    array<ptrdiff_t, 4> reduction_axis4;
+    reduction_axis4[0] = 0;
+    reduction_axis4[1] = 1;
+    reduction_axis4[2] = 2;
+    reduction_axis4[3] = 3;
+    Tensor<float, 0, DataLayout> min2 = tensor.minimum(reduction_axis4);
+    VERIFY_IS_EQUAL(min2.rank(), 0);
+
+    VERIFY_IS_APPROX(min1(), min2());
+  }
+
+  reduction_axis2[0] = 0;
+  reduction_axis2[1] = 1;
+  result = tensor.mean(reduction_axis2);
+  VERIFY_IS_EQUAL(result.dimension(0), 5);
+  VERIFY_IS_EQUAL(result.dimension(1), 7);
+  for (int i = 0; i < 5; ++i) {
+    for (int j = 0; j < 7; ++j) {
+      float sum = 0.0f;
+      int count = 0;
+      for (int k = 0; k < 2; ++k) {
+        for (int l = 0; l < 3; ++l) {
+          sum += tensor(k, l, i, j);
+          ++count;
+        }
+      }
+      VERIFY_IS_APPROX(result(i, j), sum / count);
+    }
+  }
+
+  {
+    Tensor<float, 0, DataLayout> mean1 = tensor.mean();
+    VERIFY_IS_EQUAL(mean1.rank(), 0);
+
+    array<ptrdiff_t, 4> reduction_axis4;
+    reduction_axis4[0] = 0;
+    reduction_axis4[1] = 1;
+    reduction_axis4[2] = 2;
+    reduction_axis4[3] = 3;
+    Tensor<float, 0, DataLayout> mean2 = tensor.mean(reduction_axis4);
+    VERIFY_IS_EQUAL(mean2.rank(), 0);
+
+    VERIFY_IS_APPROX(mean1(), mean2());
+  }
+
+  {
+    Tensor<int, 1> ints(10);
+    std::iota(ints.data(), ints.data() + ints.dimension(0), 0);
+
+    TensorFixedSize<bool, Sizes<> > all;
+    all = ints.all();
+    VERIFY(!all());
+    all = (ints >= ints.constant(0)).all();
+    VERIFY(all());
+
+    TensorFixedSize<bool, Sizes<> > any;
+    any = (ints > ints.constant(10)).any();
+    VERIFY(!any());
+    any = (ints < ints.constant(1)).any();
+    VERIFY(any());
+  }
+}
+
+
+template <int DataLayout>
+static void test_reductions_in_expr() {
+  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+  tensor.setRandom();
+  array<ptrdiff_t, 2> reduction_axis2;
+  reduction_axis2[0] = 1;
+  reduction_axis2[1] = 3;
+
+  Tensor<float, 2, DataLayout> result(2, 5);
+  result = result.constant(1.0f) - tensor.sum(reduction_axis2);
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_EQUAL(result.dimension(1), 5);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      float sum = 0.0f;
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          sum += tensor(i, k, j, l);
+        }
+      }
+      VERIFY_IS_APPROX(result(i, j), 1.0f - sum);
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_full_reductions() {
+  Tensor<float, 2, DataLayout> tensor(2, 3);
+  tensor.setRandom();
+  array<ptrdiff_t, 2> reduction_axis;
+  reduction_axis[0] = 0;
+  reduction_axis[1] = 1;
+
+  Tensor<float, 0, DataLayout> result = tensor.sum(reduction_axis);
+  VERIFY_IS_EQUAL(result.rank(), 0);
+
+  float sum = 0.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      sum += tensor(i, j);
+    }
+  }
+  VERIFY_IS_APPROX(result(0), sum);
+
+  result = tensor.square().sum(reduction_axis).sqrt();
+  VERIFY_IS_EQUAL(result.rank(), 0);
+
+  sum = 0.0f;
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      sum += tensor(i, j) * tensor(i, j);
+    }
+  }
+  VERIFY_IS_APPROX(result(), sqrtf(sum));
+}
+
+struct UserReducer {
+  static const bool PacketAccess = false;
+  UserReducer(float offset) : offset_(offset) {}
+  void reduce(const float val, float* accum) { *accum += val * val; }
+  float initialize() const { return 0; }
+  float finalize(const float accum) const { return 1.0f / (accum + offset_); }
+
+ private:
+  const float offset_;
+};
+
+template <int DataLayout>
+static void test_user_defined_reductions() {
+  Tensor<float, 2, DataLayout> tensor(5, 7);
+  tensor.setRandom();
+  array<ptrdiff_t, 1> reduction_axis;
+  reduction_axis[0] = 1;
+
+  UserReducer reducer(10.0f);
+  Tensor<float, 1, DataLayout> result = tensor.reduce(reduction_axis, reducer);
+  VERIFY_IS_EQUAL(result.dimension(0), 5);
+  for (int i = 0; i < 5; ++i) {
+    float expected = 10.0f;
+    for (int j = 0; j < 7; ++j) {
+      expected += tensor(i, j) * tensor(i, j);
+    }
+    expected = 1.0f / expected;
+    VERIFY_IS_APPROX(result(i), expected);
+  }
+}
+
+template <int DataLayout>
+static void test_tensor_maps() {
+  int inputs[2 * 3 * 5 * 7];
+  TensorMap<Tensor<int, 4, DataLayout> > tensor_map(inputs, 2, 3, 5, 7);
+  TensorMap<Tensor<const int, 4, DataLayout> > tensor_map_const(inputs, 2, 3, 5,
+                                                                7);
+  const TensorMap<Tensor<const int, 4, DataLayout> > tensor_map_const_const(
+      inputs, 2, 3, 5, 7);
+
+  tensor_map.setRandom();
+  array<ptrdiff_t, 2> reduction_axis;
+  reduction_axis[0] = 1;
+  reduction_axis[1] = 3;
+
+  Tensor<int, 2, DataLayout> result = tensor_map.sum(reduction_axis);
+  Tensor<int, 2, DataLayout> result2 = tensor_map_const.sum(reduction_axis);
+  Tensor<int, 2, DataLayout> result3 =
+      tensor_map_const_const.sum(reduction_axis);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      int sum = 0;
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          sum += tensor_map(i, k, j, l);
+        }
+      }
+      VERIFY_IS_EQUAL(result(i, j), sum);
+      VERIFY_IS_EQUAL(result2(i, j), sum);
+      VERIFY_IS_EQUAL(result3(i, j), sum);
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_static_dims() {
+  Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
+  Tensor<float, 2, DataLayout> out(72, 97);
+  in.setRandom();
+
+#if !EIGEN_HAS_CONSTEXPR 
+  array<int, 2> reduction_axis;
+  reduction_axis[0] = 1;
+  reduction_axis[1] = 3;
+#else
+  Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<3> > reduction_axis;
+#endif
+
+  out = in.maximum(reduction_axis);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 97; ++j) {
+      float expected = -1e10f;
+      for (int k = 0; k < 53; ++k) {
+        for (int l = 0; l < 113; ++l) {
+          expected = (std::max)(expected, in(i, k, j, l));
+        }
+      }
+      VERIFY_IS_APPROX(out(i, j), expected);
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_innermost_last_dims() {
+  Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
+  Tensor<float, 2, DataLayout> out(97, 113);
+  in.setRandom();
+
+// Reduce on the innermost dimensions.
+#if !EIGEN_HAS_CONSTEXPR
+  array<int, 2> reduction_axis;
+  reduction_axis[0] = 0;
+  reduction_axis[1] = 1;
+#else
+  // This triggers the use of packets for ColMajor.
+  Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> > reduction_axis;
+#endif
+
+  out = in.maximum(reduction_axis);
+
+  for (int i = 0; i < 97; ++i) {
+    for (int j = 0; j < 113; ++j) {
+      float expected = -1e10f;
+      for (int k = 0; k < 53; ++k) {
+        for (int l = 0; l < 72; ++l) {
+          expected = (std::max)(expected, in(l, k, i, j));
+        }
+      }
+      VERIFY_IS_APPROX(out(i, j), expected);
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_innermost_first_dims() {
+  Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
+  Tensor<float, 2, DataLayout> out(72, 53);
+  in.setRandom();
+
+// Reduce on the innermost dimensions.
+#if !EIGEN_HAS_CONSTEXPR
+  array<int, 2> reduction_axis;
+  reduction_axis[0] = 2;
+  reduction_axis[1] = 3;
+#else
+  // This triggers the use of packets for RowMajor.
+  Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>> reduction_axis;
+#endif
+
+  out = in.maximum(reduction_axis);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 53; ++j) {
+      float expected = -1e10f;
+      for (int k = 0; k < 97; ++k) {
+        for (int l = 0; l < 113; ++l) {
+          expected = (std::max)(expected, in(i, j, k, l));
+        }
+      }
+      VERIFY_IS_APPROX(out(i, j), expected);
+    }
+  }
+}
+
+template <int DataLayout>
+static void test_reduce_middle_dims() {
+  Tensor<float, 4, DataLayout> in(72, 53, 97, 113);
+  Tensor<float, 2, DataLayout> out(72, 53);
+  in.setRandom();
+
+// Reduce on the innermost dimensions.
+#if !EIGEN_HAS_CONSTEXPR
+  array<int, 2> reduction_axis;
+  reduction_axis[0] = 1;
+  reduction_axis[1] = 2;
+#else
+  // This triggers the use of packets for RowMajor.
+  Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>> reduction_axis;
+#endif
+
+  out = in.maximum(reduction_axis);
+
+  for (int i = 0; i < 72; ++i) {
+    for (int j = 0; j < 113; ++j) {
+      float expected = -1e10f;
+      for (int k = 0; k < 53; ++k) {
+        for (int l = 0; l < 97; ++l) {
+          expected = (std::max)(expected, in(i, k, l, j));
+        }
+      }
+      VERIFY_IS_APPROX(out(i, j), expected);
+    }
+  }
+}
+
+void test_cxx11_tensor_reduction() {
+  CALL_SUBTEST(test_trivial_reductions<ColMajor>());
+  CALL_SUBTEST(test_trivial_reductions<RowMajor>());
+  CALL_SUBTEST(test_simple_reductions<ColMajor>());
+  CALL_SUBTEST(test_simple_reductions<RowMajor>());
+  CALL_SUBTEST(test_reductions_in_expr<ColMajor>());
+  CALL_SUBTEST(test_reductions_in_expr<RowMajor>());
+  CALL_SUBTEST(test_full_reductions<ColMajor>());
+  CALL_SUBTEST(test_full_reductions<RowMajor>());
+  CALL_SUBTEST(test_user_defined_reductions<ColMajor>());
+  CALL_SUBTEST(test_user_defined_reductions<RowMajor>());
+  CALL_SUBTEST(test_tensor_maps<ColMajor>());
+  CALL_SUBTEST(test_tensor_maps<RowMajor>());
+  CALL_SUBTEST(test_static_dims<ColMajor>());
+  CALL_SUBTEST(test_static_dims<RowMajor>());
+  CALL_SUBTEST(test_innermost_last_dims<ColMajor>());
+  CALL_SUBTEST(test_innermost_last_dims<RowMajor>());
+  CALL_SUBTEST(test_innermost_first_dims<ColMajor>());
+  CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
+  CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
+  CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu
new file mode 100644
index 000000000..6858b43a7
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu
@@ -0,0 +1,157 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+
+template<typename Type, int DataLayout>
+static void test_full_reductions() {
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  const int num_rows = internal::random<int>(1024, 5*1024);
+  const int num_cols = internal::random<int>(1024, 5*1024);
+
+  Tensor<Type, 2, DataLayout> in(num_rows, num_cols);
+  in.setRandom();
+
+  Tensor<Type, 0, DataLayout> full_redux;
+  full_redux = in.sum();
+
+  std::size_t in_bytes = in.size() * sizeof(Type);
+  std::size_t out_bytes = full_redux.size() * sizeof(Type);
+  Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes));
+  Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes));
+  gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
+
+  TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
+  TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr);
+
+  out_gpu.device(gpu_device) = in_gpu.sum();
+
+  Tensor<Type, 0, DataLayout> full_redux_gpu;
+  gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
+  gpu_device.synchronize();
+
+  // Check that the CPU and GPU reductions return the same result.
+  VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
+
+  gpu_device.deallocate(gpu_in_ptr);
+  gpu_device.deallocate(gpu_out_ptr);
+}
+
+template<typename Type, int DataLayout>
+static void test_first_dim_reductions() {
+  int dim_x = 33;
+  int dim_y = 1;
+  int dim_z = 128;
+
+  Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
+  in.setRandom();
+
+  Eigen::array<int, 1> red_axis;
+  red_axis[0] = 0;
+  Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
+
+  // Create device
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice dev(&stream);
+  
+  // Create data(T)
+  Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
+  Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type));
+  Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
+  Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z);
+  
+  // Perform operation
+  dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
+  gpu_out.device(dev) = gpu_in.sum(red_axis);
+  gpu_out.device(dev) += gpu_in.sum(red_axis);
+  Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z);
+  dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
+  dev.synchronize();
+
+  // Check that the CPU and GPU reductions return the same result.
+  for (int i = 0; i < gpu_out.size(); ++i) {
+    VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
+  }
+
+  dev.deallocate(in_data);
+  dev.deallocate(out_data);
+}
+
+template<typename Type, int DataLayout>
+static void test_last_dim_reductions() {
+  int dim_x = 128;
+  int dim_y = 1;
+  int dim_z = 33;
+
+  Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
+  in.setRandom();
+
+  Eigen::array<int, 1> red_axis;
+  red_axis[0] = 2;
+  Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
+
+  // Create device
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice dev(&stream);
+  
+  // Create data
+  Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
+  Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type));
+  Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
+  Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y);
+  
+  // Perform operation
+  dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
+  gpu_out.device(dev) = gpu_in.sum(red_axis);
+  gpu_out.device(dev) += gpu_in.sum(red_axis);
+  Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y);
+  dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
+  dev.synchronize();
+
+  // Check that the CPU and GPU reductions return the same result.
+  for (int i = 0; i < gpu_out.size(); ++i) {
+    VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
+  }
+
+  dev.deallocate(in_data);
+  dev.deallocate(out_data);
+}
+
+
+void test_cxx11_tensor_reduction_cuda() {
+  CALL_SUBTEST_1((test_full_reductions<float, ColMajor>()));
+  CALL_SUBTEST_1((test_full_reductions<double, ColMajor>()));
+  CALL_SUBTEST_2((test_full_reductions<float, RowMajor>()));
+  CALL_SUBTEST_2((test_full_reductions<double, RowMajor>()));
+  
+  CALL_SUBTEST_3((test_first_dim_reductions<float, ColMajor>()));
+  CALL_SUBTEST_3((test_first_dim_reductions<double, ColMajor>()));
+  CALL_SUBTEST_4((test_first_dim_reductions<float, RowMajor>()));
+// Outer reductions of doubles aren't supported just yet.  					      
+//  CALL_SUBTEST_4((test_first_dim_reductions<double, RowMajor>()))
+
+  CALL_SUBTEST_5((test_last_dim_reductions<float, ColMajor>()));
+// Outer reductions of doubles aren't supported just yet.  					      
+//  CALL_SUBTEST_5((test_last_dim_reductions<double, ColMajor>()));
+  CALL_SUBTEST_6((test_last_dim_reductions<float, RowMajor>()));
+  CALL_SUBTEST_6((test_last_dim_reductions<double, RowMajor>()));
+}
diff --git a/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/unsupported/test/cxx11_tensor_reduction_sycl.cpp
new file mode 100644
index 000000000..a9ef82907
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_reduction_sycl.cpp
@@ -0,0 +1,138 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+
+
+static void test_full_reductions_sycl(const Eigen::SyclDevice&  sycl_device) {
+
+  const int num_rows = 452;
+  const int num_cols = 765;
+  array<int, 2> tensorRange = {{num_rows, num_cols}};
+
+  Tensor<float, 2> in(tensorRange);
+  Tensor<float, 0> full_redux;
+  Tensor<float, 0> full_redux_gpu;
+
+  in.setRandom();
+
+  full_redux = in.sum();
+
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float));
+
+  TensorMap<Tensor<float, 2> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 0> >  out_gpu(gpu_out_data);
+
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum();
+  sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float));
+  // Check that the CPU and GPU reductions return the same result.
+  VERIFY_IS_APPROX(full_redux_gpu(), full_redux());
+
+  sycl_device.deallocate(gpu_in_data);
+  sycl_device.deallocate(gpu_out_data);
+}
+
+static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) {
+
+  int dim_x = 145;
+  int dim_y = 1;
+  int dim_z = 67;
+
+  array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
+  Eigen::array<int, 1> red_axis;
+  red_axis[0] = 0;
+  array<int, 2> reduced_tensorRange = {{dim_y, dim_z}};
+
+  Tensor<float, 3> in(tensorRange);
+  Tensor<float, 2> redux(reduced_tensorRange);
+  Tensor<float, 2> redux_gpu(reduced_tensorRange);
+
+  in.setRandom();
+
+  redux= in.sum(red_axis);
+
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
+
+  TensorMap<Tensor<float, 3> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 2> >  out_gpu(gpu_out_data, reduced_tensorRange);
+
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
+  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
+
+  // Check that the CPU and GPU reductions return the same result.
+  for(int j=0; j<reduced_tensorRange[0]; j++ )
+    for(int k=0; k<reduced_tensorRange[1]; k++ )
+      VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
+
+  sycl_device.deallocate(gpu_in_data);
+  sycl_device.deallocate(gpu_out_data);
+}
+
+static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) {
+
+  int dim_x = 567;
+  int dim_y = 1;
+  int dim_z = 47;
+
+  array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
+  Eigen::array<int, 1> red_axis;
+  red_axis[0] = 2;
+  array<int, 2> reduced_tensorRange = {{dim_x, dim_y}};
+
+  Tensor<float, 3> in(tensorRange);
+  Tensor<float, 2> redux(reduced_tensorRange);
+  Tensor<float, 2> redux_gpu(reduced_tensorRange);
+
+  in.setRandom();
+
+  redux= in.sum(red_axis);
+
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
+
+  TensorMap<Tensor<float, 3> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 2> >  out_gpu(gpu_out_data, reduced_tensorRange);
+
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
+  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
+  // Check that the CPU and GPU reductions return the same result.
+  for(int j=0; j<reduced_tensorRange[0]; j++ )
+    for(int k=0; k<reduced_tensorRange[1]; k++ )
+      VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
+
+  sycl_device.deallocate(gpu_in_data);
+  sycl_device.deallocate(gpu_out_data);
+
+}
+
+void test_cxx11_tensor_reduction_sycl() {
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST((test_full_reductions_sycl(sycl_device)));
+  CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device)));
+  CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device)));
+
+}
diff --git a/unsupported/test/cxx11_tensor_ref.cpp b/unsupported/test/cxx11_tensor_ref.cpp
new file mode 100644
index 000000000..c8f105e3d
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_ref.cpp
@@ -0,0 +1,248 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_simple_lvalue_ref()
+{
+  Tensor<int, 1> input(6);
+  input.setRandom();
+
+  TensorRef<Tensor<int, 1>> ref3(input);
+  TensorRef<Tensor<int, 1>> ref4 = input;
+
+  VERIFY_IS_EQUAL(ref3.data(), input.data());
+  VERIFY_IS_EQUAL(ref4.data(), input.data());
+
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_EQUAL(ref3(i), input(i));
+    VERIFY_IS_EQUAL(ref4(i), input(i));
+  }
+
+  for (int i = 0; i < 6; ++i) {
+    ref3.coeffRef(i) = i;
+  }
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_EQUAL(input(i), i);
+  }
+  for (int i = 0; i < 6; ++i) {
+    ref4.coeffRef(i) = -i * 2;
+  }
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_EQUAL(input(i), -i*2);
+  }
+}
+
+
+static void test_simple_rvalue_ref()
+{
+  Tensor<int, 1> input1(6);
+  input1.setRandom();
+  Tensor<int, 1> input2(6);
+  input2.setRandom();
+
+  TensorRef<Tensor<int, 1>> ref3(input1 + input2);
+  TensorRef<Tensor<int, 1>> ref4 = input1 + input2;
+
+  VERIFY_IS_NOT_EQUAL(ref3.data(), input1.data());
+  VERIFY_IS_NOT_EQUAL(ref4.data(), input1.data());
+  VERIFY_IS_NOT_EQUAL(ref3.data(), input2.data());
+  VERIFY_IS_NOT_EQUAL(ref4.data(), input2.data());
+
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_EQUAL(ref3(i), input1(i) + input2(i));
+    VERIFY_IS_EQUAL(ref4(i), input1(i) + input2(i));
+  }
+}
+
+
+static void test_multiple_dims()
+{
+  Tensor<float, 3> input(3,5,7);
+  input.setRandom();
+
+  TensorRef<Tensor<float, 3>> ref(input);
+  VERIFY_IS_EQUAL(ref.data(), input.data());
+  VERIFY_IS_EQUAL(ref.dimension(0), 3);
+  VERIFY_IS_EQUAL(ref.dimension(1), 5);
+  VERIFY_IS_EQUAL(ref.dimension(2), 7);
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(ref(i,j,k), input(i,j,k));
+      }
+    }
+  }
+}
+
+
+static void test_slice()
+{
+  Tensor<float, 5> tensor(2,3,5,7,11);
+  tensor.setRandom();
+
+  Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5);
+  Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1);
+  TensorRef<Tensor<float, 5>> slice = tensor.slice(indices, sizes);
+  VERIFY_IS_EQUAL(slice(0,0,0,0,0), tensor(1,2,3,4,5));
+
+  Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5);
+  Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3);
+  slice = tensor.slice(indices2, sizes2);
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 2; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
+      }
+    }
+  }
+
+  Eigen::DSizes<ptrdiff_t, 5> indices3(0,0,0,0,0);
+  Eigen::DSizes<ptrdiff_t, 5> sizes3(2,3,1,1,1);
+  slice = tensor.slice(indices3, sizes3);
+  VERIFY_IS_EQUAL(slice.data(), tensor.data());
+}
+
+
+static void test_ref_of_ref()
+{
+  Tensor<float, 3> input(3,5,7);
+  input.setRandom();
+
+  TensorRef<Tensor<float, 3>> ref(input);
+  TensorRef<Tensor<float, 3>> ref_of_ref(ref);
+  TensorRef<Tensor<float, 3>> ref_of_ref2;
+  ref_of_ref2 = ref;
+
+  VERIFY_IS_EQUAL(ref_of_ref.data(), input.data());
+  VERIFY_IS_EQUAL(ref_of_ref.dimension(0), 3);
+  VERIFY_IS_EQUAL(ref_of_ref.dimension(1), 5);
+  VERIFY_IS_EQUAL(ref_of_ref.dimension(2), 7);
+
+  VERIFY_IS_EQUAL(ref_of_ref2.data(), input.data());
+  VERIFY_IS_EQUAL(ref_of_ref2.dimension(0), 3);
+  VERIFY_IS_EQUAL(ref_of_ref2.dimension(1), 5);
+  VERIFY_IS_EQUAL(ref_of_ref2.dimension(2), 7);
+
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(ref_of_ref(i,j,k), input(i,j,k));
+        VERIFY_IS_EQUAL(ref_of_ref2(i,j,k), input(i,j,k));
+     }
+    }
+  }
+}
+
+
+static void test_ref_in_expr()
+{
+  Tensor<float, 3> input(3,5,7);
+  input.setRandom();
+  TensorRef<Tensor<float, 3>> input_ref(input);
+
+  Tensor<float, 3> result(3,5,7);
+  result.setRandom();
+  TensorRef<Tensor<float, 3>> result_ref(result);
+
+  Tensor<float, 3> bias(3,5,7);
+  bias.setRandom();
+
+  result_ref = input_ref + bias;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(result_ref(i,j,k), input(i,j,k) + bias(i,j,k));
+        VERIFY_IS_NOT_EQUAL(result(i,j,k), input(i,j,k) + bias(i,j,k));
+      }
+    }
+  }
+
+  result = result_ref;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_EQUAL(result(i,j,k), input(i,j,k) + bias(i,j,k));
+      }
+    }
+  }
+}
+
+
+static void test_coeff_ref()
+{
+  Tensor<float, 5> tensor(2,3,5,7,11);
+  tensor.setRandom();
+  Tensor<float, 5> original = tensor;
+
+  TensorRef<Tensor<float, 4>> slice = tensor.chip(7, 4);
+  slice.coeffRef(0, 0, 0, 0) = 1.0f;
+  slice.coeffRef(1, 0, 0, 0) += 2.0f;
+
+  VERIFY_IS_EQUAL(tensor(0,0,0,0,7), 1.0f);
+  VERIFY_IS_EQUAL(tensor(1,0,0,0,7), original(1,0,0,0,7) + 2.0f);
+}
+
+
+static void test_nested_ops_with_ref()
+{
+  Tensor<float, 4> t(2, 3, 5, 7);
+  t.setRandom();
+  TensorMap<Tensor<const float, 4> > m(t.data(), 2, 3, 5, 7);
+  array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings;
+  paddings[0] = std::make_pair(0, 0);
+  paddings[1] = std::make_pair(2, 1);
+  paddings[2] = std::make_pair(3, 4);
+  paddings[3] = std::make_pair(0, 0);
+  DSizes<Eigen::DenseIndex, 4> shuffle_dims(0, 1, 2, 3);
+  TensorRef<Tensor<const float, 4> > ref(m.pad(paddings));
+  array<std::pair<ptrdiff_t, ptrdiff_t>, 4> trivial;
+  trivial[0] = std::make_pair(0, 0);
+  trivial[1] = std::make_pair(0, 0);
+  trivial[2] = std::make_pair(0, 0);
+  trivial[3] = std::make_pair(0, 0);
+  Tensor<float, 4> padded = ref.shuffle(shuffle_dims).pad(trivial);
+  VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
+  VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
+  VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
+  VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 6; ++j) {
+      for (int k = 0; k < 12; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          if (j >= 2 && j < 5 && k >= 3 && k < 8) {
+            VERIFY_IS_EQUAL(padded(i,j,k,l), t(i,j-2,k-3,l));
+          } else {
+            VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_ref()
+{
+  CALL_SUBTEST(test_simple_lvalue_ref());
+  CALL_SUBTEST(test_simple_rvalue_ref());
+  CALL_SUBTEST(test_multiple_dims());
+  CALL_SUBTEST(test_slice());
+  CALL_SUBTEST(test_ref_of_ref());
+  CALL_SUBTEST(test_ref_in_expr());
+  CALL_SUBTEST(test_coeff_ref());
+  CALL_SUBTEST(test_nested_ops_with_ref());
+}
diff --git a/unsupported/test/cxx11_tensor_reverse.cpp b/unsupported/test/cxx11_tensor_reverse.cpp
new file mode 100644
index 000000000..b35b8d29e
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_reverse.cpp
@@ -0,0 +1,190 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com and
+//                    Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::array;
+
+template <int DataLayout>
+static void test_simple_reverse()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  array<bool, 4> dim_rev;
+  dim_rev[0] = false;
+  dim_rev[1] = true;
+  dim_rev[2] = true;
+  dim_rev[3] = false;
+
+  Tensor<float, 4, DataLayout> reversed_tensor;
+  reversed_tensor = tensor.reverse(dim_rev);
+
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l));
+        }
+      }
+    }
+  }
+
+  dim_rev[0] = true;
+  dim_rev[1] = false;
+  dim_rev[2] = false;
+  dim_rev[3] = false;
+
+  reversed_tensor = tensor.reverse(dim_rev);
+
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7);
+
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l));
+        }
+      }
+    }
+  }
+
+  dim_rev[0] = true;
+  dim_rev[1] = false;
+  dim_rev[2] = false;
+  dim_rev[3] = true;
+
+  reversed_tensor = tensor.reverse(dim_rev);
+
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5);
+  VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7);
+
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l));
+        }
+      }
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_expr_reverse(bool LValue)
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  array<bool, 4> dim_rev;
+  dim_rev[0] = false;
+  dim_rev[1] = true;
+  dim_rev[2] = false;
+  dim_rev[3] = true;
+
+  Tensor<float, 4, DataLayout> expected(2, 3, 5, 7);
+  if (LValue) {
+    expected.reverse(dim_rev) = tensor;
+  } else {
+    expected = tensor.reverse(dim_rev);
+  }
+
+  Tensor<float, 4, DataLayout> result(2,3,5,7);
+
+  array<ptrdiff_t, 4> src_slice_dim;
+  src_slice_dim[0] = 2;
+  src_slice_dim[1] = 3;
+  src_slice_dim[2] = 1;
+  src_slice_dim[3] = 7;
+  array<ptrdiff_t, 4> src_slice_start;
+  src_slice_start[0] = 0;
+  src_slice_start[1] = 0;
+  src_slice_start[2] = 0;
+  src_slice_start[3] = 0;
+  array<ptrdiff_t, 4> dst_slice_dim = src_slice_dim;
+  array<ptrdiff_t, 4> dst_slice_start = src_slice_start;
+
+  for (int i = 0; i < 5; ++i) {
+    if (LValue) {
+      result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) =
+          tensor.slice(src_slice_start, src_slice_dim);
+    } else {
+      result.slice(dst_slice_start, dst_slice_dim) =
+          tensor.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
+    }
+    src_slice_start[2] += 1;
+    dst_slice_start[2] += 1;
+  }
+
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_EQUAL(result.dimension(1), 3);
+  VERIFY_IS_EQUAL(result.dimension(2), 5);
+  VERIFY_IS_EQUAL(result.dimension(3), 7);
+
+  for (int i = 0; i < expected.dimension(0); ++i) {
+    for (int j = 0; j < expected.dimension(1); ++j) {
+      for (int k = 0; k < expected.dimension(2); ++k) {
+        for (int l = 0; l < expected.dimension(3); ++l) {
+          VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  dst_slice_start[2] = 0;
+  result.setRandom();
+  for (int i = 0; i < 5; ++i) {
+     if (LValue) {
+       result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) =
+           tensor.slice(dst_slice_start, dst_slice_dim);
+     } else {
+       result.slice(dst_slice_start, dst_slice_dim) =
+           tensor.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
+     }
+    dst_slice_start[2] += 1;
+  }
+
+  for (int i = 0; i < expected.dimension(0); ++i) {
+    for (int j = 0; j < expected.dimension(1); ++j) {
+      for (int k = 0; k < expected.dimension(2); ++k) {
+        for (int l = 0; l < expected.dimension(3); ++l) {
+          VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
+        }
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_reverse()
+{
+  CALL_SUBTEST(test_simple_reverse<ColMajor>());
+  CALL_SUBTEST(test_simple_reverse<RowMajor>());
+  CALL_SUBTEST(test_expr_reverse<ColMajor>(true));
+  CALL_SUBTEST(test_expr_reverse<RowMajor>(true));
+  CALL_SUBTEST(test_expr_reverse<ColMajor>(false));
+  CALL_SUBTEST(test_expr_reverse<RowMajor>(false));
+}
diff --git a/unsupported/test/cxx11_tensor_roundings.cpp b/unsupported/test/cxx11_tensor_roundings.cpp
new file mode 100644
index 000000000..2c26151ab
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_roundings.cpp
@@ -0,0 +1,62 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+
+static void test_float_rounding()
+{
+  Tensor<float, 2> ftensor(20,30);
+  ftensor = ftensor.random() * 100.f;
+
+  Tensor<float, 2> result = ftensor.round();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_EQUAL(result(i,j), numext::round(ftensor(i,j)));
+    }
+  }
+}
+
+static void test_float_flooring()
+{
+  Tensor<float, 2> ftensor(20,30);
+  ftensor = ftensor.random() * 100.f;
+
+  Tensor<float, 2> result = ftensor.floor();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_EQUAL(result(i,j), numext::floor(ftensor(i,j)));
+    }
+  }
+}
+
+static void test_float_ceiling()
+{
+  Tensor<float, 2> ftensor(20,30);
+  ftensor = ftensor.random() * 100.f;
+
+  Tensor<float, 2> result = ftensor.ceil();
+
+  for (int i = 0; i < 20; ++i) {
+    for (int j = 0; j < 30; ++j) {
+      VERIFY_IS_EQUAL(result(i,j), numext::ceil(ftensor(i,j)));
+    }
+  }
+}
+
+void test_cxx11_tensor_roundings()
+{
+   CALL_SUBTEST(test_float_rounding());
+   CALL_SUBTEST(test_float_ceiling());
+   CALL_SUBTEST(test_float_flooring());
+}
diff --git a/unsupported/test/cxx11_tensor_scan.cpp b/unsupported/test/cxx11_tensor_scan.cpp
new file mode 100644
index 000000000..af59aa3ef
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_scan.cpp
@@ -0,0 +1,110 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <limits>
+#include <numeric>
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template <int DataLayout, typename Type=float, bool Exclusive = false>
+static void test_1d_scan()
+{
+  int size = 50;
+  Tensor<Type, 1, DataLayout> tensor(size);
+  tensor.setRandom();
+  Tensor<Type, 1, DataLayout> result = tensor.cumsum(0, Exclusive);
+
+  VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0));
+
+  float accum = 0;
+  for (int i = 0; i < size; i++) {
+    if (Exclusive) {
+      VERIFY_IS_EQUAL(result(i), accum);
+      accum += tensor(i);
+    } else {
+      accum += tensor(i);
+      VERIFY_IS_EQUAL(result(i), accum);
+    }
+  }
+
+  accum = 1;
+  result = tensor.cumprod(0, Exclusive);
+  for (int i = 0; i < size; i++) {
+    if (Exclusive) {
+      VERIFY_IS_EQUAL(result(i), accum);
+      accum *= tensor(i);
+    } else {
+      accum *= tensor(i);
+      VERIFY_IS_EQUAL(result(i), accum);
+    }
+  }
+}
+
+template <int DataLayout, typename Type=float>
+static void test_4d_scan()
+{
+  int size = 5;
+  Tensor<Type, 4, DataLayout> tensor(size, size, size, size);
+  tensor.setRandom();
+
+  Tensor<Type, 4, DataLayout> result(size, size, size, size);
+
+  result = tensor.cumsum(0);
+  float accum = 0;
+  for (int i = 0; i < size; i++) {
+    accum += tensor(i, 1, 2, 3);
+    VERIFY_IS_EQUAL(result(i, 1, 2, 3), accum);
+  }
+  result = tensor.cumsum(1);
+  accum = 0;
+  for (int i = 0; i < size; i++) {
+    accum += tensor(1, i, 2, 3);
+    VERIFY_IS_EQUAL(result(1, i, 2, 3), accum);
+  }
+  result = tensor.cumsum(2);
+  accum = 0;
+  for (int i = 0; i < size; i++) {
+    accum += tensor(1, 2, i, 3);
+    VERIFY_IS_EQUAL(result(1, 2, i, 3), accum);
+  }
+  result = tensor.cumsum(3);
+  accum = 0;
+  for (int i = 0; i < size; i++) {
+    accum += tensor(1, 2, 3, i);
+    VERIFY_IS_EQUAL(result(1, 2, 3, i), accum);
+  }
+}
+
+template <int DataLayout>
+static void test_tensor_maps() {
+  int inputs[20];
+  TensorMap<Tensor<int, 1, DataLayout> > tensor_map(inputs, 20);
+  tensor_map.setRandom();
+
+  Tensor<int, 1, DataLayout> result = tensor_map.cumsum(0);
+
+  int accum = 0;
+  for (int i = 0; i < 20; ++i) {
+    accum += tensor_map(i);
+    VERIFY_IS_EQUAL(result(i), accum);
+  }
+}
+
+void test_cxx11_tensor_scan() {
+  CALL_SUBTEST((test_1d_scan<ColMajor, float, true>()));
+  CALL_SUBTEST((test_1d_scan<ColMajor, float, false>()));
+  CALL_SUBTEST((test_1d_scan<RowMajor, float, true>()));
+  CALL_SUBTEST((test_1d_scan<RowMajor, float, false>()));
+  CALL_SUBTEST(test_4d_scan<ColMajor>());
+  CALL_SUBTEST(test_4d_scan<RowMajor>());
+  CALL_SUBTEST(test_tensor_maps<ColMajor>());
+  CALL_SUBTEST(test_tensor_maps<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu
new file mode 100644
index 000000000..5f146f3c9
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_scan_cuda.cu
@@ -0,0 +1,79 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_scan_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+typedef Tensor<float, 1>::DimensionPair DimPair;
+
+template<int DataLayout>
+void test_cuda_cumsum(int m_size, int k_size, int n_size)
+{
+  std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
+  Tensor<float, 3, DataLayout> t_input(m_size, k_size, n_size);
+  Tensor<float, 3, DataLayout> t_result(m_size, k_size, n_size);
+  Tensor<float, 3, DataLayout> t_result_gpu(m_size, k_size, n_size);
+
+  t_input.setRandom();
+
+  std::size_t t_input_bytes = t_input.size()  * sizeof(float);
+  std::size_t t_result_bytes = t_result.size() * sizeof(float);
+
+  float* d_t_input;
+  float* d_t_result;
+
+  cudaMalloc((void**)(&d_t_input), t_input_bytes);
+  cudaMalloc((void**)(&d_t_result), t_result_bytes);
+
+  cudaMemcpy(d_t_input, t_input.data(), t_input_bytes, cudaMemcpyHostToDevice);
+
+  Eigen::CudaStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> >
+      gpu_t_input(d_t_input, Eigen::array<int, 3>(m_size, k_size, n_size));
+  Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> >
+      gpu_t_result(d_t_result, Eigen::array<int, 3>(m_size, k_size, n_size));
+
+  gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1);
+  t_result = t_input.cumsum(1);
+
+  cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost);
+  for (DenseIndex i = 0; i < t_result.size(); i++) {
+    if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) {
+      continue;
+    }
+    if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) {
+      continue;
+    }
+    std::cout << "mismatch detected at index " << i << ": " << t_result(i)
+              << " vs " <<  t_result_gpu(i) << std::endl;
+    assert(false);
+  }
+
+  cudaFree((void*)d_t_input);
+  cudaFree((void*)d_t_result);
+}
+
+
+void test_cxx11_tensor_scan_cuda()
+{
+  CALL_SUBTEST_1(test_cuda_cumsum<ColMajor>(128, 128, 128));
+  CALL_SUBTEST_2(test_cuda_cumsum<RowMajor>(128, 128, 128));
+}
diff --git a/unsupported/test/cxx11_tensor_shuffling.cpp b/unsupported/test/cxx11_tensor_shuffling.cpp
new file mode 100644
index 000000000..d11444a14
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_shuffling.cpp
@@ -0,0 +1,228 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::array;
+
+template <int DataLayout>
+static void test_simple_shuffling()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> shuffles;
+  shuffles[0] = 0;
+  shuffles[1] = 1;
+  shuffles[2] = 2;
+  shuffles[3] = 3;
+
+  Tensor<float, 4, DataLayout> no_shuffle;
+  no_shuffle = tensor.shuffle(shuffles);
+
+  VERIFY_IS_EQUAL(no_shuffle.dimension(0), 2);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(1), 3);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(2), 5);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(3), 7);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  shuffles[0] = 2;
+  shuffles[1] = 3;
+  shuffles[2] = 1;
+  shuffles[3] = 0;
+  Tensor<float, 4, DataLayout> shuffle;
+  shuffle = tensor.shuffle(shuffles);
+
+  VERIFY_IS_EQUAL(shuffle.dimension(0), 5);
+  VERIFY_IS_EQUAL(shuffle.dimension(1), 7);
+  VERIFY_IS_EQUAL(shuffle.dimension(2), 3);
+  VERIFY_IS_EQUAL(shuffle.dimension(3), 2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
+        }
+      }
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_expr_shuffling()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  array<ptrdiff_t, 4> shuffles;
+  shuffles[0] = 2;
+  shuffles[1] = 3;
+  shuffles[2] = 1;
+  shuffles[3] = 0;
+  Tensor<float, 4, DataLayout> expected;
+  expected = tensor.shuffle(shuffles);
+
+  Tensor<float, 4, DataLayout> result(5,7,3,2);
+
+  array<int, 4> src_slice_dim{{2,3,1,7}};
+  array<int, 4> src_slice_start{{0,0,0,0}};
+  array<int, 4> dst_slice_dim{{1,7,3,2}};
+  array<int, 4> dst_slice_start{{0,0,0,0}};
+
+  for (int i = 0; i < 5; ++i) {
+    result.slice(dst_slice_start, dst_slice_dim) =
+        tensor.slice(src_slice_start, src_slice_dim).shuffle(shuffles);
+    src_slice_start[2] += 1;
+    dst_slice_start[0] += 1;
+  }
+
+  VERIFY_IS_EQUAL(result.dimension(0), 5);
+  VERIFY_IS_EQUAL(result.dimension(1), 7);
+  VERIFY_IS_EQUAL(result.dimension(2), 3);
+  VERIFY_IS_EQUAL(result.dimension(3), 2);
+
+  for (int i = 0; i < expected.dimension(0); ++i) {
+    for (int j = 0; j < expected.dimension(1); ++j) {
+      for (int k = 0; k < expected.dimension(2); ++k) {
+        for (int l = 0; l < expected.dimension(3); ++l) {
+          VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  dst_slice_start[0] = 0;
+  result.setRandom();
+  for (int i = 0; i < 5; ++i) {
+    result.slice(dst_slice_start, dst_slice_dim) =
+        tensor.shuffle(shuffles).slice(dst_slice_start, dst_slice_dim);
+    dst_slice_start[0] += 1;
+  }
+
+  for (int i = 0; i < expected.dimension(0); ++i) {
+    for (int j = 0; j < expected.dimension(1); ++j) {
+      for (int k = 0; k < expected.dimension(2); ++k) {
+        for (int l = 0; l < expected.dimension(3); ++l) {
+          VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
+        }
+      }
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_shuffling_as_value()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> shuffles;
+  shuffles[2] = 0;
+  shuffles[3] = 1;
+  shuffles[1] = 2;
+  shuffles[0] = 3;
+  Tensor<float, 4, DataLayout> shuffle(5,7,3,2);
+  shuffle.shuffle(shuffles) = tensor;
+
+  VERIFY_IS_EQUAL(shuffle.dimension(0), 5);
+  VERIFY_IS_EQUAL(shuffle.dimension(1), 7);
+  VERIFY_IS_EQUAL(shuffle.dimension(2), 3);
+  VERIFY_IS_EQUAL(shuffle.dimension(3), 2);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
+        }
+      }
+    }
+  }
+
+  array<ptrdiff_t, 4> no_shuffle;
+  no_shuffle[0] = 0;
+  no_shuffle[1] = 1;
+  no_shuffle[2] = 2;
+  no_shuffle[3] = 3;
+  Tensor<float, 4, DataLayout> shuffle2(5,7,3,2);
+  shuffle2.shuffle(shuffles) = tensor.shuffle(no_shuffle);
+  for (int i = 0; i < 5; ++i) {
+    for (int j = 0; j < 7; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 2; ++l) {
+          VERIFY_IS_EQUAL(shuffle2(i,j,k,l), shuffle(i,j,k,l));
+        }
+      }
+    }
+  }
+}
+
+
+template <int DataLayout>
+static void test_shuffle_unshuffle()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+
+  // Choose a random permutation.
+  array<ptrdiff_t, 4> shuffles;
+  for (int i = 0; i < 4; ++i) {
+    shuffles[i] = i;
+  }
+  array<ptrdiff_t, 4> shuffles_inverse;
+  for (int i = 0; i < 4; ++i) {
+    const ptrdiff_t index = internal::random<ptrdiff_t>(i, 3);
+    shuffles_inverse[shuffles[index]] = i;
+    std::swap(shuffles[i], shuffles[index]);
+  }
+
+  Tensor<float, 4, DataLayout> shuffle;
+  shuffle = tensor.shuffle(shuffles).shuffle(shuffles_inverse);
+
+  VERIFY_IS_EQUAL(shuffle.dimension(0), 2);
+  VERIFY_IS_EQUAL(shuffle.dimension(1), 3);
+  VERIFY_IS_EQUAL(shuffle.dimension(2), 5);
+  VERIFY_IS_EQUAL(shuffle.dimension(3), 7);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(i,j,k,l));
+        }
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_shuffling()
+{
+  CALL_SUBTEST(test_simple_shuffling<ColMajor>());
+  CALL_SUBTEST(test_simple_shuffling<RowMajor>());
+  CALL_SUBTEST(test_expr_shuffling<ColMajor>());
+  CALL_SUBTEST(test_expr_shuffling<RowMajor>());
+  CALL_SUBTEST(test_shuffling_as_value<ColMajor>());
+  CALL_SUBTEST(test_shuffling_as_value<RowMajor>());
+  CALL_SUBTEST(test_shuffle_unshuffle<ColMajor>());
+  CALL_SUBTEST(test_shuffle_unshuffle<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp
new file mode 100644
index 000000000..5a0d339ef
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_simple.cpp
@@ -0,0 +1,327 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_0d()
+{
+  Tensor<int, 0> scalar1;
+  Tensor<int, 0, RowMajor> scalar2;
+  Tensor<int, 0> scalar3;
+  Tensor<int, 0, RowMajor> scalar4;
+
+  scalar3.resize();
+  scalar4.resize();
+
+  scalar1() = 7;
+  scalar2() = 13;
+  scalar3.setValues(17);
+  scalar4.setZero();
+
+  VERIFY_IS_EQUAL(scalar1.rank(), 0);
+  VERIFY_IS_EQUAL(scalar1.size(), 1);
+
+  VERIFY_IS_EQUAL(scalar1(), 7);
+  VERIFY_IS_EQUAL(scalar2(), 13);
+  VERIFY_IS_EQUAL(scalar3(), 17);
+  VERIFY_IS_EQUAL(scalar4(), 0);
+
+  Tensor<int, 0> scalar5(scalar1);
+
+  VERIFY_IS_EQUAL(scalar5(), 7);
+  VERIFY_IS_EQUAL(scalar5.data()[0], 7);
+}
+
+static void test_1d()
+{
+  Tensor<int, 1> vec1(6);
+  Tensor<int, 1, RowMajor> vec2(6);
+  Tensor<int, 1> vec3;
+  Tensor<int, 1, RowMajor> vec4;
+
+  vec3.resize(6);
+  vec4.resize(6);
+
+  vec1(0) = 4;  vec2(0) = 0; vec3(0) = 5;
+  vec1(1) = 8;  vec2(1) = 1; vec3(1) = 4;
+  vec1(2) = 15; vec2(2) = 2; vec3(2) = 3;
+  vec1(3) = 16; vec2(3) = 3; vec3(3) = 2;
+  vec1(4) = 23; vec2(4) = 4; vec3(4) = 1;
+  vec1(5) = 42; vec2(5) = 5; vec3(5) = 0;
+  vec4.setZero();
+
+  VERIFY_IS_EQUAL((vec1.rank()), 1);
+  VERIFY_IS_EQUAL((vec1.size()), 6);
+  VERIFY_IS_EQUAL((vec1.dimensions()[0]), 6);
+
+  VERIFY_IS_EQUAL((vec1[0]), 4);
+  VERIFY_IS_EQUAL((vec1[1]), 8);
+  VERIFY_IS_EQUAL((vec1[2]), 15);
+  VERIFY_IS_EQUAL((vec1[3]), 16);
+  VERIFY_IS_EQUAL((vec1[4]), 23);
+  VERIFY_IS_EQUAL((vec1[5]), 42);
+
+  VERIFY_IS_EQUAL((vec2[0]), 0);
+  VERIFY_IS_EQUAL((vec2[1]), 1);
+  VERIFY_IS_EQUAL((vec2[2]), 2);
+  VERIFY_IS_EQUAL((vec2[3]), 3);
+  VERIFY_IS_EQUAL((vec2[4]), 4);
+  VERIFY_IS_EQUAL((vec2[5]), 5);
+
+  VERIFY_IS_EQUAL((vec3[0]), 5);
+  VERIFY_IS_EQUAL((vec3[1]), 4);
+  VERIFY_IS_EQUAL((vec3[2]), 3);
+  VERIFY_IS_EQUAL((vec3[3]), 2);
+  VERIFY_IS_EQUAL((vec3[4]), 1);
+  VERIFY_IS_EQUAL((vec3[5]), 0);
+
+  VERIFY_IS_EQUAL((vec4[0]), 0);
+  VERIFY_IS_EQUAL((vec4[1]), 0);
+  VERIFY_IS_EQUAL((vec4[2]), 0);
+  VERIFY_IS_EQUAL((vec4[3]), 0);
+  VERIFY_IS_EQUAL((vec4[4]), 0);
+  VERIFY_IS_EQUAL((vec4[5]), 0);
+
+  Tensor<int, 1> vec5(vec1);
+
+  VERIFY_IS_EQUAL((vec5(0)), 4);
+  VERIFY_IS_EQUAL((vec5(1)), 8);
+  VERIFY_IS_EQUAL((vec5(2)), 15);
+  VERIFY_IS_EQUAL((vec5(3)), 16);
+  VERIFY_IS_EQUAL((vec5(4)), 23);
+  VERIFY_IS_EQUAL((vec5(5)), 42);
+
+  VERIFY_IS_EQUAL((vec5.data()[0]), 4);
+  VERIFY_IS_EQUAL((vec5.data()[1]), 8);
+  VERIFY_IS_EQUAL((vec5.data()[2]), 15);
+  VERIFY_IS_EQUAL((vec5.data()[3]), 16);
+  VERIFY_IS_EQUAL((vec5.data()[4]), 23);
+  VERIFY_IS_EQUAL((vec5.data()[5]), 42);
+}
+
+static void test_2d()
+{
+  Tensor<int, 2> mat1(2,3);
+  Tensor<int, 2, RowMajor> mat2(2,3);
+
+  mat1(0,0) = 0;
+  mat1(0,1) = 1;
+  mat1(0,2) = 2;
+  mat1(1,0) = 3;
+  mat1(1,1) = 4;
+  mat1(1,2) = 5;
+
+  mat2(0,0) = 0;
+  mat2(0,1) = 1;
+  mat2(0,2) = 2;
+  mat2(1,0) = 3;
+  mat2(1,1) = 4;
+  mat2(1,2) = 5;
+
+  VERIFY_IS_EQUAL((mat1.rank()), 2);
+  VERIFY_IS_EQUAL((mat1.size()), 6);
+  VERIFY_IS_EQUAL((mat1.dimensions()[0]), 2);
+  VERIFY_IS_EQUAL((mat1.dimensions()[1]), 3);
+
+  VERIFY_IS_EQUAL((mat2.rank()), 2);
+  VERIFY_IS_EQUAL((mat2.size()), 6);
+  VERIFY_IS_EQUAL((mat2.dimensions()[0]), 2);
+  VERIFY_IS_EQUAL((mat2.dimensions()[1]), 3);
+
+  VERIFY_IS_EQUAL((mat1.data()[0]), 0);
+  VERIFY_IS_EQUAL((mat1.data()[1]), 3);
+  VERIFY_IS_EQUAL((mat1.data()[2]), 1);
+  VERIFY_IS_EQUAL((mat1.data()[3]), 4);
+  VERIFY_IS_EQUAL((mat1.data()[4]), 2);
+  VERIFY_IS_EQUAL((mat1.data()[5]), 5);
+
+  VERIFY_IS_EQUAL((mat2.data()[0]), 0);
+  VERIFY_IS_EQUAL((mat2.data()[1]), 1);
+  VERIFY_IS_EQUAL((mat2.data()[2]), 2);
+  VERIFY_IS_EQUAL((mat2.data()[3]), 3);
+  VERIFY_IS_EQUAL((mat2.data()[4]), 4);
+  VERIFY_IS_EQUAL((mat2.data()[5]), 5);
+}
+
+static void test_3d()
+{
+  Tensor<int, 3> epsilon(3,3,3);
+  epsilon.setZero();
+  epsilon(0,1,2) = epsilon(2,0,1) = epsilon(1,2,0) = 1;
+  epsilon(2,1,0) = epsilon(0,2,1) = epsilon(1,0,2) = -1;
+
+  VERIFY_IS_EQUAL((epsilon.size()), 27);
+  VERIFY_IS_EQUAL((epsilon.dimensions()[0]), 3);
+  VERIFY_IS_EQUAL((epsilon.dimensions()[1]), 3);
+  VERIFY_IS_EQUAL((epsilon.dimensions()[2]), 3);
+
+  VERIFY_IS_EQUAL((epsilon(0,0,0)), 0);
+  VERIFY_IS_EQUAL((epsilon(0,0,1)), 0);
+  VERIFY_IS_EQUAL((epsilon(0,0,2)), 0);
+  VERIFY_IS_EQUAL((epsilon(0,1,0)), 0);
+  VERIFY_IS_EQUAL((epsilon(0,1,1)), 0);
+  VERIFY_IS_EQUAL((epsilon(0,2,0)), 0);
+  VERIFY_IS_EQUAL((epsilon(0,2,2)), 0);
+  VERIFY_IS_EQUAL((epsilon(1,0,0)), 0);
+  VERIFY_IS_EQUAL((epsilon(1,0,1)), 0);
+  VERIFY_IS_EQUAL((epsilon(1,1,0)), 0);
+  VERIFY_IS_EQUAL((epsilon(1,1,1)), 0);
+  VERIFY_IS_EQUAL((epsilon(1,1,2)), 0);
+  VERIFY_IS_EQUAL((epsilon(1,2,1)), 0);
+  VERIFY_IS_EQUAL((epsilon(1,2,2)), 0);
+  VERIFY_IS_EQUAL((epsilon(2,0,0)), 0);
+  VERIFY_IS_EQUAL((epsilon(2,0,2)), 0);
+  VERIFY_IS_EQUAL((epsilon(2,1,1)), 0);
+  VERIFY_IS_EQUAL((epsilon(2,1,2)), 0);
+  VERIFY_IS_EQUAL((epsilon(2,2,0)), 0);
+  VERIFY_IS_EQUAL((epsilon(2,2,1)), 0);
+  VERIFY_IS_EQUAL((epsilon(2,2,2)), 0);
+
+  VERIFY_IS_EQUAL((epsilon(0,1,2)), 1);
+  VERIFY_IS_EQUAL((epsilon(2,0,1)), 1);
+  VERIFY_IS_EQUAL((epsilon(1,2,0)), 1);
+  VERIFY_IS_EQUAL((epsilon(2,1,0)), -1);
+  VERIFY_IS_EQUAL((epsilon(0,2,1)), -1);
+  VERIFY_IS_EQUAL((epsilon(1,0,2)), -1);
+
+  array<Eigen::DenseIndex, 3> dims;
+  dims[0] = 2;
+  dims[1] = 3;
+  dims[2] = 4;
+  Tensor<int, 3> t1(dims);
+  Tensor<int, 3, RowMajor> t2(dims);
+
+  VERIFY_IS_EQUAL((t1.size()), 24);
+  VERIFY_IS_EQUAL((t1.dimensions()[0]), 2);
+  VERIFY_IS_EQUAL((t1.dimensions()[1]), 3);
+  VERIFY_IS_EQUAL((t1.dimensions()[2]), 4);
+
+  VERIFY_IS_EQUAL((t2.size()), 24);
+  VERIFY_IS_EQUAL((t2.dimensions()[0]), 2);
+  VERIFY_IS_EQUAL((t2.dimensions()[1]), 3);
+  VERIFY_IS_EQUAL((t2.dimensions()[2]), 4);
+
+  for (int i = 0; i < 2; i++) {
+    for (int j = 0; j < 3; j++) {
+      for (int k = 0; k < 4; k++) {
+        t1(i, j, k) = 100 * i + 10 * j + k;
+        t2(i, j, k) = 100 * i + 10 * j + k;
+      }
+    }
+  }
+
+  VERIFY_IS_EQUAL((t1.data()[0]),    0);
+  VERIFY_IS_EQUAL((t1.data()[1]),  100);
+  VERIFY_IS_EQUAL((t1.data()[2]),   10);
+  VERIFY_IS_EQUAL((t1.data()[3]),  110);
+  VERIFY_IS_EQUAL((t1.data()[4]),   20);
+  VERIFY_IS_EQUAL((t1.data()[5]),  120);
+  VERIFY_IS_EQUAL((t1.data()[6]),    1);
+  VERIFY_IS_EQUAL((t1.data()[7]),  101);
+  VERIFY_IS_EQUAL((t1.data()[8]),   11);
+  VERIFY_IS_EQUAL((t1.data()[9]),  111);
+  VERIFY_IS_EQUAL((t1.data()[10]),  21);
+  VERIFY_IS_EQUAL((t1.data()[11]), 121);
+  VERIFY_IS_EQUAL((t1.data()[12]),   2);
+  VERIFY_IS_EQUAL((t1.data()[13]), 102);
+  VERIFY_IS_EQUAL((t1.data()[14]),  12);
+  VERIFY_IS_EQUAL((t1.data()[15]), 112);
+  VERIFY_IS_EQUAL((t1.data()[16]),  22);
+  VERIFY_IS_EQUAL((t1.data()[17]), 122);
+  VERIFY_IS_EQUAL((t1.data()[18]),   3);
+  VERIFY_IS_EQUAL((t1.data()[19]), 103);
+  VERIFY_IS_EQUAL((t1.data()[20]),  13);
+  VERIFY_IS_EQUAL((t1.data()[21]), 113);
+  VERIFY_IS_EQUAL((t1.data()[22]),  23);
+  VERIFY_IS_EQUAL((t1.data()[23]), 123);
+
+  VERIFY_IS_EQUAL((t2.data()[0]),    0);
+  VERIFY_IS_EQUAL((t2.data()[1]),    1);
+  VERIFY_IS_EQUAL((t2.data()[2]),    2);
+  VERIFY_IS_EQUAL((t2.data()[3]),    3);
+  VERIFY_IS_EQUAL((t2.data()[4]),   10);
+  VERIFY_IS_EQUAL((t2.data()[5]),   11);
+  VERIFY_IS_EQUAL((t2.data()[6]),   12);
+  VERIFY_IS_EQUAL((t2.data()[7]),   13);
+  VERIFY_IS_EQUAL((t2.data()[8]),   20);
+  VERIFY_IS_EQUAL((t2.data()[9]),   21);
+  VERIFY_IS_EQUAL((t2.data()[10]),  22);
+  VERIFY_IS_EQUAL((t2.data()[11]),  23);
+  VERIFY_IS_EQUAL((t2.data()[12]), 100);
+  VERIFY_IS_EQUAL((t2.data()[13]), 101);
+  VERIFY_IS_EQUAL((t2.data()[14]), 102);
+  VERIFY_IS_EQUAL((t2.data()[15]), 103);
+  VERIFY_IS_EQUAL((t2.data()[16]), 110);
+  VERIFY_IS_EQUAL((t2.data()[17]), 111);
+  VERIFY_IS_EQUAL((t2.data()[18]), 112);
+  VERIFY_IS_EQUAL((t2.data()[19]), 113);
+  VERIFY_IS_EQUAL((t2.data()[20]), 120);
+  VERIFY_IS_EQUAL((t2.data()[21]), 121);
+  VERIFY_IS_EQUAL((t2.data()[22]), 122);
+  VERIFY_IS_EQUAL((t2.data()[23]), 123);
+}
+
+static void test_simple_assign()
+{
+  Tensor<int, 3> epsilon(3,3,3);
+  epsilon.setZero();
+  epsilon(0,1,2) = epsilon(2,0,1) = epsilon(1,2,0) = 1;
+  epsilon(2,1,0) = epsilon(0,2,1) = epsilon(1,0,2) = -1;
+
+  Tensor<int, 3> e2(3,3,3);
+  e2.setZero();
+  VERIFY_IS_EQUAL((e2(1,2,0)), 0);
+
+  e2 = epsilon;
+  VERIFY_IS_EQUAL((e2(1,2,0)), 1);
+  VERIFY_IS_EQUAL((e2(0,1,2)), 1);
+  VERIFY_IS_EQUAL((e2(2,0,1)), 1);
+  VERIFY_IS_EQUAL((e2(2,1,0)), -1);
+  VERIFY_IS_EQUAL((e2(0,2,1)), -1);
+  VERIFY_IS_EQUAL((e2(1,0,2)), -1);
+}
+
+static void test_resize()
+{
+  Tensor<int, 3> epsilon;
+  epsilon.resize(2,3,7);
+  VERIFY_IS_EQUAL(epsilon.dimension(0), 2);
+  VERIFY_IS_EQUAL(epsilon.dimension(1), 3);
+  VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
+  VERIFY_IS_EQUAL(epsilon.size(), 2*3*7);
+
+  const int* old_data = epsilon.data();
+  epsilon.resize(3,2,7);
+  VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
+  VERIFY_IS_EQUAL(epsilon.dimension(1), 2);
+  VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
+  VERIFY_IS_EQUAL(epsilon.size(), 2*3*7);
+  VERIFY_IS_EQUAL(epsilon.data(), old_data);
+
+  epsilon.resize(3,5,7);
+  VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
+  VERIFY_IS_EQUAL(epsilon.dimension(1), 5);
+  VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
+  VERIFY_IS_EQUAL(epsilon.size(), 3*5*7);
+}
+
+void test_cxx11_tensor_simple()
+{
+  CALL_SUBTEST(test_0d());
+  CALL_SUBTEST(test_1d());
+  CALL_SUBTEST(test_2d());
+  CALL_SUBTEST(test_3d());
+  CALL_SUBTEST(test_simple_assign());
+  CALL_SUBTEST(test_resize());
+}
diff --git a/unsupported/test/cxx11_tensor_striding.cpp b/unsupported/test/cxx11_tensor_striding.cpp
new file mode 100644
index 000000000..935b908cc
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_striding.cpp
@@ -0,0 +1,119 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+template<int DataLayout>
+static void test_simple_striding()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> strides;
+  strides[0] = 1;
+  strides[1] = 1;
+  strides[2] = 1;
+  strides[3] = 1;
+
+  Tensor<float, 4, DataLayout> no_stride;
+  no_stride = tensor.stride(strides);
+
+  VERIFY_IS_EQUAL(no_stride.dimension(0), 2);
+  VERIFY_IS_EQUAL(no_stride.dimension(1), 3);
+  VERIFY_IS_EQUAL(no_stride.dimension(2), 5);
+  VERIFY_IS_EQUAL(no_stride.dimension(3), 7);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  strides[0] = 2;
+  strides[1] = 4;
+  strides[2] = 2;
+  strides[3] = 3;
+  Tensor<float, 4, DataLayout> stride;
+  stride = tensor.stride(strides);
+
+  VERIFY_IS_EQUAL(stride.dimension(0), 1);
+  VERIFY_IS_EQUAL(stride.dimension(1), 1);
+  VERIFY_IS_EQUAL(stride.dimension(2), 3);
+  VERIFY_IS_EQUAL(stride.dimension(3), 3);
+
+  for (int i = 0; i < 1; ++i) {
+    for (int j = 0; j < 1; ++j) {
+      for (int k = 0; k < 3; ++k) {
+        for (int l = 0; l < 3; ++l) {
+          VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l));
+        }
+      }
+    }
+  }
+}
+
+
+template<int DataLayout>
+static void test_striding_as_lvalue()
+{
+  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
+  tensor.setRandom();
+  array<ptrdiff_t, 4> strides;
+  strides[0] = 2;
+  strides[1] = 4;
+  strides[2] = 2;
+  strides[3] = 3;
+
+  Tensor<float, 4, DataLayout> result(3, 12, 10, 21);
+  result.stride(strides) = tensor;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), result(2*i,4*j,2*k,3*l));
+        }
+      }
+    }
+  }
+
+  array<ptrdiff_t, 4> no_strides;
+  no_strides[0] = 1;
+  no_strides[1] = 1;
+  no_strides[2] = 1;
+  no_strides[3] = 1;
+  Tensor<float, 4, DataLayout> result2(3, 12, 10, 21);
+  result2.stride(strides) = tensor.stride(no_strides);
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 7; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), result2(2*i,4*j,2*k,3*l));
+        }
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_striding()
+{
+  CALL_SUBTEST(test_simple_striding<ColMajor>());
+  CALL_SUBTEST(test_simple_striding<RowMajor>());
+  CALL_SUBTEST(test_striding_as_lvalue<ColMajor>());
+  CALL_SUBTEST(test_striding_as_lvalue<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_sugar.cpp b/unsupported/test/cxx11_tensor_sugar.cpp
new file mode 100644
index 000000000..2f56eb495
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_sugar.cpp
@@ -0,0 +1,81 @@
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+using Eigen::RowMajor;
+
+static void test_comparison_sugar() {
+  // we already trust comparisons between tensors, we're simply checking that
+  // the sugared versions are doing the same thing
+  Tensor<int, 3> t(6, 7, 5);
+
+  t.setRandom();
+  // make sure we have at least one value == 0
+  t(0,0,0) = 0;
+
+  Tensor<bool,0> b;
+
+#define TEST_TENSOR_EQUAL(e1, e2) \
+  b = ((e1) == (e2)).all();       \
+  VERIFY(b())
+
+#define TEST_OP(op) TEST_TENSOR_EQUAL(t op 0, t op t.constant(0))
+
+  TEST_OP(==);
+  TEST_OP(!=);
+  TEST_OP(<=);
+  TEST_OP(>=);
+  TEST_OP(<);
+  TEST_OP(>);
+#undef TEST_OP
+#undef TEST_TENSOR_EQUAL
+}
+
+
+static void test_scalar_sugar_add_mul() {
+  Tensor<float, 3> A(6, 7, 5);
+  Tensor<float, 3> B(6, 7, 5);
+  A.setRandom();
+  B.setRandom();
+
+  const float alpha = 0.43f;
+  const float beta = 0.21f;
+  const float gamma = 0.14f;
+
+  Tensor<float, 3> R = A.constant(gamma) + A * A.constant(alpha) + B * B.constant(beta);
+  Tensor<float, 3> S = A * alpha + B * beta + gamma;
+  Tensor<float, 3> T = gamma + alpha * A + beta * B;
+
+  for (int i = 0; i < 6*7*5; ++i) {
+    VERIFY_IS_APPROX(R(i), S(i));
+    VERIFY_IS_APPROX(R(i), T(i));
+  }
+}
+
+static void test_scalar_sugar_sub_div() {
+  Tensor<float, 3> A(6, 7, 5);
+  Tensor<float, 3> B(6, 7, 5);
+  A.setRandom();
+  B.setRandom();
+
+  const float alpha = 0.43f;
+  const float beta = 0.21f;
+  const float gamma = 0.14f;
+  const float delta = 0.32f;
+
+  Tensor<float, 3> R = A.constant(gamma) - A / A.constant(alpha)
+      - B.constant(beta) / B - A.constant(delta);
+  Tensor<float, 3> S = gamma - A / alpha - beta / B - delta;
+
+  for (int i = 0; i < 6*7*5; ++i) {
+    VERIFY_IS_APPROX(R(i), S(i));
+  }
+}
+
+void test_cxx11_tensor_sugar()
+{
+  CALL_SUBTEST(test_comparison_sugar());
+  CALL_SUBTEST(test_scalar_sugar_add_mul());
+  CALL_SUBTEST(test_scalar_sugar_sub_div());
+}
diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp
new file mode 100644
index 000000000..6a9c33422
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_sycl.cpp
@@ -0,0 +1,159 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+// Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+void test_sycl_cpu(const Eigen::SyclDevice &sycl_device) {
+
+  int sizeDim1 = 100;
+  int sizeDim2 = 100;
+  int sizeDim3 = 100;
+  array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+  Tensor<float, 3> in1(tensorRange);
+  Tensor<float, 3> in2(tensorRange);
+  Tensor<float, 3> in3(tensorRange);
+  Tensor<float, 3> out(tensorRange);
+
+  in2 = in2.random();
+  in3 = in3.random();
+
+  float * gpu_in1_data  = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_in2_data  = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_in3_data  = static_cast<float*>(sycl_device.allocate(in3.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_out_data =  static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
+
+  TensorMap<Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange);
+  TensorMap<Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange);
+  TensorMap<Tensor<float, 3>> gpu_in3(gpu_in3_data, tensorRange);
+  TensorMap<Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange);
+
+  /// a=1.2f
+  gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f);
+  sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(in1(i,j,k), 1.2f);
+      }
+    }
+  }
+  printf("a=1.2f Test passed\n");
+
+  /// a=b*1.2f
+  gpu_out.device(sycl_device) = gpu_in1 * 1.2f;
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) * 1.2f);
+      }
+    }
+  }
+  printf("a=b*1.2f Test Passed\n");
+
+  /// c=a*b
+  sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(float));
+  gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) *
+                             in2(i,j,k));
+      }
+    }
+  }
+  printf("c=a*b Test Passed\n");
+
+  /// c=a+b
+  gpu_out.device(sycl_device) = gpu_in1 + gpu_in2;
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) +
+                             in2(i,j,k));
+      }
+    }
+  }
+  printf("c=a+b Test Passed\n");
+
+  /// c=a*a
+  gpu_out.device(sycl_device) = gpu_in1 * gpu_in1;
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) *
+                             in1(i,j,k));
+      }
+    }
+  }
+  printf("c= a*a Test Passed\n");
+
+  //a*3.14f + b*2.7f
+  gpu_out.device(sycl_device) =  gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f);
+  sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k),
+                         in1(i,j,k) * 3.14f
+                       + in2(i,j,k) * 2.7f);
+      }
+    }
+  }
+  printf("a*3.14f + b*2.7f Test Passed\n");
+
+  ///d= (a>0.5? b:c)
+  sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.dimensions().TotalSize())*sizeof(float));
+  gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3);
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f)
+                                                ? in2(i, j, k)
+                                                : in3(i, j, k));
+      }
+    }
+  }
+  printf("d= (a>0.5? b:c) Test Passed\n");
+  sycl_device.deallocate(gpu_in1_data);
+  sycl_device.deallocate(gpu_in2_data);
+  sycl_device.deallocate(gpu_in3_data);
+  sycl_device.deallocate(gpu_out_data);
+}
+void test_cxx11_tensor_sycl() {
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_sycl_cpu(sycl_device));
+}
diff --git a/unsupported/test/cxx11_tensor_symmetry.cpp b/unsupported/test/cxx11_tensor_symmetry.cpp
new file mode 100644
index 000000000..d680e9b3b
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_symmetry.cpp
@@ -0,0 +1,818 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+#include <Eigen/CXX11/TensorSymmetry>
+
+#include <map>
+#include <set>
+
+using Eigen::Tensor;
+using Eigen::SGroup;
+using Eigen::DynamicSGroup;
+using Eigen::StaticSGroup;
+using Eigen::Symmetry;
+using Eigen::AntiSymmetry;
+using Eigen::Hermiticity;
+using Eigen::AntiHermiticity;
+
+using Eigen::NegationFlag;
+using Eigen::ConjugationFlag;
+using Eigen::GlobalZeroFlag;
+using Eigen::GlobalRealFlag;
+using Eigen::GlobalImagFlag;
+
+// helper function to determine if the compiler intantiated a static
+// or dynamic symmetry group
+template<typename... Sym>
+bool isDynGroup(StaticSGroup<Sym...> const& dummy)
+{
+  (void)dummy;
+  return false;
+}
+
+bool isDynGroup(DynamicSGroup const& dummy)
+{
+  (void)dummy;
+  return true;
+}
+
+// helper class for checking that the symmetry groups are correct
+struct checkIdx {
+  template<typename ArrType>
+  static inline int doCheck_(ArrType e, int flags, int dummy, std::set<uint64_t>& found, std::map<uint64_t, int> const& expected)
+  {
+    // use decimal representation of value
+    uint64_t value = e[0];
+    for (std::size_t i = 1; i < e.size(); i++)
+      value = value * 10 + e[i];
+
+    // we want to make sure that we find each element
+    auto it = expected.find(value);
+    VERIFY((it != expected.end()));
+    VERIFY_IS_EQUAL(it->second, flags);
+
+    // we want to make sure we only have each element once;
+    // set::insert returns true for the second part of the pair
+    // if the element was really inserted and not already there
+    auto p = found.insert(value);
+    VERIFY((p.second));
+
+    return dummy;
+  }
+
+  static inline int run(std::vector<int> e, int flags, int dummy, std::set<uint64_t>& found, std::map<uint64_t, int> const& expected)
+  {
+    return doCheck_(e, flags, dummy, found, expected);
+  }
+
+  template<std::size_t N>
+  static inline int run(std::array<int, N> e, int flags, int dummy, std::set<uint64_t>& found, std::map<uint64_t, int> const& expected)
+  {
+    return doCheck_(e, flags, dummy, found, expected);
+  }
+};
+
+static void test_symgroups_static()
+{
+  std::array<int, 7> identity{{0,1,2,3,4,5,6}};
+
+  // Simple static symmetry group
+  StaticSGroup<
+    AntiSymmetry<0,1>,
+    Hermiticity<0,2>
+  > group;
+
+  std::set<uint64_t> found;
+  std::map<uint64_t, int> expected;
+  expected[ 123456] = 0;
+  expected[1023456] = NegationFlag;
+  expected[2103456] = ConjugationFlag;
+  expected[1203456] = ConjugationFlag | NegationFlag;
+  expected[2013456] = ConjugationFlag | NegationFlag;
+  expected[ 213456] = ConjugationFlag;
+
+  VERIFY_IS_EQUAL(group.size(), 6u);
+  VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag);
+  group.apply<checkIdx, int>(identity, 0, found, expected);
+  VERIFY_IS_EQUAL(found.size(), 6u);
+}
+
+static void test_symgroups_dynamic()
+{
+  std::vector<int> identity;
+  for (int i = 0; i <= 6; i++)
+    identity.push_back(i);
+
+  // Simple dynamic symmetry group
+  DynamicSGroup group;
+  group.add(0,1,NegationFlag);
+  group.add(0,2,ConjugationFlag);
+
+  VERIFY_IS_EQUAL(group.size(), 6u);
+  VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag);
+
+  std::set<uint64_t> found;
+  std::map<uint64_t, int> expected;
+  expected[ 123456] = 0;
+  expected[1023456] = NegationFlag;
+  expected[2103456] = ConjugationFlag;
+  expected[1203456] = ConjugationFlag | NegationFlag;
+  expected[2013456] = ConjugationFlag | NegationFlag;
+  expected[ 213456] = ConjugationFlag;
+
+  VERIFY_IS_EQUAL(group.size(), 6u);
+  VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag);
+  group.apply<checkIdx, int>(identity, 0, found, expected);
+  VERIFY_IS_EQUAL(found.size(), 6u);
+}
+
+static void test_symgroups_selection()
+{
+  std::array<int, 7> identity7{{0,1,2,3,4,5,6}};
+  std::array<int, 10> identity10{{0,1,2,3,4,5,6,7,8,9}};
+
+  {
+    // Do the same test as in test_symgroups_static but
+    // require selection via SGroup
+    SGroup<
+      AntiSymmetry<0,1>,
+      Hermiticity<0,2>
+    > group;
+
+    std::set<uint64_t> found;
+    std::map<uint64_t, int> expected;
+    expected[ 123456] = 0;
+    expected[1023456] = NegationFlag;
+    expected[2103456] = ConjugationFlag;
+    expected[1203456] = ConjugationFlag | NegationFlag;
+    expected[2013456] = ConjugationFlag | NegationFlag;
+    expected[ 213456] = ConjugationFlag;
+
+    VERIFY(!isDynGroup(group));
+    VERIFY_IS_EQUAL(group.size(), 6u);
+    VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag);
+    group.apply<checkIdx, int>(identity7, 0, found, expected);
+    VERIFY_IS_EQUAL(found.size(), 6u);
+  }
+
+  {
+    // simple factorizing group: 5 generators, 2^5 = 32 elements
+    // selection should make this dynamic, although static group
+    // can still be reasonably generated
+    SGroup<
+      Symmetry<0,1>,
+      Symmetry<2,3>,
+      Symmetry<4,5>,
+      Symmetry<6,7>,
+      Symmetry<8,9>
+    > group;
+
+    std::set<uint64_t> found;
+    std::map<uint64_t, int> expected;
+    expected[ 123456789] = 0; expected[ 123456798] = 0; expected[ 123457689] = 0; expected[ 123457698] = 0;
+    expected[ 123546789] = 0; expected[ 123546798] = 0; expected[ 123547689] = 0; expected[ 123547698] = 0;
+    expected[ 132456789] = 0; expected[ 132456798] = 0; expected[ 132457689] = 0; expected[ 132457698] = 0;
+    expected[ 132546789] = 0; expected[ 132546798] = 0; expected[ 132547689] = 0; expected[ 132547698] = 0;
+    expected[1023456789] = 0; expected[1023456798] = 0; expected[1023457689] = 0; expected[1023457698] = 0;
+    expected[1023546789] = 0; expected[1023546798] = 0; expected[1023547689] = 0; expected[1023547698] = 0;
+    expected[1032456789] = 0; expected[1032456798] = 0; expected[1032457689] = 0; expected[1032457698] = 0;
+    expected[1032546789] = 0; expected[1032546798] = 0; expected[1032547689] = 0; expected[1032547698] = 0;
+
+    VERIFY(isDynGroup(group));
+    VERIFY_IS_EQUAL(group.size(), 32u);
+    VERIFY_IS_EQUAL(group.globalFlags(), 0);
+    group.apply<checkIdx, int>(identity10, 0, found, expected);
+    VERIFY_IS_EQUAL(found.size(), 32u);
+
+    // no verify that we could also generate a static group
+    // with these generators
+    found.clear();
+    StaticSGroup<
+      Symmetry<0,1>,
+      Symmetry<2,3>,
+      Symmetry<4,5>,
+      Symmetry<6,7>,
+      Symmetry<8,9>
+    > group_static;
+    VERIFY_IS_EQUAL(group_static.size(), 32u);
+    VERIFY_IS_EQUAL(group_static.globalFlags(), 0);
+    group_static.apply<checkIdx, int>(identity10, 0, found, expected);
+    VERIFY_IS_EQUAL(found.size(), 32u);
+  }
+
+  {
+    // try to create a HUGE group
+    SGroup<
+      Symmetry<0,1>,
+      Symmetry<1,2>,
+      Symmetry<2,3>,
+      Symmetry<3,4>,
+      Symmetry<4,5>,
+      Symmetry<5,6>
+    > group;
+
+    std::set<uint64_t> found;
+    uint64_t pre_expected[5040] = {
+       123456, 1023456,  213456, 2013456, 1203456, 2103456,  132456, 1032456,  312456, 3012456, 1302456, 3102456,
+       231456, 2031456,  321456, 3021456, 2301456, 3201456, 1230456, 2130456, 1320456, 3120456, 2310456, 3210456,
+       124356, 1024356,  214356, 2014356, 1204356, 2104356,  142356, 1042356,  412356, 4012356, 1402356, 4102356,
+       241356, 2041356,  421356, 4021356, 2401356, 4201356, 1240356, 2140356, 1420356, 4120356, 2410356, 4210356,
+       134256, 1034256,  314256, 3014256, 1304256, 3104256,  143256, 1043256,  413256, 4013256, 1403256, 4103256,
+       341256, 3041256,  431256, 4031256, 3401256, 4301256, 1340256, 3140256, 1430256, 4130256, 3410256, 4310256,
+       234156, 2034156,  324156, 3024156, 2304156, 3204156,  243156, 2043156,  423156, 4023156, 2403156, 4203156,
+       342156, 3042156,  432156, 4032156, 3402156, 4302156, 2340156, 3240156, 2430156, 4230156, 3420156, 4320156,
+      1234056, 2134056, 1324056, 3124056, 2314056, 3214056, 1243056, 2143056, 1423056, 4123056, 2413056, 4213056,
+      1342056, 3142056, 1432056, 4132056, 3412056, 4312056, 2341056, 3241056, 2431056, 4231056, 3421056, 4321056,
+       123546, 1023546,  213546, 2013546, 1203546, 2103546,  132546, 1032546,  312546, 3012546, 1302546, 3102546,
+       231546, 2031546,  321546, 3021546, 2301546, 3201546, 1230546, 2130546, 1320546, 3120546, 2310546, 3210546,
+       125346, 1025346,  215346, 2015346, 1205346, 2105346,  152346, 1052346,  512346, 5012346, 1502346, 5102346,
+       251346, 2051346,  521346, 5021346, 2501346, 5201346, 1250346, 2150346, 1520346, 5120346, 2510346, 5210346,
+       135246, 1035246,  315246, 3015246, 1305246, 3105246,  153246, 1053246,  513246, 5013246, 1503246, 5103246,
+       351246, 3051246,  531246, 5031246, 3501246, 5301246, 1350246, 3150246, 1530246, 5130246, 3510246, 5310246,
+       235146, 2035146,  325146, 3025146, 2305146, 3205146,  253146, 2053146,  523146, 5023146, 2503146, 5203146,
+       352146, 3052146,  532146, 5032146, 3502146, 5302146, 2350146, 3250146, 2530146, 5230146, 3520146, 5320146,
+      1235046, 2135046, 1325046, 3125046, 2315046, 3215046, 1253046, 2153046, 1523046, 5123046, 2513046, 5213046,
+      1352046, 3152046, 1532046, 5132046, 3512046, 5312046, 2351046, 3251046, 2531046, 5231046, 3521046, 5321046,
+       124536, 1024536,  214536, 2014536, 1204536, 2104536,  142536, 1042536,  412536, 4012536, 1402536, 4102536,
+       241536, 2041536,  421536, 4021536, 2401536, 4201536, 1240536, 2140536, 1420536, 4120536, 2410536, 4210536,
+       125436, 1025436,  215436, 2015436, 1205436, 2105436,  152436, 1052436,  512436, 5012436, 1502436, 5102436,
+       251436, 2051436,  521436, 5021436, 2501436, 5201436, 1250436, 2150436, 1520436, 5120436, 2510436, 5210436,
+       145236, 1045236,  415236, 4015236, 1405236, 4105236,  154236, 1054236,  514236, 5014236, 1504236, 5104236,
+       451236, 4051236,  541236, 5041236, 4501236, 5401236, 1450236, 4150236, 1540236, 5140236, 4510236, 5410236,
+       245136, 2045136,  425136, 4025136, 2405136, 4205136,  254136, 2054136,  524136, 5024136, 2504136, 5204136,
+       452136, 4052136,  542136, 5042136, 4502136, 5402136, 2450136, 4250136, 2540136, 5240136, 4520136, 5420136,
+      1245036, 2145036, 1425036, 4125036, 2415036, 4215036, 1254036, 2154036, 1524036, 5124036, 2514036, 5214036,
+      1452036, 4152036, 1542036, 5142036, 4512036, 5412036, 2451036, 4251036, 2541036, 5241036, 4521036, 5421036,
+       134526, 1034526,  314526, 3014526, 1304526, 3104526,  143526, 1043526,  413526, 4013526, 1403526, 4103526,
+       341526, 3041526,  431526, 4031526, 3401526, 4301526, 1340526, 3140526, 1430526, 4130526, 3410526, 4310526,
+       135426, 1035426,  315426, 3015426, 1305426, 3105426,  153426, 1053426,  513426, 5013426, 1503426, 5103426,
+       351426, 3051426,  531426, 5031426, 3501426, 5301426, 1350426, 3150426, 1530426, 5130426, 3510426, 5310426,
+       145326, 1045326,  415326, 4015326, 1405326, 4105326,  154326, 1054326,  514326, 5014326, 1504326, 5104326,
+       451326, 4051326,  541326, 5041326, 4501326, 5401326, 1450326, 4150326, 1540326, 5140326, 4510326, 5410326,
+       345126, 3045126,  435126, 4035126, 3405126, 4305126,  354126, 3054126,  534126, 5034126, 3504126, 5304126,
+       453126, 4053126,  543126, 5043126, 4503126, 5403126, 3450126, 4350126, 3540126, 5340126, 4530126, 5430126,
+      1345026, 3145026, 1435026, 4135026, 3415026, 4315026, 1354026, 3154026, 1534026, 5134026, 3514026, 5314026,
+      1453026, 4153026, 1543026, 5143026, 4513026, 5413026, 3451026, 4351026, 3541026, 5341026, 4531026, 5431026,
+       234516, 2034516,  324516, 3024516, 2304516, 3204516,  243516, 2043516,  423516, 4023516, 2403516, 4203516,
+       342516, 3042516,  432516, 4032516, 3402516, 4302516, 2340516, 3240516, 2430516, 4230516, 3420516, 4320516,
+       235416, 2035416,  325416, 3025416, 2305416, 3205416,  253416, 2053416,  523416, 5023416, 2503416, 5203416,
+       352416, 3052416,  532416, 5032416, 3502416, 5302416, 2350416, 3250416, 2530416, 5230416, 3520416, 5320416,
+       245316, 2045316,  425316, 4025316, 2405316, 4205316,  254316, 2054316,  524316, 5024316, 2504316, 5204316,
+       452316, 4052316,  542316, 5042316, 4502316, 5402316, 2450316, 4250316, 2540316, 5240316, 4520316, 5420316,
+       345216, 3045216,  435216, 4035216, 3405216, 4305216,  354216, 3054216,  534216, 5034216, 3504216, 5304216,
+       453216, 4053216,  543216, 5043216, 4503216, 5403216, 3450216, 4350216, 3540216, 5340216, 4530216, 5430216,
+      2345016, 3245016, 2435016, 4235016, 3425016, 4325016, 2354016, 3254016, 2534016, 5234016, 3524016, 5324016,
+      2453016, 4253016, 2543016, 5243016, 4523016, 5423016, 3452016, 4352016, 3542016, 5342016, 4532016, 5432016,
+      1234506, 2134506, 1324506, 3124506, 2314506, 3214506, 1243506, 2143506, 1423506, 4123506, 2413506, 4213506,
+      1342506, 3142506, 1432506, 4132506, 3412506, 4312506, 2341506, 3241506, 2431506, 4231506, 3421506, 4321506,
+      1235406, 2135406, 1325406, 3125406, 2315406, 3215406, 1253406, 2153406, 1523406, 5123406, 2513406, 5213406,
+      1352406, 3152406, 1532406, 5132406, 3512406, 5312406, 2351406, 3251406, 2531406, 5231406, 3521406, 5321406,
+      1245306, 2145306, 1425306, 4125306, 2415306, 4215306, 1254306, 2154306, 1524306, 5124306, 2514306, 5214306,
+      1452306, 4152306, 1542306, 5142306, 4512306, 5412306, 2451306, 4251306, 2541306, 5241306, 4521306, 5421306,
+      1345206, 3145206, 1435206, 4135206, 3415206, 4315206, 1354206, 3154206, 1534206, 5134206, 3514206, 5314206,
+      1453206, 4153206, 1543206, 5143206, 4513206, 5413206, 3451206, 4351206, 3541206, 5341206, 4531206, 5431206,
+      2345106, 3245106, 2435106, 4235106, 3425106, 4325106, 2354106, 3254106, 2534106, 5234106, 3524106, 5324106,
+      2453106, 4253106, 2543106, 5243106, 4523106, 5423106, 3452106, 4352106, 3542106, 5342106, 4532106, 5432106,
+       123465, 1023465,  213465, 2013465, 1203465, 2103465,  132465, 1032465,  312465, 3012465, 1302465, 3102465,
+       231465, 2031465,  321465, 3021465, 2301465, 3201465, 1230465, 2130465, 1320465, 3120465, 2310465, 3210465,
+       124365, 1024365,  214365, 2014365, 1204365, 2104365,  142365, 1042365,  412365, 4012365, 1402365, 4102365,
+       241365, 2041365,  421365, 4021365, 2401365, 4201365, 1240365, 2140365, 1420365, 4120365, 2410365, 4210365,
+       134265, 1034265,  314265, 3014265, 1304265, 3104265,  143265, 1043265,  413265, 4013265, 1403265, 4103265,
+       341265, 3041265,  431265, 4031265, 3401265, 4301265, 1340265, 3140265, 1430265, 4130265, 3410265, 4310265,
+       234165, 2034165,  324165, 3024165, 2304165, 3204165,  243165, 2043165,  423165, 4023165, 2403165, 4203165,
+       342165, 3042165,  432165, 4032165, 3402165, 4302165, 2340165, 3240165, 2430165, 4230165, 3420165, 4320165,
+      1234065, 2134065, 1324065, 3124065, 2314065, 3214065, 1243065, 2143065, 1423065, 4123065, 2413065, 4213065,
+      1342065, 3142065, 1432065, 4132065, 3412065, 4312065, 2341065, 3241065, 2431065, 4231065, 3421065, 4321065,
+       123645, 1023645,  213645, 2013645, 1203645, 2103645,  132645, 1032645,  312645, 3012645, 1302645, 3102645,
+       231645, 2031645,  321645, 3021645, 2301645, 3201645, 1230645, 2130645, 1320645, 3120645, 2310645, 3210645,
+       126345, 1026345,  216345, 2016345, 1206345, 2106345,  162345, 1062345,  612345, 6012345, 1602345, 6102345,
+       261345, 2061345,  621345, 6021345, 2601345, 6201345, 1260345, 2160345, 1620345, 6120345, 2610345, 6210345,
+       136245, 1036245,  316245, 3016245, 1306245, 3106245,  163245, 1063245,  613245, 6013245, 1603245, 6103245,
+       361245, 3061245,  631245, 6031245, 3601245, 6301245, 1360245, 3160245, 1630245, 6130245, 3610245, 6310245,
+       236145, 2036145,  326145, 3026145, 2306145, 3206145,  263145, 2063145,  623145, 6023145, 2603145, 6203145,
+       362145, 3062145,  632145, 6032145, 3602145, 6302145, 2360145, 3260145, 2630145, 6230145, 3620145, 6320145,
+      1236045, 2136045, 1326045, 3126045, 2316045, 3216045, 1263045, 2163045, 1623045, 6123045, 2613045, 6213045,
+      1362045, 3162045, 1632045, 6132045, 3612045, 6312045, 2361045, 3261045, 2631045, 6231045, 3621045, 6321045,
+       124635, 1024635,  214635, 2014635, 1204635, 2104635,  142635, 1042635,  412635, 4012635, 1402635, 4102635,
+       241635, 2041635,  421635, 4021635, 2401635, 4201635, 1240635, 2140635, 1420635, 4120635, 2410635, 4210635,
+       126435, 1026435,  216435, 2016435, 1206435, 2106435,  162435, 1062435,  612435, 6012435, 1602435, 6102435,
+       261435, 2061435,  621435, 6021435, 2601435, 6201435, 1260435, 2160435, 1620435, 6120435, 2610435, 6210435,
+       146235, 1046235,  416235, 4016235, 1406235, 4106235,  164235, 1064235,  614235, 6014235, 1604235, 6104235,
+       461235, 4061235,  641235, 6041235, 4601235, 6401235, 1460235, 4160235, 1640235, 6140235, 4610235, 6410235,
+       246135, 2046135,  426135, 4026135, 2406135, 4206135,  264135, 2064135,  624135, 6024135, 2604135, 6204135,
+       462135, 4062135,  642135, 6042135, 4602135, 6402135, 2460135, 4260135, 2640135, 6240135, 4620135, 6420135,
+      1246035, 2146035, 1426035, 4126035, 2416035, 4216035, 1264035, 2164035, 1624035, 6124035, 2614035, 6214035,
+      1462035, 4162035, 1642035, 6142035, 4612035, 6412035, 2461035, 4261035, 2641035, 6241035, 4621035, 6421035,
+       134625, 1034625,  314625, 3014625, 1304625, 3104625,  143625, 1043625,  413625, 4013625, 1403625, 4103625,
+       341625, 3041625,  431625, 4031625, 3401625, 4301625, 1340625, 3140625, 1430625, 4130625, 3410625, 4310625,
+       136425, 1036425,  316425, 3016425, 1306425, 3106425,  163425, 1063425,  613425, 6013425, 1603425, 6103425,
+       361425, 3061425,  631425, 6031425, 3601425, 6301425, 1360425, 3160425, 1630425, 6130425, 3610425, 6310425,
+       146325, 1046325,  416325, 4016325, 1406325, 4106325,  164325, 1064325,  614325, 6014325, 1604325, 6104325,
+       461325, 4061325,  641325, 6041325, 4601325, 6401325, 1460325, 4160325, 1640325, 6140325, 4610325, 6410325,
+       346125, 3046125,  436125, 4036125, 3406125, 4306125,  364125, 3064125,  634125, 6034125, 3604125, 6304125,
+       463125, 4063125,  643125, 6043125, 4603125, 6403125, 3460125, 4360125, 3640125, 6340125, 4630125, 6430125,
+      1346025, 3146025, 1436025, 4136025, 3416025, 4316025, 1364025, 3164025, 1634025, 6134025, 3614025, 6314025,
+      1463025, 4163025, 1643025, 6143025, 4613025, 6413025, 3461025, 4361025, 3641025, 6341025, 4631025, 6431025,
+       234615, 2034615,  324615, 3024615, 2304615, 3204615,  243615, 2043615,  423615, 4023615, 2403615, 4203615,
+       342615, 3042615,  432615, 4032615, 3402615, 4302615, 2340615, 3240615, 2430615, 4230615, 3420615, 4320615,
+       236415, 2036415,  326415, 3026415, 2306415, 3206415,  263415, 2063415,  623415, 6023415, 2603415, 6203415,
+       362415, 3062415,  632415, 6032415, 3602415, 6302415, 2360415, 3260415, 2630415, 6230415, 3620415, 6320415,
+       246315, 2046315,  426315, 4026315, 2406315, 4206315,  264315, 2064315,  624315, 6024315, 2604315, 6204315,
+       462315, 4062315,  642315, 6042315, 4602315, 6402315, 2460315, 4260315, 2640315, 6240315, 4620315, 6420315,
+       346215, 3046215,  436215, 4036215, 3406215, 4306215,  364215, 3064215,  634215, 6034215, 3604215, 6304215,
+       463215, 4063215,  643215, 6043215, 4603215, 6403215, 3460215, 4360215, 3640215, 6340215, 4630215, 6430215,
+      2346015, 3246015, 2436015, 4236015, 3426015, 4326015, 2364015, 3264015, 2634015, 6234015, 3624015, 6324015,
+      2463015, 4263015, 2643015, 6243015, 4623015, 6423015, 3462015, 4362015, 3642015, 6342015, 4632015, 6432015,
+      1234605, 2134605, 1324605, 3124605, 2314605, 3214605, 1243605, 2143605, 1423605, 4123605, 2413605, 4213605,
+      1342605, 3142605, 1432605, 4132605, 3412605, 4312605, 2341605, 3241605, 2431605, 4231605, 3421605, 4321605,
+      1236405, 2136405, 1326405, 3126405, 2316405, 3216405, 1263405, 2163405, 1623405, 6123405, 2613405, 6213405,
+      1362405, 3162405, 1632405, 6132405, 3612405, 6312405, 2361405, 3261405, 2631405, 6231405, 3621405, 6321405,
+      1246305, 2146305, 1426305, 4126305, 2416305, 4216305, 1264305, 2164305, 1624305, 6124305, 2614305, 6214305,
+      1462305, 4162305, 1642305, 6142305, 4612305, 6412305, 2461305, 4261305, 2641305, 6241305, 4621305, 6421305,
+      1346205, 3146205, 1436205, 4136205, 3416205, 4316205, 1364205, 3164205, 1634205, 6134205, 3614205, 6314205,
+      1463205, 4163205, 1643205, 6143205, 4613205, 6413205, 3461205, 4361205, 3641205, 6341205, 4631205, 6431205,
+      2346105, 3246105, 2436105, 4236105, 3426105, 4326105, 2364105, 3264105, 2634105, 6234105, 3624105, 6324105,
+      2463105, 4263105, 2643105, 6243105, 4623105, 6423105, 3462105, 4362105, 3642105, 6342105, 4632105, 6432105,
+       123564, 1023564,  213564, 2013564, 1203564, 2103564,  132564, 1032564,  312564, 3012564, 1302564, 3102564,
+       231564, 2031564,  321564, 3021564, 2301564, 3201564, 1230564, 2130564, 1320564, 3120564, 2310564, 3210564,
+       125364, 1025364,  215364, 2015364, 1205364, 2105364,  152364, 1052364,  512364, 5012364, 1502364, 5102364,
+       251364, 2051364,  521364, 5021364, 2501364, 5201364, 1250364, 2150364, 1520364, 5120364, 2510364, 5210364,
+       135264, 1035264,  315264, 3015264, 1305264, 3105264,  153264, 1053264,  513264, 5013264, 1503264, 5103264,
+       351264, 3051264,  531264, 5031264, 3501264, 5301264, 1350264, 3150264, 1530264, 5130264, 3510264, 5310264,
+       235164, 2035164,  325164, 3025164, 2305164, 3205164,  253164, 2053164,  523164, 5023164, 2503164, 5203164,
+       352164, 3052164,  532164, 5032164, 3502164, 5302164, 2350164, 3250164, 2530164, 5230164, 3520164, 5320164,
+      1235064, 2135064, 1325064, 3125064, 2315064, 3215064, 1253064, 2153064, 1523064, 5123064, 2513064, 5213064,
+      1352064, 3152064, 1532064, 5132064, 3512064, 5312064, 2351064, 3251064, 2531064, 5231064, 3521064, 5321064,
+       123654, 1023654,  213654, 2013654, 1203654, 2103654,  132654, 1032654,  312654, 3012654, 1302654, 3102654,
+       231654, 2031654,  321654, 3021654, 2301654, 3201654, 1230654, 2130654, 1320654, 3120654, 2310654, 3210654,
+       126354, 1026354,  216354, 2016354, 1206354, 2106354,  162354, 1062354,  612354, 6012354, 1602354, 6102354,
+       261354, 2061354,  621354, 6021354, 2601354, 6201354, 1260354, 2160354, 1620354, 6120354, 2610354, 6210354,
+       136254, 1036254,  316254, 3016254, 1306254, 3106254,  163254, 1063254,  613254, 6013254, 1603254, 6103254,
+       361254, 3061254,  631254, 6031254, 3601254, 6301254, 1360254, 3160254, 1630254, 6130254, 3610254, 6310254,
+       236154, 2036154,  326154, 3026154, 2306154, 3206154,  263154, 2063154,  623154, 6023154, 2603154, 6203154,
+       362154, 3062154,  632154, 6032154, 3602154, 6302154, 2360154, 3260154, 2630154, 6230154, 3620154, 6320154,
+      1236054, 2136054, 1326054, 3126054, 2316054, 3216054, 1263054, 2163054, 1623054, 6123054, 2613054, 6213054,
+      1362054, 3162054, 1632054, 6132054, 3612054, 6312054, 2361054, 3261054, 2631054, 6231054, 3621054, 6321054,
+       125634, 1025634,  215634, 2015634, 1205634, 2105634,  152634, 1052634,  512634, 5012634, 1502634, 5102634,
+       251634, 2051634,  521634, 5021634, 2501634, 5201634, 1250634, 2150634, 1520634, 5120634, 2510634, 5210634,
+       126534, 1026534,  216534, 2016534, 1206534, 2106534,  162534, 1062534,  612534, 6012534, 1602534, 6102534,
+       261534, 2061534,  621534, 6021534, 2601534, 6201534, 1260534, 2160534, 1620534, 6120534, 2610534, 6210534,
+       156234, 1056234,  516234, 5016234, 1506234, 5106234,  165234, 1065234,  615234, 6015234, 1605234, 6105234,
+       561234, 5061234,  651234, 6051234, 5601234, 6501234, 1560234, 5160234, 1650234, 6150234, 5610234, 6510234,
+       256134, 2056134,  526134, 5026134, 2506134, 5206134,  265134, 2065134,  625134, 6025134, 2605134, 6205134,
+       562134, 5062134,  652134, 6052134, 5602134, 6502134, 2560134, 5260134, 2650134, 6250134, 5620134, 6520134,
+      1256034, 2156034, 1526034, 5126034, 2516034, 5216034, 1265034, 2165034, 1625034, 6125034, 2615034, 6215034,
+      1562034, 5162034, 1652034, 6152034, 5612034, 6512034, 2561034, 5261034, 2651034, 6251034, 5621034, 6521034,
+       135624, 1035624,  315624, 3015624, 1305624, 3105624,  153624, 1053624,  513624, 5013624, 1503624, 5103624,
+       351624, 3051624,  531624, 5031624, 3501624, 5301624, 1350624, 3150624, 1530624, 5130624, 3510624, 5310624,
+       136524, 1036524,  316524, 3016524, 1306524, 3106524,  163524, 1063524,  613524, 6013524, 1603524, 6103524,
+       361524, 3061524,  631524, 6031524, 3601524, 6301524, 1360524, 3160524, 1630524, 6130524, 3610524, 6310524,
+       156324, 1056324,  516324, 5016324, 1506324, 5106324,  165324, 1065324,  615324, 6015324, 1605324, 6105324,
+       561324, 5061324,  651324, 6051324, 5601324, 6501324, 1560324, 5160324, 1650324, 6150324, 5610324, 6510324,
+       356124, 3056124,  536124, 5036124, 3506124, 5306124,  365124, 3065124,  635124, 6035124, 3605124, 6305124,
+       563124, 5063124,  653124, 6053124, 5603124, 6503124, 3560124, 5360124, 3650124, 6350124, 5630124, 6530124,
+      1356024, 3156024, 1536024, 5136024, 3516024, 5316024, 1365024, 3165024, 1635024, 6135024, 3615024, 6315024,
+      1563024, 5163024, 1653024, 6153024, 5613024, 6513024, 3561024, 5361024, 3651024, 6351024, 5631024, 6531024,
+       235614, 2035614,  325614, 3025614, 2305614, 3205614,  253614, 2053614,  523614, 5023614, 2503614, 5203614,
+       352614, 3052614,  532614, 5032614, 3502614, 5302614, 2350614, 3250614, 2530614, 5230614, 3520614, 5320614,
+       236514, 2036514,  326514, 3026514, 2306514, 3206514,  263514, 2063514,  623514, 6023514, 2603514, 6203514,
+       362514, 3062514,  632514, 6032514, 3602514, 6302514, 2360514, 3260514, 2630514, 6230514, 3620514, 6320514,
+       256314, 2056314,  526314, 5026314, 2506314, 5206314,  265314, 2065314,  625314, 6025314, 2605314, 6205314,
+       562314, 5062314,  652314, 6052314, 5602314, 6502314, 2560314, 5260314, 2650314, 6250314, 5620314, 6520314,
+       356214, 3056214,  536214, 5036214, 3506214, 5306214,  365214, 3065214,  635214, 6035214, 3605214, 6305214,
+       563214, 5063214,  653214, 6053214, 5603214, 6503214, 3560214, 5360214, 3650214, 6350214, 5630214, 6530214,
+      2356014, 3256014, 2536014, 5236014, 3526014, 5326014, 2365014, 3265014, 2635014, 6235014, 3625014, 6325014,
+      2563014, 5263014, 2653014, 6253014, 5623014, 6523014, 3562014, 5362014, 3652014, 6352014, 5632014, 6532014,
+      1235604, 2135604, 1325604, 3125604, 2315604, 3215604, 1253604, 2153604, 1523604, 5123604, 2513604, 5213604,
+      1352604, 3152604, 1532604, 5132604, 3512604, 5312604, 2351604, 3251604, 2531604, 5231604, 3521604, 5321604,
+      1236504, 2136504, 1326504, 3126504, 2316504, 3216504, 1263504, 2163504, 1623504, 6123504, 2613504, 6213504,
+      1362504, 3162504, 1632504, 6132504, 3612504, 6312504, 2361504, 3261504, 2631504, 6231504, 3621504, 6321504,
+      1256304, 2156304, 1526304, 5126304, 2516304, 5216304, 1265304, 2165304, 1625304, 6125304, 2615304, 6215304,
+      1562304, 5162304, 1652304, 6152304, 5612304, 6512304, 2561304, 5261304, 2651304, 6251304, 5621304, 6521304,
+      1356204, 3156204, 1536204, 5136204, 3516204, 5316204, 1365204, 3165204, 1635204, 6135204, 3615204, 6315204,
+      1563204, 5163204, 1653204, 6153204, 5613204, 6513204, 3561204, 5361204, 3651204, 6351204, 5631204, 6531204,
+      2356104, 3256104, 2536104, 5236104, 3526104, 5326104, 2365104, 3265104, 2635104, 6235104, 3625104, 6325104,
+      2563104, 5263104, 2653104, 6253104, 5623104, 6523104, 3562104, 5362104, 3652104, 6352104, 5632104, 6532104,
+       124563, 1024563,  214563, 2014563, 1204563, 2104563,  142563, 1042563,  412563, 4012563, 1402563, 4102563,
+       241563, 2041563,  421563, 4021563, 2401563, 4201563, 1240563, 2140563, 1420563, 4120563, 2410563, 4210563,
+       125463, 1025463,  215463, 2015463, 1205463, 2105463,  152463, 1052463,  512463, 5012463, 1502463, 5102463,
+       251463, 2051463,  521463, 5021463, 2501463, 5201463, 1250463, 2150463, 1520463, 5120463, 2510463, 5210463,
+       145263, 1045263,  415263, 4015263, 1405263, 4105263,  154263, 1054263,  514263, 5014263, 1504263, 5104263,
+       451263, 4051263,  541263, 5041263, 4501263, 5401263, 1450263, 4150263, 1540263, 5140263, 4510263, 5410263,
+       245163, 2045163,  425163, 4025163, 2405163, 4205163,  254163, 2054163,  524163, 5024163, 2504163, 5204163,
+       452163, 4052163,  542163, 5042163, 4502163, 5402163, 2450163, 4250163, 2540163, 5240163, 4520163, 5420163,
+      1245063, 2145063, 1425063, 4125063, 2415063, 4215063, 1254063, 2154063, 1524063, 5124063, 2514063, 5214063,
+      1452063, 4152063, 1542063, 5142063, 4512063, 5412063, 2451063, 4251063, 2541063, 5241063, 4521063, 5421063,
+       124653, 1024653,  214653, 2014653, 1204653, 2104653,  142653, 1042653,  412653, 4012653, 1402653, 4102653,
+       241653, 2041653,  421653, 4021653, 2401653, 4201653, 1240653, 2140653, 1420653, 4120653, 2410653, 4210653,
+       126453, 1026453,  216453, 2016453, 1206453, 2106453,  162453, 1062453,  612453, 6012453, 1602453, 6102453,
+       261453, 2061453,  621453, 6021453, 2601453, 6201453, 1260453, 2160453, 1620453, 6120453, 2610453, 6210453,
+       146253, 1046253,  416253, 4016253, 1406253, 4106253,  164253, 1064253,  614253, 6014253, 1604253, 6104253,
+       461253, 4061253,  641253, 6041253, 4601253, 6401253, 1460253, 4160253, 1640253, 6140253, 4610253, 6410253,
+       246153, 2046153,  426153, 4026153, 2406153, 4206153,  264153, 2064153,  624153, 6024153, 2604153, 6204153,
+       462153, 4062153,  642153, 6042153, 4602153, 6402153, 2460153, 4260153, 2640153, 6240153, 4620153, 6420153,
+      1246053, 2146053, 1426053, 4126053, 2416053, 4216053, 1264053, 2164053, 1624053, 6124053, 2614053, 6214053,
+      1462053, 4162053, 1642053, 6142053, 4612053, 6412053, 2461053, 4261053, 2641053, 6241053, 4621053, 6421053,
+       125643, 1025643,  215643, 2015643, 1205643, 2105643,  152643, 1052643,  512643, 5012643, 1502643, 5102643,
+       251643, 2051643,  521643, 5021643, 2501643, 5201643, 1250643, 2150643, 1520643, 5120643, 2510643, 5210643,
+       126543, 1026543,  216543, 2016543, 1206543, 2106543,  162543, 1062543,  612543, 6012543, 1602543, 6102543,
+       261543, 2061543,  621543, 6021543, 2601543, 6201543, 1260543, 2160543, 1620543, 6120543, 2610543, 6210543,
+       156243, 1056243,  516243, 5016243, 1506243, 5106243,  165243, 1065243,  615243, 6015243, 1605243, 6105243,
+       561243, 5061243,  651243, 6051243, 5601243, 6501243, 1560243, 5160243, 1650243, 6150243, 5610243, 6510243,
+       256143, 2056143,  526143, 5026143, 2506143, 5206143,  265143, 2065143,  625143, 6025143, 2605143, 6205143,
+       562143, 5062143,  652143, 6052143, 5602143, 6502143, 2560143, 5260143, 2650143, 6250143, 5620143, 6520143,
+      1256043, 2156043, 1526043, 5126043, 2516043, 5216043, 1265043, 2165043, 1625043, 6125043, 2615043, 6215043,
+      1562043, 5162043, 1652043, 6152043, 5612043, 6512043, 2561043, 5261043, 2651043, 6251043, 5621043, 6521043,
+       145623, 1045623,  415623, 4015623, 1405623, 4105623,  154623, 1054623,  514623, 5014623, 1504623, 5104623,
+       451623, 4051623,  541623, 5041623, 4501623, 5401623, 1450623, 4150623, 1540623, 5140623, 4510623, 5410623,
+       146523, 1046523,  416523, 4016523, 1406523, 4106523,  164523, 1064523,  614523, 6014523, 1604523, 6104523,
+       461523, 4061523,  641523, 6041523, 4601523, 6401523, 1460523, 4160523, 1640523, 6140523, 4610523, 6410523,
+       156423, 1056423,  516423, 5016423, 1506423, 5106423,  165423, 1065423,  615423, 6015423, 1605423, 6105423,
+       561423, 5061423,  651423, 6051423, 5601423, 6501423, 1560423, 5160423, 1650423, 6150423, 5610423, 6510423,
+       456123, 4056123,  546123, 5046123, 4506123, 5406123,  465123, 4065123,  645123, 6045123, 4605123, 6405123,
+       564123, 5064123,  654123, 6054123, 5604123, 6504123, 4560123, 5460123, 4650123, 6450123, 5640123, 6540123,
+      1456023, 4156023, 1546023, 5146023, 4516023, 5416023, 1465023, 4165023, 1645023, 6145023, 4615023, 6415023,
+      1564023, 5164023, 1654023, 6154023, 5614023, 6514023, 4561023, 5461023, 4651023, 6451023, 5641023, 6541023,
+       245613, 2045613,  425613, 4025613, 2405613, 4205613,  254613, 2054613,  524613, 5024613, 2504613, 5204613,
+       452613, 4052613,  542613, 5042613, 4502613, 5402613, 2450613, 4250613, 2540613, 5240613, 4520613, 5420613,
+       246513, 2046513,  426513, 4026513, 2406513, 4206513,  264513, 2064513,  624513, 6024513, 2604513, 6204513,
+       462513, 4062513,  642513, 6042513, 4602513, 6402513, 2460513, 4260513, 2640513, 6240513, 4620513, 6420513,
+       256413, 2056413,  526413, 5026413, 2506413, 5206413,  265413, 2065413,  625413, 6025413, 2605413, 6205413,
+       562413, 5062413,  652413, 6052413, 5602413, 6502413, 2560413, 5260413, 2650413, 6250413, 5620413, 6520413,
+       456213, 4056213,  546213, 5046213, 4506213, 5406213,  465213, 4065213,  645213, 6045213, 4605213, 6405213,
+       564213, 5064213,  654213, 6054213, 5604213, 6504213, 4560213, 5460213, 4650213, 6450213, 5640213, 6540213,
+      2456013, 4256013, 2546013, 5246013, 4526013, 5426013, 2465013, 4265013, 2645013, 6245013, 4625013, 6425013,
+      2564013, 5264013, 2654013, 6254013, 5624013, 6524013, 4562013, 5462013, 4652013, 6452013, 5642013, 6542013,
+      1245603, 2145603, 1425603, 4125603, 2415603, 4215603, 1254603, 2154603, 1524603, 5124603, 2514603, 5214603,
+      1452603, 4152603, 1542603, 5142603, 4512603, 5412603, 2451603, 4251603, 2541603, 5241603, 4521603, 5421603,
+      1246503, 2146503, 1426503, 4126503, 2416503, 4216503, 1264503, 2164503, 1624503, 6124503, 2614503, 6214503,
+      1462503, 4162503, 1642503, 6142503, 4612503, 6412503, 2461503, 4261503, 2641503, 6241503, 4621503, 6421503,
+      1256403, 2156403, 1526403, 5126403, 2516403, 5216403, 1265403, 2165403, 1625403, 6125403, 2615403, 6215403,
+      1562403, 5162403, 1652403, 6152403, 5612403, 6512403, 2561403, 5261403, 2651403, 6251403, 5621403, 6521403,
+      1456203, 4156203, 1546203, 5146203, 4516203, 5416203, 1465203, 4165203, 1645203, 6145203, 4615203, 6415203,
+      1564203, 5164203, 1654203, 6154203, 5614203, 6514203, 4561203, 5461203, 4651203, 6451203, 5641203, 6541203,
+      2456103, 4256103, 2546103, 5246103, 4526103, 5426103, 2465103, 4265103, 2645103, 6245103, 4625103, 6425103,
+      2564103, 5264103, 2654103, 6254103, 5624103, 6524103, 4562103, 5462103, 4652103, 6452103, 5642103, 6542103,
+       134562, 1034562,  314562, 3014562, 1304562, 3104562,  143562, 1043562,  413562, 4013562, 1403562, 4103562,
+       341562, 3041562,  431562, 4031562, 3401562, 4301562, 1340562, 3140562, 1430562, 4130562, 3410562, 4310562,
+       135462, 1035462,  315462, 3015462, 1305462, 3105462,  153462, 1053462,  513462, 5013462, 1503462, 5103462,
+       351462, 3051462,  531462, 5031462, 3501462, 5301462, 1350462, 3150462, 1530462, 5130462, 3510462, 5310462,
+       145362, 1045362,  415362, 4015362, 1405362, 4105362,  154362, 1054362,  514362, 5014362, 1504362, 5104362,
+       451362, 4051362,  541362, 5041362, 4501362, 5401362, 1450362, 4150362, 1540362, 5140362, 4510362, 5410362,
+       345162, 3045162,  435162, 4035162, 3405162, 4305162,  354162, 3054162,  534162, 5034162, 3504162, 5304162,
+       453162, 4053162,  543162, 5043162, 4503162, 5403162, 3450162, 4350162, 3540162, 5340162, 4530162, 5430162,
+      1345062, 3145062, 1435062, 4135062, 3415062, 4315062, 1354062, 3154062, 1534062, 5134062, 3514062, 5314062,
+      1453062, 4153062, 1543062, 5143062, 4513062, 5413062, 3451062, 4351062, 3541062, 5341062, 4531062, 5431062,
+       134652, 1034652,  314652, 3014652, 1304652, 3104652,  143652, 1043652,  413652, 4013652, 1403652, 4103652,
+       341652, 3041652,  431652, 4031652, 3401652, 4301652, 1340652, 3140652, 1430652, 4130652, 3410652, 4310652,
+       136452, 1036452,  316452, 3016452, 1306452, 3106452,  163452, 1063452,  613452, 6013452, 1603452, 6103452,
+       361452, 3061452,  631452, 6031452, 3601452, 6301452, 1360452, 3160452, 1630452, 6130452, 3610452, 6310452,
+       146352, 1046352,  416352, 4016352, 1406352, 4106352,  164352, 1064352,  614352, 6014352, 1604352, 6104352,
+       461352, 4061352,  641352, 6041352, 4601352, 6401352, 1460352, 4160352, 1640352, 6140352, 4610352, 6410352,
+       346152, 3046152,  436152, 4036152, 3406152, 4306152,  364152, 3064152,  634152, 6034152, 3604152, 6304152,
+       463152, 4063152,  643152, 6043152, 4603152, 6403152, 3460152, 4360152, 3640152, 6340152, 4630152, 6430152,
+      1346052, 3146052, 1436052, 4136052, 3416052, 4316052, 1364052, 3164052, 1634052, 6134052, 3614052, 6314052,
+      1463052, 4163052, 1643052, 6143052, 4613052, 6413052, 3461052, 4361052, 3641052, 6341052, 4631052, 6431052,
+       135642, 1035642,  315642, 3015642, 1305642, 3105642,  153642, 1053642,  513642, 5013642, 1503642, 5103642,
+       351642, 3051642,  531642, 5031642, 3501642, 5301642, 1350642, 3150642, 1530642, 5130642, 3510642, 5310642,
+       136542, 1036542,  316542, 3016542, 1306542, 3106542,  163542, 1063542,  613542, 6013542, 1603542, 6103542,
+       361542, 3061542,  631542, 6031542, 3601542, 6301542, 1360542, 3160542, 1630542, 6130542, 3610542, 6310542,
+       156342, 1056342,  516342, 5016342, 1506342, 5106342,  165342, 1065342,  615342, 6015342, 1605342, 6105342,
+       561342, 5061342,  651342, 6051342, 5601342, 6501342, 1560342, 5160342, 1650342, 6150342, 5610342, 6510342,
+       356142, 3056142,  536142, 5036142, 3506142, 5306142,  365142, 3065142,  635142, 6035142, 3605142, 6305142,
+       563142, 5063142,  653142, 6053142, 5603142, 6503142, 3560142, 5360142, 3650142, 6350142, 5630142, 6530142,
+      1356042, 3156042, 1536042, 5136042, 3516042, 5316042, 1365042, 3165042, 1635042, 6135042, 3615042, 6315042,
+      1563042, 5163042, 1653042, 6153042, 5613042, 6513042, 3561042, 5361042, 3651042, 6351042, 5631042, 6531042,
+       145632, 1045632,  415632, 4015632, 1405632, 4105632,  154632, 1054632,  514632, 5014632, 1504632, 5104632,
+       451632, 4051632,  541632, 5041632, 4501632, 5401632, 1450632, 4150632, 1540632, 5140632, 4510632, 5410632,
+       146532, 1046532,  416532, 4016532, 1406532, 4106532,  164532, 1064532,  614532, 6014532, 1604532, 6104532,
+       461532, 4061532,  641532, 6041532, 4601532, 6401532, 1460532, 4160532, 1640532, 6140532, 4610532, 6410532,
+       156432, 1056432,  516432, 5016432, 1506432, 5106432,  165432, 1065432,  615432, 6015432, 1605432, 6105432,
+       561432, 5061432,  651432, 6051432, 5601432, 6501432, 1560432, 5160432, 1650432, 6150432, 5610432, 6510432,
+       456132, 4056132,  546132, 5046132, 4506132, 5406132,  465132, 4065132,  645132, 6045132, 4605132, 6405132,
+       564132, 5064132,  654132, 6054132, 5604132, 6504132, 4560132, 5460132, 4650132, 6450132, 5640132, 6540132,
+      1456032, 4156032, 1546032, 5146032, 4516032, 5416032, 1465032, 4165032, 1645032, 6145032, 4615032, 6415032,
+      1564032, 5164032, 1654032, 6154032, 5614032, 6514032, 4561032, 5461032, 4651032, 6451032, 5641032, 6541032,
+       345612, 3045612,  435612, 4035612, 3405612, 4305612,  354612, 3054612,  534612, 5034612, 3504612, 5304612,
+       453612, 4053612,  543612, 5043612, 4503612, 5403612, 3450612, 4350612, 3540612, 5340612, 4530612, 5430612,
+       346512, 3046512,  436512, 4036512, 3406512, 4306512,  364512, 3064512,  634512, 6034512, 3604512, 6304512,
+       463512, 4063512,  643512, 6043512, 4603512, 6403512, 3460512, 4360512, 3640512, 6340512, 4630512, 6430512,
+       356412, 3056412,  536412, 5036412, 3506412, 5306412,  365412, 3065412,  635412, 6035412, 3605412, 6305412,
+       563412, 5063412,  653412, 6053412, 5603412, 6503412, 3560412, 5360412, 3650412, 6350412, 5630412, 6530412,
+       456312, 4056312,  546312, 5046312, 4506312, 5406312,  465312, 4065312,  645312, 6045312, 4605312, 6405312,
+       564312, 5064312,  654312, 6054312, 5604312, 6504312, 4560312, 5460312, 4650312, 6450312, 5640312, 6540312,
+      3456012, 4356012, 3546012, 5346012, 4536012, 5436012, 3465012, 4365012, 3645012, 6345012, 4635012, 6435012,
+      3564012, 5364012, 3654012, 6354012, 5634012, 6534012, 4563012, 5463012, 4653012, 6453012, 5643012, 6543012,
+      1345602, 3145602, 1435602, 4135602, 3415602, 4315602, 1354602, 3154602, 1534602, 5134602, 3514602, 5314602,
+      1453602, 4153602, 1543602, 5143602, 4513602, 5413602, 3451602, 4351602, 3541602, 5341602, 4531602, 5431602,
+      1346502, 3146502, 1436502, 4136502, 3416502, 4316502, 1364502, 3164502, 1634502, 6134502, 3614502, 6314502,
+      1463502, 4163502, 1643502, 6143502, 4613502, 6413502, 3461502, 4361502, 3641502, 6341502, 4631502, 6431502,
+      1356402, 3156402, 1536402, 5136402, 3516402, 5316402, 1365402, 3165402, 1635402, 6135402, 3615402, 6315402,
+      1563402, 5163402, 1653402, 6153402, 5613402, 6513402, 3561402, 5361402, 3651402, 6351402, 5631402, 6531402,
+      1456302, 4156302, 1546302, 5146302, 4516302, 5416302, 1465302, 4165302, 1645302, 6145302, 4615302, 6415302,
+      1564302, 5164302, 1654302, 6154302, 5614302, 6514302, 4561302, 5461302, 4651302, 6451302, 5641302, 6541302,
+      3456102, 4356102, 3546102, 5346102, 4536102, 5436102, 3465102, 4365102, 3645102, 6345102, 4635102, 6435102,
+      3564102, 5364102, 3654102, 6354102, 5634102, 6534102, 4563102, 5463102, 4653102, 6453102, 5643102, 6543102,
+       234561, 2034561,  324561, 3024561, 2304561, 3204561,  243561, 2043561,  423561, 4023561, 2403561, 4203561,
+       342561, 3042561,  432561, 4032561, 3402561, 4302561, 2340561, 3240561, 2430561, 4230561, 3420561, 4320561,
+       235461, 2035461,  325461, 3025461, 2305461, 3205461,  253461, 2053461,  523461, 5023461, 2503461, 5203461,
+       352461, 3052461,  532461, 5032461, 3502461, 5302461, 2350461, 3250461, 2530461, 5230461, 3520461, 5320461,
+       245361, 2045361,  425361, 4025361, 2405361, 4205361,  254361, 2054361,  524361, 5024361, 2504361, 5204361,
+       452361, 4052361,  542361, 5042361, 4502361, 5402361, 2450361, 4250361, 2540361, 5240361, 4520361, 5420361,
+       345261, 3045261,  435261, 4035261, 3405261, 4305261,  354261, 3054261,  534261, 5034261, 3504261, 5304261,
+       453261, 4053261,  543261, 5043261, 4503261, 5403261, 3450261, 4350261, 3540261, 5340261, 4530261, 5430261,
+      2345061, 3245061, 2435061, 4235061, 3425061, 4325061, 2354061, 3254061, 2534061, 5234061, 3524061, 5324061,
+      2453061, 4253061, 2543061, 5243061, 4523061, 5423061, 3452061, 4352061, 3542061, 5342061, 4532061, 5432061,
+       234651, 2034651,  324651, 3024651, 2304651, 3204651,  243651, 2043651,  423651, 4023651, 2403651, 4203651,
+       342651, 3042651,  432651, 4032651, 3402651, 4302651, 2340651, 3240651, 2430651, 4230651, 3420651, 4320651,
+       236451, 2036451,  326451, 3026451, 2306451, 3206451,  263451, 2063451,  623451, 6023451, 2603451, 6203451,
+       362451, 3062451,  632451, 6032451, 3602451, 6302451, 2360451, 3260451, 2630451, 6230451, 3620451, 6320451,
+       246351, 2046351,  426351, 4026351, 2406351, 4206351,  264351, 2064351,  624351, 6024351, 2604351, 6204351,
+       462351, 4062351,  642351, 6042351, 4602351, 6402351, 2460351, 4260351, 2640351, 6240351, 4620351, 6420351,
+       346251, 3046251,  436251, 4036251, 3406251, 4306251,  364251, 3064251,  634251, 6034251, 3604251, 6304251,
+       463251, 4063251,  643251, 6043251, 4603251, 6403251, 3460251, 4360251, 3640251, 6340251, 4630251, 6430251,
+      2346051, 3246051, 2436051, 4236051, 3426051, 4326051, 2364051, 3264051, 2634051, 6234051, 3624051, 6324051,
+      2463051, 4263051, 2643051, 6243051, 4623051, 6423051, 3462051, 4362051, 3642051, 6342051, 4632051, 6432051,
+       235641, 2035641,  325641, 3025641, 2305641, 3205641,  253641, 2053641,  523641, 5023641, 2503641, 5203641,
+       352641, 3052641,  532641, 5032641, 3502641, 5302641, 2350641, 3250641, 2530641, 5230641, 3520641, 5320641,
+       236541, 2036541,  326541, 3026541, 2306541, 3206541,  263541, 2063541,  623541, 6023541, 2603541, 6203541,
+       362541, 3062541,  632541, 6032541, 3602541, 6302541, 2360541, 3260541, 2630541, 6230541, 3620541, 6320541,
+       256341, 2056341,  526341, 5026341, 2506341, 5206341,  265341, 2065341,  625341, 6025341, 2605341, 6205341,
+       562341, 5062341,  652341, 6052341, 5602341, 6502341, 2560341, 5260341, 2650341, 6250341, 5620341, 6520341,
+       356241, 3056241,  536241, 5036241, 3506241, 5306241,  365241, 3065241,  635241, 6035241, 3605241, 6305241,
+       563241, 5063241,  653241, 6053241, 5603241, 6503241, 3560241, 5360241, 3650241, 6350241, 5630241, 6530241,
+      2356041, 3256041, 2536041, 5236041, 3526041, 5326041, 2365041, 3265041, 2635041, 6235041, 3625041, 6325041,
+      2563041, 5263041, 2653041, 6253041, 5623041, 6523041, 3562041, 5362041, 3652041, 6352041, 5632041, 6532041,
+       245631, 2045631,  425631, 4025631, 2405631, 4205631,  254631, 2054631,  524631, 5024631, 2504631, 5204631,
+       452631, 4052631,  542631, 5042631, 4502631, 5402631, 2450631, 4250631, 2540631, 5240631, 4520631, 5420631,
+       246531, 2046531,  426531, 4026531, 2406531, 4206531,  264531, 2064531,  624531, 6024531, 2604531, 6204531,
+       462531, 4062531,  642531, 6042531, 4602531, 6402531, 2460531, 4260531, 2640531, 6240531, 4620531, 6420531,
+       256431, 2056431,  526431, 5026431, 2506431, 5206431,  265431, 2065431,  625431, 6025431, 2605431, 6205431,
+       562431, 5062431,  652431, 6052431, 5602431, 6502431, 2560431, 5260431, 2650431, 6250431, 5620431, 6520431,
+       456231, 4056231,  546231, 5046231, 4506231, 5406231,  465231, 4065231,  645231, 6045231, 4605231, 6405231,
+       564231, 5064231,  654231, 6054231, 5604231, 6504231, 4560231, 5460231, 4650231, 6450231, 5640231, 6540231,
+      2456031, 4256031, 2546031, 5246031, 4526031, 5426031, 2465031, 4265031, 2645031, 6245031, 4625031, 6425031,
+      2564031, 5264031, 2654031, 6254031, 5624031, 6524031, 4562031, 5462031, 4652031, 6452031, 5642031, 6542031,
+       345621, 3045621,  435621, 4035621, 3405621, 4305621,  354621, 3054621,  534621, 5034621, 3504621, 5304621,
+       453621, 4053621,  543621, 5043621, 4503621, 5403621, 3450621, 4350621, 3540621, 5340621, 4530621, 5430621,
+       346521, 3046521,  436521, 4036521, 3406521, 4306521,  364521, 3064521,  634521, 6034521, 3604521, 6304521,
+       463521, 4063521,  643521, 6043521, 4603521, 6403521, 3460521, 4360521, 3640521, 6340521, 4630521, 6430521,
+       356421, 3056421,  536421, 5036421, 3506421, 5306421,  365421, 3065421,  635421, 6035421, 3605421, 6305421,
+       563421, 5063421,  653421, 6053421, 5603421, 6503421, 3560421, 5360421, 3650421, 6350421, 5630421, 6530421,
+       456321, 4056321,  546321, 5046321, 4506321, 5406321,  465321, 4065321,  645321, 6045321, 4605321, 6405321,
+       564321, 5064321,  654321, 6054321, 5604321, 6504321, 4560321, 5460321, 4650321, 6450321, 5640321, 6540321,
+      3456021, 4356021, 3546021, 5346021, 4536021, 5436021, 3465021, 4365021, 3645021, 6345021, 4635021, 6435021,
+      3564021, 5364021, 3654021, 6354021, 5634021, 6534021, 4563021, 5463021, 4653021, 6453021, 5643021, 6543021,
+      2345601, 3245601, 2435601, 4235601, 3425601, 4325601, 2354601, 3254601, 2534601, 5234601, 3524601, 5324601,
+      2453601, 4253601, 2543601, 5243601, 4523601, 5423601, 3452601, 4352601, 3542601, 5342601, 4532601, 5432601,
+      2346501, 3246501, 2436501, 4236501, 3426501, 4326501, 2364501, 3264501, 2634501, 6234501, 3624501, 6324501,
+      2463501, 4263501, 2643501, 6243501, 4623501, 6423501, 3462501, 4362501, 3642501, 6342501, 4632501, 6432501,
+      2356401, 3256401, 2536401, 5236401, 3526401, 5326401, 2365401, 3265401, 2635401, 6235401, 3625401, 6325401,
+      2563401, 5263401, 2653401, 6253401, 5623401, 6523401, 3562401, 5362401, 3652401, 6352401, 5632401, 6532401,
+      2456301, 4256301, 2546301, 5246301, 4526301, 5426301, 2465301, 4265301, 2645301, 6245301, 4625301, 6425301,
+      2564301, 5264301, 2654301, 6254301, 5624301, 6524301, 4562301, 5462301, 4652301, 6452301, 5642301, 6542301,
+      3456201, 4356201, 3546201, 5346201, 4536201, 5436201, 3465201, 4365201, 3645201, 6345201, 4635201, 6435201,
+      3564201, 5364201, 3654201, 6354201, 5634201, 6534201, 4563201, 5463201, 4653201, 6453201, 5643201, 6543201,
+      1234560, 2134560, 1324560, 3124560, 2314560, 3214560, 1243560, 2143560, 1423560, 4123560, 2413560, 4213560,
+      1342560, 3142560, 1432560, 4132560, 3412560, 4312560, 2341560, 3241560, 2431560, 4231560, 3421560, 4321560,
+      1235460, 2135460, 1325460, 3125460, 2315460, 3215460, 1253460, 2153460, 1523460, 5123460, 2513460, 5213460,
+      1352460, 3152460, 1532460, 5132460, 3512460, 5312460, 2351460, 3251460, 2531460, 5231460, 3521460, 5321460,
+      1245360, 2145360, 1425360, 4125360, 2415360, 4215360, 1254360, 2154360, 1524360, 5124360, 2514360, 5214360,
+      1452360, 4152360, 1542360, 5142360, 4512360, 5412360, 2451360, 4251360, 2541360, 5241360, 4521360, 5421360,
+      1345260, 3145260, 1435260, 4135260, 3415260, 4315260, 1354260, 3154260, 1534260, 5134260, 3514260, 5314260,
+      1453260, 4153260, 1543260, 5143260, 4513260, 5413260, 3451260, 4351260, 3541260, 5341260, 4531260, 5431260,
+      2345160, 3245160, 2435160, 4235160, 3425160, 4325160, 2354160, 3254160, 2534160, 5234160, 3524160, 5324160,
+      2453160, 4253160, 2543160, 5243160, 4523160, 5423160, 3452160, 4352160, 3542160, 5342160, 4532160, 5432160,
+      1234650, 2134650, 1324650, 3124650, 2314650, 3214650, 1243650, 2143650, 1423650, 4123650, 2413650, 4213650,
+      1342650, 3142650, 1432650, 4132650, 3412650, 4312650, 2341650, 3241650, 2431650, 4231650, 3421650, 4321650,
+      1236450, 2136450, 1326450, 3126450, 2316450, 3216450, 1263450, 2163450, 1623450, 6123450, 2613450, 6213450,
+      1362450, 3162450, 1632450, 6132450, 3612450, 6312450, 2361450, 3261450, 2631450, 6231450, 3621450, 6321450,
+      1246350, 2146350, 1426350, 4126350, 2416350, 4216350, 1264350, 2164350, 1624350, 6124350, 2614350, 6214350,
+      1462350, 4162350, 1642350, 6142350, 4612350, 6412350, 2461350, 4261350, 2641350, 6241350, 4621350, 6421350,
+      1346250, 3146250, 1436250, 4136250, 3416250, 4316250, 1364250, 3164250, 1634250, 6134250, 3614250, 6314250,
+      1463250, 4163250, 1643250, 6143250, 4613250, 6413250, 3461250, 4361250, 3641250, 6341250, 4631250, 6431250,
+      2346150, 3246150, 2436150, 4236150, 3426150, 4326150, 2364150, 3264150, 2634150, 6234150, 3624150, 6324150,
+      2463150, 4263150, 2643150, 6243150, 4623150, 6423150, 3462150, 4362150, 3642150, 6342150, 4632150, 6432150,
+      1235640, 2135640, 1325640, 3125640, 2315640, 3215640, 1253640, 2153640, 1523640, 5123640, 2513640, 5213640,
+      1352640, 3152640, 1532640, 5132640, 3512640, 5312640, 2351640, 3251640, 2531640, 5231640, 3521640, 5321640,
+      1236540, 2136540, 1326540, 3126540, 2316540, 3216540, 1263540, 2163540, 1623540, 6123540, 2613540, 6213540,
+      1362540, 3162540, 1632540, 6132540, 3612540, 6312540, 2361540, 3261540, 2631540, 6231540, 3621540, 6321540,
+      1256340, 2156340, 1526340, 5126340, 2516340, 5216340, 1265340, 2165340, 1625340, 6125340, 2615340, 6215340,
+      1562340, 5162340, 1652340, 6152340, 5612340, 6512340, 2561340, 5261340, 2651340, 6251340, 5621340, 6521340,
+      1356240, 3156240, 1536240, 5136240, 3516240, 5316240, 1365240, 3165240, 1635240, 6135240, 3615240, 6315240,
+      1563240, 5163240, 1653240, 6153240, 5613240, 6513240, 3561240, 5361240, 3651240, 6351240, 5631240, 6531240,
+      2356140, 3256140, 2536140, 5236140, 3526140, 5326140, 2365140, 3265140, 2635140, 6235140, 3625140, 6325140,
+      2563140, 5263140, 2653140, 6253140, 5623140, 6523140, 3562140, 5362140, 3652140, 6352140, 5632140, 6532140,
+      1245630, 2145630, 1425630, 4125630, 2415630, 4215630, 1254630, 2154630, 1524630, 5124630, 2514630, 5214630,
+      1452630, 4152630, 1542630, 5142630, 4512630, 5412630, 2451630, 4251630, 2541630, 5241630, 4521630, 5421630,
+      1246530, 2146530, 1426530, 4126530, 2416530, 4216530, 1264530, 2164530, 1624530, 6124530, 2614530, 6214530,
+      1462530, 4162530, 1642530, 6142530, 4612530, 6412530, 2461530, 4261530, 2641530, 6241530, 4621530, 6421530,
+      1256430, 2156430, 1526430, 5126430, 2516430, 5216430, 1265430, 2165430, 1625430, 6125430, 2615430, 6215430,
+      1562430, 5162430, 1652430, 6152430, 5612430, 6512430, 2561430, 5261430, 2651430, 6251430, 5621430, 6521430,
+      1456230, 4156230, 1546230, 5146230, 4516230, 5416230, 1465230, 4165230, 1645230, 6145230, 4615230, 6415230,
+      1564230, 5164230, 1654230, 6154230, 5614230, 6514230, 4561230, 5461230, 4651230, 6451230, 5641230, 6541230,
+      2456130, 4256130, 2546130, 5246130, 4526130, 5426130, 2465130, 4265130, 2645130, 6245130, 4625130, 6425130,
+      2564130, 5264130, 2654130, 6254130, 5624130, 6524130, 4562130, 5462130, 4652130, 6452130, 5642130, 6542130,
+      1345620, 3145620, 1435620, 4135620, 3415620, 4315620, 1354620, 3154620, 1534620, 5134620, 3514620, 5314620,
+      1453620, 4153620, 1543620, 5143620, 4513620, 5413620, 3451620, 4351620, 3541620, 5341620, 4531620, 5431620,
+      1346520, 3146520, 1436520, 4136520, 3416520, 4316520, 1364520, 3164520, 1634520, 6134520, 3614520, 6314520,
+      1463520, 4163520, 1643520, 6143520, 4613520, 6413520, 3461520, 4361520, 3641520, 6341520, 4631520, 6431520,
+      1356420, 3156420, 1536420, 5136420, 3516420, 5316420, 1365420, 3165420, 1635420, 6135420, 3615420, 6315420,
+      1563420, 5163420, 1653420, 6153420, 5613420, 6513420, 3561420, 5361420, 3651420, 6351420, 5631420, 6531420,
+      1456320, 4156320, 1546320, 5146320, 4516320, 5416320, 1465320, 4165320, 1645320, 6145320, 4615320, 6415320,
+      1564320, 5164320, 1654320, 6154320, 5614320, 6514320, 4561320, 5461320, 4651320, 6451320, 5641320, 6541320,
+      3456120, 4356120, 3546120, 5346120, 4536120, 5436120, 3465120, 4365120, 3645120, 6345120, 4635120, 6435120,
+      3564120, 5364120, 3654120, 6354120, 5634120, 6534120, 4563120, 5463120, 4653120, 6453120, 5643120, 6543120,
+      2345610, 3245610, 2435610, 4235610, 3425610, 4325610, 2354610, 3254610, 2534610, 5234610, 3524610, 5324610,
+      2453610, 4253610, 2543610, 5243610, 4523610, 5423610, 3452610, 4352610, 3542610, 5342610, 4532610, 5432610,
+      2346510, 3246510, 2436510, 4236510, 3426510, 4326510, 2364510, 3264510, 2634510, 6234510, 3624510, 6324510,
+      2463510, 4263510, 2643510, 6243510, 4623510, 6423510, 3462510, 4362510, 3642510, 6342510, 4632510, 6432510,
+      2356410, 3256410, 2536410, 5236410, 3526410, 5326410, 2365410, 3265410, 2635410, 6235410, 3625410, 6325410,
+      2563410, 5263410, 2653410, 6253410, 5623410, 6523410, 3562410, 5362410, 3652410, 6352410, 5632410, 6532410,
+      2456310, 4256310, 2546310, 5246310, 4526310, 5426310, 2465310, 4265310, 2645310, 6245310, 4625310, 6425310,
+      2564310, 5264310, 2654310, 6254310, 5624310, 6524310, 4562310, 5462310, 4652310, 6452310, 5642310, 6542310,
+      3456210, 4356210, 3546210, 5346210, 4536210, 5436210, 3465210, 4365210, 3645210, 6345210, 4635210, 6435210,
+      3564210, 5364210, 3654210, 6354210, 5634210, 6534210, 4563210, 5463210, 4653210, 6453210, 5643210, 6543210
+    };
+    std::map<uint64_t, int> expected;
+    for (std::size_t i = 0; i < 5040; i++)
+      expected[pre_expected[i]] = 0; // flags are 0, everything is symmetric here
+
+    VERIFY(isDynGroup(group));
+    VERIFY_IS_EQUAL(group.size(), 5040u);
+    VERIFY_IS_EQUAL(group.globalFlags(), 0);
+    group.apply<checkIdx, int>(identity7, 0, found, expected);
+    VERIFY_IS_EQUAL(found.size(), 5040u);
+  }
+}
+
+static void test_tensor_epsilon()
+{
+  SGroup<AntiSymmetry<0,1>, AntiSymmetry<1,2>> sym;
+  Tensor<int, 3> epsilon(3,3,3);
+
+  epsilon.setZero();
+  sym(epsilon, 0, 1, 2) = 1;
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 3; j++) {
+      for (int k = 0; k < 3; k++) {
+        VERIFY_IS_EQUAL((epsilon(i,j,k)), (- (j - i) * (k - j) * (i - k) / 2) );
+      }
+    }
+  }
+}
+
+static void test_tensor_sym()
+{
+  SGroup<Symmetry<0,1>, Symmetry<2,3>> sym;
+  Tensor<int, 4> t(10,10,10,10);
+
+  t.setZero();
+
+  for (int l = 0; l < 10; l++) {
+    for (int k = l; k < 10; k++) {
+      for (int j = 0; j < 10; j++) {
+        for (int i = j; i < 10; i++) {
+          sym(t, i, j, k, l) = (i + j) * (k + l);
+        }
+      }
+    }
+  }
+
+  for (int l = 0; l < 10; l++) {
+    for (int k = 0; k < 10; k++) {
+      for (int j = 0; j < 10; j++) {
+        for (int i = 0; i < 10; i++) {
+          VERIFY_IS_EQUAL((t(i, j, k, l)), ((i + j) * (k + l)));
+        }
+      }
+    }
+  }
+
+}
+
+static void test_tensor_asym()
+{
+  SGroup<AntiSymmetry<0,1>, AntiSymmetry<2,3>> sym;
+  Tensor<int, 4> t(10,10,10,10);
+
+  t.setZero();
+
+  for (int l = 0; l < 10; l++) {
+    for (int k = l + 1; k < 10; k++) {
+      for (int j = 0; j < 10; j++) {
+        for (int i = j + 1; i < 10; i++) {
+          sym(t, i, j, k, l) = ((i * j) + (k * l));
+        }
+      }
+    }
+  }
+
+  for (int l = 0; l < 10; l++) {
+    for (int k = 0; k < 10; k++) {
+      for (int j = 0; j < 10; j++) {
+        for (int i = 0; i < 10; i++) {
+          if (i < j && k < l)
+            VERIFY_IS_EQUAL((t(i, j, k, l)), (((i * j) + (k * l))));
+          else if (i > j && k > l)
+            VERIFY_IS_EQUAL((t(i, j, k, l)), (((i * j) + (k * l))));
+          else if (i < j && k > l)
+            VERIFY_IS_EQUAL((t(i, j, k, l)), (- ((i * j) + (k * l))));
+          else if (i > j && k < l)
+            VERIFY_IS_EQUAL((t(i, j, k, l)), (- ((i * j) + (k * l))));
+          else
+            VERIFY_IS_EQUAL((t(i, j, k, l)), 0);
+        }
+      }
+    }
+  }
+}
+
+static void test_tensor_dynsym()
+{
+  DynamicSGroup sym;
+  sym.addSymmetry(0,1);
+  sym.addSymmetry(2,3);
+  Tensor<int, 4> t(10,10,10,10);
+
+  t.setZero();
+
+  for (int l = 0; l < 10; l++) {
+    for (int k = l; k < 10; k++) {
+      for (int j = 0; j < 10; j++) {
+        for (int i = j; i < 10; i++) {
+          sym(t, i, j, k, l) = (i + j) * (k + l);
+        }
+      }
+    }
+  }
+
+  for (int l = 0; l < 10; l++) {
+    for (int k = 0; k < 10; k++) {
+      for (int j = 0; j < 10; j++) {
+        for (int i = 0; i < 10; i++) {
+          VERIFY_IS_EQUAL((t(i, j, k, l)), ((i + j) * (k + l)));
+        }
+      }
+    }
+  }
+}
+
+static void test_tensor_randacc()
+{
+  SGroup<Symmetry<0,1>, Symmetry<2,3>> sym;
+  Tensor<int, 4> t(10,10,10,10);
+
+  t.setZero();
+
+  // set elements 1 million times, that way we access the
+  // entire matrix
+  for (int n = 0; n < 1000000; n++) {
+    int i = rand() % 10;
+    int j = rand() % 10;
+    int k = rand() % 10;
+    int l = rand() % 10;
+    // only access those indices in a given order
+    if (i < j)
+      std::swap(i, j);
+    if (k < l)
+      std::swap(k, l);
+    sym(t, i, j, k, l) = (i + j) * (k + l);
+  }
+
+  for (int l = 0; l < 10; l++) {
+    for (int k = 0; k < 10; k++) {
+      for (int j = 0; j < 10; j++) {
+        for (int i = 0; i < 10; i++) {
+          VERIFY_IS_EQUAL((t(i, j, k, l)), ((i + j) * (k + l)));
+        }
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_symmetry()
+{
+  CALL_SUBTEST(test_symgroups_static());
+  CALL_SUBTEST(test_symgroups_dynamic());
+  CALL_SUBTEST(test_symgroups_selection());
+  CALL_SUBTEST(test_tensor_epsilon());
+  CALL_SUBTEST(test_tensor_sym());
+  CALL_SUBTEST(test_tensor_asym());
+  CALL_SUBTEST(test_tensor_dynsym());
+  CALL_SUBTEST(test_tensor_randacc());
+}
+
+/*
+ * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
+ */
diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp
new file mode 100644
index 000000000..2ef665f30
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_thread_pool.cpp
@@ -0,0 +1,373 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_USE_THREADS
+
+
+#include "main.h"
+#include <iostream>
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+
+void test_multithread_elementwise()
+{
+  Tensor<float, 3> in1(2,3,7);
+  Tensor<float, 3> in2(2,3,7);
+  Tensor<float, 3> out(2,3,7);
+
+  in1.setRandom();
+  in2.setRandom();
+
+  Eigen::ThreadPool tp(internal::random<int>(3, 11));
+  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
+  out.device(thread_pool_device) = in1 + in2 * 3.14f;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
+      }
+    }
+  }
+}
+
+
+void test_multithread_compound_assignment()
+{
+  Tensor<float, 3> in1(2,3,7);
+  Tensor<float, 3> in2(2,3,7);
+  Tensor<float, 3> out(2,3,7);
+
+  in1.setRandom();
+  in2.setRandom();
+
+  Eigen::ThreadPool tp(internal::random<int>(3, 11));
+  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11));
+  out.device(thread_pool_device) = in1;
+  out.device(thread_pool_device) += in2 * 3.14f;
+
+  for (int i = 0; i < 2; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
+      }
+    }
+  }
+}
+
+template<int DataLayout>
+void test_multithread_contraction()
+{
+  Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31);
+  Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10);
+  Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10);
+
+  t_left.setRandom();
+  t_right.setRandom();
+
+  // this contraction should be equivalent to a single matrix multiplication
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
+
+  typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
+  MapXf m_left(t_left.data(), 1500, 1147);
+  MapXf m_right(t_right.data(), 1147, 1400);
+  Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400);
+
+  Eigen::ThreadPool tp(4);
+  Eigen::ThreadPoolDevice thread_pool_device(&tp, 4);
+
+  // compute results by separate methods
+  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
+  m_result = m_left * m_right;
+
+ for (ptrdiff_t i = 0; i < t_result.size(); i++) {
+    VERIFY(&t_result.data()[i] != &m_result.data()[i]);
+    if (fabsf(t_result(i) - m_result(i)) < 1e-4f) {
+      continue;
+    }
+    if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) {
+      continue;
+    }
+    std::cout << "mismatch detected at index " << i << ": " << t_result(i)
+              << " vs " <<  m_result(i) << std::endl;
+    assert(false);
+  }
+}
+
+template<int DataLayout>
+void test_contraction_corner_cases()
+{
+  Tensor<float, 2, DataLayout> t_left(32, 500);
+  Tensor<float, 2, DataLayout> t_right(32, 28*28);
+  Tensor<float, 2, DataLayout> t_result(500, 28*28);
+
+  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
+  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
+  t_result = t_result.constant(NAN);
+
+  // this contraction should be equivalent to a single matrix multiplication
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
+
+  typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf;
+  MapXf m_left(t_left.data(), 32, 500);
+  MapXf m_right(t_right.data(), 32, 28*28);
+  Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28);
+
+  Eigen::ThreadPool tp(12);
+  Eigen::ThreadPoolDevice thread_pool_device(&tp, 12);
+
+  // compute results by separate methods
+  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
+  m_result = m_left.transpose() * m_right;
+
+  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
+    assert(!(numext::isnan)(t_result.data()[i]));
+    if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
+      std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
+      assert(false);
+    }
+  }
+
+  t_left.resize(32, 1);
+  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
+  t_result.resize (1, 28*28);
+  t_result = t_result.constant(NAN);
+  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
+  new(&m_left) MapXf(t_left.data(), 32, 1);
+  m_result = m_left.transpose() * m_right;
+  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
+    assert(!(numext::isnan)(t_result.data()[i]));
+    if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
+      std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
+      assert(false);
+    }
+  }
+
+  t_left.resize(32, 500);
+  t_right.resize(32, 4);
+  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
+  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
+  t_result.resize (500, 4);
+  t_result = t_result.constant(NAN);
+  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
+  new(&m_left) MapXf(t_left.data(), 32, 500);
+  new(&m_right) MapXf(t_right.data(), 32, 4);
+  m_result = m_left.transpose() * m_right;
+  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
+    assert(!(numext::isnan)(t_result.data()[i]));
+    if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
+      std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
+      assert(false);
+    }
+  }
+
+  t_left.resize(32, 1);
+  t_right.resize(32, 4);
+  t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
+  t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
+  t_result.resize (1, 4);
+  t_result = t_result.constant(NAN);
+  t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
+  new(&m_left) MapXf(t_left.data(), 32, 1);
+  new(&m_right) MapXf(t_right.data(), 32, 4);
+  m_result = m_left.transpose() * m_right;
+  for (ptrdiff_t i = 0; i < t_result.size(); i++) {
+    assert(!(numext::isnan)(t_result.data()[i]));
+    if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) {
+      std::cout << "mismatch detected: " << t_result.data()[i] << " vs " <<  m_result.data()[i] << std::endl;
+      assert(false);
+    }
+  }
+}
+
+template<int DataLayout>
+void test_multithread_contraction_agrees_with_singlethread() {
+  int contract_size = internal::random<int>(1, 5000);
+
+  Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80),
+                                    contract_size,
+                                    internal::random<int>(1, 100));
+
+  Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25),
+                                     internal::random<int>(1, 37),
+                                     contract_size,
+                                     internal::random<int>(1, 51));
+
+  left.setRandom();
+  right.setRandom();
+
+  // add constants to shift values away from 0 for more precision
+  left += left.constant(1.5f);
+  right += right.constant(1.5f);
+
+  typedef Tensor<float, 1>::DimensionPair DimPair;
+  Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
+
+  Eigen::ThreadPool tp(internal::random<int>(2, 11));
+  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
+
+  Tensor<float, 5, DataLayout> st_result;
+  st_result = left.contract(right, dims);
+
+  Tensor<float, 5, DataLayout> tp_result(st_result.dimensions());
+  tp_result.device(thread_pool_device) = left.contract(right, dims);
+
+  VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
+  for (ptrdiff_t i = 0; i < st_result.size(); i++) {
+    // if both of the values are very small, then do nothing (because the test will fail
+    // due to numerical precision issues when values are small)
+    if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) {
+      VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
+    }
+  }
+}
+
+
+template<int DataLayout>
+void test_full_contraction() {
+  int contract_size1 = internal::random<int>(1, 500);
+  int contract_size2 = internal::random<int>(1, 500);
+
+  Tensor<float, 2, DataLayout> left(contract_size1,
+                                    contract_size2);
+  Tensor<float, 2, DataLayout> right(contract_size1,
+                                    contract_size2);
+  left.setRandom();
+  right.setRandom();
+
+  // add constants to shift values away from 0 for more precision
+  left += left.constant(1.5f);
+  right += right.constant(1.5f);
+
+  typedef Tensor<float, 2>::DimensionPair DimPair;
+  Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}});
+
+  Eigen::ThreadPool tp(internal::random<int>(2, 11));
+  Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11));
+
+  Tensor<float, 0, DataLayout> st_result;
+  st_result = left.contract(right, dims);
+
+  Tensor<float, 0, DataLayout> tp_result;
+  tp_result.device(thread_pool_device) = left.contract(right, dims);
+
+  VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
+  // if both of the values are very small, then do nothing (because the test will fail
+  // due to numerical precision issues when values are small)
+  if (numext::abs(st_result() - tp_result()) >= 1e-4f) {
+    VERIFY_IS_APPROX(st_result(), tp_result());
+  }
+}
+
+template<int DataLayout>
+void test_multithreaded_reductions() {
+  const int num_threads = internal::random<int>(3, 11);
+  ThreadPool thread_pool(num_threads);
+  Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
+
+  const int num_rows = internal::random<int>(13, 732);
+  const int num_cols = internal::random<int>(13, 732);
+  Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
+  t1.setRandom();
+
+  Tensor<float, 0, DataLayout> full_redux;
+  full_redux = t1.sum();
+
+  Tensor<float, 0, DataLayout> full_redux_tp;
+  full_redux_tp.device(thread_pool_device) = t1.sum();
+
+  // Check that the single threaded and the multi threaded reductions return
+  // the same result.
+  VERIFY_IS_APPROX(full_redux(), full_redux_tp());
+}
+
+
+void test_memcpy() {
+
+  for (int i = 0; i < 5; ++i) {
+    const int num_threads = internal::random<int>(3, 11);
+    Eigen::ThreadPool tp(num_threads);
+    Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
+
+    const int size = internal::random<int>(13, 7632);
+    Tensor<float, 1> t1(size);
+    t1.setRandom();
+    std::vector<float> result(size);
+    thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float));
+    for (int j = 0; j < size; j++) {
+      VERIFY_IS_EQUAL(t1(j), result[j]);
+    }
+  }
+}
+
+
+void test_multithread_random()
+{
+  Eigen::ThreadPool tp(2);
+  Eigen::ThreadPoolDevice device(&tp, 2);
+  Tensor<float, 1> t(1 << 20);
+  t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>();
+}
+
+template<int DataLayout>
+void test_multithread_shuffle()
+{
+  Tensor<float, 4, DataLayout> tensor(17,5,7,11);
+  tensor.setRandom();
+
+  const int num_threads = internal::random<int>(2, 11);
+  ThreadPool threads(num_threads);
+  Eigen::ThreadPoolDevice device(&threads, num_threads);
+
+  Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
+  array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
+  shuffle.device(device) = tensor.shuffle(shuffles);
+
+  for (int i = 0; i < 17; ++i) {
+    for (int j = 0; j < 5; ++j) {
+      for (int k = 0; k < 7; ++k) {
+        for (int l = 0; l < 11; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
+        }
+      }
+    }
+  }
+}
+
+
+void test_cxx11_tensor_thread_pool()
+{
+  CALL_SUBTEST_1(test_multithread_elementwise());
+  CALL_SUBTEST_1(test_multithread_compound_assignment());
+
+  CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
+  CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
+
+  CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
+  CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
+
+  // Exercise various cases that have been problematic in the past.
+  CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>());
+  CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>());
+
+  CALL_SUBTEST_4(test_full_contraction<ColMajor>());
+  CALL_SUBTEST_4(test_full_contraction<RowMajor>());
+
+  CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>());
+  CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>());
+
+  CALL_SUBTEST_6(test_memcpy());
+  CALL_SUBTEST_6(test_multithread_random());
+  CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
+  CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>());
+}
diff --git a/unsupported/test/cxx11_tensor_uint128.cpp b/unsupported/test/cxx11_tensor_uint128.cpp
new file mode 100644
index 000000000..d2a1e8673
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_uint128.cpp
@@ -0,0 +1,160 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+
+#if EIGEN_COMP_MSVC
+#define EIGEN_NO_INT128
+#else
+typedef __uint128_t uint128_t;
+#endif
+
+// Only run the test on compilers that support 128bit integers natively
+#ifndef EIGEN_NO_INT128
+
+using Eigen::internal::TensorUInt128;
+using Eigen::internal::static_val;
+
+void VERIFY_EQUAL(TensorUInt128<uint64_t, uint64_t> actual, uint128_t expected) {
+  bool matchl = actual.lower() == static_cast<uint64_t>(expected);
+  bool matchh = actual.upper() == static_cast<uint64_t>(expected >> 64);
+  if (!matchl || !matchh) {
+    const char* testname = g_test_stack.back().c_str();
+    std::cerr << "Test " << testname << " failed in " << __FILE__
+              << " (" << __LINE__ << ")"
+              << std::endl;
+    abort();
+  }
+}
+
+
+void test_add() {
+  uint64_t incr = internal::random<uint64_t>(1, 9999999999);
+  for (uint64_t i1 = 0; i1 < 100; ++i1) {
+    for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
+      TensorUInt128<uint64_t, uint64_t> i(i1, i2);
+      uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
+      for (uint64_t j1 = 0; j1 < 100; ++j1) {
+        for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
+          TensorUInt128<uint64_t, uint64_t> j(j1, j2);
+          uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
+          TensorUInt128<uint64_t, uint64_t> actual = i + j;
+          uint128_t expected = a + b;
+          VERIFY_EQUAL(actual, expected);
+        }
+      }
+    }
+  }
+}
+
+void test_sub() {
+  uint64_t incr = internal::random<uint64_t>(1, 9999999999);
+  for (uint64_t i1 = 0; i1 < 100; ++i1) {
+    for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
+      TensorUInt128<uint64_t, uint64_t> i(i1, i2);
+      uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
+      for (uint64_t j1 = 0; j1 < 100; ++j1) {
+        for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
+          TensorUInt128<uint64_t, uint64_t> j(j1, j2);
+          uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
+          TensorUInt128<uint64_t, uint64_t> actual = i - j;
+          uint128_t expected = a - b;
+          VERIFY_EQUAL(actual, expected);
+        }
+      }
+    }
+  }
+}
+
+void test_mul() {
+  uint64_t incr = internal::random<uint64_t>(1, 9999999999);
+  for (uint64_t i1 = 0; i1 < 100; ++i1) {
+    for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
+      TensorUInt128<uint64_t, uint64_t> i(i1, i2);
+      uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
+      for (uint64_t j1 = 0; j1 < 100; ++j1) {
+        for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
+          TensorUInt128<uint64_t, uint64_t> j(j1, j2);
+          uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
+          TensorUInt128<uint64_t, uint64_t> actual = i * j;
+          uint128_t expected = a * b;
+          VERIFY_EQUAL(actual, expected);
+        }
+      }
+    }
+  }
+}
+
+void test_div() {
+  uint64_t incr = internal::random<uint64_t>(1, 9999999999);
+  for (uint64_t i1 = 0; i1 < 100; ++i1) {
+    for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
+      TensorUInt128<uint64_t, uint64_t> i(i1, i2);
+      uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2);
+      for (uint64_t j1 = 0; j1 < 100; ++j1) {
+        for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
+          TensorUInt128<uint64_t, uint64_t> j(j1, j2);
+          uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2);
+          TensorUInt128<uint64_t, uint64_t> actual = i / j;
+          uint128_t expected = a / b;
+          VERIFY_EQUAL(actual, expected);
+        }
+      }
+    }
+  }
+}
+
+void test_misc1() {
+  uint64_t incr = internal::random<uint64_t>(1, 9999999999);
+  for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) {
+    TensorUInt128<static_val<0>, uint64_t> i(0, i2);
+    uint128_t a = static_cast<uint128_t>(i2);
+    for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) {
+      TensorUInt128<static_val<0>, uint64_t> j(0, j2);
+      uint128_t b = static_cast<uint128_t>(j2);
+      uint64_t actual = (i * j).upper();
+      uint64_t expected = (a * b) >> 64;
+      VERIFY_IS_EQUAL(actual, expected);
+    }
+  }
+}
+
+void test_misc2() {
+  int64_t incr = internal::random<int64_t>(1, 100);
+  for (int64_t log_div = 0; log_div < 63; ++log_div) {
+    for (int64_t divider = 1; divider <= 1000000 * incr; divider += incr) {
+      uint64_t expected = (static_cast<uint128_t>(1) << (64+log_div)) / static_cast<uint128_t>(divider) - (static_cast<uint128_t>(1) << 64) + 1;
+      uint64_t shift = 1ULL << log_div;
+
+      TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
+      uint64_t actual = static_cast<uint64_t>(result);
+      VERIFY_IS_EQUAL(actual, expected);
+    }
+  }
+}
+#endif
+
+
+void test_cxx11_tensor_uint128()
+{
+#ifdef EIGEN_NO_INT128
+  // Skip the test on compilers that don't support 128bit integers natively
+  return;
+#else
+  CALL_SUBTEST_1(test_add());
+  CALL_SUBTEST_2(test_sub());
+  CALL_SUBTEST_3(test_mul());
+  CALL_SUBTEST_4(test_div());
+  CALL_SUBTEST_5(test_misc1());
+  CALL_SUBTEST_6(test_misc2());
+#endif
+}
diff --git a/unsupported/test/cxx11_tensor_volume_patch.cpp b/unsupported/test/cxx11_tensor_volume_patch.cpp
new file mode 100644
index 000000000..ca6840f3b
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_volume_patch.cpp
@@ -0,0 +1,112 @@
+#include "main.h"
+
+#include <Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+static void test_single_voxel_patch()
+{
+  Tensor<float, 5> tensor(4,2,3,5,7);
+  tensor.setRandom();
+  Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout();
+
+  Tensor<float, 6> single_voxel_patch;
+  single_voxel_patch = tensor.extract_volume_patches(1, 1, 1);
+  VERIFY_IS_EQUAL(single_voxel_patch.dimension(0), 4);
+  VERIFY_IS_EQUAL(single_voxel_patch.dimension(1), 1);
+  VERIFY_IS_EQUAL(single_voxel_patch.dimension(2), 1);
+  VERIFY_IS_EQUAL(single_voxel_patch.dimension(3), 1);
+  VERIFY_IS_EQUAL(single_voxel_patch.dimension(4), 2 * 3 * 5);
+  VERIFY_IS_EQUAL(single_voxel_patch.dimension(5), 7);
+
+  Tensor<float, 6, RowMajor> single_voxel_patch_row_major;
+  single_voxel_patch_row_major = tensor_row_major.extract_volume_patches(1, 1, 1);
+  VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7);
+  VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5);
+  VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1);
+  VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1);
+  VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1);
+  VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4);
+
+  for (int i = 0; i < tensor.size(); ++i) {
+    VERIFY_IS_EQUAL(tensor.data()[i], single_voxel_patch.data()[i]);
+    VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]);
+    VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]);
+  }
+}
+
+
+static void test_entire_volume_patch()
+{
+  const int depth = 4;
+  const int patch_z = 2;
+  const int patch_y = 3;
+  const int patch_x = 5;
+  const int batch = 7;
+
+  Tensor<float, 5> tensor(depth, patch_z, patch_y, patch_x, batch);
+  tensor.setRandom();
+  Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout();
+
+  Tensor<float, 6> entire_volume_patch;
+  entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x);
+  VERIFY_IS_EQUAL(entire_volume_patch.dimension(0), depth);
+  VERIFY_IS_EQUAL(entire_volume_patch.dimension(1), patch_z);
+  VERIFY_IS_EQUAL(entire_volume_patch.dimension(2), patch_y);
+  VERIFY_IS_EQUAL(entire_volume_patch.dimension(3), patch_x);
+  VERIFY_IS_EQUAL(entire_volume_patch.dimension(4), patch_z * patch_y * patch_x);
+  VERIFY_IS_EQUAL(entire_volume_patch.dimension(5), batch);
+
+  Tensor<float, 6, RowMajor> entire_volume_patch_row_major;
+  entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x);
+  VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch);
+  VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x);
+  VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x);
+  VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y);
+  VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z);
+  VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth);
+
+  const int dz = patch_z - 1;
+  const int dy = patch_y - 1;
+  const int dx = patch_x - 1;
+
+  const int forward_pad_z = dz - dz / 2;
+  const int forward_pad_y = dy - dy / 2;
+  const int forward_pad_x = dx - dx / 2;
+
+  for (int pz = 0; pz < patch_z; pz++) {
+    for (int py = 0; py < patch_y; py++) {
+      for (int px = 0; px < patch_x; px++) {
+        const int patchId = pz + patch_z * (py + px * patch_y);
+        for (int z = 0; z < patch_z; z++) {
+          for (int y = 0; y < patch_y; y++) {
+            for (int x = 0; x < patch_x; x++) {
+              for (int b = 0; b < batch; b++) {
+                for (int d = 0; d < depth; d++) {
+                  float expected = 0.0f;
+                  float expected_row_major = 0.0f;
+                  const int eff_z = z - forward_pad_z + pz;
+                  const int eff_y = y - forward_pad_y + py;
+                  const int eff_x = x - forward_pad_x + px;
+                  if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 &&
+                      eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) {
+                    expected = tensor(d, eff_z, eff_y, eff_x, b);
+                    expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d);
+                  }
+                  VERIFY_IS_EQUAL(entire_volume_patch(d, z, y, x, patchId, b), expected);
+                  VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void test_cxx11_tensor_volume_patch()
+{
+  CALL_SUBTEST(test_single_voxel_patch());
+  CALL_SUBTEST(test_entire_volume_patch());
+}
diff --git a/unsupported/test/forward_adolc.cpp b/unsupported/test/forward_adolc.cpp
index d4baafe62..866db8e86 100644
--- a/unsupported/test/forward_adolc.cpp
+++ b/unsupported/test/forward_adolc.cpp
@@ -13,8 +13,6 @@
 #define NUMBER_DIRECTIONS 16
 #include <unsupported/Eigen/AdolcForward>
 
-int adtl::ADOLC_numDir;
-
 template<typename Vector>
 EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
 {
@@ -123,7 +121,7 @@ template<typename Func> void adolc_forward_jacobian(const Func& f)
 
 void test_forward_adolc()
 {
-  adtl::ADOLC_numDir = NUMBER_DIRECTIONS;
+  adtl::setNumDir(NUMBER_DIRECTIONS);
 
   for(int i = 0; i < g_repeat; i++) {
     CALL_SUBTEST(( adolc_forward_jacobian(TestFunc1<double,2,2>()) ));
diff --git a/unsupported/test/jacobisvd.cpp b/unsupported/test/jacobisvd.cpp
deleted file mode 100644
index b4e884eee..000000000
--- a/unsupported/test/jacobisvd.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "svd_common.h"
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_check_full(const MatrixType& m, const JacobiSVD<MatrixType, QRPreconditioner>& svd)
-{
-  svd_check_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner > >(m, svd);
-}
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_compare_to_full(const MatrixType& m,
-                               unsigned int computationOptions,
-                               const JacobiSVD<MatrixType, QRPreconditioner>& referenceSvd)
-{
-  svd_compare_to_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner> >(m, computationOptions, referenceSvd);
-}
-
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_solve(const MatrixType& m, unsigned int computationOptions)
-{
-  svd_solve< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, computationOptions);
-}
-
-
-
-template<typename MatrixType, int QRPreconditioner>
-void jacobisvd_test_all_computation_options(const MatrixType& m)
-{
-  
-  if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols())
-    return;
-
-  JacobiSVD< MatrixType, QRPreconditioner > fullSvd(m, ComputeFullU|ComputeFullV);
-  svd_test_computation_options_1< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd);
-
-  if(QRPreconditioner == FullPivHouseholderQRPreconditioner)
-    return;
-  svd_test_computation_options_2< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd);
-
-}
-
-template<typename MatrixType>
-void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true)
-{
-  MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a;
-
-  jacobisvd_test_all_computation_options<MatrixType, FullPivHouseholderQRPreconditioner>(m);
-  jacobisvd_test_all_computation_options<MatrixType, ColPivHouseholderQRPreconditioner>(m);
-  jacobisvd_test_all_computation_options<MatrixType, HouseholderQRPreconditioner>(m);
-  jacobisvd_test_all_computation_options<MatrixType, NoQRPreconditioner>(m);
-}
-
-
-template<typename MatrixType> 
-void jacobisvd_verify_assert(const MatrixType& m)
-{
-  
-  svd_verify_assert<MatrixType, JacobiSVD< MatrixType > >(m);
-
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  MatrixType a = MatrixType::Zero(rows, cols);
-  a.setZero();
-
-  if (ColsAtCompileTime == Dynamic)
-  {
-    JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> svd_fullqr;
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV))
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV))
-    VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV))
-  }
-}
-
-template<typename MatrixType>
-void jacobisvd_method()
-{
-  enum { Size = MatrixType::RowsAtCompileTime };
-  typedef typename MatrixType::RealScalar RealScalar;
-  typedef Matrix<RealScalar, Size, 1> RealVecType;
-  MatrixType m = MatrixType::Identity();
-  VERIFY_IS_APPROX(m.jacobiSvd().singularValues(), RealVecType::Ones());
-  VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixU());
-  VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixV());
-  VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m);
-}
-
-
-
-template<typename MatrixType>
-void jacobisvd_inf_nan()
-{
-  svd_inf_nan<MatrixType, JacobiSVD< MatrixType > >();
-}
-
-
-// Regression test for bug 286: JacobiSVD loops indefinitely with some
-// matrices containing denormal numbers.
-void jacobisvd_bug286()
-{
-#if defined __INTEL_COMPILER
-// shut up warning #239: floating point underflow
-#pragma warning push
-#pragma warning disable 239
-#endif
-  Matrix2d M;
-  M << -7.90884e-313, -4.94e-324,
-                 0, 5.60844e-313;
-#if defined __INTEL_COMPILER
-#pragma warning pop
-#endif
-  JacobiSVD<Matrix2d> svd;
-  svd.compute(M); // just check we don't loop indefinitely
-}
-
-
-void jacobisvd_preallocate()
-{
-  svd_preallocate< JacobiSVD <MatrixXf> >();
-}
-
-void test_jacobisvd()
-{
-  CALL_SUBTEST_11(( jacobisvd<Matrix<double,Dynamic,Dynamic> >
-		    (Matrix<double,Dynamic,Dynamic>(16, 6)) ));
-
-  CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) ));
-  CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) ));
-  CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) ));
-  CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) ));
-
-  for(int i = 0; i < g_repeat; i++) {
-    Matrix2cd m;
-    m << 0, 1,
-         0, 1;
-    CALL_SUBTEST_1(( jacobisvd(m, false) ));
-    m << 1, 0,
-         1, 0;
-    CALL_SUBTEST_1(( jacobisvd(m, false) ));
-
-    Matrix2d n;
-    n << 0, 0,
-         0, 0;
-    CALL_SUBTEST_2(( jacobisvd(n, false) ));
-    n << 0, 0,
-         0, 1;
-    CALL_SUBTEST_2(( jacobisvd(n, false) ));
-    
-    CALL_SUBTEST_3(( jacobisvd<Matrix3f>() ));
-    CALL_SUBTEST_4(( jacobisvd<Matrix4d>() ));
-    CALL_SUBTEST_5(( jacobisvd<Matrix<float,3,5> >() ));
-    CALL_SUBTEST_6(( jacobisvd<Matrix<double,Dynamic,2> >(Matrix<double,Dynamic,2>(10,2)) ));
-
-    int r = internal::random<int>(1, 30),
-        c = internal::random<int>(1, 30);
-    CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(r,c)) ));
-    CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(r,c)) ));
-    (void) r;
-    (void) c;
-
-    // Test on inf/nan matrix
-    CALL_SUBTEST_7( jacobisvd_inf_nan<MatrixXf>() );
-  }
-
-  CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) ));
-  CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) ));
-
-
-  // test matrixbase method
-  CALL_SUBTEST_1(( jacobisvd_method<Matrix2cd>() ));
-  CALL_SUBTEST_3(( jacobisvd_method<Matrix3f>() ));
-
-
-  // Test problem size constructors
-  CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
-
-  // Check that preallocation avoids subsequent mallocs
-  CALL_SUBTEST_9( jacobisvd_preallocate() );
-
-  // Regression check for bug 286
-  CALL_SUBTEST_2( jacobisvd_bug286() );
-}
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 8ddc6ec28..e770049e5 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -9,12 +9,12 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+#ifdef EIGEN_TEST_PART_1
 
 #include "sparse.h"
 #include <Eigen/SparseExtra>
 #include <Eigen/KroneckerProduct>
 
-
 template<typename MatrixType>
 void check_dimension(const MatrixType& ab, const int rows,  const int cols)
 {
@@ -107,31 +107,34 @@ void test_kronecker_product()
 
   SparseMatrix<double,RowMajor> SM_row_a(SM_a), SM_row_b(SM_b);
 
-  // test kroneckerProduct(DM_block,DM,DM_fixedSize)
+  // test DM_fixedSize = kroneckerProduct(DM_block,DM)
   Matrix<double, 6, 6> DM_fix_ab = kroneckerProduct(DM_a.topLeftCorner<2,3>(),DM_b);
 
   CALL_SUBTEST(check_kronecker_product(DM_fix_ab));
+  CALL_SUBTEST(check_kronecker_product(kroneckerProduct(DM_a.topLeftCorner<2,3>(),DM_b)));
 
   for(int i=0;i<DM_fix_ab.rows();++i)
     for(int j=0;j<DM_fix_ab.cols();++j)
        VERIFY_IS_APPROX(kroneckerProduct(DM_a,DM_b).coeff(i,j), DM_fix_ab(i,j));
 
-  // test kroneckerProduct(DM,DM,DM_block)
+  // test DM_block = kroneckerProduct(DM,DM)
   MatrixXd DM_block_ab(10,15);
   DM_block_ab.block<6,6>(2,5) = kroneckerProduct(DM_a,DM_b);
   CALL_SUBTEST(check_kronecker_product(DM_block_ab.block<6,6>(2,5)));
 
-  // test kroneckerProduct(DM,DM,DM)
+  // test DM = kroneckerProduct(DM,DM)
   MatrixXd DM_ab = kroneckerProduct(DM_a,DM_b);
   CALL_SUBTEST(check_kronecker_product(DM_ab));
+  CALL_SUBTEST(check_kronecker_product(kroneckerProduct(DM_a,DM_b)));
 
-  // test kroneckerProduct(SM,DM,SM)
+  // test SM = kroneckerProduct(SM,DM)
   SparseMatrix<double> SM_ab = kroneckerProduct(SM_a,DM_b);
   CALL_SUBTEST(check_kronecker_product(SM_ab));
   SparseMatrix<double,RowMajor> SM_ab2 = kroneckerProduct(SM_a,DM_b);
   CALL_SUBTEST(check_kronecker_product(SM_ab2));
+  CALL_SUBTEST(check_kronecker_product(kroneckerProduct(SM_a,DM_b)));
 
-  // test kroneckerProduct(DM,SM,SM)
+  // test SM = kroneckerProduct(DM,SM)
   SM_ab.setZero();
   SM_ab.insert(0,0)=37.0;
   SM_ab = kroneckerProduct(DM_a,SM_b);
@@ -140,8 +143,9 @@ void test_kronecker_product()
   SM_ab2.insert(0,0)=37.0;
   SM_ab2 = kroneckerProduct(DM_a,SM_b);
   CALL_SUBTEST(check_kronecker_product(SM_ab2));
+  CALL_SUBTEST(check_kronecker_product(kroneckerProduct(DM_a,SM_b)));
 
-  // test kroneckerProduct(SM,SM,SM)
+  // test SM = kroneckerProduct(SM,SM)
   SM_ab.resize(2,33);
   SM_ab.insert(0,0)=37.0;
   SM_ab = kroneckerProduct(SM_a,SM_b);
@@ -150,8 +154,9 @@ void test_kronecker_product()
   SM_ab2.insert(0,0)=37.0;
   SM_ab2 = kroneckerProduct(SM_a,SM_b);
   CALL_SUBTEST(check_kronecker_product(SM_ab2));
+  CALL_SUBTEST(check_kronecker_product(kroneckerProduct(SM_a,SM_b)));
 
-  // test kroneckerProduct(SM,SM,SM) with sparse pattern
+  // test SM = kroneckerProduct(SM,SM) with sparse pattern
   SM_a.resize(4,5);
   SM_b.resize(3,2);
   SM_a.resizeNonZeros(0);
@@ -169,7 +174,7 @@ void test_kronecker_product()
   SM_ab = kroneckerProduct(SM_a,SM_b);
   CALL_SUBTEST(check_sparse_kronecker_product(SM_ab));
 
-  // test dimension of result of kroneckerProduct(DM,DM,DM)
+  // test dimension of result of DM = kroneckerProduct(DM,DM)
   MatrixXd DM_a2(2,1);
   MatrixXd DM_b2(5,4);
   MatrixXd DM_ab2 = kroneckerProduct(DM_a2,DM_b2);
@@ -178,4 +183,70 @@ void test_kronecker_product()
   DM_b2.resize(4,8);
   DM_ab2 = kroneckerProduct(DM_a2,DM_b2);
   CALL_SUBTEST(check_dimension(DM_ab2,10*4,9*8));
+  
+  for(int i = 0; i < g_repeat; i++)
+  {
+    double density = Eigen::internal::random<double>(0.01,0.5);
+    int ra = Eigen::internal::random<int>(1,50);
+    int ca = Eigen::internal::random<int>(1,50);
+    int rb = Eigen::internal::random<int>(1,50);
+    int cb = Eigen::internal::random<int>(1,50);
+    SparseMatrix<float,ColMajor> sA(ra,ca), sB(rb,cb), sC;
+    SparseMatrix<float,RowMajor> sC2;
+    MatrixXf dA(ra,ca), dB(rb,cb), dC;
+    initSparse(density, dA, sA);
+    initSparse(density, dB, sB);
+    
+    sC = kroneckerProduct(sA,sB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA.transpose(),sB);
+    dC = kroneckerProduct(dA.transpose(),dB);
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA.transpose(),sB.transpose());
+    dC = kroneckerProduct(dA.transpose(),dB.transpose());
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA,sB.transpose());
+    dC = kroneckerProduct(dA,dB.transpose());
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC2 = kroneckerProduct(sA,sB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+    
+    sC2 = kroneckerProduct(dA,sB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+    
+    sC2 = kroneckerProduct(sA,dB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+    
+    sC2 = kroneckerProduct(2*sA,sB);
+    dC = kroneckerProduct(2*dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+  }
+}
+
+#endif
+
+#ifdef EIGEN_TEST_PART_2
+
+// simply check that for a dense kronecker product, sparse module is not needed
+
+#include "main.h"
+#include <Eigen/KroneckerProduct>
+
+void test_kronecker_product()
+{
+  MatrixXd a(2,2), b(3,3), c;
+  a.setRandom();
+  b.setRandom();
+  c = kroneckerProduct(a,b);
+  VERIFY_IS_APPROX(c.block(3,3,3,3), a(1,1)*b);
 }
+
+#endif
diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp
index 04464727d..64f168c16 100644
--- a/unsupported/test/levenberg_marquardt.cpp
+++ b/unsupported/test/levenberg_marquardt.cpp
@@ -9,6 +9,9 @@
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 
+// FIXME: These tests all check for hard-coded values. Ideally, parameters and start estimates should be randomized.
+
+
 #include <stdio.h>
 
 #include "main.h"
@@ -20,6 +23,9 @@
 
 using std::sqrt;
 
+// tolerance for chekcing number of iterations
+#define LM_EVAL_COUNT_TOL 4/3
+
 struct lmder_functor : DenseFunctor<double>
 {
     lmder_functor(void): DenseFunctor<double>(3,15) {}
@@ -275,7 +281,7 @@ const double chwirut2_functor::m_y[54] = { 92.9000E0 ,57.1000E0 ,31.0500E0 ,11.5
 void testNistChwirut2(void)
 {
   const int n=3;
-  int info;
+  LevenbergMarquardtSpace::Status info;
 
   VectorXd x(n);
 
@@ -610,7 +616,7 @@ const double lanczos1_functor::y[24] = { 2.513400000000E+00 ,2.044333373291E+00
 void testNistLanczos1(void)
 {
   const int n=6;
-  int info;
+  LevenbergMarquardtSpace::Status info;
 
   VectorXd x(n);
 
@@ -624,11 +630,11 @@ void testNistLanczos1(void)
   info = lm.minimize(x);
 
   // check return value
-  VERIFY_IS_EQUAL(info, 2);
+  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
   VERIFY_IS_EQUAL(lm.nfev(), 79);
   VERIFY_IS_EQUAL(lm.njev(), 72);
   // check norm^2
-//   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.430899764097e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
+  VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
   // check x
   VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
   VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -645,11 +651,11 @@ void testNistLanczos1(void)
   info = lm.minimize(x);
 
   // check return value
-  VERIFY_IS_EQUAL(info, 2);
+  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
   VERIFY_IS_EQUAL(lm.nfev(), 9);
   VERIFY_IS_EQUAL(lm.njev(), 8);
   // check norm^2
-//   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.428595533845e-25);  // should be 1.4307867721E-25, but nist results are on 128-bit floats
+  VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
   // check x
   VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
   VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
@@ -696,7 +702,7 @@ const double rat42_functor::y[9] = { 8.930E0 ,10.800E0 ,18.590E0 ,22.330E0 ,39.3
 void testNistRat42(void)
 {
   const int n=3;
-  int info;
+  LevenbergMarquardtSpace::Status info;
 
   VectorXd x(n);
 
@@ -710,7 +716,7 @@ void testNistRat42(void)
   info = lm.minimize(x);
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
   VERIFY_IS_EQUAL(lm.nfev(), 10);
   VERIFY_IS_EQUAL(lm.njev(), 8);
   // check norm^2
@@ -728,7 +734,7 @@ void testNistRat42(void)
   info = lm.minimize(x);
 
   // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
   VERIFY_IS_EQUAL(lm.nfev(), 6);
   VERIFY_IS_EQUAL(lm.njev(), 5);
   // check norm^2
@@ -774,7 +780,7 @@ const double MGH10_functor::y[16] = { 3.478000E+04, 2.861000E+04, 2.365000E+04,
 void testNistMGH10(void)
 {
   const int n=3;
-  int info;
+  LevenbergMarquardtSpace::Status info;
 
   VectorXd x(n);
 
@@ -786,17 +792,26 @@ void testNistMGH10(void)
   MGH10_functor functor;
   LevenbergMarquardt<MGH10_functor> lm(functor);
   info = lm.minimize(x);
+  ++g_test_level;
+  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  --g_test_level;
+  // was: VERIFY_IS_EQUAL(info, 1);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  VERIFY_IS_EQUAL(lm.nfev(), 284 ); 
-  VERIFY_IS_EQUAL(lm.njev(), 249 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
   // check x
   VERIFY_IS_APPROX(x[0], 5.6096364710E-03);
   VERIFY_IS_APPROX(x[1], 6.1813463463E+03);
   VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
+  
+  // check return value
+
+  ++g_test_level;
+  VERIFY_IS_EQUAL(lm.nfev(), 284 );
+  VERIFY_IS_EQUAL(lm.njev(), 249 );
+  --g_test_level;
+  VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
+  VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);
 
   /*
    * Second try
@@ -804,17 +819,25 @@ void testNistMGH10(void)
   x<< 0.02, 4000., 250.;
   // do the computation
   info = lm.minimize(x);
+  ++g_test_level;
+  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // was: VERIFY_IS_EQUAL(info, 1);
+  --g_test_level;
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 126);
-  VERIFY_IS_EQUAL(lm.njev(), 116);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
   // check x
   VERIFY_IS_APPROX(x[0], 5.6096364710E-03);
   VERIFY_IS_APPROX(x[1], 6.1813463463E+03);
   VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
+  
+  // check return value
+  ++g_test_level;
+  VERIFY_IS_EQUAL(lm.nfev(), 126);
+  VERIFY_IS_EQUAL(lm.njev(), 116);
+  --g_test_level;
+  VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
+  VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
 }
 
 
@@ -866,15 +889,16 @@ void testNistBoxBOD(void)
   lm.setFactor(10);
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 31);
-  VERIFY_IS_EQUAL(lm.njev(), 25);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
   // check x
   VERIFY_IS_APPROX(x[0], 2.1380940889E+02);
   VERIFY_IS_APPROX(x[1], 5.4723748542E-01);
+  
+  // check return value
+  VERIFY_IS_EQUAL(info, 1);
+  VERIFY(lm.nfev() < 31); // 31
+  VERIFY(lm.njev() < 25); // 25
 
   /*
    * Second try
@@ -888,8 +912,12 @@ void testNistBoxBOD(void)
 
   // check return value
   VERIFY_IS_EQUAL(info, 1); 
-  VERIFY_IS_EQUAL(lm.nfev(), 15 ); 
-  VERIFY_IS_EQUAL(lm.njev(), 14 ); 
+  ++g_test_level;
+  VERIFY_IS_EQUAL(lm.nfev(), 16 );
+  VERIFY_IS_EQUAL(lm.njev(), 15 );
+  --g_test_level;
+  VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
+  VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
   // check x
@@ -948,10 +976,6 @@ void testNistMGH17(void)
   lm.setMaxfev(1000);
   info = lm.minimize(x);
 
-  // check return value
-//   VERIFY_IS_EQUAL(info, 2);  //FIXME Use (lm.info() == Success)
-//   VERIFY_IS_EQUAL(lm.nfev(), 602 ); 
-  VERIFY_IS_EQUAL(lm.njev(), 545 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
   // check x
@@ -960,6 +984,11 @@ void testNistMGH17(void)
   VERIFY_IS_APPROX(x[2], -1.4646871366E+00);
   VERIFY_IS_APPROX(x[3], 1.2867534640E-02);
   VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
+  
+    // check return value
+//   VERIFY_IS_EQUAL(info, 2);  //FIXME Use (lm.info() == Success)
+  VERIFY(lm.nfev() < 700 ); // 602
+  VERIFY(lm.njev() < 600 ); // 545
 
   /*
    * Second try
@@ -1035,10 +1064,6 @@ void testNistMGH09(void)
   lm.setMaxfev(1000);
   info = lm.minimize(x);
 
-  // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  VERIFY_IS_EQUAL(lm.nfev(), 490 ); 
-  VERIFY_IS_EQUAL(lm.njev(), 376 ); 
   // check norm^2
   VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
   // check x
@@ -1046,6 +1071,10 @@ void testNistMGH09(void)
   VERIFY_IS_APPROX(x[1], 0.19126423573); // should be 1.9128232873E-01
   VERIFY_IS_APPROX(x[2], 0.12305309914); // should be 1.2305650693E-01
   VERIFY_IS_APPROX(x[3], 0.13605395375); // should be 1.3606233068E-01
+  // check return value
+  VERIFY_IS_EQUAL(info, 1); 
+  VERIFY(lm.nfev() < 510 ); // 490
+  VERIFY(lm.njev() < 400 ); // 376
 
   /*
    * Second try
diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp
index 3c76cfb65..7c9b68a3c 100644
--- a/unsupported/test/matrix_function.cpp
+++ b/unsupported/test/matrix_function.cpp
@@ -102,7 +102,7 @@ void testMatrixExponential(const MatrixType& A)
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef std::complex<RealScalar> ComplexScalar;
 
-  VERIFY_IS_APPROX(A.exp(), A.matrixFunction(StdStemFunctions<ComplexScalar>::exp));
+  VERIFY_IS_APPROX(A.exp(), A.matrixFunction(internal::stem_function_exp<ComplexScalar>));
 }
 
 template<typename MatrixType>
@@ -113,8 +113,8 @@ void testMatrixLogarithm(const MatrixType& A)
 
   MatrixType scaledA;
   RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff();
-  if (maxImagPartOfSpectrum >= 0.9 * M_PI)
-    scaledA = A * 0.9 * M_PI / maxImagPartOfSpectrum;
+  if (maxImagPartOfSpectrum >= RealScalar(0.9L * EIGEN_PI))
+    scaledA = A * RealScalar(0.9L * EIGEN_PI) / maxImagPartOfSpectrum;
   else
     scaledA = A;
 
diff --git a/unsupported/test/matrix_functions.h b/unsupported/test/matrix_functions.h
index 5817caef6..4e2636404 100644
--- a/unsupported/test/matrix_functions.h
+++ b/unsupported/test/matrix_functions.h
@@ -10,27 +10,47 @@
 #include "main.h"
 #include <unsupported/Eigen/MatrixFunctions>
 
+// For complex matrices, any matrix is fine.
+template<typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex>
+struct processTriangularMatrix
+{
+  static void run(MatrixType&, MatrixType&, const MatrixType&)
+  { }
+};
+
+// For real matrices, make sure none of the eigenvalues are negative.
+template<typename MatrixType>
+struct processTriangularMatrix<MatrixType,0>
+{
+  static void run(MatrixType& m, MatrixType& T, const MatrixType& U)
+  {
+    const Index size = m.cols();
+
+    for (Index i=0; i < size; ++i) {
+      if (i == size - 1 || T.coeff(i+1,i) == 0)
+        T.coeffRef(i,i) = std::abs(T.coeff(i,i));
+      else
+        ++i;
+    }
+    m = U * T * U.transpose();
+  }
+};
+
 template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex>
 struct generateTestMatrix;
 
-// for real matrices, make sure none of the eigenvalues are negative
 template <typename MatrixType>
 struct generateTestMatrix<MatrixType,0>
 {
   static void run(MatrixType& result, typename MatrixType::Index size)
   {
-    MatrixType mat = MatrixType::Random(size, size);
-    EigenSolver<MatrixType> es(mat);
-    typename EigenSolver<MatrixType>::EigenvalueType eivals = es.eigenvalues();
-    for (typename MatrixType::Index i = 0; i < size; ++i) {
-      if (eivals(i).imag() == 0 && eivals(i).real() < 0)
-	eivals(i) = -eivals(i);
-    }
-    result = (es.eigenvectors() * eivals.asDiagonal() * es.eigenvectors().inverse()).real();
+    result = MatrixType::Random(size, size);
+    RealSchur<MatrixType> schur(result);
+    MatrixType T = schur.matrixT();
+    processTriangularMatrix<MatrixType>::run(result, T, schur.matrixU());
   }
 };
 
-// for complex matrices, any matrix is fine
 template <typename MatrixType>
 struct generateTestMatrix<MatrixType,1>
 {
@@ -41,7 +61,7 @@ struct generateTestMatrix<MatrixType,1>
 };
 
 template <typename Derived, typename OtherDerived>
-double relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B)
+typename Derived::RealScalar relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B)
 {
   return std::sqrt((A - B).cwiseAbs2().sum() / (std::min)(A.cwiseAbs2().sum(), B.cwiseAbs2().sum()));
 }
diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp
index b9d513b45..7ccfacfdf 100644
--- a/unsupported/test/matrix_power.cpp
+++ b/unsupported/test/matrix_power.cpp
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2012 Chen-Pang He <jdh8@ms63.hinet.net>
+// Copyright (C) 2012, 2013 Chen-Pang He <jdh8@ms63.hinet.net>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -9,35 +9,8 @@
 
 #include "matrix_functions.h"
 
-template <typename MatrixType, int IsComplex = NumTraits<typename MatrixType::Scalar>::IsComplex>
-struct generateTriangularMatrix;
-
-// for real matrices, make sure none of the eigenvalues are negative
-template <typename MatrixType>
-struct generateTriangularMatrix<MatrixType,0>
-{
-  static void run(MatrixType& result, typename MatrixType::Index size)
-  {
-    result.resize(size, size);
-    result.template triangularView<Upper>() = MatrixType::Random(size, size);
-    for (typename MatrixType::Index i = 0; i < size; ++i)
-      result.coeffRef(i,i) = std::abs(result.coeff(i,i));
-  }
-};
-
-// for complex matrices, any matrix is fine
-template <typename MatrixType>
-struct generateTriangularMatrix<MatrixType,1>
-{
-  static void run(MatrixType& result, typename MatrixType::Index size)
-  {
-    result.resize(size, size);
-    result.template triangularView<Upper>() = MatrixType::Random(size, size);
-  }
-};
-
 template<typename T>
-void test2dRotation(double tol)
+void test2dRotation(const T& tol)
 {
   Matrix<T,2,2> A, B, C;
   T angle, c, s;
@@ -46,19 +19,19 @@ void test2dRotation(double tol)
   MatrixPower<Matrix<T,2,2> > Apow(A);
 
   for (int i=0; i<=20; ++i) {
-    angle = pow(10, (i-10) / 5.);
+    angle = std::pow(T(10), (i-10) / T(5.));
     c = std::cos(angle);
     s = std::sin(angle);
     B << c, s, -s, c;
 
-    C = Apow(std::ldexp(angle,1) / M_PI);
+    C = Apow(std::ldexp(angle,1) / T(EIGEN_PI));
     std::cout << "test2dRotation: i = " << i << "   error powerm = " << relerr(C,B) << '\n';
-    VERIFY(C.isApprox(B, static_cast<T>(tol)));
+    VERIFY(C.isApprox(B, tol));
   }
 }
 
 template<typename T>
-void test2dHyperbolicRotation(double tol)
+void test2dHyperbolicRotation(const T& tol)
 {
   Matrix<std::complex<T>,2,2> A, B, C;
   T angle, ch = std::cosh((T)1);
@@ -75,12 +48,26 @@ void test2dHyperbolicRotation(double tol)
 
     C = Apow(angle);
     std::cout << "test2dHyperbolicRotation: i = " << i << "   error powerm = " << relerr(C,B) << '\n';
-    VERIFY(C.isApprox(B, static_cast<T>(tol)));
+    VERIFY(C.isApprox(B, tol));
+  }
+}
+
+template<typename T>
+void test3dRotation(const T& tol)
+{
+  Matrix<T,3,1> v;
+  T angle;
+
+  for (int i=0; i<=20; ++i) {
+    v = Matrix<T,3,1>::Random();
+    v.normalize();
+    angle = std::pow(T(10), (i-10) / T(5.));
+    VERIFY(AngleAxis<T>(angle, v).matrix().isApprox(AngleAxis<T>(1,v).matrix().pow(angle), tol));
   }
 }
 
 template<typename MatrixType>
-void testExponentLaws(const MatrixType& m, double tol)
+void testGeneral(const MatrixType& m, const typename MatrixType::RealScalar& tol)
 {
   typedef typename MatrixType::RealScalar RealScalar;
   MatrixType m1, m2, m3, m4, m5;
@@ -97,37 +84,121 @@ void testExponentLaws(const MatrixType& m, double tol)
 
     m4 = mpow(x+y);
     m5.noalias() = m2 * m3;
-    VERIFY(m4.isApprox(m5, static_cast<RealScalar>(tol)));
+    VERIFY(m4.isApprox(m5, tol));
 
     m4 = mpow(x*y);
     m5 = m2.pow(y);
-    VERIFY(m4.isApprox(m5, static_cast<RealScalar>(tol)));
+    VERIFY(m4.isApprox(m5, tol));
 
     m4 = (std::abs(x) * m1).pow(y);
     m5 = std::pow(std::abs(x), y) * m3;
-    VERIFY(m4.isApprox(m5, static_cast<RealScalar>(tol)));
+    VERIFY(m4.isApprox(m5, tol));
+  }
+}
+
+template<typename MatrixType>
+void testSingular(const MatrixType& m_const, const typename MatrixType::RealScalar& tol)
+{
+  // we need to pass by reference in order to prevent errors with
+  // MSVC for aligned data types ...
+  MatrixType& m = const_cast<MatrixType&>(m_const);
+
+  const int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex;
+  typedef typename internal::conditional<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&>::type TriangularType;
+  typename internal::conditional< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> >::type schur;
+  MatrixType T;
+
+  for (int i=0; i < g_repeat; ++i) {
+    m.setRandom();
+    m.col(0).fill(0);
+
+    schur.compute(m);
+    T = schur.matrixT();
+    const MatrixType& U = schur.matrixU();
+    processTriangularMatrix<MatrixType>::run(m, T, U);
+    MatrixPower<MatrixType> mpow(m);
+
+    T = T.sqrt();
+    VERIFY(mpow(0.5L).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
+
+    T = T.sqrt();
+    VERIFY(mpow(0.25L).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
+
+    T = T.sqrt();
+    VERIFY(mpow(0.125L).isApprox(U * (TriangularType(T) * U.adjoint()), tol));
+  }
+}
+
+template<typename MatrixType>
+void testLogThenExp(const MatrixType& m_const, const typename MatrixType::RealScalar& tol)
+{
+  // we need to pass by reference in order to prevent errors with
+  // MSVC for aligned data types ...
+  MatrixType& m = const_cast<MatrixType&>(m_const);
+
+  typedef typename MatrixType::Scalar Scalar;
+  Scalar x;
+
+  for (int i=0; i < g_repeat; ++i) {
+    generateTestMatrix<MatrixType>::run(m, m.rows());
+    x = internal::random<Scalar>();
+    VERIFY(m.pow(x).isApprox((x * m.log()).exp(), tol));
   }
 }
 
 typedef Matrix<double,3,3,RowMajor>         Matrix3dRowMajor;
+typedef Matrix<long double,3,3>             Matrix3e;
 typedef Matrix<long double,Dynamic,Dynamic> MatrixXe;
  
 void test_matrix_power()
 {
   CALL_SUBTEST_2(test2dRotation<double>(1e-13));
   CALL_SUBTEST_1(test2dRotation<float>(2e-5));  // was 1e-5, relaxed for clang 2.8 / linux / x86-64
-  CALL_SUBTEST_9(test2dRotation<long double>(1e-13)); 
+  CALL_SUBTEST_9(test2dRotation<long double>(1e-13L));
   CALL_SUBTEST_2(test2dHyperbolicRotation<double>(1e-14));
   CALL_SUBTEST_1(test2dHyperbolicRotation<float>(1e-5));
-  CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14));
-
-  CALL_SUBTEST_2(testExponentLaws(Matrix2d(),         1e-13));
-  CALL_SUBTEST_7(testExponentLaws(Matrix3dRowMajor(), 1e-13));
-  CALL_SUBTEST_3(testExponentLaws(Matrix4cd(),        1e-13));
-  CALL_SUBTEST_4(testExponentLaws(MatrixXd(8,8),      2e-12));
-  CALL_SUBTEST_1(testExponentLaws(Matrix2f(),         1e-4));
-  CALL_SUBTEST_5(testExponentLaws(Matrix3cf(),        1e-4));
-  CALL_SUBTEST_8(testExponentLaws(Matrix4f(),         1e-4));
-  CALL_SUBTEST_6(testExponentLaws(MatrixXf(2,2),      1e-3)); // see bug 614
-  CALL_SUBTEST_9(testExponentLaws(MatrixXe(7,7),      1e-13));
+  CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14L));
+
+  CALL_SUBTEST_10(test3dRotation<double>(1e-13));
+  CALL_SUBTEST_11(test3dRotation<float>(1e-5));
+  CALL_SUBTEST_12(test3dRotation<long double>(1e-13L));
+
+  CALL_SUBTEST_2(testGeneral(Matrix2d(),         1e-13));
+  CALL_SUBTEST_7(testGeneral(Matrix3dRowMajor(), 1e-13));
+  CALL_SUBTEST_3(testGeneral(Matrix4cd(),        1e-13));
+  CALL_SUBTEST_4(testGeneral(MatrixXd(8,8),      2e-12));
+  CALL_SUBTEST_1(testGeneral(Matrix2f(),         1e-4));
+  CALL_SUBTEST_5(testGeneral(Matrix3cf(),        1e-4));
+  CALL_SUBTEST_8(testGeneral(Matrix4f(),         1e-4));
+  CALL_SUBTEST_6(testGeneral(MatrixXf(2,2),      1e-3)); // see bug 614
+  CALL_SUBTEST_9(testGeneral(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_10(testGeneral(Matrix3d(),        1e-13));
+  CALL_SUBTEST_11(testGeneral(Matrix3f(),        1e-4));
+  CALL_SUBTEST_12(testGeneral(Matrix3e(),        1e-13L));
+
+  CALL_SUBTEST_2(testSingular(Matrix2d(),         1e-13));
+  CALL_SUBTEST_7(testSingular(Matrix3dRowMajor(), 1e-13));
+  CALL_SUBTEST_3(testSingular(Matrix4cd(),        1e-13));
+  CALL_SUBTEST_4(testSingular(MatrixXd(8,8),      2e-12));
+  CALL_SUBTEST_1(testSingular(Matrix2f(),         1e-4));
+  CALL_SUBTEST_5(testSingular(Matrix3cf(),        1e-4));
+  CALL_SUBTEST_8(testSingular(Matrix4f(),         1e-4));
+  CALL_SUBTEST_6(testSingular(MatrixXf(2,2),      1e-3));
+  CALL_SUBTEST_9(testSingular(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_10(testSingular(Matrix3d(),        1e-13));
+  CALL_SUBTEST_11(testSingular(Matrix3f(),        1e-4));
+  CALL_SUBTEST_12(testSingular(Matrix3e(),        1e-13L));
+
+  CALL_SUBTEST_2(testLogThenExp(Matrix2d(),         1e-13));
+  CALL_SUBTEST_7(testLogThenExp(Matrix3dRowMajor(), 1e-13));
+  CALL_SUBTEST_3(testLogThenExp(Matrix4cd(),        1e-13));
+  CALL_SUBTEST_4(testLogThenExp(MatrixXd(8,8),      2e-12));
+  CALL_SUBTEST_1(testLogThenExp(Matrix2f(),         1e-4));
+  CALL_SUBTEST_5(testLogThenExp(Matrix3cf(),        1e-4));
+  CALL_SUBTEST_8(testLogThenExp(Matrix4f(),         1e-4));
+  CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2),      1e-3));
+  CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_10(testLogThenExp(Matrix3d(),        1e-13));
+  CALL_SUBTEST_11(testLogThenExp(Matrix3f(),        1e-4));
+  CALL_SUBTEST_12(testLogThenExp(Matrix3e(),        1e-13L));
 }
diff --git a/unsupported/test/minres.cpp b/unsupported/test/minres.cpp
index 509ebe09a..8b300b78a 100644
--- a/unsupported/test/minres.cpp
+++ b/unsupported/test/minres.cpp
@@ -1,8 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
 // Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
+// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -14,21 +14,14 @@
 
 template<typename T> void test_minres_T()
 {
-  MINRES<SparseMatrix<T>, Lower|Upper, DiagonalPreconditioner<T> > minres_colmajor_diag;
+  // Identity preconditioner
   MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner    > minres_colmajor_lower_I;
   MINRES<SparseMatrix<T>, Upper, IdentityPreconditioner    > minres_colmajor_upper_I;
-//  MINRES<SparseMatrix<T>, Lower, IncompleteLUT<T> >           minres_colmajor_ilut;
-  //minres<SparseMatrix<T>, SSORPreconditioner<T> >     minres_colmajor_ssor;
-
-
-//   CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_diag)  );
- // CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ilut)     );
-  //CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ssor)     );
 
   // Diagonal preconditioner
   MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_lower_diag;
   MINRES<SparseMatrix<T>, Upper, DiagonalPreconditioner<T> > minres_colmajor_upper_diag;
-  MINRES<SparseMatrix<T>, Upper|Lower, DiagonalPreconditioner<T> > minres_colmajor_uplo_diag;
+  MINRES<SparseMatrix<T>, Lower|Upper, DiagonalPreconditioner<T> > minres_colmajor_uplo_diag;
   
   // call tests for SPD matrix
   CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_I) );
@@ -36,14 +29,16 @@ template<typename T> void test_minres_T()
     
   CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_diag)  );
   CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_diag)  );
-//   CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_uplo_diag)  );
+  CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_uplo_diag)  );
     
   // TO DO: symmetric semi-definite matrix
   // TO DO: symmetric indefinite matrix
+
 }
 
 void test_minres()
 {
   CALL_SUBTEST_1(test_minres_T<double>());
-//  CALL_SUBTEST_2(test_minres_T<std::complex<double> >());
+//  CALL_SUBTEST_2(test_minres_T<std::compex<double> >());
+
 }
diff --git a/unsupported/test/mpreal/mpreal.h b/unsupported/test/mpreal/mpreal.h
index 7d6f4e79f..8404f1ff8 100644
--- a/unsupported/test/mpreal/mpreal.h
+++ b/unsupported/test/mpreal/mpreal.h
@@ -1,33 +1,34 @@
 /*
-    MPFR C++: Multi-precision floating point number class for C++. 
+    MPFR C++: Multi-precision floating point number class for C++.
     Based on MPFR library:    http://mpfr.org
 
     Project homepage:    http://www.holoborodko.com/pavel/mpfr
     Contact e-mail:      pavel@holoborodko.com
 
-    Copyright (c) 2008-2014 Pavel Holoborodko
+    Copyright (c) 2008-2015 Pavel Holoborodko
 
     Contributors:
-    Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman, 
-    Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen, 
-    Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng, 
+    Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman,
+    Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen,
+    Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng,
     Alexei Zubanov, Jauhien Piatlicki, Victor Berger, John Westwood,
-    Petr Aleksandrov, Orion Poplawski, Charles Karney.
+    Petr Aleksandrov, Orion Poplawski, Charles Karney, Arash Partow,
+    Rodney James, Jorge Leitao.
 
     Licensing:
     (A) MPFR C++ is under GNU General Public License ("GPL").
-    
-    (B) Non-free licenses may also be purchased from the author, for users who 
+
+    (B) Non-free licenses may also be purchased from the author, for users who
         do not want their programs protected by the GPL.
 
-        The non-free licenses are for users that wish to use MPFR C++ in 
-        their products but are unwilling to release their software 
-        under the GPL (which would require them to release source code 
+        The non-free licenses are for users that wish to use MPFR C++ in
+        their products but are unwilling to release their software
+        under the GPL (which would require them to release source code
         and allow free redistribution).
 
         Such users can purchase an unlimited-use license from the author.
         Contact us for more details.
-    
+
     GNU General Public License ("GPL") copyright permissions statement:
     **************************************************************************
     This program is free software: you can redistribute it and/or modify
@@ -55,10 +56,10 @@
 #include <cmath>
 #include <cstring>
 #include <limits>
+#include <complex>
+#include <algorithm>
 
 // Options
-// FIXME HAVE_INT64_SUPPORT leads to clashes with long int and int64_t on some systems.
-//#define MPREAL_HAVE_INT64_SUPPORT               // Enable int64_t support if possible. Available only for MSVC 2010 & GCC.
 #define MPREAL_HAVE_MSVC_DEBUGVIEW              // Enable Debugger Visualizer for "Debug" builds in MSVC.
 #define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS  // Enable extended std::numeric_limits<mpfr::mpreal> specialization.
                                                 // Meaning that "digits", "round_style" and similar members are defined as functions, not constants.
@@ -66,19 +67,17 @@
 
 // Library version
 #define MPREAL_VERSION_MAJOR 3
-#define MPREAL_VERSION_MINOR 5
-#define MPREAL_VERSION_PATCHLEVEL 9
-#define MPREAL_VERSION_STRING "3.5.9"
+#define MPREAL_VERSION_MINOR 6
+#define MPREAL_VERSION_PATCHLEVEL 2
+#define MPREAL_VERSION_STRING "3.6.2"
 
 // Detect compiler using signatures from http://predef.sourceforge.net/
-#if defined(__GNUC__) && defined(__INTEL_COMPILER)
-    #define IsInf(x) isinf(x)                   // Intel ICC compiler on Linux 
-
-#elif defined(_MSC_VER)                         // Microsoft Visual C++ 
-    #define IsInf(x) (!_finite(x))                           
-
+#if defined(__GNUC__)
+    #define IsInf(x) (isinf)(x)                 // GNU C++/Intel ICC compiler on Linux
+#elif defined(_MSC_VER)                         // Microsoft Visual C++
+    #define IsInf(x) (!_finite(x))
 #else
-    #define IsInf(x) std::isinf(x)              // GNU C/C++ (and/or other compilers), just hope for C99 conformance
+    #define IsInf(x) (std::isinf)(x)              // GNU C/C++ (and/or other compilers), just hope for C99 conformance
 #endif
 
 // A Clang feature extension to determine compiler features.
@@ -93,54 +92,27 @@
 
     #define MPREAL_HAVE_MOVE_SUPPORT
 
-    // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization 
+    // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization
     #define mpfr_is_initialized(x)      (0 != (x)->_mpfr_d)
     #define mpfr_set_uninitialized(x)   ((x)->_mpfr_d = 0 )
 #endif
 
-// Detect support for explicit converters. 
+// Detect support for explicit converters.
 #if (__has_feature(cxx_explicit_conversions) || \
-       defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \
-      (defined(_MSC_VER) && _MSC_VER >= 1800))
+       (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR__ >= 5) || __cplusplus >= 201103L || \
+       (defined(_MSC_VER) && _MSC_VER >= 1800))
 
     #define MPREAL_HAVE_EXPLICIT_CONVERTERS
 #endif
 
-// Detect available 64-bit capabilities
-#if defined(MPREAL_HAVE_INT64_SUPPORT)
-    
-    #define MPFR_USE_INTMAX_T                   // Should be defined before mpfr.h
-
-    #if defined(_MSC_VER)                       // MSVC + Windows
-        #if (_MSC_VER >= 1600)                    
-            #include <stdint.h>                 // <stdint.h> is available only in msvc2010!
-
-        #else                                   // MPFR relies on intmax_t which is available only in msvc2010
-            #undef MPREAL_HAVE_INT64_SUPPORT    // Besides, MPFR & MPIR have to be compiled with msvc2010
-            #undef MPFR_USE_INTMAX_T            // Since we cannot detect this, disable x64 by default
-                                                // Someone should change this manually if needed.
-        #endif
-
-    #elif defined (__GNUC__) && defined(__linux__)
-        #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) || defined (__PPC64__)
-            #undef MPREAL_HAVE_INT64_SUPPORT    // Remove all shaman dances for x64 builds since
-            #undef MPFR_USE_INTMAX_T            // GCC already supports x64 as of "long int" is 64-bit integer, nothing left to do
-        #else
-            #include <stdint.h>                 // use int64_t, uint64_t otherwise
-        #endif
-
-    #else
-        #include <stdint.h>                     // rely on int64_t, uint64_t in all other cases, Mac OSX, etc.
-    #endif
-
-#endif 
+#define MPFR_USE_INTMAX_T   // Enable 64-bit integer types - should be defined before mpfr.h
 
 #if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG)
     #define MPREAL_MSVC_DEBUGVIEW_CODE     DebugView = toString();
     #define MPREAL_MSVC_DEBUGVIEW_DATA     std::string DebugView;
 #else
-    #define MPREAL_MSVC_DEBUGVIEW_CODE 
-    #define MPREAL_MSVC_DEBUGVIEW_DATA 
+    #define MPREAL_MSVC_DEBUGVIEW_CODE
+    #define MPREAL_MSVC_DEBUGVIEW_DATA
 #endif
 
 #include <mpfr.h>
@@ -150,9 +122,15 @@
 #endif
 
 // Less important options
-#define MPREAL_DOUBLE_BITS_OVERFLOW -1          // Triggers overflow exception during conversion to double if mpreal 
+#define MPREAL_DOUBLE_BITS_OVERFLOW -1          // Triggers overflow exception during conversion to double if mpreal
                                                 // cannot fit in MPREAL_DOUBLE_BITS_OVERFLOW bits
                                                 // = -1 disables overflow checks (default)
+
+// Fast replacement for mpfr_set_zero(x, +1):
+// (a) uses low-level data members, might not be compatible with new versions of MPFR
+// (b) sign is not set, add (x)->_mpfr_sign = 1;
+#define mpfr_set_zero_fast(x)  ((x)->_mpfr_exp = __MPFR_EXP_ZERO)
+
 #if defined(__GNUC__)
   #define MPREAL_PERMISSIVE_EXPR __extension__
 #else
@@ -164,9 +142,9 @@ namespace mpfr {
 class mpreal {
 private:
     mpfr_t mp;
-    
+
 public:
-    
+
     // Get default rounding mode & precision
     inline static mp_rnd_t   get_default_rnd()    {    return (mp_rnd_t)(mpfr_get_default_rounding_mode());       }
     inline static mp_prec_t  get_default_prec()   {    return mpfr_get_default_prec();                            }
@@ -174,29 +152,26 @@ public:
     // Constructors && type conversions
     mpreal();
     mpreal(const mpreal& u);
-    mpreal(const mpf_t u);    
-    mpreal(const mpz_t u,             mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());    
-    mpreal(const mpq_t u,             mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());    
-    mpreal(const double u,            mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
-    mpreal(const long double u,       mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
-    mpreal(const unsigned long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
-    mpreal(const unsigned int u,      mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
-    mpreal(const long int u,          mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
-    mpreal(const int u,               mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
-    
-    // Construct mpreal from mpfr_t structure.
-    // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers.    
-    mpreal(const mpfr_t  u, bool shared = false);   
+    mpreal(const mpf_t u);
+    mpreal(const mpz_t u,                  mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const mpq_t u,                  mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const double u,                 mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const long double u,            mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const unsigned long long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const long long int u,          mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const unsigned long int u,      mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const unsigned int u,           mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const long int u,               mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+    mpreal(const int u,                    mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
 
-#if defined (MPREAL_HAVE_INT64_SUPPORT)
-    mpreal(const uint64_t u,          mp_prec_t prec = mpreal::get_default_prec(),  mp_rnd_t mode = mpreal::get_default_rnd());
-    mpreal(const int64_t u,           mp_prec_t prec = mpreal::get_default_prec(),  mp_rnd_t mode = mpreal::get_default_rnd());
-#endif
+    // Construct mpreal from mpfr_t structure.
+    // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers.
+    mpreal(const mpfr_t  u, bool shared = false);
 
     mpreal(const char* s,             mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd());
     mpreal(const std::string& s,      mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd());
 
-    ~mpreal();                           
+    ~mpreal();
 
 #ifdef MPREAL_HAVE_MOVE_SUPPORT
     mpreal& operator=(mpreal&& v);
@@ -205,7 +180,7 @@ public:
 
     // Operations
     // =
-    // +, -, *, /, ++, --, <<, >> 
+    // +, -, *, /, ++, --, <<, >>
     // *=, +=, -=, /=,
     // <, >, ==, <=, >=
 
@@ -215,13 +190,16 @@ public:
     mpreal& operator=(const mpz_t v);
     mpreal& operator=(const mpq_t v);
     mpreal& operator=(const long double v);
-    mpreal& operator=(const double v);        
+    mpreal& operator=(const double v);
     mpreal& operator=(const unsigned long int v);
+    mpreal& operator=(const unsigned long long int v);
+    mpreal& operator=(const long long int v);
     mpreal& operator=(const unsigned int v);
     mpreal& operator=(const long int v);
     mpreal& operator=(const int v);
     mpreal& operator=(const char* s);
     mpreal& operator=(const std::string& s);
+    template <typename real_t> mpreal& operator= (const std::complex<real_t>& z);
 
     // +
     mpreal& operator+=(const mpreal& v);
@@ -235,20 +213,18 @@ public:
     mpreal& operator+=(const long int u);
     mpreal& operator+=(const int u);
 
-#if defined (MPREAL_HAVE_INT64_SUPPORT)
-    mpreal& operator+=(const int64_t  u);
-    mpreal& operator+=(const uint64_t u);
-    mpreal& operator-=(const int64_t  u);
-    mpreal& operator-=(const uint64_t u);
-    mpreal& operator*=(const int64_t  u);
-    mpreal& operator*=(const uint64_t u);
-    mpreal& operator/=(const int64_t  u);
-    mpreal& operator/=(const uint64_t u);
-#endif 
+    mpreal& operator+=(const long long int  u);
+    mpreal& operator+=(const unsigned long long int u);
+    mpreal& operator-=(const long long int  u);
+    mpreal& operator-=(const unsigned long long int u);
+    mpreal& operator*=(const long long int  u);
+    mpreal& operator*=(const unsigned long long int u);
+    mpreal& operator/=(const long long int  u);
+    mpreal& operator/=(const unsigned long long int u);
 
     const mpreal operator+() const;
     mpreal& operator++ ();
-    const mpreal  operator++ (int); 
+    const mpreal  operator++ (int);
 
     // -
     mpreal& operator-=(const mpreal& v);
@@ -266,7 +242,7 @@ public:
     friend const mpreal operator-(const long int b,          const mpreal& a);
     friend const mpreal operator-(const int b,               const mpreal& a);
     friend const mpreal operator-(const double b,            const mpreal& a);
-    mpreal& operator-- ();    
+    mpreal& operator-- ();
     const mpreal  operator-- (int);
 
     // *
@@ -279,7 +255,7 @@ public:
     mpreal& operator*=(const unsigned int v);
     mpreal& operator*=(const long int v);
     mpreal& operator*=(const int v);
-    
+
     // /
     mpreal& operator/=(const mpreal& v);
     mpreal& operator/=(const mpz_t v);
@@ -308,51 +284,27 @@ public:
     mpreal& operator>>=(const long int u);
     mpreal& operator>>=(const int u);
 
-    // Boolean Operators
-    friend bool operator >  (const mpreal& a, const mpreal& b);
-    friend bool operator >= (const mpreal& a, const mpreal& b);
-    friend bool operator <  (const mpreal& a, const mpreal& b);
-    friend bool operator <= (const mpreal& a, const mpreal& b);
-    friend bool operator == (const mpreal& a, const mpreal& b);
-    friend bool operator != (const mpreal& a, const mpreal& b);
-
-    // Optimized specializations for boolean operators
-    friend bool operator == (const mpreal& a, const unsigned long int b);
-    friend bool operator == (const mpreal& a, const unsigned int b);
-    friend bool operator == (const mpreal& a, const long int b);
-    friend bool operator == (const mpreal& a, const int b);
-    friend bool operator == (const mpreal& a, const long double b);
-    friend bool operator == (const mpreal& a, const double b);
-
     // Type Conversion operators
-    bool            toBool      (mp_rnd_t mode = GMP_RNDZ)    const;
-    long            toLong      (mp_rnd_t mode = GMP_RNDZ)    const;
-    unsigned long   toULong     (mp_rnd_t mode = GMP_RNDZ)    const;
-    float           toFloat     (mp_rnd_t mode = GMP_RNDN)    const;
-    double          toDouble    (mp_rnd_t mode = GMP_RNDN)    const;
-    long double     toLDouble   (mp_rnd_t mode = GMP_RNDN)    const;
+    bool               toBool      (                        )    const;
+    long               toLong      (mp_rnd_t mode = GMP_RNDZ)    const;
+    unsigned long      toULong     (mp_rnd_t mode = GMP_RNDZ)    const;
+    long long          toLLong     (mp_rnd_t mode = GMP_RNDZ)    const;
+    unsigned long long toULLong    (mp_rnd_t mode = GMP_RNDZ)    const;
+    float              toFloat     (mp_rnd_t mode = GMP_RNDN)    const;
+    double             toDouble    (mp_rnd_t mode = GMP_RNDN)    const;
+    long double        toLDouble   (mp_rnd_t mode = GMP_RNDN)    const;
 
 #if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS)
-    explicit operator bool               () const { return toBool();       }
-    explicit operator int                () const { return toLong();       }
-    explicit operator long               () const { return toLong();       }
-    explicit operator long long          () const { return toLong();       }
-    explicit operator unsigned           () const { return toULong();      }
-    explicit operator unsigned long      () const { return toULong();      }
-    explicit operator unsigned long long () const { return toULong();      }
-    explicit operator float              () const { return toFloat();      }
-    explicit operator double             () const { return toDouble();     }
-    explicit operator long double        () const { return toLDouble();    }
-#endif
-
-#if defined (MPREAL_HAVE_INT64_SUPPORT)
-    int64_t         toInt64     (mp_rnd_t mode = GMP_RNDZ)    const;
-    uint64_t        toUInt64    (mp_rnd_t mode = GMP_RNDZ)    const;
-
-    #if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS)
-    explicit operator int64_t   () const { return toInt64();      }
-    explicit operator uint64_t  () const { return toUInt64();     }
-    #endif
+    explicit operator bool               () const { return toBool();                 }
+    explicit operator int                () const { return int(toLong());            }
+    explicit operator long               () const { return toLong();                 }
+    explicit operator long long          () const { return toLLong();                }
+    explicit operator unsigned           () const { return unsigned(toULong());      }
+    explicit operator unsigned long      () const { return toULong();                }
+    explicit operator unsigned long long () const { return toULLong();               }
+    explicit operator float              () const { return toFloat();                }
+    explicit operator double             () const { return toDouble();               }
+    explicit operator long double        () const { return toLDouble();              }
 #endif
 
     // Get raw pointers so that mpreal can be directly used in raw mpfr_* functions
@@ -391,11 +343,12 @@ public:
     friend inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode);
     friend inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode);
     friend int cmpabs(const mpreal& a,const mpreal& b);
-    
+
     friend const mpreal log  (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal log2 (const mpreal& v, mp_rnd_t rnd_mode);
+    friend const mpreal logb (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal log10(const mpreal& v, mp_rnd_t rnd_mode);
-    friend const mpreal exp  (const mpreal& v, mp_rnd_t rnd_mode); 
+    friend const mpreal exp  (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal exp2 (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal exp10(const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal log1p(const mpreal& v, mp_rnd_t rnd_mode);
@@ -436,21 +389,22 @@ public:
     friend const mpreal eint   (const mpreal& v, mp_rnd_t rnd_mode);
 
     friend const mpreal gamma    (const mpreal& v, mp_rnd_t rnd_mode);
+    friend const mpreal tgamma   (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal lngamma  (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal lgamma   (const mpreal& v, int *signp, mp_rnd_t rnd_mode);
     friend const mpreal zeta     (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal erf      (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal erfc     (const mpreal& v, mp_rnd_t rnd_mode);
-    friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode); 
-    friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode); 
+    friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode);
+    friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal besseljn (long n, const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal bessely0 (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal bessely1 (const mpreal& v, mp_rnd_t rnd_mode);
-    friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode); 
+    friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal fma      (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode);
     friend const mpreal fms      (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode);
     friend const mpreal agm      (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode);
-    friend const mpreal sum      (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode);
+    friend const mpreal sum      (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t rnd_mode);
     friend int sgn(const mpreal& v); // returns -1 or +1
 
 // MPFR 2.4.0 Specifics
@@ -465,28 +419,26 @@ public:
     friend const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); // Modulus after division
 #endif
 
-// MPFR 3.0.0 Specifics
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
     friend const mpreal digamma (const mpreal& v,        mp_rnd_t rnd_mode);
     friend const mpreal ai      (const mpreal& v,        mp_rnd_t rnd_mode);
     friend const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode);     // use gmp_randinit_default() to init state, gmp_randclear() to clear
+#endif
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0))
     friend const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode);     // use gmp_randinit_default() to init state, gmp_randclear() to clear
     friend const mpreal grandom (unsigned int seed);
 #endif
-    
+
     // Uniformly distributed random number generation in [0,1] using
     // Mersenne-Twister algorithm by default.
     // Use parameter to setup seed, e.g.: random((unsigned)time(NULL))
     // Check urandom() for more precise control.
     friend const mpreal random(unsigned int seed);
 
-    // Exponent and mantissa manipulation
-    friend const mpreal frexp(const mpreal& v, mp_exp_t* exp);    
-    friend const mpreal ldexp(const mpreal& v, mp_exp_t exp);
-
     // Splits mpreal value into fractional and integer parts.
     // Returns fractional part and stores integer part in n.
-    friend const mpreal modf(const mpreal& v, mpreal& n);    
+    friend const mpreal modf(const mpreal& v, mpreal& n);
 
     // Constants
     // don't forget to call mpfr_free_cache() for every thread where you are using const-functions
@@ -515,14 +467,14 @@ public:
     friend const mpreal frac        (const mpreal& v, mp_rnd_t rnd_mode);
     friend const mpreal remainder   (         const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
     friend const mpreal remquo      (long* q, const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
-    
+
     // Miscellaneous Functions
     friend const mpreal nexttoward (const mpreal& x, const mpreal& y);
     friend const mpreal nextabove  (const mpreal& x);
     friend const mpreal nextbelow  (const mpreal& x);
 
     // use gmp_randinit_default() to init state, gmp_randclear() to clear
-    friend const mpreal urandomb (gmp_randstate_t& state); 
+    friend const mpreal urandomb (gmp_randstate_t& state);
 
 // MPFR < 2.4.2 Specifics
 #if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2))
@@ -530,9 +482,9 @@ public:
 #endif
 
     // Instance Checkers
-    friend bool isnan    (const mpreal& v);
-    friend bool isinf    (const mpreal& v);
-    friend bool isfinite (const mpreal& v);
+    friend bool (isnan)    (const mpreal& v);
+    friend bool (isinf)    (const mpreal& v);
+    friend bool (isfinite) (const mpreal& v);
 
     friend bool isnum    (const mpreal& v);
     friend bool iszero   (const mpreal& v);
@@ -549,9 +501,9 @@ public:
     // Aliases for get_prec(), set_prec() - needed for compatibility with std::complex<mpreal> interface
     inline mpreal&      setPrecision(int Precision, mp_rnd_t RoundingMode = get_default_rnd());
     inline int          getPrecision() const;
-    
+
     // Set mpreal to +/- inf, NaN, +/-0
-    mpreal&        setInf  (int Sign = +1);    
+    mpreal&        setInf  (int Sign = +1);
     mpreal&        setNan  ();
     mpreal&        setZero (int Sign = +1);
     mpreal&        setSign (int Sign, mp_rnd_t RoundingMode = get_default_rnd());
@@ -560,7 +512,7 @@ public:
     mp_exp_t get_exp();
     int set_exp(mp_exp_t e);
     int check_range  (int t, mp_rnd_t rnd_mode = get_default_rnd());
-    int subnormalize (int t,mp_rnd_t rnd_mode = get_default_rnd());
+    int subnormalize (int t, mp_rnd_t rnd_mode = get_default_rnd());
 
     // Inexact conversion from float
     inline bool fits_in_bits(double x, int n);
@@ -580,7 +532,7 @@ public:
 
     // Efficient swapping of two mpreal values - needed for std algorithms
     friend void swap(mpreal& x, mpreal& y);
-    
+
     friend const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
     friend const mpreal fmin(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
 
@@ -590,7 +542,7 @@ private:
     //
     // mpfr::mpreal=<DebugView>                              ; Show value only
     // mpfr::mpreal=<DebugView>, <mp[0]._mpfr_prec,u>bits    ; Show value & precision
-    // 
+    //
     // at the beginning of
     // [Visual Studio Installation Folder]\Common7\Packages\Debugger\autoexp.dat
     MPREAL_MSVC_DEBUGVIEW_DATA
@@ -609,15 +561,15 @@ public:
 //////////////////////////////////////////////////////////////////////////
 // Constructors & converters
 // Default constructor: creates mp number and initializes it to 0.
-inline mpreal::mpreal() 
-{ 
-    mpfr_init2 (mpfr_ptr(), mpreal::get_default_prec()); 
-    mpfr_set_ui(mpfr_ptr(), 0, mpreal::get_default_rnd());
+inline mpreal::mpreal()
+{
+    mpfr_init2(mpfr_ptr(), mpreal::get_default_prec());
+    mpfr_set_zero_fast(mpfr_ptr());
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
 
-inline mpreal::mpreal(const mpreal& u) 
+inline mpreal::mpreal(const mpreal& u)
 {
     mpfr_init2(mpfr_ptr(),mpfr_get_prec(u.mpfr_srcptr()));
     mpfr_set  (mpfr_ptr(),u.mpfr_srcptr(),mpreal::get_default_rnd());
@@ -628,7 +580,7 @@ inline mpreal::mpreal(const mpreal& u)
 #ifdef MPREAL_HAVE_MOVE_SUPPORT
 inline mpreal::mpreal(mpreal&& other)
 {
-    mpfr_set_uninitialized(mpfr_ptr());     // make sure "other" holds no pinter to actual data 
+    mpfr_set_uninitialized(mpfr_ptr());     // make sure "other" holds no pointer to actual data
     mpfr_swap(mpfr_ptr(), other.mpfr_ptr());
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
@@ -700,67 +652,65 @@ inline mpreal::mpreal(const double u, mp_prec_t prec, mp_rnd_t mode)
 }
 
 inline mpreal::mpreal(const long double u, mp_prec_t prec, mp_rnd_t mode)
-{ 
+{
     mpfr_init2 (mpfr_ptr(), prec);
     mpfr_set_ld(mpfr_ptr(), u, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
 
-inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode)
-{ 
+inline mpreal::mpreal(const unsigned long long int u, mp_prec_t prec, mp_rnd_t mode)
+{
     mpfr_init2 (mpfr_ptr(), prec);
-    mpfr_set_ui(mpfr_ptr(), u, mode);
+    mpfr_set_uj(mpfr_ptr(), u, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
 
-inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode)
-{ 
+inline mpreal::mpreal(const long long int u, mp_prec_t prec, mp_rnd_t mode)
+{
     mpfr_init2 (mpfr_ptr(), prec);
-    mpfr_set_ui(mpfr_ptr(), u, mode);
+    mpfr_set_sj(mpfr_ptr(), u, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
 
-inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode)
-{ 
+inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode)
+{
     mpfr_init2 (mpfr_ptr(), prec);
-    mpfr_set_si(mpfr_ptr(), u, mode);
+    mpfr_set_ui(mpfr_ptr(), u, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
 
-inline mpreal::mpreal(const int u, mp_prec_t prec, mp_rnd_t mode)
-{ 
+inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode)
+{
     mpfr_init2 (mpfr_ptr(), prec);
-    mpfr_set_si(mpfr_ptr(), u, mode);
+    mpfr_set_ui(mpfr_ptr(), u, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
 
-#if defined (MPREAL_HAVE_INT64_SUPPORT)
-inline mpreal::mpreal(const uint64_t u, mp_prec_t prec, mp_rnd_t mode)
+inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode)
 {
     mpfr_init2 (mpfr_ptr(), prec);
-    mpfr_set_uj(mpfr_ptr(), u, mode); 
+    mpfr_set_si(mpfr_ptr(), u, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
 
-inline mpreal::mpreal(const int64_t u, mp_prec_t prec, mp_rnd_t mode)
+inline mpreal::mpreal(const int u, mp_prec_t prec, mp_rnd_t mode)
 {
     mpfr_init2 (mpfr_ptr(), prec);
-    mpfr_set_sj(mpfr_ptr(), u, mode); 
+    mpfr_set_si(mpfr_ptr(), u, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
-#endif
 
 inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode)
 {
     mpfr_init2  (mpfr_ptr(), prec);
-    mpfr_set_str(mpfr_ptr(), s, base, mode); 
+    mpfr_set_str(mpfr_ptr(), s, base, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
@@ -768,7 +718,7 @@ inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode)
 inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t mode)
 {
     mpfr_init2  (mpfr_ptr(), prec);
-    mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode); 
+    mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode);
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
 }
@@ -776,15 +726,15 @@ inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t m
 inline void mpreal::clear(::mpfr_ptr x)
 {
 #ifdef MPREAL_HAVE_MOVE_SUPPORT
-    if(mpfr_is_initialized(x)) 
+    if(mpfr_is_initialized(x))
 #endif
     mpfr_clear(x);
 }
 
-inline mpreal::~mpreal() 
-{ 
+inline mpreal::~mpreal()
+{
     clear(mpfr_ptr());
-}                           
+}
 
 // internal namespace needed for template magic
 namespace internal{
@@ -792,58 +742,55 @@ namespace internal{
     // Use SFINAE to restrict arithmetic operations instantiation only for numeric types
     // This is needed for smooth integration with libraries based on expression templates, like Eigen.
     // TODO: Do the same for boolean operators.
-    template <typename ArgumentType> struct result_type {};    
-    
-    template <> struct result_type<mpreal>              {typedef mpreal type;};    
-    template <> struct result_type<mpz_t>               {typedef mpreal type;};    
-    template <> struct result_type<mpq_t>               {typedef mpreal type;};    
-    template <> struct result_type<long double>         {typedef mpreal type;};    
-    template <> struct result_type<double>              {typedef mpreal type;};    
-    template <> struct result_type<unsigned long int>   {typedef mpreal type;};    
-    template <> struct result_type<unsigned int>        {typedef mpreal type;};    
-    template <> struct result_type<long int>            {typedef mpreal type;};    
-    template <> struct result_type<int>                 {typedef mpreal type;};    
-
-#if defined (MPREAL_HAVE_INT64_SUPPORT)
-    template <> struct result_type<int64_t  >           {typedef mpreal type;};    
-    template <> struct result_type<uint64_t >           {typedef mpreal type;};    
-#endif
+    template <typename ArgumentType> struct result_type {};
+
+    template <> struct result_type<mpreal>              {typedef mpreal type;};
+    template <> struct result_type<mpz_t>               {typedef mpreal type;};
+    template <> struct result_type<mpq_t>               {typedef mpreal type;};
+    template <> struct result_type<long double>         {typedef mpreal type;};
+    template <> struct result_type<double>              {typedef mpreal type;};
+    template <> struct result_type<unsigned long int>   {typedef mpreal type;};
+    template <> struct result_type<unsigned int>        {typedef mpreal type;};
+    template <> struct result_type<long int>            {typedef mpreal type;};
+    template <> struct result_type<int>                 {typedef mpreal type;};
+    template <> struct result_type<long long>           {typedef mpreal type;};
+    template <> struct result_type<unsigned long long>  {typedef mpreal type;};
 }
 
 // + Addition
-template <typename Rhs> 
-inline const typename internal::result_type<Rhs>::type 
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
     operator+(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) += rhs;    }
 
-template <typename Lhs> 
-inline const typename internal::result_type<Lhs>::type 
-    operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs;    } 
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
+    operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs;    }
 
 // - Subtraction
-template <typename Rhs> 
-inline const typename internal::result_type<Rhs>::type 
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
     operator-(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) -= rhs;    }
 
-template <typename Lhs> 
-inline const typename internal::result_type<Lhs>::type 
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
     operator-(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) -= rhs;    }
 
 // * Multiplication
-template <typename Rhs> 
-inline const typename internal::result_type<Rhs>::type 
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
     operator*(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) *= rhs;    }
 
-template <typename Lhs> 
-inline const typename internal::result_type<Lhs>::type 
-    operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs;    } 
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
+    operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs;    }
 
 // / Division
-template <typename Rhs> 
-inline const typename internal::result_type<Rhs>::type 
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
     operator/(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) /= rhs;    }
 
-template <typename Lhs> 
-inline const typename internal::result_type<Lhs>::type 
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
     operator/(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) /= rhs;    }
 
 //////////////////////////////////////////////////////////////////////////
@@ -893,17 +840,17 @@ const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode = mpreal::g
 const mpreal pow(const int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
-const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); 
+const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
-const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); 
+const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 
-const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());    
+const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const long double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const long double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const long double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const long double a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 
-const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());    
+const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
 const mpreal pow(const double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
@@ -920,9 +867,9 @@ inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode = mpr
 inline mpreal machine_epsilon(mp_prec_t prec = mpreal::get_default_prec());
 
 // Returns smallest eps such that x + eps != x (relative machine epsilon)
-inline mpreal machine_epsilon(const mpreal& x);        
+inline mpreal machine_epsilon(const mpreal& x);
 
-// Gives max & min values for the required precision, 
+// Gives max & min values for the required precision,
 // minval is 'safe' meaning 1 / minval does not overflow
 // maxval is 'safe' meaning 1 / maxval does not underflow
 inline mpreal minval(mp_prec_t prec = mpreal::get_default_prec());
@@ -935,13 +882,13 @@ inline bool isEqualFuzzy(const mpreal& a, const mpreal& b, const mpreal& eps);
 inline bool isEqualFuzzy(const mpreal& a, const mpreal& b);
 
 // 'Bitwise' equality check
-//  maxUlps - a and b can be apart by maxUlps binary numbers. 
+//  maxUlps - a and b can be apart by maxUlps binary numbers.
 inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps);
 
 //////////////////////////////////////////////////////////////////////////
-//     Convert precision in 'bits' to decimal digits and vice versa.
-//        bits   = ceil(digits*log[2](10))
-//        digits = floor(bits*log[10](2))
+// Convert precision in 'bits' to decimal digits and vice versa.
+//    bits   = ceil(digits*log[2](10))
+//    digits = floor(bits*log[10](2))
 
 inline mp_prec_t digits2bits(int d);
 inline int       bits2digits(mp_prec_t b);
@@ -979,7 +926,7 @@ inline mpreal& mpreal::operator=(const mpreal& v)
 inline mpreal& mpreal::operator=(const mpf_t v)
 {
     mpfr_set_f(mpfr_ptr(), v, mpreal::get_default_rnd());
-    
+
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
@@ -987,7 +934,7 @@ inline mpreal& mpreal::operator=(const mpf_t v)
 inline mpreal& mpreal::operator=(const mpz_t v)
 {
     mpfr_set_z(mpfr_ptr(), v, mpreal::get_default_rnd());
-    
+
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
@@ -1000,16 +947,16 @@ inline mpreal& mpreal::operator=(const mpq_t v)
     return *this;
 }
 
-inline mpreal& mpreal::operator=(const long double v)        
-{    
+inline mpreal& mpreal::operator=(const long double v)
+{
     mpfr_set_ld(mpfr_ptr(), v, mpreal::get_default_rnd());
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
 
-inline mpreal& mpreal::operator=(const double v)                
-{   
+inline mpreal& mpreal::operator=(const double v)
+{
 #if (MPREAL_DOUBLE_BITS_OVERFLOW > -1)
   if(fits_in_bits(v, MPREAL_DOUBLE_BITS_OVERFLOW))
   {
@@ -1024,33 +971,49 @@ inline mpreal& mpreal::operator=(const double v)
     return *this;
 }
 
-inline mpreal& mpreal::operator=(const unsigned long int v)    
-{    
-    mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd());    
+inline mpreal& mpreal::operator=(const unsigned long int v)
+{
+    mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+    MPREAL_MSVC_DEBUGVIEW_CODE;
+    return *this;
+}
+
+inline mpreal& mpreal::operator=(const unsigned int v)
+{
+    mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+    MPREAL_MSVC_DEBUGVIEW_CODE;
+    return *this;
+}
+
+inline mpreal& mpreal::operator=(const unsigned long long int v)
+{
+    mpfr_set_uj(mpfr_ptr(), v, mpreal::get_default_rnd());
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
 
-inline mpreal& mpreal::operator=(const unsigned int v)        
-{    
-    mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd());    
+inline mpreal& mpreal::operator=(const long long int v)
+{
+    mpfr_set_sj(mpfr_ptr(), v, mpreal::get_default_rnd());
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
 
-inline mpreal& mpreal::operator=(const long int v)            
-{    
-    mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd());    
+inline mpreal& mpreal::operator=(const long int v)
+{
+    mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd());
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
 
 inline mpreal& mpreal::operator=(const int v)
-{    
-    mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd());    
+{
+    mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd());
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
@@ -1071,7 +1034,7 @@ inline mpreal& mpreal::operator=(const char* s)
 
     if(0 == mpfr_set_str(t, s, 10, mpreal::get_default_rnd()))
     {
-        mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); 
+        mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd());
         MPREAL_MSVC_DEBUGVIEW_CODE;
     }
 
@@ -1094,7 +1057,7 @@ inline mpreal& mpreal::operator=(const std::string& s)
 
     if(0 == mpfr_set_str(t, s.c_str(), 10, mpreal::get_default_rnd()))
     {
-        mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); 
+        mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd());
         MPREAL_MSVC_DEBUGVIEW_CODE;
     }
 
@@ -1102,6 +1065,11 @@ inline mpreal& mpreal::operator=(const std::string& s)
     return *this;
 }
 
+template <typename real_t>
+inline mpreal& mpreal::operator= (const std::complex<real_t>& z)
+{
+    return *this = z.real();
+}
 
 //////////////////////////////////////////////////////////////////////////
 // + Addition
@@ -1135,9 +1103,9 @@ inline mpreal& mpreal::operator+=(const mpq_t u)
 
 inline mpreal& mpreal::operator+= (const long double u)
 {
-    *this += mpreal(u);    
+    *this += mpreal(u);
     MPREAL_MSVC_DEBUGVIEW_CODE;
-    return *this;    
+    return *this;
 }
 
 inline mpreal& mpreal::operator+= (const double u)
@@ -1180,16 +1148,14 @@ inline mpreal& mpreal::operator+=(const int u)
     return *this;
 }
 
-#if defined (MPREAL_HAVE_INT64_SUPPORT)
-inline mpreal& mpreal::operator+=(const int64_t  u){    *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-inline mpreal& mpreal::operator+=(const uint64_t u){    *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-inline mpreal& mpreal::operator-=(const int64_t  u){    *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-inline mpreal& mpreal::operator-=(const uint64_t u){    *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-inline mpreal& mpreal::operator*=(const int64_t  u){    *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-inline mpreal& mpreal::operator*=(const uint64_t u){    *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-inline mpreal& mpreal::operator/=(const int64_t  u){    *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-inline mpreal& mpreal::operator/=(const uint64_t u){    *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
-#endif 
+inline mpreal& mpreal::operator+=(const long long int u)         {    *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
+inline mpreal& mpreal::operator+=(const unsigned long long int u){    *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
+inline mpreal& mpreal::operator-=(const long long int  u)        {    *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
+inline mpreal& mpreal::operator-=(const unsigned long long int u){    *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
+inline mpreal& mpreal::operator*=(const long long int  u)        {    *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
+inline mpreal& mpreal::operator*=(const unsigned long long int u){    *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
+inline mpreal& mpreal::operator/=(const long long int  u)        {    *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
+inline mpreal& mpreal::operator/=(const unsigned long long int u){    *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this;    }
 
 inline const mpreal mpreal::operator+()const    {    return mpreal(*this); }
 
@@ -1200,7 +1166,7 @@ inline const mpreal operator+(const mpreal& a, const mpreal& b)
   return c;
 }
 
-inline mpreal& mpreal::operator++() 
+inline mpreal& mpreal::operator++()
 {
     return *this += 1;
 }
@@ -1212,7 +1178,7 @@ inline const mpreal mpreal::operator++ (int)
     return x;
 }
 
-inline mpreal& mpreal::operator--() 
+inline mpreal& mpreal::operator--()
 {
     return *this -= 1;
 }
@@ -1249,9 +1215,9 @@ inline mpreal& mpreal::operator-=(const mpq_t v)
 
 inline mpreal& mpreal::operator-=(const long double v)
 {
-    *this -= mpreal(v);    
+    *this -= mpreal(v);
     MPREAL_MSVC_DEBUGVIEW_CODE;
-    return *this;    
+    return *this;
 }
 
 inline mpreal& mpreal::operator-=(const double v)
@@ -1259,7 +1225,7 @@ inline mpreal& mpreal::operator-=(const double v)
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
     mpfr_sub_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
 #else
-    *this -= mpreal(v);    
+    *this -= mpreal(v);
 #endif
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
@@ -1374,9 +1340,9 @@ inline mpreal& mpreal::operator*=(const mpq_t v)
 
 inline mpreal& mpreal::operator*=(const long double v)
 {
-    *this *= mpreal(v);    
+    *this *= mpreal(v);
     MPREAL_MSVC_DEBUGVIEW_CODE;
-    return *this;    
+    return *this;
 }
 
 inline mpreal& mpreal::operator*=(const double v)
@@ -1384,7 +1350,7 @@ inline mpreal& mpreal::operator*=(const double v)
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
     mpfr_mul_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
 #else
-    *this *= mpreal(v);    
+    *this *= mpreal(v);
 #endif
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
@@ -1452,7 +1418,7 @@ inline mpreal& mpreal::operator/=(const long double v)
 {
     *this /= mpreal(v);
     MPREAL_MSVC_DEBUGVIEW_CODE;
-    return *this;    
+    return *this;
 }
 
 inline mpreal& mpreal::operator/=(const double v)
@@ -1460,7 +1426,7 @@ inline mpreal& mpreal::operator/=(const double v)
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
     mpfr_div_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
 #else
-    *this /= mpreal(v);    
+    *this /= mpreal(v);
 #endif
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
@@ -1671,45 +1637,86 @@ inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode)
 }
 
 //////////////////////////////////////////////////////////////////////////
-//Boolean operators
-inline bool operator >  (const mpreal& a, const mpreal& b){    return (mpfr_greater_p       (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 );    }
-inline bool operator >= (const mpreal& a, const mpreal& b){    return (mpfr_greaterequal_p  (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 );    }
-inline bool operator <  (const mpreal& a, const mpreal& b){    return (mpfr_less_p          (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 );    }
-inline bool operator <= (const mpreal& a, const mpreal& b){    return (mpfr_lessequal_p     (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 );    }
-inline bool operator == (const mpreal& a, const mpreal& b){    return (mpfr_equal_p         (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 );    }
-inline bool operator != (const mpreal& a, const mpreal& b){    return (mpfr_lessgreater_p   (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 );    }
-
-inline bool operator == (const mpreal& a, const unsigned long int b ){    return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 );    }
-inline bool operator == (const mpreal& a, const unsigned int b      ){    return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 );    }
-inline bool operator == (const mpreal& a, const long int b          ){    return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 );    }
-inline bool operator == (const mpreal& a, const int b               ){    return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 );    }
-inline bool operator == (const mpreal& a, const long double b       ){    return (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 );    }
-inline bool operator == (const mpreal& a, const double b            ){    return (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 );    }
-
-
-inline bool isnan    (const mpreal& op){    return (mpfr_nan_p    (op.mpfr_srcptr()) != 0 );    }
-inline bool isinf    (const mpreal& op){    return (mpfr_inf_p    (op.mpfr_srcptr()) != 0 );    }
-inline bool isfinite (const mpreal& op){    return (mpfr_number_p (op.mpfr_srcptr()) != 0 );    }
+//Relational operators
+
+// WARNING:
+//
+// Please note that following checks for double-NaN are guaranteed to work only in IEEE math mode:
+//
+// isnan(b) =  (b != b)
+// isnan(b) = !(b == b)  (we use in code below)
+//
+// Be cautions if you use compiler options which break strict IEEE compliance (e.g. -ffast-math in GCC).
+// Use std::isnan instead (C++11).
+
+inline bool operator >  (const mpreal& a, const mpreal& b           ){  return (mpfr_greater_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 );            }
+inline bool operator >  (const mpreal& a, const unsigned long int b ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 );                 }
+inline bool operator >  (const mpreal& a, const unsigned int b      ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 );                 }
+inline bool operator >  (const mpreal& a, const long int b          ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 );                 }
+inline bool operator >  (const mpreal& a, const int b               ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 );                 }
+inline bool operator >  (const mpreal& a, const long double b       ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) > 0 );    }
+inline bool operator >  (const mpreal& a, const double b            ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) > 0 );    }
+
+inline bool operator >= (const mpreal& a, const mpreal& b           ){  return (mpfr_greaterequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 );       }
+inline bool operator >= (const mpreal& a, const unsigned long int b ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 );                }
+// inline bool operator >= (const mpreal& a, const unsigned int b      ){  return !isnan EIGEN_NOT_A_MACRO (isnan()a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 );                }
+inline bool operator >= (const mpreal& a, const long int b          ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 );                }
+inline bool operator >= (const mpreal& a, const int b               ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 );                }
+inline bool operator >= (const mpreal& a, const long double b       ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) >= 0 );   }
+inline bool operator >= (const mpreal& a, const double b            ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) >= 0 );   }
+
+inline bool operator <  (const mpreal& a, const mpreal& b           ){  return (mpfr_less_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 );               }
+inline bool operator <  (const mpreal& a, const unsigned long int b ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 );                 }
+inline bool operator <  (const mpreal& a, const unsigned int b      ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 );                 }
+inline bool operator <  (const mpreal& a, const long int b          ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 );                 }
+inline bool operator <  (const mpreal& a, const int b               ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 );                 }
+inline bool operator <  (const mpreal& a, const long double b       ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) < 0 );    }
+inline bool operator <  (const mpreal& a, const double b            ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) < 0 );    }
+
+inline bool operator <= (const mpreal& a, const mpreal& b           ){  return (mpfr_lessequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 );          }
+inline bool operator <= (const mpreal& a, const unsigned long int b ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 );                }
+inline bool operator <= (const mpreal& a, const unsigned int b      ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 );                }
+inline bool operator <= (const mpreal& a, const long int b          ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 );                }
+inline bool operator <= (const mpreal& a, const int b               ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 );                }
+inline bool operator <= (const mpreal& a, const long double b       ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) <= 0 );   }
+inline bool operator <= (const mpreal& a, const double b            ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) <= 0 );   }
+
+inline bool operator == (const mpreal& a, const mpreal& b           ){  return (mpfr_equal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 );              }
+inline bool operator == (const mpreal& a, const unsigned long int b ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 );                }
+inline bool operator == (const mpreal& a, const unsigned int b      ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 );                }
+inline bool operator == (const mpreal& a, const long int b          ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 );                }
+inline bool operator == (const mpreal& a, const int b               ){  return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 );                }
+inline bool operator == (const mpreal& a, const long double b       ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 );   }
+inline bool operator == (const mpreal& a, const double b            ){  return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 );   }
+
+inline bool operator != (const mpreal& a, const mpreal& b           ){  return !(a == b);  }
+inline bool operator != (const mpreal& a, const unsigned long int b ){  return !(a == b);  }
+inline bool operator != (const mpreal& a, const unsigned int b      ){  return !(a == b);  }
+inline bool operator != (const mpreal& a, const long int b          ){  return !(a == b);  }
+inline bool operator != (const mpreal& a, const int b               ){  return !(a == b);  }
+inline bool operator != (const mpreal& a, const long double b       ){  return !(a == b);  }
+inline bool operator != (const mpreal& a, const double b            ){  return !(a == b);  }
+
+inline bool (isnan)    (const mpreal& op){    return (mpfr_nan_p    (op.mpfr_srcptr()) != 0 );    }
+inline bool (isinf)    (const mpreal& op){    return (mpfr_inf_p    (op.mpfr_srcptr()) != 0 );    }
+inline bool (isfinite) (const mpreal& op){    return (mpfr_number_p (op.mpfr_srcptr()) != 0 );    }
 inline bool iszero   (const mpreal& op){    return (mpfr_zero_p   (op.mpfr_srcptr()) != 0 );    }
 inline bool isint    (const mpreal& op){    return (mpfr_integer_p(op.mpfr_srcptr()) != 0 );    }
 
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
 inline bool isregular(const mpreal& op){    return (mpfr_regular_p(op.mpfr_srcptr()));}
-#endif 
+#endif
 
 //////////////////////////////////////////////////////////////////////////
 // Type Converters
-inline bool             mpreal::toBool (mp_rnd_t /*mode*/) const   {    return  mpfr_zero_p (mpfr_srcptr()) == 0;     }
-inline long             mpreal::toLong   (mp_rnd_t mode)  const    {    return  mpfr_get_si (mpfr_srcptr(), mode);    }
-inline unsigned long    mpreal::toULong  (mp_rnd_t mode)  const    {    return  mpfr_get_ui (mpfr_srcptr(), mode);    }
-inline float            mpreal::toFloat  (mp_rnd_t mode)  const    {    return  mpfr_get_flt(mpfr_srcptr(), mode);    }
-inline double           mpreal::toDouble (mp_rnd_t mode)  const    {    return  mpfr_get_d  (mpfr_srcptr(), mode);    }
-inline long double      mpreal::toLDouble(mp_rnd_t mode)  const    {    return  mpfr_get_ld (mpfr_srcptr(), mode);    }
-
-#if defined (MPREAL_HAVE_INT64_SUPPORT)
-inline int64_t      mpreal::toInt64 (mp_rnd_t mode)    const{    return mpfr_get_sj(mpfr_srcptr(), mode);    }
-inline uint64_t     mpreal::toUInt64(mp_rnd_t mode)    const{    return mpfr_get_uj(mpfr_srcptr(), mode);    }
-#endif
+inline bool               mpreal::toBool   (             )  const    {    return  mpfr_zero_p (mpfr_srcptr()) == 0;     }
+inline long               mpreal::toLong   (mp_rnd_t mode)  const    {    return  mpfr_get_si (mpfr_srcptr(), mode);    }
+inline unsigned long      mpreal::toULong  (mp_rnd_t mode)  const    {    return  mpfr_get_ui (mpfr_srcptr(), mode);    }
+inline float              mpreal::toFloat  (mp_rnd_t mode)  const    {    return  mpfr_get_flt(mpfr_srcptr(), mode);    }
+inline double             mpreal::toDouble (mp_rnd_t mode)  const    {    return  mpfr_get_d  (mpfr_srcptr(), mode);    }
+inline long double        mpreal::toLDouble(mp_rnd_t mode)  const    {    return  mpfr_get_ld (mpfr_srcptr(), mode);    }
+inline long long          mpreal::toLLong  (mp_rnd_t mode)  const    {    return  mpfr_get_sj (mpfr_srcptr(), mode);    }
+inline unsigned long long mpreal::toULLong (mp_rnd_t mode)  const    {    return  mpfr_get_uj (mpfr_srcptr(), mode);    }
 
 inline ::mpfr_ptr     mpreal::mpfr_ptr()             { return mp; }
 inline ::mpfr_srcptr  mpreal::mpfr_ptr()    const    { return mp; }
@@ -1755,21 +1762,21 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
 
     std::ostringstream format;
 
-    int digits = (n >= 0) ? n : bits2digits(mpfr_get_prec(mpfr_srcptr()));
-    
+    int digits = (n >= 0) ? n : 1 + bits2digits(mpfr_get_prec(mpfr_srcptr()));
+
     format << "%." << digits << "RNg";
 
     return toString(format.str());
 
 #else
 
-    char *s, *ns = NULL; 
+    char *s, *ns = NULL;
     size_t slen, nslen;
     mp_exp_t exp;
     std::string out;
 
     if(mpfr_inf_p(mp))
-    { 
+    {
         if(mpfr_sgn(mp)>0) return "+Inf";
         else               return "-Inf";
     }
@@ -1784,7 +1791,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
     {
         slen  = strlen(s);
         nslen = strlen(ns);
-        if(nslen<=slen) 
+        if(nslen<=slen)
         {
             mpfr_free_str(s);
             s = ns;
@@ -1801,7 +1808,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
             {
                 // Remove zeros starting from right end
                 char* ptr = s+slen-1;
-                while (*ptr=='0' && ptr>s+exp) ptr--; 
+                while (*ptr=='0' && ptr>s+exp) ptr--;
 
                 if(ptr==s+exp) out = std::string(s,exp+1);
                 else           out = std::string(s,exp+1)+'.'+std::string(s+exp+1,ptr-(s+exp+1)+1);
@@ -1812,7 +1819,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
             {
                 // Remove zeros starting from right end
                 char* ptr = s+slen-1;
-                while (*ptr=='0' && ptr>s+exp-1) ptr--; 
+                while (*ptr=='0' && ptr>s+exp-1) ptr--;
 
                 if(ptr==s+exp-1) out = std::string(s,exp);
                 else             out = std::string(s,exp)+'.'+std::string(s+exp,ptr-(s+exp)+1);
@@ -1825,7 +1832,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
             {
                 // Remove zeros starting from right end
                 char* ptr = s+slen-1;
-                while (*ptr=='0' && ptr>s+1) ptr--; 
+                while (*ptr=='0' && ptr>s+1) ptr--;
 
                 if(ptr==s+1) out = std::string(s,2);
                 else         out = std::string(s,2)+'.'+std::string(s+2,ptr-(s+2)+1);
@@ -1836,7 +1843,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
             {
                 // Remove zeros starting from right end
                 char* ptr = s+slen-1;
-                while (*ptr=='0' && ptr>s) ptr--; 
+                while (*ptr=='0' && ptr>s) ptr--;
 
                 if(ptr==s) out = std::string(s,1);
                 else       out = std::string(s,1)+'.'+std::string(s+1,ptr-(s+1)+1);
@@ -1863,7 +1870,7 @@ inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
 
 //////////////////////////////////////////////////////////////////////////
 // I/O
-inline std::ostream& mpreal::output(std::ostream& os) const 
+inline std::ostream& mpreal::output(std::ostream& os) const
 {
     std::ostringstream format;
     const std::ios::fmtflags flags = os.flags();
@@ -1926,8 +1933,7 @@ inline int bits2digits(mp_prec_t b)
 // Set/Get number properties
 inline int sgn(const mpreal& op)
 {
-    int r = mpfr_signbit(op.mpfr_srcptr());
-    return (r > 0? -1 : 1);
+    return mpfr_sgn(op.mpfr_srcptr());
 }
 
 inline mpreal& mpreal::setSign(int sign, mp_rnd_t RoundingMode)
@@ -1949,29 +1955,28 @@ inline mpreal& mpreal::setPrecision(int Precision, mp_rnd_t RoundingMode)
     return *this;
 }
 
-inline mpreal& mpreal::setInf(int sign) 
-{ 
+inline mpreal& mpreal::setInf(int sign)
+{
     mpfr_set_inf(mpfr_ptr(), sign);
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
-}    
+}
 
-inline mpreal& mpreal::setNan() 
+inline mpreal& mpreal::setNan()
 {
     mpfr_set_nan(mpfr_ptr());
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
 }
 
-inline mpreal&    mpreal::setZero(int sign)
+inline mpreal& mpreal::setZero(int sign)
 {
-
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
     mpfr_set_zero(mpfr_ptr(), sign);
 #else
     mpfr_set_si(mpfr_ptr(), 0, (mpfr_get_default_rounding_mode)());
     setSign(sign);
-#endif 
+#endif
 
     MPREAL_MSVC_DEBUGVIEW_CODE;
     return *this;
@@ -2000,23 +2005,32 @@ inline int mpreal::set_exp (mp_exp_t e)
     return x;
 }
 
-inline const mpreal frexp(const mpreal& v, mp_exp_t* exp)
+inline const mpreal frexp(const mpreal& x, mp_exp_t* exp, mp_rnd_t mode = mpreal::get_default_rnd())
 {
-    mpreal x(v);
-    *exp = x.get_exp();
-    x.set_exp(0);
-    return x;
+    mpreal y(x);
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0))
+    mpfr_frexp(exp,y.mpfr_ptr(),x.mpfr_srcptr(),mode);
+#else
+    *exp = mpfr_get_exp(y.mpfr_srcptr());
+    mpfr_set_exp(y.mpfr_ptr(),0);
+#endif
+    return y;
 }
 
 inline const mpreal ldexp(const mpreal& v, mp_exp_t exp)
 {
     mpreal x(v);
 
-    // rounding is not important since we just increasing the exponent
-    mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd()); 
+    // rounding is not important since we are just increasing the exponent (= exact operation)
+    mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd());
     return x;
 }
 
+inline const mpreal scalbn(const mpreal& v, mp_exp_t exp)
+{
+    return ldexp(v, exp);
+}
+
 inline mpreal machine_epsilon(mp_prec_t prec)
 {
     /* the smallest eps such that 1 + eps != 1 */
@@ -2024,7 +2038,7 @@ inline mpreal machine_epsilon(mp_prec_t prec)
 }
 
 inline mpreal machine_epsilon(const mpreal& x)
-{    
+{
     /* the smallest eps such that x + eps != x */
     if( x < 0)
     {
@@ -2045,7 +2059,7 @@ inline mpreal minval(mp_prec_t prec)
 inline mpreal maxval(mp_prec_t prec)
 {
     /* max = (1 - eps) * 2^emax, eps is machine epsilon */
-    return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax(); 
+    return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax();
 }
 
 inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps)
@@ -2063,12 +2077,26 @@ inline bool isEqualFuzzy(const mpreal& a, const mpreal& b)
     return isEqualFuzzy(a, b, machine_epsilon((max)(1, (min)(abs(a), abs(b)))));
 }
 
+//////////////////////////////////////////////////////////////////////////
+// C++11 sign functions.
+inline mpreal copysign(const mpreal& x, const  mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+    mpreal rop(0, mpfr_get_prec(x.mpfr_ptr()));
+    mpfr_setsign(rop.mpfr_ptr(), x.mpfr_srcptr(), mpfr_signbit(y.mpfr_srcptr()), rnd_mode);
+    return rop;
+}
+
+inline bool signbit(const mpreal& x)
+{
+    return mpfr_signbit(x.mpfr_srcptr());
+}
+
 inline const mpreal modf(const mpreal& v, mpreal& n)
 {
     mpreal f(v);
 
     // rounding is not important since we are using the same number
-    mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd());    
+    mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd());
     mpfr_trunc(n.mpfr_ptr(),v.mpfr_srcptr());
     return f;
 }
@@ -2131,7 +2159,7 @@ inline mp_exp_t mpreal::get_emax_max (void)
 #define MPREAL_UNARY_MATH_FUNCTION_BODY(f)                    \
         mpreal y(0, mpfr_get_prec(x.mpfr_srcptr()));          \
         mpfr_##f(y.mpfr_ptr(), x.mpfr_srcptr(), r);           \
-        return y; 
+        return y;
 
 inline const mpreal sqr  (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd())
 {   MPREAL_UNARY_MATH_FUNCTION_BODY(sqr );    }
@@ -2154,7 +2182,7 @@ inline const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode)
 inline const mpreal sqrt(const long int v, mp_rnd_t rnd_mode)
 {
     if (v>=0)   return sqrt(static_cast<unsigned long int>(v),rnd_mode);
-    else        return mpreal().setNan(); // NaN  
+    else        return mpreal().setNan(); // NaN
 }
 
 inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode)
@@ -2165,9 +2193,9 @@ inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode)
 
 inline const mpreal root(const mpreal& x, unsigned long int k, mp_rnd_t r = mpreal::get_default_rnd())
 {
-    mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); 
-    mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r);  
-    return y; 
+    mpreal y(0, mpfr_get_prec(x.mpfr_srcptr()));
+    mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r);
+    return y;
 }
 
 inline const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t r = mpreal::get_default_rnd())
@@ -2209,6 +2237,8 @@ inline const mpreal acos  (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd
 inline const mpreal asin  (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(asin );    }
 inline const mpreal atan  (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(atan );    }
 
+inline const mpreal logb  (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   return log2 (abs(x),r);                    }
+
 inline const mpreal acot  (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) {   return atan (1/v, r);                      }
 inline const mpreal asec  (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) {   return acos (1/v, r);                      }
 inline const mpreal acsc  (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) {   return asin (1/v, r);                      }
@@ -2230,6 +2260,7 @@ inline const mpreal log1p   (const mpreal& x, mp_rnd_t r = mpreal::get_default_r
 inline const mpreal expm1   (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(expm1  );    }
 inline const mpreal eint    (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(eint   );    }
 inline const mpreal gamma   (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(gamma  );    }
+inline const mpreal tgamma  (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(gamma  );    }
 inline const mpreal lngamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(lngamma);    }
 inline const mpreal zeta    (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(zeta   );    }
 inline const mpreal erf     (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) {   MPREAL_UNARY_MATH_FUNCTION_BODY(erf    );    }
@@ -2254,7 +2285,7 @@ inline const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode =
 }
 
 inline const mpreal remainder (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
-{    
+{
     mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision()));
     mpfr_remainder(a.mpfr_ptr(), x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode);
     return a;
@@ -2307,9 +2338,9 @@ inline const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, m
     mpreal a;
     mp_prec_t p1, p2, p3;
 
-    p1 = v1.get_prec(); 
-    p2 = v2.get_prec(); 
-    p3 = v3.get_prec(); 
+    p1 = v1.get_prec();
+    p2 = v2.get_prec();
+    p3 = v3.get_prec();
 
     a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1));
 
@@ -2322,9 +2353,9 @@ inline const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, m
     mpreal a;
     mp_prec_t p1, p2, p3;
 
-    p1 = v1.get_prec(); 
-    p2 = v2.get_prec(); 
-    p3 = v3.get_prec(); 
+    p1 = v1.get_prec();
+    p2 = v2.get_prec();
+    p3 = v3.get_prec();
 
     a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1));
 
@@ -2337,8 +2368,8 @@ inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode =
     mpreal a;
     mp_prec_t p1, p2;
 
-    p1 = v1.get_prec(); 
-    p2 = v2.get_prec(); 
+    p1 = v1.get_prec();
+    p2 = v2.get_prec();
 
     a.set_prec(p1>p2?p1:p2);
 
@@ -2347,16 +2378,17 @@ inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode =
     return a;
 }
 
-inline const mpreal sum (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+inline const mpreal sum (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t mode = mpreal::get_default_rnd())
 {
+    mpfr_srcptr *p = new mpfr_srcptr[n];
+
+    for (unsigned long int  i = 0; i < n; i++)
+        p[i] = tab[i].mpfr_srcptr();
+
     mpreal x;
-    mpfr_ptr* t;
-    unsigned long int i;
+    status = mpfr_sum(x.mpfr_ptr(), (mpfr_ptr*)p, n, mode);
 
-    t = new mpfr_ptr[n];
-    for (i=0;i<n;i++) t[i] = (mpfr_ptr)tab[i].mp;
-    mpfr_sum(x.mp,t,n,rnd_mode);
-    delete[] t;
+    delete [] p;
     return x;
 }
 
@@ -2369,9 +2401,9 @@ inline int sinh_cosh(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode =
     return mpfr_sinh_cosh(s.mp,c.mp,v.mp,rnd_mode);
 }
 
-inline const mpreal li2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) 
-{   
-    MPREAL_UNARY_MATH_FUNCTION_BODY(li2);    
+inline const mpreal li2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd())
+{
+    MPREAL_UNARY_MATH_FUNCTION_BODY(li2);
 }
 
 inline const mpreal rem (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
@@ -2383,23 +2415,23 @@ inline const mpreal rem (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = m
 inline const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
 {
     (void)rnd_mode;
-    
-    /*  
+
+    /*
 
     m = mod(x,y) if y != 0, returns x - n*y where n = floor(x/y)
 
     The following are true by convention:
     - mod(x,0) is x
     - mod(x,x) is 0
-    - mod(x,y) for x != y and y != 0 has the same sign as y.    
-    
+    - mod(x,y) for x != y and y != 0 has the same sign as y.
+
     */
 
     if(iszero(y)) return x;
     if(x == y) return 0;
 
     mpreal m = x - floor(x / y) * y;
-    
+
     m.setSign(sgn(y)); // make sure result has the same sign as Y
 
     return m;
@@ -2410,8 +2442,8 @@ inline const mpreal fmod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode =
     mpreal a;
     mp_prec_t yp, xp;
 
-    yp = y.get_prec(); 
-    xp = x.get_prec(); 
+    yp = y.get_prec();
+    xp = x.get_prec();
 
     a.set_prec(yp>xp?yp:xp);
 
@@ -2553,33 +2585,24 @@ inline const mpreal nextbelow  (const mpreal& x)
 inline const mpreal urandomb (gmp_randstate_t& state)
 {
     mpreal x;
-    mpfr_urandomb(x.mp,state);
+    mpfr_urandomb(x.mpfr_ptr(),state);
     return x;
 }
 
-#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0))
-// use gmp_randinit_default() to init state, gmp_randclear() to clear
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
 inline const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
 {
     mpreal x;
-    mpfr_urandom(x.mp,state,rnd_mode);
-    return x;
-}
-
-inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
-{
-    mpreal x;
-    mpfr_grandom(x.mp, NULL, state, rnd_mode);
+    mpfr_urandom(x.mpfr_ptr(), state, rnd_mode);
     return x;
 }
-
-#endif 
+#endif
 
 #if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2))
 inline const mpreal random2 (mp_size_t size, mp_exp_t exp)
 {
     mpreal x;
-    mpfr_random2(x.mp,size,exp);
+    mpfr_random2(x.mpfr_ptr(),size,exp);
     return x;
 }
 #endif
@@ -2590,16 +2613,15 @@ inline const mpreal random2 (mp_size_t size, mp_exp_t exp)
 // seed != 0
 inline const mpreal random(unsigned int seed = 0)
 {
-
 #if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
     static gmp_randstate_t state;
-    static bool isFirstTime = true;
+    static bool initialize = true;
 
-    if(isFirstTime)
+    if(initialize)
     {
         gmp_randinit_default(state);
         gmp_randseed_ui(state,0);
-        isFirstTime = false;
+        initialize = false;
     }
 
     if(seed != 0)    gmp_randseed_ui(state,seed);
@@ -2612,17 +2634,25 @@ inline const mpreal random(unsigned int seed = 0)
 
 }
 
-#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0))
+
+inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+    mpreal x;
+    mpfr_grandom(x.mpfr_ptr(), NULL, state, rnd_mode);
+    return x;
+}
+
 inline const mpreal grandom(unsigned int seed = 0)
 {
     static gmp_randstate_t state;
-    static bool isFirstTime = true;
+    static bool initialize = true;
 
-    if(isFirstTime)
+    if(initialize)
     {
         gmp_randinit_default(state);
         gmp_randseed_ui(state,0);
-        isFirstTime = false;
+        initialize = false;
     }
 
     if(seed != 0) gmp_randseed_ui(state,seed);
@@ -2634,17 +2664,17 @@ inline const mpreal grandom(unsigned int seed = 0)
 //////////////////////////////////////////////////////////////////////////
 // Set/Get global properties
 inline void mpreal::set_default_prec(mp_prec_t prec)
-{ 
-    mpfr_set_default_prec(prec); 
+{
+    mpfr_set_default_prec(prec);
 }
 
 inline void mpreal::set_default_rnd(mp_rnd_t rnd_mode)
-{ 
-    mpfr_set_default_rounding_mode(rnd_mode); 
+{
+    mpfr_set_default_rounding_mode(rnd_mode);
 }
 
 inline bool mpreal::fits_in_bits(double x, int n)
-{   
+{
     int i;
     double t;
     return IsInf(x) || (std::modf ( std::ldexp ( std::frexp ( x, &i ), n ), &t ) == 0.0);
@@ -2894,7 +2924,7 @@ inline const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode)
     else        return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
 }
 
-// pow long double 
+// pow long double
 inline const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode)
 {
     return pow(mpreal(a),mpreal(b),rnd_mode);
@@ -2953,9 +2983,9 @@ namespace std
 {
   // we are allowed to extend namespace std with specializations only
     template <>
-    inline void swap(mpfr::mpreal& x, mpfr::mpreal& y) 
-    { 
-        return mpfr::swap(x, y); 
+    inline void swap(mpfr::mpreal& x, mpfr::mpreal& y)
+    {
+        return mpfr::swap(x, y);
     }
 
     template<>
@@ -2966,7 +2996,7 @@ namespace std
         static const bool is_signed         = true;
         static const bool is_integer        = false;
         static const bool is_exact          = false;
-        static const int  radix             = 2;    
+        static const int  radix             = 2;
 
         static const bool has_infinity      = true;
         static const bool has_quiet_NaN     = true;
@@ -2986,7 +3016,7 @@ namespace std
 
         // Returns smallest eps such that 1 + eps != 1 (classic machine epsilon)
         inline static mpfr::mpreal epsilon(mp_prec_t precision = mpfr::mpreal::get_default_prec()) {  return  mpfr::machine_epsilon(precision); }
-    
+
         // Returns smallest eps such that x + eps != x (relative machine epsilon)
         inline static mpfr::mpreal epsilon(const mpfr::mpreal& x) {  return mpfr::machine_epsilon(x);  }
 
@@ -2994,8 +3024,8 @@ namespace std
         {
             mp_rnd_t r = mpfr::mpreal::get_default_rnd();
 
-            if(r == GMP_RNDN)  return mpfr::mpreal(0.5, precision); 
-            else               return mpfr::mpreal(1.0, precision);    
+            if(r == GMP_RNDN)  return mpfr::mpreal(0.5, precision);
+            else               return mpfr::mpreal(1.0, precision);
         }
 
         inline static const mpfr::mpreal infinity()         { return mpfr::const_infinity();     }
@@ -3006,17 +3036,17 @@ namespace std
         // Please note, exponent range is not fixed in MPFR
         static const int min_exponent = MPFR_EMIN_DEFAULT;
         static const int max_exponent = MPFR_EMAX_DEFAULT;
-        MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); 
-        MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); 
+        MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811);
+        MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811);
 
 #ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS
 
         // Following members should be constant according to standard, but they can be variable in MPFR
-        // So we define them as functions here. 
+        // So we define them as functions here.
         //
         // This is preferable way for std::numeric_limits<mpfr::mpreal> specialization.
-        // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost. 
-        // See below for compatible implementation. 
+        // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost.
+        // See below for compatible implementation.
         inline static float_round_style round_style()
         {
             mp_rnd_t r = mpfr::mpreal::get_default_rnd();
@@ -3024,9 +3054,9 @@ namespace std
             switch (r)
             {
             case GMP_RNDN: return round_to_nearest;
-            case GMP_RNDZ: return round_toward_zero; 
-            case GMP_RNDU: return round_toward_infinity; 
-            case GMP_RNDD: return round_toward_neg_infinity; 
+            case GMP_RNDZ: return round_toward_zero;
+            case GMP_RNDU: return round_toward_infinity;
+            case GMP_RNDD: return round_toward_neg_infinity;
             default: return round_indeterminate;
             }
         }
@@ -3053,13 +3083,13 @@ namespace std
         // If possible, please use functions digits() and round_style() defined above.
         //
         // These (default) values are preserved for compatibility with existing libraries, e.g. boost.
-        // Change them accordingly to your application. 
+        // Change them accordingly to your application.
         //
         // For example, if you use 256 bits of precision uniformly in your program, then:
         // digits       = 256
-        // digits10     = 77 
+        // digits10     = 77
         // max_digits10 = 78
-        // 
+        //
         // Approximate formula for decimal digits is: digits10 = floor(log10(2) * digits). See bits2digits() for more details.
 
         static const std::float_round_style round_style = round_to_nearest;
diff --git a/unsupported/test/mpreal_support.cpp b/unsupported/test/mpreal_support.cpp
index bc00382be..685e7ea45 100644
--- a/unsupported/test/mpreal_support.cpp
+++ b/unsupported/test/mpreal_support.cpp
@@ -12,11 +12,13 @@ void test_mpreal_support()
   // set precision to 256 bits (double has only 53 bits)
   mpreal::set_default_prec(256);
   typedef Matrix<mpreal,Eigen::Dynamic,Eigen::Dynamic> MatrixXmp;
+  typedef Matrix<std::complex<mpreal>,Eigen::Dynamic,Eigen::Dynamic> MatrixXcmp;
 
   std::cerr << "epsilon =         " << NumTraits<mpreal>::epsilon() << "\n";
   std::cerr << "dummy_precision = " << NumTraits<mpreal>::dummy_precision() << "\n";
   std::cerr << "highest =         " << NumTraits<mpreal>::highest() << "\n";
   std::cerr << "lowest =          " << NumTraits<mpreal>::lowest() << "\n";
+  std::cerr << "digits10 =        " << NumTraits<mpreal>::digits10() << "\n";
 
   for(int i = 0; i < g_repeat; i++) {
     int s = Eigen::internal::random<int>(1,100);
@@ -24,6 +26,10 @@ void test_mpreal_support()
     MatrixXmp B = MatrixXmp::Random(s,s);
     MatrixXmp S = A.adjoint() * A;
     MatrixXmp X;
+    MatrixXcmp Ac = MatrixXcmp::Random(s,s);
+    MatrixXcmp Bc = MatrixXcmp::Random(s,s);
+    MatrixXcmp Sc = Ac.adjoint() * Ac;
+    MatrixXcmp Xc;
     
     // Basic stuffs
     VERIFY_IS_APPROX(A.real(), A);
@@ -32,12 +38,14 @@ void test_mpreal_support()
     VERIFY_IS_APPROX(A.array().abs2().sqrt(), A.array().abs());
     VERIFY_IS_APPROX(A.array().sin(),         sin(A.array()));
     VERIFY_IS_APPROX(A.array().cos(),         cos(A.array()));
-    
 
     // Cholesky
     X = S.selfadjointView<Lower>().llt().solve(B);
     VERIFY_IS_APPROX((S.selfadjointView<Lower>()*X).eval(),B);
 
+    Xc = Sc.selfadjointView<Lower>().llt().solve(Bc);
+    VERIFY_IS_APPROX((Sc.selfadjointView<Lower>()*Xc).eval(),Bc);
+    
     // partial LU
     X = A.lu().solve(B);
     VERIFY_IS_APPROX((A*X).eval(),B);
diff --git a/unsupported/test/polynomialsolver.cpp b/unsupported/test/polynomialsolver.cpp
index de79f1538..0c87478dd 100644
--- a/unsupported/test/polynomialsolver.cpp
+++ b/unsupported/test/polynomialsolver.cpp
@@ -38,6 +38,9 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve )
 
   const Index deg = pols.size()-1;
 
+  // Test template constructor from coefficient vector
+  SOLVER solve_constr (pols);
+
   psolve.compute( pols );
   const RootsType& roots( psolve.roots() );
   EvalRootsType evr( deg );
@@ -104,6 +107,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
     // 1) the roots found are correct
     // 2) the roots have distinct moduli
 
+    typedef typename POLYNOMIAL::Scalar                 Scalar;
     typedef typename REAL_ROOTS::Scalar                 Real;
 
     //Test realRoots
@@ -118,7 +122,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
       bool found = false;
       for( size_t j=0; j<calc_realRoots.size()&& !found; ++j )
       {
-        if( internal::isApprox( calc_realRoots[i], real_roots[j] ), psPrec ){
+        if( internal::isApprox( calc_realRoots[i], real_roots[j], psPrec ) ){
           found = true; }
       }
       VERIFY( found );
@@ -209,5 +213,6 @@ void test_polynomialsolver()
     CALL_SUBTEST_10((polynomialsolver<double,Dynamic>(
             internal::random<int>(9,13)
             )) );
+    CALL_SUBTEST_11((polynomialsolver<float,Dynamic>(1)) );
   }
 }
diff --git a/unsupported/test/sparse_extra.cpp b/unsupported/test/sparse_extra.cpp
index 1ee791b0f..a010ceb93 100644
--- a/unsupported/test/sparse_extra.cpp
+++ b/unsupported/test/sparse_extra.cpp
@@ -49,7 +49,6 @@ bool test_random_setter(DynamicSparseMatrix<T>& sm, const DenseType& ref, const
 
 template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& ref)
 {
-  typedef typename SparseMatrixType::Index Index;
   const Index rows = ref.rows();
   const Index cols = ref.cols();
   typedef typename SparseMatrixType::Scalar Scalar;
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
new file mode 100644
index 000000000..057fb3e92
--- /dev/null
+++ b/unsupported/test/special_functions.cpp
@@ -0,0 +1,345 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include "../Eigen/SpecialFunctions"
+
+template<typename X, typename Y>
+void verify_component_wise(const X& x, const Y& y)
+{
+  for(Index i=0; i<x.size(); ++i)
+  {
+    if((numext::isfinite)(y(i)))
+      VERIFY_IS_APPROX( x(i), y(i) );
+    else if((numext::isnan)(y(i)))
+      VERIFY((numext::isnan)(x(i)));
+    else
+      VERIFY_IS_EQUAL( x(i), y(i) );
+  }
+}
+
+template<typename ArrayType> void array_special_functions()
+{
+  using std::abs;
+  using std::sqrt;
+  typedef typename ArrayType::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+
+  Scalar plusinf = std::numeric_limits<Scalar>::infinity();
+  Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+
+  Index rows = internal::random<Index>(1,30);
+  Index cols = 1;
+
+  // API
+  {
+    ArrayType m1 = ArrayType::Random(rows,cols);
+#if EIGEN_HAS_C99_MATH
+    VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1));
+    VERIFY_IS_APPROX(m1.digamma(), digamma(m1));
+    VERIFY_IS_APPROX(m1.erf(), erf(m1));
+    VERIFY_IS_APPROX(m1.erfc(), erfc(m1));
+#endif  // EIGEN_HAS_C99_MATH
+  }
+
+
+#if EIGEN_HAS_C99_MATH
+  // check special functions (comparing against numpy implementation)
+  if (!NumTraits<Scalar>::IsComplex)
+  {
+
+    {
+      ArrayType m1 = ArrayType::Random(rows,cols);
+      ArrayType m2 = ArrayType::Random(rows,cols);
+
+      // Test various propreties of igamma & igammac.  These are normalized
+      // gamma integrals where
+      //   igammac(a, x) = Gamma(a, x) / Gamma(a)
+      //   igamma(a, x) = gamma(a, x) / Gamma(a)
+      // where Gamma and gamma are considered the standard unnormalized
+      // upper and lower incomplete gamma functions, respectively.
+      ArrayType a = m1.abs() + 2;
+      ArrayType x = m2.abs() + 2;
+      ArrayType zero = ArrayType::Zero(rows, cols);
+      ArrayType one = ArrayType::Constant(rows, cols, Scalar(1.0));
+      ArrayType a_m1 = a - one;
+      ArrayType Gamma_a_x = Eigen::igammac(a, x) * a.lgamma().exp();
+      ArrayType Gamma_a_m1_x = Eigen::igammac(a_m1, x) * a_m1.lgamma().exp();
+      ArrayType gamma_a_x = Eigen::igamma(a, x) * a.lgamma().exp();
+      ArrayType gamma_a_m1_x = Eigen::igamma(a_m1, x) * a_m1.lgamma().exp();
+
+      // Gamma(a, 0) == Gamma(a)
+      VERIFY_IS_APPROX(Eigen::igammac(a, zero), one);
+
+      // Gamma(a, x) + gamma(a, x) == Gamma(a)
+      VERIFY_IS_APPROX(Gamma_a_x + gamma_a_x, a.lgamma().exp());
+
+      // Gamma(a, x) == (a - 1) * Gamma(a-1, x) + x^(a-1) * exp(-x)
+      VERIFY_IS_APPROX(Gamma_a_x, (a - 1) * Gamma_a_m1_x + x.pow(a-1) * (-x).exp());
+
+      // gamma(a, x) == (a - 1) * gamma(a-1, x) - x^(a-1) * exp(-x)
+      VERIFY_IS_APPROX(gamma_a_x, (a - 1) * gamma_a_m1_x - x.pow(a-1) * (-x).exp());
+    }
+
+    {
+      // Check exact values of igamma and igammac against a third party calculation.
+      Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+      Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+      // location i*6+j corresponds to a_s[i], x_s[j].
+      Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan},
+                              {0.0, 0.6321205588285578, 0.7768698398515702,
+                              0.9816843611112658, 9.999500016666262e-05, 1.0},
+                              {0.0, 0.4275932955291202, 0.608374823728911,
+                              0.9539882943107686, 7.522076445089201e-07, 1.0},
+                              {0.0, 0.01898815687615381, 0.06564245437845008,
+                              0.5665298796332909, 4.166333347221828e-18, 1.0},
+                              {0.0, 0.9999780593618628, 0.9999899967080838,
+                              0.9999996219837988, 0.9991370418689945, 1.0},
+                              {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}};
+      Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan},
+                              {1.0, 0.36787944117144233, 0.22313016014842982,
+                                0.018315638888734182, 0.9999000049998333, 0.0},
+                              {1.0, 0.5724067044708798, 0.3916251762710878,
+                                0.04601170568923136, 0.9999992477923555, 0.0},
+                              {1.0, 0.9810118431238462, 0.9343575456215499,
+                                0.4334701203667089, 1.0, 0.0},
+                              {1.0, 2.1940638138146658e-05, 1.0003291916285e-05,
+                                3.7801620118431334e-07, 0.0008629581310054535,
+                                0.0},
+                              {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}};
+      for (int i = 0; i < 6; ++i) {
+        for (int j = 0; j < 6; ++j) {
+          if ((std::isnan)(igamma_s[i][j])) {
+            VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j])));
+          } else {
+            VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]);
+          }
+
+          if ((std::isnan)(igammac_s[i][j])) {
+            VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j])));
+          } else {
+            VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]);
+          }
+        }
+      }
+    }
+  }
+#endif  // EIGEN_HAS_C99_MATH
+
+  // Check the zeta function against scipy.special.zeta
+  {
+    ArrayType x(7), q(7), res(7), ref(7);
+    x << 1.5,   4, 10.5, 10000.5,    3, 1,        0.9;
+    q << 2,   1.5,    3,  1.0001, -2.5, 1.2345, 1.2345;
+    ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan;
+    CALL_SUBTEST( verify_component_wise(ref, ref); );
+    CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
+    CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );
+  }
+
+  // digamma
+  {
+    ArrayType x(7), res(7), ref(7);
+    x << 1, 1.5, 4, -10.5, 10000.5, 0, -1;
+    ref << -0.5772156649015329, 0.03648997397857645, 1.2561176684318, 2.398239129535781, 9.210340372392849, plusinf, plusinf;
+    CALL_SUBTEST( verify_component_wise(ref, ref); );
+
+    CALL_SUBTEST( res = x.digamma(); verify_component_wise(res, ref); );
+    CALL_SUBTEST( res = digamma(x);  verify_component_wise(res, ref); );
+  }
+
+
+#if EIGEN_HAS_C99_MATH
+  {
+    ArrayType n(11), x(11), res(11), ref(11);
+    n << 1, 1,    1, 1.5,   17,   31,   28,    8, 42, 147, 170;
+    x << 2, 3, 25.5, 1.5,  4.7, 11.8, 17.7, 30.2, 15.8, 54.1, 64;
+    ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927;
+    CALL_SUBTEST( verify_component_wise(ref, ref); );
+
+    if(sizeof(RealScalar)>=8) {  // double
+      // Reason for commented line: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232
+      //       CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); );
+      CALL_SUBTEST( res = polygamma(n,x);  verify_component_wise(res, ref); );
+    }
+    else {
+      //       CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); );
+      CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res.head(8), ref.head(8)); );
+    }
+  }
+#endif
+
+#if EIGEN_HAS_C99_MATH
+  {
+    // Inputs and ground truth generated with scipy via:
+    //   a = np.logspace(-3, 3, 5) - 1e-3
+    //   b = np.logspace(-3, 3, 5) - 1e-3
+    //   x = np.linspace(-0.1, 1.1, 5)
+    //   (full_a, full_b, full_x) = np.vectorize(lambda a, b, x: (a, b, x))(*np.ix_(a, b, x))
+    //   full_a = full_a.flatten().tolist()  # same for full_b, full_x
+    //   v = scipy.special.betainc(full_a, full_b, full_x).flatten().tolist()
+    //
+    // Note in Eigen, we call betainc with arguments in the order (x, a, b).
+    ArrayType a(125);
+    ArrayType b(125);
+    ArrayType x(125);
+    ArrayType v(125);
+    ArrayType res(125);
+
+    a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+        0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+        0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+        999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+        999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+        999.999, 999.999, 999.999;
+
+    b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+        0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999,
+        999.999, 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.999, 0.999, 0.999, 0.999,
+        0.999, 31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+        999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+        999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+        999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+        0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+        0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+        31.62177660168379, 31.62177660168379, 31.62177660168379,
+        31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+        999.999, 999.999;
+
+    x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+        0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+        0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+        0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1,
+        -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8,
+        1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+        0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+        0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+        0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+        0.8, 1.1;
+
+    v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+        nan, nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan,
+        0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan,
+        0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan,
+        0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan,
+        nan, nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256,
+        0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001,
+        0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403,
+        0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999,
+        0.9999999999999999, nan, nan, nan, nan, nan, nan, nan,
+        1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06,
+        nan, nan, 7.864342668429763e-23, 3.015969667594166e-10,
+        0.0008598571564165444, nan, nan, 6.031987710123844e-08,
+        0.5000000000000007, 0.9999999396801229, nan, nan, 0.9999999999999999,
+        0.9999999999999999, 0.9999999999999999, nan, nan, nan, nan, nan, nan,
+        nan, 0.0, 7.029920380986636e-306, 2.2450728208591345e-101, nan, nan,
+        0.0, 9.275871147869727e-302, 1.2232913026152827e-97, nan, nan, 0.0,
+        3.0891393081932924e-252, 2.9303043666183996e-60, nan, nan,
+        2.248913486879199e-196, 0.5000000000004947, 0.9999999999999999, nan;
+
+    CALL_SUBTEST(res = betainc(a, b, x);
+                 verify_component_wise(res, v););
+  }
+
+  // Test various properties of betainc
+  {
+    ArrayType m1 = ArrayType::Random(32);
+    ArrayType m2 = ArrayType::Random(32);
+    ArrayType m3 = ArrayType::Random(32);
+    ArrayType one = ArrayType::Constant(32, Scalar(1.0));
+    const Scalar eps = std::numeric_limits<Scalar>::epsilon();
+    ArrayType a = (m1 * 4.0).exp();
+    ArrayType b = (m2 * 4.0).exp();
+    ArrayType x = m3.abs();
+
+    // betainc(a, 1, x) == x**a
+    CALL_SUBTEST(
+        ArrayType test = betainc(a, one, x);
+        ArrayType expected = x.pow(a);
+        verify_component_wise(test, expected););
+
+    // betainc(1, b, x) == 1 - (1 - x)**b
+    CALL_SUBTEST(
+        ArrayType test = betainc(one, b, x);
+        ArrayType expected = one - (one - x).pow(b);
+        verify_component_wise(test, expected););
+
+    // betainc(a, b, x) == 1 - betainc(b, a, 1-x)
+    CALL_SUBTEST(
+        ArrayType test = betainc(a, b, x) + betainc(b, a, one - x);
+        ArrayType expected = one;
+        verify_component_wise(test, expected););
+
+    // betainc(a+1, b, x) = betainc(a, b, x) - x**a * (1 - x)**b / (a * beta(a, b))
+    CALL_SUBTEST(
+        ArrayType num = x.pow(a) * (one - x).pow(b);
+        ArrayType denom = a * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp();
+        // Add eps to rhs and lhs so that component-wise test doesn't result in
+        // nans when both outputs are zeros.
+        ArrayType expected = betainc(a, b, x) - num / denom + eps;
+        ArrayType test = betainc(a + one, b, x) + eps;
+        if (sizeof(Scalar) >= 8) { // double
+          verify_component_wise(test, expected);
+        } else {
+          // Reason for limited test: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232
+          verify_component_wise(test.head(8), expected.head(8));
+        });
+
+    // betainc(a, b+1, x) = betainc(a, b, x) + x**a * (1 - x)**b / (b * beta(a, b))
+    CALL_SUBTEST(
+        // Add eps to rhs and lhs so that component-wise test doesn't result in
+        // nans when both outputs are zeros.
+        ArrayType num = x.pow(a) * (one - x).pow(b);
+        ArrayType denom = b * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp();
+        ArrayType expected = betainc(a, b, x) + num / denom + eps;
+        ArrayType test = betainc(a, b + one, x) + eps;
+        verify_component_wise(test, expected););
+  }
+#endif
+}
+
+void test_special_functions()
+{
+  CALL_SUBTEST_1(array_special_functions<ArrayXf>());
+  CALL_SUBTEST_2(array_special_functions<ArrayXd>());
+}
diff --git a/unsupported/test/splines.cpp b/unsupported/test/splines.cpp
index a7eb3e0c4..3be020434 100644
--- a/unsupported/test/splines.cpp
+++ b/unsupported/test/splines.cpp
@@ -13,23 +13,23 @@
 
 namespace Eigen {
   
-// lets do some explicit instantiations and thus
-// force the compilation of all spline functions...
-template class Spline<double, 2, Dynamic>;
-template class Spline<double, 3, Dynamic>;
+  // lets do some explicit instantiations and thus
+  // force the compilation of all spline functions...
+  template class Spline<double, 2, Dynamic>;
+  template class Spline<double, 3, Dynamic>;
 
-template class Spline<double, 2, 2>;
-template class Spline<double, 2, 3>;
-template class Spline<double, 2, 4>;
-template class Spline<double, 2, 5>;
+  template class Spline<double, 2, 2>;
+  template class Spline<double, 2, 3>;
+  template class Spline<double, 2, 4>;
+  template class Spline<double, 2, 5>;
 
-template class Spline<float, 2, Dynamic>;
-template class Spline<float, 3, Dynamic>;
+  template class Spline<float, 2, Dynamic>;
+  template class Spline<float, 3, Dynamic>;
 
-template class Spline<float, 3, 2>;
-template class Spline<float, 3, 3>;
-template class Spline<float, 3, 4>;
-template class Spline<float, 3, 5>;
+  template class Spline<float, 3, 2>;
+  template class Spline<float, 3, 3>;
+  template class Spline<float, 3, 4>;
+  template class Spline<float, 3, 5>;
 
 }
 
@@ -234,11 +234,48 @@ void check_global_interpolation2d()
   }
 }
 
+void check_global_interpolation_with_derivatives2d()
+{
+  typedef Spline2d::PointType PointType;
+  typedef Spline2d::KnotVectorType KnotVectorType;
+
+  const Eigen::DenseIndex numPoints = 100;
+  const unsigned int dimension = 2;
+  const unsigned int degree = 3;
+
+  ArrayXXd points = ArrayXXd::Random(dimension, numPoints);
+
+  KnotVectorType knots;
+  Eigen::ChordLengths(points, knots);
+
+  ArrayXXd derivatives = ArrayXXd::Random(dimension, numPoints);
+  VectorXd derivativeIndices(numPoints);
+
+  for (Eigen::DenseIndex i = 0; i < numPoints; ++i)
+      derivativeIndices(i) = static_cast<double>(i);
+
+  const Spline2d spline = SplineFitting<Spline2d>::InterpolateWithDerivatives(
+    points, derivatives, derivativeIndices, degree);  
+    
+  for (Eigen::DenseIndex i = 0; i < points.cols(); ++i)
+  {
+    PointType point = spline(knots(i));
+    PointType referencePoint = points.col(i);
+    VERIFY_IS_APPROX(point, referencePoint);
+    PointType derivative = spline.derivatives(knots(i), 1).col(1);
+    PointType referenceDerivative = derivatives.col(i);
+    VERIFY_IS_APPROX(derivative, referenceDerivative);
+  }
+}
 
 void test_splines()
 {
-  CALL_SUBTEST( eval_spline3d() );
-  CALL_SUBTEST( eval_spline3d_onbrks() );
-  CALL_SUBTEST( eval_closed_spline2d() );
-  CALL_SUBTEST( check_global_interpolation2d() );
+  for (int i = 0; i < g_repeat; ++i)
+  {
+    CALL_SUBTEST( eval_spline3d() );
+    CALL_SUBTEST( eval_spline3d_onbrks() );
+    CALL_SUBTEST( eval_closed_spline2d() );
+    CALL_SUBTEST( check_global_interpolation2d() );
+    CALL_SUBTEST( check_global_interpolation_with_derivatives2d() );
+  }
 }
diff --git a/unsupported/test/svd_common.h b/unsupported/test/svd_common.h
deleted file mode 100644
index b40c23a2b..000000000
--- a/unsupported/test/svd_common.h
+++ /dev/null
@@ -1,261 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
-// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
-// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
-// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
-// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-// discard stack allocation as that too bypasses malloc
-#define EIGEN_STACK_ALLOCATION_LIMIT 0
-#define EIGEN_RUNTIME_NO_MALLOC
-
-#include "main.h"
-#include <unsupported/Eigen/SVD>
-#include <Eigen/LU>
-
-
-// check if "svd" is the good image of "m"  
-template<typename MatrixType, typename SVD>
-void svd_check_full(const MatrixType& m, const SVD& svd)
-{
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef typename MatrixType::Scalar Scalar;
-  typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
-  typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
-
-  
-  MatrixType sigma = MatrixType::Zero(rows, cols);
-  sigma.diagonal() = svd.singularValues().template cast<Scalar>();
-  MatrixUType u = svd.matrixU();
-  MatrixVType v = svd.matrixV();
-  VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
-  VERIFY_IS_UNITARY(u);
-  VERIFY_IS_UNITARY(v);
-} // end svd_check_full
-
-
-
-// Compare to a reference value
-template<typename MatrixType, typename SVD>
-void svd_compare_to_full(const MatrixType& m,
-			 unsigned int computationOptions,
-			 const SVD& referenceSvd)
-{
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-  Index diagSize = (std::min)(rows, cols);
-
-  SVD svd(m, computationOptions);
-
-  VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues());
-  if(computationOptions & ComputeFullU)
-    VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU());
-  if(computationOptions & ComputeThinU)
-    VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize));
-  if(computationOptions & ComputeFullV)
-    VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV());
-  if(computationOptions & ComputeThinV)
-    VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize));
-} // end svd_compare_to_full
-
-
-
-template<typename MatrixType, typename SVD>
-void svd_solve(const MatrixType& m, unsigned int computationOptions)
-{
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType;
-  typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType;
-
-  RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols));
-  SVD svd(m, computationOptions);
-  SolutionType x = svd.solve(rhs);
-  // evaluate normal equation which works also for least-squares solutions
-  VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs);
-} // end svd_solve
-
-
-// test computations options
-// 2 functions because Jacobisvd can return before the second function
-template<typename MatrixType, typename SVD>
-void svd_test_computation_options_1(const MatrixType& m, const SVD& fullSvd)
-{
-  svd_check_full< MatrixType, SVD >(m, fullSvd);
-  svd_solve< MatrixType, SVD >(m, ComputeFullU | ComputeFullV);
-}
-
-
-template<typename MatrixType, typename SVD>
-void svd_test_computation_options_2(const MatrixType& m, const SVD& fullSvd)
-{
-  svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU, fullSvd);
-  svd_compare_to_full< MatrixType, SVD >(m, ComputeFullV, fullSvd);
-  svd_compare_to_full< MatrixType, SVD >(m, 0, fullSvd);
-
-  if (MatrixType::ColsAtCompileTime == Dynamic) {
-    // thin U/V are only available with dynamic number of columns
- 
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU|ComputeThinV, fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m,              ComputeThinV, fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeFullV, fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU             , fullSvd);
-    svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeThinV, fullSvd);
-    svd_solve<MatrixType, SVD>(m, ComputeFullU | ComputeThinV);
-    svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeFullV);
-    svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeThinV);
-    
-    typedef typename MatrixType::Index Index;
-    Index diagSize = (std::min)(m.rows(), m.cols());
-    SVD svd(m, ComputeThinU | ComputeThinV);
-    VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint());
-  }
-}
-
-template<typename MatrixType, typename SVD> 
-void svd_verify_assert(const MatrixType& m)
-{
-  typedef typename MatrixType::Scalar Scalar;
-  typedef typename MatrixType::Index Index;
-  Index rows = m.rows();
-  Index cols = m.cols();
-
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime
-  };
-
-  typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType;
-  RhsType rhs(rows);
-  SVD svd;
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.singularValues())
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
-  MatrixType a = MatrixType::Zero(rows, cols);
-  a.setZero();
-  svd.compute(a, 0);
-  VERIFY_RAISES_ASSERT(svd.matrixU())
-  VERIFY_RAISES_ASSERT(svd.matrixV())
-  svd.singularValues();
-  VERIFY_RAISES_ASSERT(svd.solve(rhs))
-    
-  if (ColsAtCompileTime == Dynamic)
-  {
-    svd.compute(a, ComputeThinU);
-    svd.matrixU();
-    VERIFY_RAISES_ASSERT(svd.matrixV())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-    svd.compute(a, ComputeThinV);
-    svd.matrixV();
-    VERIFY_RAISES_ASSERT(svd.matrixU())
-    VERIFY_RAISES_ASSERT(svd.solve(rhs))
-  }
-  else
-  {
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU))
-    VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV))
-  }
-}
-
-// work around stupid msvc error when constructing at compile time an expression that involves
-// a division by zero, even if the numeric type has floating point
-template<typename Scalar>
-EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); }
-
-// workaround aggressive optimization in ICC
-template<typename T> EIGEN_DONT_INLINE  T sub(T a, T b) { return a - b; }
-
-
-template<typename MatrixType, typename SVD>
-void svd_inf_nan()
-{
-  // all this function does is verify we don't iterate infinitely on nan/inf values
-
-  SVD svd;
-  typedef typename MatrixType::Scalar Scalar;
-  Scalar some_inf = Scalar(1) / zero<Scalar>();
-  VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf));
-  svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV);
-
-  Scalar some_nan = zero<Scalar> () / zero<Scalar> ();
-  VERIFY(some_nan != some_nan);
-  svd.compute(MatrixType::Constant(10,10,some_nan), ComputeFullU | ComputeFullV);
-
-  MatrixType m = MatrixType::Zero(10,10);
-  m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf;
-  svd.compute(m, ComputeFullU | ComputeFullV);
-
-  m = MatrixType::Zero(10,10);
-  m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_nan;
-  svd.compute(m, ComputeFullU | ComputeFullV);
-}
-
-
-template<typename SVD>
-void svd_preallocate()
-{
-  Vector3f v(3.f, 2.f, 1.f);
-  MatrixXf m = v.asDiagonal();
-
-  internal::set_is_malloc_allowed(false);
-  VERIFY_RAISES_ASSERT(VectorXf v(10);)
-    SVD svd;
-  internal::set_is_malloc_allowed(true);
-  svd.compute(m);
-  VERIFY_IS_APPROX(svd.singularValues(), v);
-
-  SVD svd2(3,3);
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-  VERIFY_IS_APPROX(svd2.singularValues(), v);
-  VERIFY_RAISES_ASSERT(svd2.matrixU());
-  VERIFY_RAISES_ASSERT(svd2.matrixV());
-  svd2.compute(m, ComputeFullU | ComputeFullV);
-  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
-  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-
-  SVD svd3(3,3,ComputeFullU|ComputeFullV);
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m);
-  internal::set_is_malloc_allowed(true);
-  VERIFY_IS_APPROX(svd2.singularValues(), v);
-  VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity());
-  VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity());
-  internal::set_is_malloc_allowed(false);
-  svd2.compute(m, ComputeFullU|ComputeFullV);
-  internal::set_is_malloc_allowed(true);
-}
-
-
-
-
-