Merge changes Iee153445,Iee274471 am: 79df15ea88 am: 10f298fc41 am: 7cb5001398t_frc_odp_330442040 t_frc_odp_330442000 t_frc_ase_330444010 android-wear-13.0.0-gpl_r3 android-wear-13.0.0-gpl_r2 android-wear-13.0.0-gpl_r1 android-vts-13.0_r8 android-vts-13.0_r7 android-vts-13.0_r6 android-vts-13.0_r5 android-vts-13.0_r4 android-vts-13.0_r3 android-vts-13.0_r2 android-t-qpr3-beta-3-gpl android-t-qpr3-beta-1-gpl android-t-qpr2-beta-3-gpl android-t-qpr2-beta-2-gpl android-t-qpr1-beta-3-gpl android-t-qpr1-beta-1-gpl android-cts-13.0_r8 android-cts-13.0_r7 android-cts-13.0_r6 android-cts-13.0_r5 android-cts-13.0_r4 android-cts-13.0_r3 android-cts-13.0_r2 android-13.0.0_r83 android-13.0.0_r82 android-13.0.0_r81 android-13.0.0_r80 android-13.0.0_r79 android-13.0.0_r78 android-13.0.0_r77 android-13.0.0_r76 android-13.0.0_r75 android-13.0.0_r74 android-13.0.0_r73 android-13.0.0_r72 android-13.0.0_r71 android-13.0.0_r70 android-13.0.0_r69 android-13.0.0_r68 android-13.0.0_r67 android-13.0.0_r66 android-13.0.0_r65 android-13.0.0_r64 android-13.0.0_r63 android-13.0.0_r62 android-13.0.0_r61 android-13.0.0_r60 android-13.0.0_r59 android-13.0.0_r58 android-13.0.0_r57 android-13.0.0_r56 android-13.0.0_r55 android-13.0.0_r54 android-13.0.0_r53 android-13.0.0_r52 android-13.0.0_r51 android-13.0.0_r50 android-13.0.0_r49 android-13.0.0_r48 android-13.0.0_r47 android-13.0.0_r46 android-13.0.0_r45 android-13.0.0_r44 android-13.0.0_r43 android-13.0.0_r42 android-13.0.0_r41 android-13.0.0_r40 android-13.0.0_r39 android-13.0.0_r38 android-13.0.0_r37 android-13.0.0_r36 android-13.0.0_r35 android-13.0.0_r34 android-13.0.0_r33 android-13.0.0_r32 android-13.0.0_r30 android-13.0.0_r29 android-13.0.0_r28 android-13.0.0_r27 android-13.0.0_r24 android-13.0.0_r23 android-13.0.0_r22 android-13.0.0_r21 android-13.0.0_r20 android-13.0.0_r19 android-13.0.0_r18 android-13.0.0_r17 android-13.0.0_r16 aml_go_odp_330912000 aml_go_ads_330915100 aml_go_ads_330915000 aml_go_ads_330913000 android13-tests-release android13-tests-dev android13-qpr3-s9-release android13-qpr3-s8-release android13-qpr3-s7-release android13-qpr3-s6-release android13-qpr3-s5-release android13-qpr3-s4-release android13-qpr3-s3-release android13-qpr3-s2-release android13-qpr3-s14-release android13-qpr3-s13-release android13-qpr3-s12-release android13-qpr3-s11-release android13-qpr3-s10-release android13-qpr3-s1-release android13-qpr3-release android13-qpr3-c-s8-release android13-qpr3-c-s7-release android13-qpr3-c-s6-release android13-qpr3-c-s5-release android13-qpr3-c-s4-release android13-qpr3-c-s3-release android13-qpr3-c-s2-release android13-qpr3-c-s12-release android13-qpr3-c-s11-release android13-qpr3-c-s10-release android13-qpr3-c-s1-release android13-qpr2-s9-release android13-qpr2-s8-release android13-qpr2-s7-release android13-qpr2-s6-release android13-qpr2-s5-release android13-qpr2-s3-release android13-qpr2-s2-release android13-qpr2-s12-release android13-qpr2-s11-release android13-qpr2-s10-release android13-qpr2-s1-release android13-qpr2-release android13-qpr2-b-s1-release android13-qpr1-s8-release android13-qpr1-s7-release android13-qpr1-s6-release android13-qpr1-s5-release android13-qpr1-s4-release android13-qpr1-s3-release android13-qpr1-s2-release android13-qpr1-s1-release android13-qpr1-release android13-mainline-go-adservices-release android13-frc-odp-release android13-dev android13-d4-s2-release android13-d4-s1-release android13-d4-release android13-d3-s1-release android13-d2-release android-wear-13.0.0-gpl_r1

Original change: https://android-review.googlesource.com/c/platform/external/eigen/+/1999079 Change-Id: I4c76dc5ddc7fb0ae9fc42436f28bd8bf9de50a97
author: Yi Kong <yikong@google.com> 2022-02-25 16:41:05 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2022-02-25 16:41:05 +0000
commit: bc0f5df265caa21a2120c22453655a7fcc941991 (patch)
tree: fb979fb4cf4f8052c8cc66b1ec9516d91fcd859b /unsupported/test/cxx11_tensor_convolution_sycl.cpp
parent: 8fd413e275f78a4c240f1442ce5cf77c73a20a55 (diff)
parent: 7cb50013986f04dce5fac87bebf319bb8db37a36 (diff)
download: eigen-ebe162fe562066c4ede645ddecae96b7dab0987f.tar.gz
1 files changed, 469 insertions, 0 deletions
diff --git a/unsupported/test/cxx11_tensor_convolution_sycl.cpp b/unsupported/test/cxx11_tensor_convolution_sycl.cpp
new file mode 100644
index 000000000..3954c8a28
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_convolution_sycl.cpp
@@ -0,0 +1,469 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_USE_SYCL
+
+#include <iostream>
+#include <chrono>
+#include <ctime>
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+#include <iomanip>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+static const float error_threshold =1e-4f;
+
+
+template <typename DataType, int DataLayout, typename IndexType>
+static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device)
+{
+  IndexType indim0 =53;
+  IndexType indim1= 55;
+  IndexType indim2= 51;
+  IndexType outdim0=50;
+  IndexType outdim1=55;
+  IndexType outdim2=51;
+  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
+  Eigen::array<IndexType, 1> kernel_dims = {{4}};
+  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
+
+  Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
+  Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
+  Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
+  Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
+
+  Eigen::array<IndexType, 1> dims3{{0}};
+
+  input.setRandom();
+  kernel.setRandom();
+  result.setZero();
+  result_host.setZero();
+
+  std::size_t input_bytes = input.size()  * sizeof(DataType);
+  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
+  std::size_t result_bytes = result.size() * sizeof(DataType);
+
+  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
+  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
+  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
+
+  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
+  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
+  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
+
+  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
+  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
+
+  result_host=input.convolve(kernel, dims3);
+
+for(IndexType i=0; i< outdim0; i++ ){
+  for(IndexType j=0; j< outdim1; j++ ){
+    for(IndexType k=0; k< outdim2; k++ ){
+      if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
+        std::cout <<std::setprecision(16)<< "mismatch detected at index  ( "<< i  << " , "  << j  << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<<  result_host(i,j,k) << std::endl;
+        assert(false);
+      }
+    }
+  }
+}
+  sycl_device.deallocate(d_input);
+  sycl_device.deallocate(d_kernel);
+  sycl_device.deallocate(d_result);
+
+}
+
+
+template <typename DataType, int DataLayout, typename IndexType>
+static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device)
+{
+  IndexType indim0 =53;
+  IndexType indim1= 55;
+  IndexType indim2= 51;
+  IndexType outdim0=50;
+  IndexType outdim1=51;
+  IndexType outdim2=51;
+  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
+  Eigen::array<IndexType, 2> kernel_dims = {{4,5}};
+  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
+
+  Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
+  Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims);
+  Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
+  Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
+
+  Eigen::array<IndexType, 2> dims3{{0,1}};
+
+  input.setRandom();
+  kernel.setRandom();
+  result.setZero();
+  result_host.setZero();
+
+  std::size_t input_bytes = input.size()  * sizeof(DataType);
+  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
+  std::size_t result_bytes = result.size() * sizeof(DataType);
+
+  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
+  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
+  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
+
+  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
+  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
+  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
+
+  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
+  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
+
+  result_host=input.convolve(kernel, dims3);
+
+for(IndexType i=0; i< outdim0; i++ ){
+  for(IndexType j=0; j< outdim1; j++ ){
+    for(IndexType k=0; k< outdim2; k++ ){
+      if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
+        std::cout <<std::setprecision(16)<< "mismatch detected at index  ( "<< i  << " , "  << j  << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<<  result_host(i,j,k) << std::endl;
+        assert(false);
+      }
+    }
+  }
+}
+  sycl_device.deallocate(d_input);
+  sycl_device.deallocate(d_kernel);
+  sycl_device.deallocate(d_result);
+
+}
+
+
+template <typename DataType, int DataLayout, typename IndexType>
+static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device)
+{
+  IndexType indim0 =53;
+  IndexType indim1= 55;
+  IndexType indim2= 51;
+  IndexType outdim0=50;
+  IndexType outdim1=51;
+  IndexType outdim2=49;
+  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
+  Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}};
+  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
+
+  Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
+  Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims);
+  Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
+  Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
+
+  Eigen::array<IndexType, 3> dims3{{0,1,2}};
+
+  input.setRandom();
+  kernel.setRandom();
+  result.setZero();
+  result_host.setZero();
+
+  std::size_t input_bytes = input.size()  * sizeof(DataType);
+  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
+  std::size_t result_bytes = result.size() * sizeof(DataType);
+
+  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
+  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
+  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
+
+  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
+  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
+  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
+
+  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
+  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
+
+  result_host=input.convolve(kernel, dims3);
+
+for(IndexType i=0; i< outdim0; i++ ){
+  for(IndexType j=0; j< outdim1; j++ ){
+    for(IndexType k=0; k< outdim2; k++ ){
+      if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
+        std::cout <<std::setprecision(16)<< "mismatch detected at index  ( "<< i  << " , "  << j  << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<<  result_host(i,j,k) << std::endl;
+        assert(false);
+      }
+    }
+  }
+}
+  sycl_device.deallocate(d_input);
+  sycl_device.deallocate(d_kernel);
+  sycl_device.deallocate(d_result);
+
+}
+
+
+template <typename DataType, int DataLayout, typename IndexType>
+static void test_evals(const Eigen::SyclDevice& sycl_device)
+{
+  Eigen::array<IndexType, 2> input_dims = {{3, 3}};
+  Eigen::array<IndexType, 1> kernel_dims = {{2}};
+  Eigen::array<IndexType, 2> result_dims = {{2, 3}};
+
+  Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
+  Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
+  Tensor<DataType, 2, DataLayout,IndexType> result(result_dims);
+
+  Eigen::array<IndexType, 1> dims3{{0}};
+
+  input.setRandom();
+  kernel.setRandom();
+  result.setZero();
+
+  std::size_t input_bytes = input.size()  * sizeof(DataType);
+  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
+  std::size_t result_bytes = result.size() * sizeof(DataType);
+
+  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
+  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
+  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
+
+  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims);
+  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
+  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
+
+  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
+  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
+
+  VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1));  // index 0
+  VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1));  // index 2
+  VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1));  // index 4
+  VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1));  // index 1
+  VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1));  // index 3
+  VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1));  // index 5
+
+  sycl_device.deallocate(d_input);
+  sycl_device.deallocate(d_kernel);
+  sycl_device.deallocate(d_result);
+}
+
+template <typename DataType, int DataLayout, typename IndexType>
+static void test_expr(const Eigen::SyclDevice& sycl_device)
+{
+  Eigen::array<IndexType, 2> input_dims = {{3, 3}};
+  Eigen::array<IndexType, 2> kernel_dims = {{2, 2}};
+  Eigen::array<IndexType, 2> result_dims = {{2, 2}};
+
+  Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
+  Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims);
+  Tensor<DataType, 2, DataLayout, IndexType> result(result_dims);
+
+  input.setRandom();
+  kernel.setRandom();
+  Eigen::array<IndexType, 2> dims;
+  dims[0] = 0;
+  dims[1] = 1;
+
+  std::size_t input_bytes = input.size()  * sizeof(DataType);
+  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
+  std::size_t result_bytes = result.size() * sizeof(DataType);
+
+  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
+  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
+  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
+
+  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims);
+  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
+  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
+
+  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims);
+  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
+
+  VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) +
+                                input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1));
+  VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) +
+                                input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1));
+  VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) +
+                                input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1));
+  VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) +
+                                input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
+
+  sycl_device.deallocate(d_input);
+  sycl_device.deallocate(d_kernel);
+  sycl_device.deallocate(d_result);
+}
+
+
+template <typename DataType, int DataLayout, typename IndexType>
+static void test_modes(const Eigen::SyclDevice& sycl_device){
+
+Eigen::array<IndexType, 1> input_dims = {{3}};
+Eigen::array<IndexType, 1> kernel_dims = {{3}};
+
+Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
+Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
+
+input.setRandom();
+kernel.setRandom();
+Eigen::array<IndexType, 1> dims;
+dims[0] = 0;
+
+  input(0) = 1.0f;
+  input(1) = 2.0f;
+  input(2) = 3.0f;
+  kernel(0) = 0.5f;
+  kernel(1) = 1.0f;
+  kernel(2) = 0.0f;
+
+  Eigen::array<std::pair<IndexType, IndexType>, 1> padding;
+
+  // Emulate VALID mode (as defined in
+  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
+  padding[0] = std::make_pair(0, 0);
+  Tensor<DataType, 1, DataLayout, IndexType> valid(1);
+
+  std::size_t input_bytes = input.size()  * sizeof(DataType);
+  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
+  std::size_t valid_bytes = valid.size() * sizeof(DataType);
+
+  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
+  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
+  DataType * d_valid =  static_cast<DataType*>(sycl_device.allocate(valid_bytes));
+
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions());
+  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
+  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
+
+  gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
+  sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes);
+
+  VERIFY_IS_EQUAL(valid.dimension(0), 1);
+  VERIFY_IS_APPROX(valid(0), 2.5f);
+
+  // Emulate SAME mode (as defined in
+  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
+  padding[0] = std::make_pair(1, 1);
+  Tensor<DataType, 1, DataLayout, IndexType> same(3);
+  std::size_t same_bytes = same.size() * sizeof(DataType);
+  DataType * d_same =  static_cast<DataType*>(sycl_device.allocate(same_bytes));
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions());
+  gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
+  sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes);
+
+  VERIFY_IS_EQUAL(same.dimension(0), 3);
+  VERIFY_IS_APPROX(same(0), 1.0f);
+  VERIFY_IS_APPROX(same(1), 2.5f);
+  VERIFY_IS_APPROX(same(2), 4.0f);
+
+  // Emulate FULL mode (as defined in
+  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
+  padding[0] = std::make_pair(2, 2);
+
+  Tensor<DataType, 1, DataLayout, IndexType> full(5);
+  std::size_t full_bytes = full.size() * sizeof(DataType);
+  DataType * d_full =  static_cast<DataType*>(sycl_device.allocate(full_bytes));
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions());
+  gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
+  sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes);
+
+  VERIFY_IS_EQUAL(full.dimension(0), 5);
+  VERIFY_IS_APPROX(full(0), 0.0f);
+  VERIFY_IS_APPROX(full(1), 1.0f);
+  VERIFY_IS_APPROX(full(2), 2.5f);
+  VERIFY_IS_APPROX(full(3), 4.0f);
+  VERIFY_IS_APPROX(full(4), 1.5f);
+
+  sycl_device.deallocate(d_input);
+  sycl_device.deallocate(d_kernel);
+  sycl_device.deallocate(d_valid);
+  sycl_device.deallocate(d_same);
+  sycl_device.deallocate(d_full);
+
+}
+
+template <typename DataType, int DataLayout, typename IndexType>
+static void test_strides(const Eigen::SyclDevice& sycl_device){
+
+  Eigen::array<IndexType, 1> input_dims = {{13}};
+  Eigen::array<IndexType, 1> kernel_dims = {{3}};
+
+  Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
+  Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
+  Tensor<DataType, 1, DataLayout, IndexType> result(2);
+
+  input.setRandom();
+  kernel.setRandom();
+  Eigen::array<IndexType, 1> dims;
+  dims[0] = 0;
+
+  Eigen::array<IndexType, 1> stride_of_3;
+  stride_of_3[0] = 3;
+  Eigen::array<IndexType, 1> stride_of_2;
+  stride_of_2[0] = 2;
+
+  std::size_t input_bytes = input.size()  * sizeof(DataType);
+  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
+  std::size_t result_bytes = result.size() * sizeof(DataType);
+
+  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
+  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
+  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
+
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
+  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions());
+  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
+  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
+
+  gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2);
+  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
+
+  VERIFY_IS_EQUAL(result.dimension(0), 2);
+  VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
+                               input(6)*kernel(2)));
+  VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
+                               input(12)*kernel(2)));
+}
+
+template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){
+  QueueInterface queueInterface(s);
+  auto sycl_device=Eigen::SyclDevice(&queueInterface);
+  test_larg_expr1D<float, RowMajor, int64_t>(sycl_device);
+  test_larg_expr1D<float, ColMajor, int64_t>(sycl_device);
+  test_larg_expr2D<float, RowMajor, int64_t>(sycl_device);
+  test_larg_expr2D<float, ColMajor, int64_t>(sycl_device);
+  test_larg_expr3D<float, RowMajor, int64_t>(sycl_device);
+  test_larg_expr3D<float, ColMajor, int64_t>(sycl_device);
+  test_evals<float, ColMajor, int64_t>(sycl_device);
+  test_evals<float, RowMajor, int64_t>(sycl_device);
+  test_expr<float, ColMajor, int64_t>(sycl_device);
+  test_expr<float, RowMajor, int64_t>(sycl_device);
+  test_modes<float, ColMajor, int64_t>(sycl_device);
+  test_modes<float, RowMajor, int64_t>(sycl_device);
+  test_strides<float, ColMajor, int64_t>(sycl_device);
+  test_strides<float, RowMajor, int64_t>(sycl_device);
+}
+
+EIGEN_DECLARE_TEST(cxx11_tensor_convolution_sycl) {
+  for (const auto& device :Eigen::get_sycl_supported_devices()) {
+    CALL_SUBTEST(tensorConvolutionPerDevice(device));
+  }
+}
author	Yi Kong <yikong@google.com>	2022-02-25 16:41:05 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	2022-02-25 16:41:05 +0000
commit	bc0f5df265caa21a2120c22453655a7fcc941991 (patch)
tree	fb979fb4cf4f8052c8cc66b1ec9516d91fcd859b /unsupported/test/cxx11_tensor_convolution_sycl.cpp
parent	8fd413e275f78a4c240f1442ce5cf77c73a20a55 (diff)
parent	7cb50013986f04dce5fac87bebf319bb8db37a36 (diff)
download	eigen-ebe162fe562066c4ede645ddecae96b7dab0987f.tar.gz