diff options
author | Miao Wang <miaowang@google.com> | 2015-07-14 16:18:49 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2015-07-27 13:04:36 -0700 |
commit | 6d4dabc61625fb271c5e3e10f0689b2996de3933 (patch) | |
tree | b0c40e65d0a90629dc2b0e1b6b0712bc751168cd | |
parent | 648a1c137663ef7207684d0d7009dd5518942111 (diff) | |
download | rs-6d4dabc61625fb271c5e3e10f0689b2996de3933.tar.gz |
Making libRSSupport able to optionally bundle libblas(V8) through dlopen
and dlsym.
bug: 22700067
bug: 22693954
Change-Id: I3ade3ad2802f3b8e5fc5661319b98a6212e6d8a2
(cherry picked from commit e941f18202b9c9883ff81c63710f7faec5c988e4)
-rw-r--r-- | cpu_ref/rsCpuBLAS.inc | 136 | ||||
-rw-r--r-- | cpu_ref/rsCpuBLASDispatch.h | 468 | ||||
-rw-r--r-- | cpu_ref/rsCpuIntrinsicBLAS.cpp | 16 |
3 files changed, 619 insertions, 1 deletions
diff --git a/cpu_ref/rsCpuBLAS.inc b/cpu_ref/rsCpuBLAS.inc new file mode 100644 index 00000000..17fdcd3a --- /dev/null +++ b/cpu_ref/rsCpuBLAS.inc @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This is a helper file to apply macros to different cblas routines. +// Will be include multiple times. + +#if !defined(RS_APPLY_MACRO_TO) +#error "You must define the macro RS_APPLY_MACRO_TO to include this file" +#endif + +RS_APPLY_MACRO_TO(cblas_sgemv) +RS_APPLY_MACRO_TO(cblas_sgbmv) +RS_APPLY_MACRO_TO(cblas_strmv) +RS_APPLY_MACRO_TO(cblas_stbmv) +RS_APPLY_MACRO_TO(cblas_stpmv) +RS_APPLY_MACRO_TO(cblas_strsv) +RS_APPLY_MACRO_TO(cblas_stbsv) +RS_APPLY_MACRO_TO(cblas_stpsv) + +RS_APPLY_MACRO_TO(cblas_dgemv) +RS_APPLY_MACRO_TO(cblas_dgbmv) +RS_APPLY_MACRO_TO(cblas_dtrmv) +RS_APPLY_MACRO_TO(cblas_dtbmv) +RS_APPLY_MACRO_TO(cblas_dtpmv) +RS_APPLY_MACRO_TO(cblas_dtrsv) +RS_APPLY_MACRO_TO(cblas_dtbsv) +RS_APPLY_MACRO_TO(cblas_dtpsv) + +RS_APPLY_MACRO_TO(cblas_cgemv) +RS_APPLY_MACRO_TO(cblas_cgbmv) +RS_APPLY_MACRO_TO(cblas_ctrmv) +RS_APPLY_MACRO_TO(cblas_ctbmv) +RS_APPLY_MACRO_TO(cblas_ctpmv) +RS_APPLY_MACRO_TO(cblas_ctrsv) +RS_APPLY_MACRO_TO(cblas_ctbsv) +RS_APPLY_MACRO_TO(cblas_ctpsv) + +RS_APPLY_MACRO_TO(cblas_zgemv) +RS_APPLY_MACRO_TO(cblas_zgbmv) +RS_APPLY_MACRO_TO(cblas_ztrmv) +RS_APPLY_MACRO_TO(cblas_ztbmv) +RS_APPLY_MACRO_TO(cblas_ztpmv) +RS_APPLY_MACRO_TO(cblas_ztrsv) +RS_APPLY_MACRO_TO(cblas_ztbsv) +RS_APPLY_MACRO_TO(cblas_ztpsv) + +RS_APPLY_MACRO_TO(cblas_ssymv) +RS_APPLY_MACRO_TO(cblas_ssbmv) +RS_APPLY_MACRO_TO(cblas_sspmv) +RS_APPLY_MACRO_TO(cblas_sger) +RS_APPLY_MACRO_TO(cblas_ssyr) +RS_APPLY_MACRO_TO(cblas_sspr) +RS_APPLY_MACRO_TO(cblas_ssyr2) +RS_APPLY_MACRO_TO(cblas_sspr2) + +RS_APPLY_MACRO_TO(cblas_dsymv) +RS_APPLY_MACRO_TO(cblas_dsbmv) +RS_APPLY_MACRO_TO(cblas_dspmv) +RS_APPLY_MACRO_TO(cblas_dger) +RS_APPLY_MACRO_TO(cblas_dsyr) +RS_APPLY_MACRO_TO(cblas_dspr) +RS_APPLY_MACRO_TO(cblas_dsyr2) +RS_APPLY_MACRO_TO(cblas_dspr2) + +RS_APPLY_MACRO_TO(cblas_chemv) +RS_APPLY_MACRO_TO(cblas_chbmv) +RS_APPLY_MACRO_TO(cblas_chpmv) +RS_APPLY_MACRO_TO(cblas_cgeru) +RS_APPLY_MACRO_TO(cblas_cgerc) +RS_APPLY_MACRO_TO(cblas_cher) +RS_APPLY_MACRO_TO(cblas_chpr) +RS_APPLY_MACRO_TO(cblas_cher2) +RS_APPLY_MACRO_TO(cblas_chpr2) + +RS_APPLY_MACRO_TO(cblas_zhemv) +RS_APPLY_MACRO_TO(cblas_zhbmv) +RS_APPLY_MACRO_TO(cblas_zhpmv) +RS_APPLY_MACRO_TO(cblas_zgeru) +RS_APPLY_MACRO_TO(cblas_zgerc) +RS_APPLY_MACRO_TO(cblas_zher) +RS_APPLY_MACRO_TO(cblas_zhpr) +RS_APPLY_MACRO_TO(cblas_zher2) +RS_APPLY_MACRO_TO(cblas_zhpr2) + + +RS_APPLY_MACRO_TO(cblas_sgemm) +RS_APPLY_MACRO_TO(cblas_ssymm) +RS_APPLY_MACRO_TO(cblas_ssyrk) +RS_APPLY_MACRO_TO(cblas_ssyr2k) +RS_APPLY_MACRO_TO(cblas_strmm) +RS_APPLY_MACRO_TO(cblas_strsm) + +RS_APPLY_MACRO_TO(cblas_dgemm) +RS_APPLY_MACRO_TO(cblas_dsymm) +RS_APPLY_MACRO_TO(cblas_dsyrk) +RS_APPLY_MACRO_TO(cblas_dsyr2k) +RS_APPLY_MACRO_TO(cblas_dtrmm) +RS_APPLY_MACRO_TO(cblas_dtrsm) + +RS_APPLY_MACRO_TO(cblas_cgemm) +RS_APPLY_MACRO_TO(cblas_csymm) +RS_APPLY_MACRO_TO(cblas_csyrk) +RS_APPLY_MACRO_TO(cblas_csyr2k) +RS_APPLY_MACRO_TO(cblas_ctrmm) +RS_APPLY_MACRO_TO(cblas_ctrsm) + +RS_APPLY_MACRO_TO(cblas_zgemm) +RS_APPLY_MACRO_TO(cblas_zsymm) +RS_APPLY_MACRO_TO(cblas_zsyrk) +RS_APPLY_MACRO_TO(cblas_zsyr2k) +RS_APPLY_MACRO_TO(cblas_ztrmm) +RS_APPLY_MACRO_TO(cblas_ztrsm) + +RS_APPLY_MACRO_TO(cblas_chemm) +RS_APPLY_MACRO_TO(cblas_cherk) +RS_APPLY_MACRO_TO(cblas_cher2k) + +RS_APPLY_MACRO_TO(cblas_zhemm) +RS_APPLY_MACRO_TO(cblas_zherk) +RS_APPLY_MACRO_TO(cblas_zher2k) + +// Undefine the macro so that we can include this file multiple times to generate different functionality. +#undef RS_APPLY_MACRO_TO diff --git a/cpu_ref/rsCpuBLASDispatch.h b/cpu_ref/rsCpuBLASDispatch.h new file mode 100644 index 00000000..46021358 --- /dev/null +++ b/cpu_ref/rsCpuBLASDispatch.h @@ -0,0 +1,468 @@ +#ifndef RS_COMPATIBILITY_LIB +#include "cblas.h" +#else +#include <dlfcn.h> +/* + * The following enum and function pointers are based on cblas.h + * =========================================================================== + * Prototypes for level 2 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; +enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; +enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; +enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; +enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; + +typedef void (*FnPtr_cblas_sgemv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, const float beta, + float *Y, const int incY); +typedef void (*FnPtr_cblas_sgbmv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const float alpha, + const float *A, const int lda, const float *X, + const int incX, const float beta, float *Y, const int incY); +typedef void (*FnPtr_cblas_strmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, + float *X, const int incX); +typedef void (*FnPtr_cblas_stbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +typedef void (*FnPtr_cblas_stpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); +typedef void (*FnPtr_cblas_strsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, float *X, + const int incX); +typedef void (*FnPtr_cblas_stbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +typedef void (*FnPtr_cblas_stpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); + +typedef void (*FnPtr_cblas_dgemv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *X, const int incX, const double beta, + double *Y, const int incY); +typedef void (*FnPtr_cblas_dgbmv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const double alpha, + const double *A, const int lda, const double *X, + const int incX, const double beta, double *Y, const int incY); +typedef void (*FnPtr_cblas_dtrmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, + double *X, const int incX); +typedef void (*FnPtr_cblas_dtbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +typedef void (*FnPtr_cblas_dtpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); +typedef void (*FnPtr_cblas_dtrsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, double *X, + const int incX); +typedef void (*FnPtr_cblas_dtbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +typedef void (*FnPtr_cblas_dtpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); + +typedef void (*FnPtr_cblas_cgemv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +typedef void (*FnPtr_cblas_cgbmv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_ctrmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +typedef void (*FnPtr_cblas_ctbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +typedef void (*FnPtr_cblas_ctpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +typedef void (*FnPtr_cblas_ctrsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +typedef void (*FnPtr_cblas_ctbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +typedef void (*FnPtr_cblas_ctpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + +typedef void (*FnPtr_cblas_zgemv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +typedef void (*FnPtr_cblas_zgbmv)(const enum CBLAS_ORDER order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_ztrmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +typedef void (*FnPtr_cblas_ztbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +typedef void (*FnPtr_cblas_ztpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +typedef void (*FnPtr_cblas_ztrsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +typedef void (*FnPtr_cblas_ztbsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +typedef void (*FnPtr_cblas_ztpsv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + + +/* + * Routines with S and D prefixes only + */ +typedef void (*FnPtr_cblas_ssymv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +typedef void (*FnPtr_cblas_ssbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +typedef void (*FnPtr_cblas_sspmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *Ap, + const float *X, const int incX, + const float beta, float *Y, const int incY); +typedef void (*FnPtr_cblas_sger)(const enum CBLAS_ORDER order, const int M, const int N, + const float alpha, const float *X, const int incX, + const float *Y, const int incY, float *A, const int lda); +typedef void (*FnPtr_cblas_ssyr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *A, const int lda); +typedef void (*FnPtr_cblas_sspr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *Ap); +typedef void (*FnPtr_cblas_ssyr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A, + const int lda); +typedef void (*FnPtr_cblas_sspr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A); + +typedef void (*FnPtr_cblas_dsymv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +typedef void (*FnPtr_cblas_dsbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +typedef void (*FnPtr_cblas_dspmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *Ap, + const double *X, const int incX, + const double beta, double *Y, const int incY); +typedef void (*FnPtr_cblas_dger)(const enum CBLAS_ORDER order, const int M, const int N, + const double alpha, const double *X, const int incX, + const double *Y, const int incY, double *A, const int lda); +typedef void (*FnPtr_cblas_dsyr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *A, const int lda); +typedef void (*FnPtr_cblas_dspr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *Ap); +typedef void (*FnPtr_cblas_dsyr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A, + const int lda); +typedef void (*FnPtr_cblas_dspr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A); + + +/* + * Routines with C and Z prefixes only + */ +typedef void (*FnPtr_cblas_chemv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_chbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_chpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_cgeru)(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +typedef void (*FnPtr_cblas_cgerc)(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +typedef void (*FnPtr_cblas_cher)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, const int incX, + void *A, const int lda); +typedef void (*FnPtr_cblas_chpr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, + const int incX, void *A); +typedef void (*FnPtr_cblas_cher2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +typedef void (*FnPtr_cblas_chpr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +typedef void (*FnPtr_cblas_zhemv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_zhbmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_zhpmv)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +typedef void (*FnPtr_cblas_zgeru)(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +typedef void (*FnPtr_cblas_zgerc)(const enum CBLAS_ORDER order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +typedef void (*FnPtr_cblas_zher)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, const int incX, + void *A, const int lda); +typedef void (*FnPtr_cblas_zhpr)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, + const int incX, void *A); +typedef void (*FnPtr_cblas_zher2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +typedef void (*FnPtr_cblas_zhpr2)(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +/* + * =========================================================================== + * Prototypes for level 3 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +typedef void (*FnPtr_cblas_sgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const float alpha, const float *A, + const int lda, const float *B, const int ldb, + const float beta, float *C, const int ldc); +typedef void (*FnPtr_cblas_ssymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +typedef void (*FnPtr_cblas_ssyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float beta, float *C, const int ldc); +typedef void (*FnPtr_cblas_ssyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +typedef void (*FnPtr_cblas_strmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); +typedef void (*FnPtr_cblas_strsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); + +typedef void (*FnPtr_cblas_dgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc); +typedef void (*FnPtr_cblas_dsymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +typedef void (*FnPtr_cblas_dsyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double beta, double *C, const int ldc); +typedef void (*FnPtr_cblas_dsyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +typedef void (*FnPtr_cblas_dtrmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); +typedef void (*FnPtr_cblas_dtrsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); + +typedef void (*FnPtr_cblas_cgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +typedef void (*FnPtr_cblas_csymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +typedef void (*FnPtr_cblas_csyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +typedef void (*FnPtr_cblas_csyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +typedef void (*FnPtr_cblas_ctrmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +typedef void (*FnPtr_cblas_ctrsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + +typedef void (*FnPtr_cblas_zgemm)(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +typedef void (*FnPtr_cblas_zsymm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +typedef void (*FnPtr_cblas_zsyrk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +typedef void (*FnPtr_cblas_zsyr2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +typedef void (*FnPtr_cblas_ztrmm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +typedef void (*FnPtr_cblas_ztrsm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + + +/* + * Routines with prefixes C and Z only + */ +typedef void (*FnPtr_cblas_chemm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +typedef void (*FnPtr_cblas_cherk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const void *A, const int lda, + const float beta, void *C, const int ldc); +typedef void (*FnPtr_cblas_cher2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const float beta, + void *C, const int ldc); + +typedef void (*FnPtr_cblas_zhemm)(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +typedef void (*FnPtr_cblas_zherk)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const void *A, const int lda, + const double beta, void *C, const int ldc); +typedef void (*FnPtr_cblas_zher2k)(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const double beta, + void *C, const int ldc); + +// Macros to help declare our function pointers for the dispatch table. +#define RS_APPLY_MACRO_TO(x) \ + FnPtr_##x x; +#include "rsCpuBLAS.inc" + +bool loadBLASLib() { + void* handle = NULL; + handle = dlopen("libblasV8.so", RTLD_LAZY | RTLD_LOCAL); + + if (handle == NULL) { + return false; + } + +// Macros to help load the function pointers. +#define RS_APPLY_MACRO_TO(x) \ + x = (FnPtr_##x)dlsym(handle, #x); \ + if (x == nullptr) { \ + ALOGE("Failed to load " #x " for RS BLAS implementation."); \ + return false; \ + } +#include "rsCpuBLAS.inc" + return true; +} + +#endif diff --git a/cpu_ref/rsCpuIntrinsicBLAS.cpp b/cpu_ref/rsCpuIntrinsicBLAS.cpp index a7705b4e..ef041351 100644 --- a/cpu_ref/rsCpuIntrinsicBLAS.cpp +++ b/cpu_ref/rsCpuIntrinsicBLAS.cpp @@ -17,7 +17,7 @@ #include "rsCpuIntrinsic.h" #include "rsCpuIntrinsicInlines.h" -#include "cblas.h" +#include "rsCpuBLASDispatch.h" using namespace android; using namespace android::renderscript; @@ -46,6 +46,9 @@ protected: uint8_t b_offset = 0; uint8_t c_offset = 0; +#ifdef RS_COMPATIBILITY_LIB + bool isBlasLibInitialized = false; +#endif static void kernelBNNM(size_t m, size_t n, size_t k, const uint8_t* a, uint8_t a_offset, size_t lda, const uint8_t* b, uint8_t b_offset, size_t ldb, @@ -111,6 +114,17 @@ void RsdCpuScriptIntrinsicBLAS::invokeForEach(uint32_t slot, int lda = 0, ldb = 0, ldc = 0; +#ifdef RS_COMPATIBILITY_LIB + // Allow BNNM even without libblas + if (call->func != RsBlas_bnnm && !isBlasLibInitialized) { + if (!loadBLASLib()) { + ALOGE("Failed to load the BLAS lib, IntrinsicBLAS NOT supported!\n"); + return; + } + isBlasLibInitialized = true; + } +#endif + switch (call->func) { // Level 1 BLAS: returns into a 1D Allocation |