From e54a7bd24d0484f56f1dec8660a401e5af964cda Mon Sep 17 00:00:00 2001 From: "andrew@webrtc.org" Date: Fri, 3 Oct 2014 22:17:50 +0000 Subject: Only try to do CPU detection on Android ARM. Previously, the code made the assumption that ARM implied Android. It uses Android-specific machinery to do run-time NEON detection. Retain this functionality, but on non-Android, don't do run-time detection; instead require arm_neon==1 in order to build the NEON bits. BUG=chromium:415393 TEST=Production code requiring openmax_dl runs correctly on an ARM Chromebook. R=rtoy@google.com Review URL: https://webrtc-codereview.appspot.com/29639004 git-svn-id: http://webrtc.googlecode.com/svn/deps/third_party/openmax@7371 4adac7df-926f-26a2-2b94-8c16560cd09d --- dl/dl.gyp | 204 +++++++++++++++++++++++++------------------- dl/sp/api/omxSP.h | 73 ++++++++-------- dl/sp/src/test/test_fft.gyp | 57 ++++++++----- 3 files changed, 189 insertions(+), 145 deletions(-) diff --git a/dl/dl.gyp b/dl/dl.gyp index d43f546..ecb3450 100644 --- a/dl/dl.gyp +++ b/dl/dl.gyp @@ -11,13 +11,35 @@ # Override this value to build with small float FFT tables 'big_float_fft%' : 1, }, + 'target_defaults': { + 'include_dirs': [ + '../', + ], + 'conditions' : [ + ['target_arch=="arm"', { + 'conditions' : [ + ['arm_neon==1', { + # Enable build-time NEON selection. + 'defines': ['DL_ARM_NEON',], + 'direct_dependent_settings': { + 'defines': ['DL_ARM_NEON',], + }, + }], + ['arm_neon==0 and OS=="android"', { + # Enable run-time NEON selection. + 'defines': ['DL_ARM_NEON_OPTIONAL',], + 'direct_dependent_settings': { + 'defines': ['DL_ARM_NEON_OPTIONAL',], + }, + }], + ], + }], + ], + }, 'targets': [ { 'target_name': 'openmax_dl', 'type': 'static_library', - 'include_dirs': [ - '../', - ], 'direct_dependent_settings': { 'include_dirs': [ '../', @@ -29,7 +51,7 @@ 'sp/src/armSP_FFT_F32TwiddleTable.c', ], 'conditions' : [ - ['big_float_fft == 1', { + ['big_float_fft==1', { 'defines': [ 'BIG_FFT_TABLE', ], @@ -50,76 +72,80 @@ ], }], ['target_arch=="arm"', { - 'cflags!': [ - '-mfpu=vfpv3-d16', - ], - 'cflags': [ - # We enable Neon instructions even with arm_neon==0, to support - # runtime detection. - '-mfpu=neon', - ], - 'dependencies': [ - 'openmax_dl_armv7', - ], - 'sources': [ - # Common files that are used by both the NEON and non-NEON code. - 'api/armCOMM_s.h', - 'sp/src/arm/omxSP_FFTGetBufSize_C_SC16.c', - 'sp/src/arm/omxSP_FFTGetBufSize_R_S16.c', - 'sp/src/arm/omxSP_FFTGetBufSize_R_S16S32.c', - 'sp/src/arm/omxSP_FFTInit_C_SC16.c', - 'sp/src/arm/omxSP_FFTInit_C_SC32.c', - 'sp/src/arm/omxSP_FFTInit_R_S16.c', - 'sp/src/arm/omxSP_FFTInit_R_S16S32.c', - 'sp/src/arm/omxSP_FFTInit_R_S32.c', + 'conditions': [ + ['arm_neon==0 or OS=="android"', { + 'dependencies': [ + 'openmax_dl_armv7', + ], + }], + ['arm_neon==1 or OS=="android"', { + 'cflags!': [ + '-mfpu=vfpv3-d16', + ], + 'cflags': [ + '-mfpu=neon', + ], + 'sources': [ + # Common files that are used by both the NEON and non-NEON code. + 'api/armCOMM_s.h', + 'sp/src/arm/omxSP_FFTGetBufSize_C_SC16.c', + 'sp/src/arm/omxSP_FFTGetBufSize_R_S16.c', + 'sp/src/arm/omxSP_FFTGetBufSize_R_S16S32.c', + 'sp/src/arm/omxSP_FFTInit_C_SC16.c', + 'sp/src/arm/omxSP_FFTInit_C_SC32.c', + 'sp/src/arm/omxSP_FFTInit_R_S16.c', + 'sp/src/arm/omxSP_FFTInit_R_S16S32.c', + 'sp/src/arm/omxSP_FFTInit_R_S32.c', - # Complex 32-bit fixed-point FFT. - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S', - 'sp/src/arm/neon/omxSP_FFTInv_CToC_SC32_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTFwd_CToC_SC32_Sfs_s.S', - # Real 32-bit fixed-point FFT - 'sp/src/arm/neon/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S', - 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32_Sfs_s.S', - # Complex 16-bit fixed-point FFT - 'sp/src/arm/neon/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S', - 'sp/src/arm/neon/omxSP_FFTFwd_CToC_SC16_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTInv_CToC_SC16_Sfs_s.S', - # Real 16-bit fixed-point FFT - 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S16_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S', - # Complex floating-point FFT - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S', - 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S', - 'sp/src/arm/neon/omxSP_FFTInv_CToC_FC32_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTFwd_CToC_FC32_Sfs_s.S', - # Real floating-point FFT - 'sp/src/arm/neon/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S', - 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S', - 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_F32_Sfs_s.S', + # Complex 32-bit fixed-point FFT. + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_ls_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix2_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix4_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC32_Radix8_fs_unsafe_s.S', + 'sp/src/arm/neon/omxSP_FFTInv_CToC_SC32_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTFwd_CToC_SC32_Sfs_s.S', + # Real 32-bit fixed-point FFT + 'sp/src/arm/neon/armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S', + 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S32_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32_Sfs_s.S', + # Complex 16-bit fixed-point FFT + 'sp/src/arm/neon/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix2_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S', + 'sp/src/arm/neon/omxSP_FFTFwd_CToC_SC16_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTInv_CToC_SC16_Sfs_s.S', + # Real 16-bit fixed-point FFT + 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S16_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S', + # Complex floating-point FFT + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_ls_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_fs_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_ls_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix2_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix4_unsafe_s.S', + 'sp/src/arm/neon/armSP_FFT_CToC_FC32_Radix8_fs_unsafe_s.S', + 'sp/src/arm/neon/omxSP_FFTInv_CToC_FC32_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTFwd_CToC_FC32_Sfs_s.S', + # Real floating-point FFT + 'sp/src/arm/neon/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_s.S', + 'sp/src/arm/neon/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S', + 'sp/src/arm/neon/omxSP_FFTInv_CCSToR_F32_Sfs_s.S', + ], + }], ], }], ['target_arch=="ia32" or target_arch=="x64"', { @@ -210,24 +236,10 @@ # standalone. Applications must link with openmax_dl. 'target_name': 'openmax_dl_armv7', 'type': 'static_library', - 'includes': [ - '../../../build/android/cpufeatures.gypi', - ], - 'include_dirs': [ - '../', - ], 'cflags!': [ '-mfpu=neon', ], - 'link_settings' : { - 'libraries': [ - # To get the __android_log_print routine - '-llog', - ], - }, 'sources': [ - # Detection routine - 'sp/src/arm/detect.c', # Complex floating-point FFT 'sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S', 'sp/src/arm/armv7/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S', @@ -241,6 +253,24 @@ 'sp/src/arm/armv7/omxSP_FFTFwd_RToCCS_F32_Sfs_s.S', 'sp/src/arm/armv7/omxSP_FFTInv_CCSToR_F32_Sfs_s.S', ], + 'conditions': [ + ['OS=="android"', { + # We only do run-time NEON detection on Android. + 'includes': [ + '../../../build/android/cpufeatures.gypi', + ], + 'link_settings' : { + 'libraries': [ + # To get the __android_log_print routine + '-llog', + ], + }, + 'sources': [ + # Detection routine + 'sp/src/arm/detect.c', + ], + }], + ], }, ], }], diff --git a/dl/sp/api/omxSP.h b/dl/sp/api/omxSP.h index 249217b..0565fe9 100644 --- a/dl/sp/api/omxSP.h +++ b/dl/sp/api/omxSP.h @@ -2306,30 +2306,6 @@ OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs( const OMXFFTSpec_R_F32* pFFTSpec ); -#ifdef __arm__ -/* - * Non-NEON version of omxSP_FFTFwd_RToCCS_F32_Sfs - */ -OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs_vfp( - const OMX_F32* pSrc, - OMX_F32* pDst, - const OMXFFTSpec_R_F32* pFFTSpec -); - -/* - * Just like omxSP_FFTFwd_RToCCS_F32_Sfs, but automatically detects - * whether NEON is available or not and chooses the appropriate - * routine. - */ -extern OMXResult (*omxSP_FFTFwd_RToCCS_F32)( - const OMX_F32* pSrc, - OMX_F32* pDst, - const OMXFFTSpec_R_F32* pFFTSpec -); -#else -#define omxSP_FFTFwd_RToCCS_F32 omxSP_FFTFwd_RToCCS_F32_Sfs -#endif - /** * Function: omxSP_FFTInv_CCSToR_S32S16_Sfs (2.2.4.4.4) * @@ -2559,7 +2535,7 @@ OMXResult omxSP_FFTInv_CToC_FC32_Sfs_vfp ( * Return Value: * * OMX_Sts_NoErr - no error - + * * OMX_Sts_BadArgErr - bad arguments if one or more of the * following is true: * - pSrc, pDst, or pFFTSpec is NULL @@ -2573,28 +2549,55 @@ OMXResult omxSP_FFTInv_CCSToR_F32_Sfs( const OMXFFTSpec_R_F32* pFFTSpec ); -#ifdef __arm__ /* - * Non-NEON version of omxSP_FFTInv_CCSToR_F32_Sfs + * This block sets things up appropriately for run-time or build-time selection + * of NEON implementations. */ -OMXResult omxSP_FFTInv_CCSToR_F32_Sfs_vfp( +#if defined(__arm__) +/* + * Generic versions. Just like their *_Sfs counterparts, but automatically + * detect whether NEON is available or not and choose the appropriate routine. + */ +#if defined(DL_ARM_NEON_OPTIONAL) +extern OMXResult (*omxSP_FFTFwd_RToCCS_F32)( const OMX_F32* pSrc, OMX_F32* pDst, const OMXFFTSpec_R_F32* pFFTSpec ); - -/* - * Just like omxSP_FFTInv_CCSToR_F32_Sfs, but automatically detects - * whether NEON is available or not and chooses the appropriate - * routine. - */ extern OMXResult (*omxSP_FFTInv_CCSToR_F32)( const OMX_F32* pSrc, OMX_F32* pDst, const OMXFFTSpec_R_F32* pFFTSpec); +#elif defined(DL_ARM_NEON) +/* Build-time NEON selection. */ +#define omxSP_FFTFwd_RToCCS_F32 omxSP_FFTFwd_RToCCS_F32_Sfs +#define omxSP_FFTInv_CCSToR_F32 omxSP_FFTInv_CCSToR_F32_Sfs #else +/* Build-time non-NEON selection. */ +#define omxSP_FFTFwd_RToCCS_F32 omxSP_FFTFwd_CCSToR_F32_vfp +#define omxSP_FFTInv_CCSToR_F32 omxSP_FFTInv_CCSToR_F32_vfp +#endif /* defined(DL_ARM_NEON_OPTIONAL) */ + +#if defined(DL_ARM_NEON_OPTIONAL) || !defined(DL_ARM_NEON) +/* Non-NEON versions. */ +OMXResult omxSP_FFTFwd_RToCCS_F32_Sfs_vfp( + const OMX_F32* pSrc, + OMX_F32* pDst, + const OMXFFTSpec_R_F32* pFFTSpec +); + +OMXResult omxSP_FFTInv_CCSToR_F32_Sfs_vfp( + const OMX_F32* pSrc, + OMX_F32* pDst, + const OMXFFTSpec_R_F32* pFFTSpec +); +#endif /* defined(DL_ARM_NEON_OPTIONAL) || !defined(DL_ARM_NEON) */ + +#else +/* Build-time non-ARM selection. */ +#define omxSP_FFTInv_RToCCS_F32 omxSP_FFTInv_RToCCS_F32_Sfs #define omxSP_FFTInv_CCSToR_F32 omxSP_FFTInv_CCSToR_F32_Sfs -#endif +#endif /* defined(__arm__) */ #ifdef __cplusplus } diff --git a/dl/sp/src/test/test_fft.gyp b/dl/sp/src/test/test_fft.gyp index d9764cc..859ea46 100644 --- a/dl/sp/src/test/test_fft.gyp +++ b/dl/sp/src/test/test_fft.gyp @@ -151,6 +151,11 @@ 'dependencies!' : [ 'test_utilities' ], + 'link_settings': { + 'libraries': [ + '-lm', + ], + }, 'sources' : [ 'aligned_ptr.c', 'compare.c', @@ -167,9 +172,6 @@ 'test_float_rfft.c', 'support/float_rfft_thresholds.h', ], - 'libraries': [ - '-lm', - ], 'conditions': [ ['target_arch == "arm" or target_arch == "arm64"', { 'sources': [ @@ -195,9 +197,6 @@ 'sources': [ 'test_fft_time.c', ], - 'libraries': [ - '-lm', - ], 'conditions': [ ['target_arch == "ia32" or target_arch == "x64" or target_arch == "arm64" or target_arch == "mipsel"', { 'defines': [ @@ -213,32 +212,44 @@ 'type': 'none', 'conditions' : [ ['target_arch == "arm"', { - # Supported test programs for ARM - 'dependencies': [ - 'test_fft16', - 'test_fft32', - 'test_float_fft', - 'test_float_rfft', - 'test_rfft16_s32', - 'test_rfft16_s16', - 'test_rfft32', - # Non-Neon tests - 'test_fft_time_armv7', - 'test_float_fft_armv7', - 'test_float_rfft_armv7', - # Tests with detection - 'test_float_rfft_detect', + 'conditions' : [ + ['arm_neon==1 or OS=="android"', { + # NEON tests. + 'dependencies': [ + 'test_fft16', + 'test_fft32', + 'test_float_fft', + 'test_float_rfft', + 'test_rfft16_s32', + 'test_rfft16_s16', + 'test_rfft32', + ], + }], + ['arm_neon==0 or OS=="android"', { + # Non-NEON tests. + 'dependencies': [ + 'test_fft_time_armv7', + 'test_float_fft_armv7', + 'test_float_rfft_armv7', + ], + }], + ['OS=="android"', { + # Tests with detection. + 'dependencies': [ + 'test_float_rfft_detect', + ], + }], ], }], ['target_arch == "arm64"', { - # Supported test programs for ARM64 + # Supported test programs for ARM64. 'dependencies': [ 'test_float_fft', ], }], ], 'dependencies' : [ - # All architectures must support at least the float rfft test + # All architectures must support at least the float rfft test. 'test_float_rfft', 'test_fft_time', ], -- cgit v1.2.3