diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-04-06 16:00:50 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2022-04-06 16:00:50 +0000 |
commit | fbd0b66b6ad9863164b6c7f8dc6df5cc8ffc3978 (patch) | |
tree | efa48ebdb8542e1ec6490306d11e5eb32c7f8635 | |
parent | 4c2de4651f5ea4e79600cc259c3e256a5797d3d9 (diff) | |
parent | 949f27ce82a3a82e14bbe45f33cd727bc48db918 (diff) | |
download | libavc-fbd0b66b6ad9863164b6c7f8dc6df5cc8ffc3978.tar.gz |
Snap for 8413241 from 949f27ce82a3a82e14bbe45f33cd727bc48db918 to mainline-art-releaseandroid-mainline-12.0.0_r115
Change-Id: I10b54767a92a826d81719f505268702d935dc116
-rw-r--r-- | common/x86/ih264_iquant_itrans_recon_sse42.c | 8 | ||||
-rw-r--r-- | common/x86/ih264_platform_macros.h | 14 | ||||
-rw-r--r-- | common/x86/ih264_resi_trans_quant_sse42.c | 9 |
3 files changed, 23 insertions, 8 deletions
diff --git a/common/x86/ih264_iquant_itrans_recon_sse42.c b/common/x86/ih264_iquant_itrans_recon_sse42.c index a7b9e82..3ae18a5 100644 --- a/common/x86/ih264_iquant_itrans_recon_sse42.c +++ b/common/x86/ih264_iquant_itrans_recon_sse42.c @@ -227,10 +227,10 @@ void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src, //Transform ends -- horizontal transform //Load pred buffer - pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits - pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits - pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits - pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r0 = loadu_32(&pu1_pred[0]); //p00 p01 p02 p03 -- all 8 bits + pred_r1 = loadu_32(&pu1_pred[pred_strd]); //p10 p11 p12 p13 -- all 8 bits + pred_r2 = loadu_32(&pu1_pred[2 * pred_strd]); //p20 p21 p22 p23 -- all 8 bits + pred_r3 = loadu_32(&pu1_pred[3 * pred_strd]); //p30 p31 p32 p33 -- all 8 bits pred_r0 = _mm_cvtepu8_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits pred_r1 = _mm_cvtepu8_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits diff --git a/common/x86/ih264_platform_macros.h b/common/x86/ih264_platform_macros.h index 54af325..a733617 100644 --- a/common/x86/ih264_platform_macros.h +++ b/common/x86/ih264_platform_macros.h @@ -41,6 +41,20 @@ #include <stdint.h> #include <immintrin.h> +#ifndef __ANDROID__ +static __inline__ __m128i +loadu_32(void const *__a) +{ + struct __loadu_si32 { + int __v; + } __attribute__((__packed__, __may_alias__)); + int __u = ((struct __loadu_si32*)__a)->__v; + return __extension__ (__m128i)(__v4si){__u, 0, 0, 0}; +} +#else +static __inline__ __m128i loadu_32(void const *__a) { return _mm_loadu_si32(__a); }; +#endif + #define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x)) #define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x)) diff --git a/common/x86/ih264_resi_trans_quant_sse42.c b/common/x86/ih264_resi_trans_quant_sse42.c index f4f5cbf..232ab1a 100644 --- a/common/x86/ih264_resi_trans_quant_sse42.c +++ b/common/x86/ih264_resi_trans_quant_sse42.c @@ -46,6 +46,7 @@ #include "ih264_defs.h" #include "ih264_size_defs.h" #include "ih264_macros.h" +#include "ih264_platform_macros.h" #include "ih264_trans_macros.h" #include "ih264_trans_data.h" #include "ih264_structs.h" @@ -136,10 +137,10 @@ void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred, src_r2 = _mm_cvtepu8_epi16(src_r2); src_r3 = _mm_cvtepu8_epi16(src_r3); - pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits - pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits - pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits - pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits + pred_r0 = loadu_32(&pu1_pred[0]); //p00 p01 p02 p03 -- all 8 bits + pred_r1 = loadu_32(&pu1_pred[pred_strd]); //p10 p11 p12 p13 -- all 8 bits + pred_r2 = loadu_32(&pu1_pred[2 * pred_strd]); //p20 p21 p22 p23 -- all 8 bits + pred_r3 = loadu_32(&pu1_pred[3 * pred_strd]); //p30 p31 p32 p33 -- all 8 bits pred_r0 = _mm_cvtepu8_epi16(pred_r0); //p00 p01 p02 p03 -- all 16 bits pred_r1 = _mm_cvtepu8_epi16(pred_r1); //p10 p11 p12 p13 -- all 16 bits |