Merge Android 12 QPR3 ab/8391262

Bug: 226662282 Merged-In: I8e4f1f4c57fae1510835733173c505326f39e28f Change-Id: Ie5766e7c1506152070639040b82a9d2ca66766c1
author: Xin Li <delphij@google.com> 2022-04-01 20:32:27 +0000
committer: Xin Li <delphij@google.com> 2022-04-01 20:32:27 +0000
commit: aa4f8822088582e1532f2f135a51643c89bd823d (patch)
tree: 8bfac2959f81c8dd9362fad804a4c04f8cb718d0
parent: 9a308447a4e42482f63dc540b4fb4fa135986e36 (diff)
parent: f5513f57222ba2c14b68b72f350a550481cfe433 (diff)
download: libavc-aa4f8822088582e1532f2f135a51643c89bd823d.tar.gz
3 files changed, 23 insertions, 8 deletions
diff --git a/common/x86/ih264_iquant_itrans_recon_sse42.c b/common/x86/ih264_iquant_itrans_recon_sse42.c
index a7b9e82..3ae18a5 100644
--- a/common/x86/ih264_iquant_itrans_recon_sse42.c
+++ b/common/x86/ih264_iquant_itrans_recon_sse42.c
@@ -227,10 +227,10 @@ void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src,
     //Transform ends -- horizontal transform
 
     //Load pred buffer
-    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
-    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
-    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
-    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+    pred_r0 = loadu_32(&pu1_pred[0]); //p00 p01 p02 p03 -- all 8 bits
+    pred_r1 = loadu_32(&pu1_pred[pred_strd]); //p10 p11 p12 p13 -- all 8 bits
+    pred_r2 = loadu_32(&pu1_pred[2 * pred_strd]); //p20 p21 p22 p23 -- all 8 bits
+    pred_r3 = loadu_32(&pu1_pred[3 * pred_strd]); //p30 p31 p32 p33 -- all 8 bits
 
     pred_r0 = _mm_cvtepu8_epi32(pred_r0); //p00 p01 p02 p03 -- all 32 bits
     pred_r1 = _mm_cvtepu8_epi32(pred_r1); //p10 p11 p12 p13 -- all 32 bits
diff --git a/common/x86/ih264_platform_macros.h b/common/x86/ih264_platform_macros.h
index 54af325..a733617 100644
--- a/common/x86/ih264_platform_macros.h
+++ b/common/x86/ih264_platform_macros.h
@@ -41,6 +41,20 @@
 #include <stdint.h>
 #include <immintrin.h>
 
+#ifndef __ANDROID__
+static __inline__ __m128i
+loadu_32(void const *__a)
+{
+  struct __loadu_si32 {
+    int __v;
+  } __attribute__((__packed__, __may_alias__));
+  int __u = ((struct __loadu_si32*)__a)->__v;
+  return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
+}
+#else
+static __inline__ __m128i loadu_32(void const *__a) { return _mm_loadu_si32(__a); };
+#endif
+
 #define CLIP_U8(x) CLIP3(0, UINT8_MAX, (x))
 #define CLIP_S8(x) CLIP3(INT8_MIN, INT8_MAX, (x))
 
diff --git a/common/x86/ih264_resi_trans_quant_sse42.c b/common/x86/ih264_resi_trans_quant_sse42.c
index f4f5cbf..232ab1a 100644
--- a/common/x86/ih264_resi_trans_quant_sse42.c
+++ b/common/x86/ih264_resi_trans_quant_sse42.c
@@ -46,6 +46,7 @@
 #include "ih264_defs.h"
 #include "ih264_size_defs.h"
 #include "ih264_macros.h"
+#include "ih264_platform_macros.h"
 #include "ih264_trans_macros.h"
 #include "ih264_trans_data.h"
 #include "ih264_structs.h"
@@ -136,10 +137,10 @@ void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred,
     src_r2 = _mm_cvtepu8_epi16(src_r2);
     src_r3 = _mm_cvtepu8_epi16(src_r3);
 
-    pred_r0 = _mm_loadl_epi64((__m128i *) (&pu1_pred[0])); //p00 p01 p02 p03 0 0 0 0 0 0 0 0 -- all 8 bits
-    pred_r1 = _mm_loadl_epi64((__m128i *) (&pu1_pred[pred_strd])); //p10 p11 p12 p13 0 0 0 0 0 0 0 0 -- all 8 bits
-    pred_r2 = _mm_loadl_epi64((__m128i *) (&pu1_pred[2 * pred_strd])); //p20 p21 p22 p23 0 0 0 0 0 0 0 0 -- all 8 bits
-    pred_r3 = _mm_loadl_epi64((__m128i *) (&pu1_pred[3 * pred_strd])); //p30 p31 p32 p33 0 0 0 0 0 0 0 0 -- all 8 bits
+    pred_r0 = loadu_32(&pu1_pred[0]); //p00 p01 p02 p03 -- all 8 bits
+    pred_r1 = loadu_32(&pu1_pred[pred_strd]); //p10 p11 p12 p13 -- all 8 bits
+    pred_r2 = loadu_32(&pu1_pred[2 * pred_strd]); //p20 p21 p22 p23 -- all 8 bits
+    pred_r3 = loadu_32(&pu1_pred[3 * pred_strd]); //p30 p31 p32 p33 -- all 8 bits
 
     pred_r0 = _mm_cvtepu8_epi16(pred_r0); //p00 p01 p02 p03 -- all 16 bits
     pred_r1 = _mm_cvtepu8_epi16(pred_r1); //p10 p11 p12 p13 -- all 16 bits
author	Xin Li <delphij@google.com>	2022-04-01 20:32:27 +0000
committer	Xin Li <delphij@google.com>	2022-04-01 20:32:27 +0000
commit	aa4f8822088582e1532f2f135a51643c89bd823d (patch)
tree	8bfac2959f81c8dd9362fad804a4c04f8cb718d0
parent	9a308447a4e42482f63dc540b4fb4fa135986e36 (diff)
parent	f5513f57222ba2c14b68b72f350a550481cfe433 (diff)
download	libavc-aa4f8822088582e1532f2f135a51643c89bd823d.tar.gz