aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLu Wang <wanglu@loongson.cn>2023-05-08 21:13:25 +0800
committerlibyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>2023-05-10 00:25:48 +0000
commit1d940cc570212c8979d81e78738296fe39f9df43 (patch)
tree7bcaeb72858520f5bf99655359cd8e9cc723d1db
parentb372510c5699abdde5d50b60e89daa5b71b7792c (diff)
downloadlibyuv-1d940cc570212c8979d81e78738296fe39f9df43.tar.gz
Optimize the following functions with LSX.
MirrorRow_LSX, MirrorUVRow_LSX, ARGBMirrorRow_LSX, I422ToYUY2Row_LSX, I422ToUYVYRow_LSX, I422ToARGBRow_LSX, I422ToRGBARow_LSX, I422AlphaToARGBRow_LSX, I422ToRGB24Row_LSX, I422ToRGB565Row_LSX, I422ToARGB4444Row_LSX, I422ToARGB1555Row_LSX, YUY2ToYRow_LSX, YUY2ToUVRow_LSX, YUY2ToUV422Row_LSX Bug: libyuv:913 Change-Id: I46cec605001d7ddd73846eed6d0a77f936b6dc53 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4515191 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Frank Barchard <fbarchard@chromium.org>
-rw-r--r--include/libyuv/row.h152
-rw-r--r--source/convert.cc10
-rw-r--r--source/convert_argb.cc104
-rw-r--r--source/convert_from.cc24
-rw-r--r--source/convert_from_argb.cc16
-rw-r--r--source/planar_functions.cc42
-rw-r--r--source/rotate.cc8
-rw-r--r--source/rotate_argb.cc8
-rw-r--r--source/row_any.cc35
-rw-r--r--source/row_lsx.cc559
-rw-r--r--source/scale_argb.cc8
11 files changed, 966 insertions, 0 deletions
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 6e973bcf..9a9d1b38 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -657,13 +657,24 @@ extern "C" {
#define HAS_ARGBSETROW_LSX
#define HAS_ARGBTOUVJROW_LSX
#define HAS_ARGBTOYJROW_LSX
+#define HAS_ARGBMIRRORROW_LSX
#define HAS_BGRATOUVROW_LSX
#define HAS_BGRATOYROW_LSX
#define HAS_I400TOARGBROW_LSX
#define HAS_I444TOARGBROW_LSX
#define HAS_INTERPOLATEROW_LSX
+#define HAS_I422ALPHATOARGBROW_LSX
+#define HAS_I422TOARGB1555ROW_LSX
+#define HAS_I422TOARGB4444ROW_LSX
+#define HAS_I422TORGB24ROW_LSX
+#define HAS_I422TORGB565ROW_LSX
+#define HAS_I422TORGBAROW_LSX
+#define HAS_I422TOUYVYROW_LSX
+#define HAS_I422TOYUY2ROW_LSX
#define HAS_J400TOARGBROW_LSX
#define HAS_MERGEUVROW_LSX
+#define HAS_MIRRORROW_LSX
+#define HAS_MIRRORUVROW_LSX
#define HAS_MIRRORSPLITUVROW_LSX
#define HAS_NV12TOARGBROW_LSX
#define HAS_NV12TORGB565ROW_LSX
@@ -687,6 +698,9 @@ extern "C" {
#define HAS_SPLITUVROW_LSX
#define HAS_UYVYTOARGBROW_LSX
#define HAS_YUY2TOARGBROW_LSX
+#define HAS_YUY2TOUVROW_LSX
+#define HAS_YUY2TOUV422ROW_LSX
+#define HAS_YUY2TOYROW_LSX
#define HAS_ARGBTOYROW_LSX
#define HAS_ABGRTOYJROW_LSX
#define HAS_RGBATOYJROW_LSX
@@ -694,6 +708,10 @@ extern "C" {
#define HAS_RAWTOYJROW_LSX
#endif
+#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx)
+#define HAS_I422TOARGBROW_LSX
+#endif
+
#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx)
#define HAS_ARGB1555TOARGBROW_LASX
#define HAS_ARGB1555TOUVROW_LASX
@@ -1060,6 +1078,12 @@ void I422ToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1072,6 +1096,12 @@ void I422ToRGBARow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGBARow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1085,6 +1115,13 @@ void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422AlphaToARGBRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1098,6 +1135,12 @@ void I422ToRGB24Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB24Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1110,6 +1153,12 @@ void I422ToRGB565Row_MSA(const uint8_t* src_y,
uint8_t* dst_rgb565,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB565Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1122,6 +1171,12 @@ void I422ToARGB4444Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb4444,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB4444Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1134,6 +1189,12 @@ void I422ToARGB1555Row_MSA(const uint8_t* src_y,
uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -1958,6 +2019,7 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -1965,17 +2027,20 @@ void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width);
+void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width);
void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void MirrorSplitUVRow_SSSE3(const uint8_t* src,
@@ -2005,6 +2070,7 @@ void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
+void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
@@ -2017,6 +2083,9 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void ARGBMirrorRow_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ int width);
void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -4731,6 +4800,12 @@ void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGBRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4743,6 +4818,12 @@ void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGBARow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGBARow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4756,6 +4837,13 @@ void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422AlphaToARGBRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ const uint8_t* a_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4769,6 +4857,12 @@ void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB24Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4781,6 +4875,12 @@ void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToRGB565Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4793,6 +4893,12 @@ void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB4444Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4805,6 +4911,12 @@ void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
+void I422ToARGB1555Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ const struct YuvConstants* yuvconstants,
+ int width);
void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -4917,12 +5029,18 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
uint8_t* dst_v,
int width);
void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
+void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUVRow_LSX(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUVRow_LASX(const uint8_t* src_yuy2,
int src_stride_yuy2,
uint8_t* dst_u,
@@ -4932,6 +5050,10 @@ void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -4993,12 +5115,18 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_v,
int width);
void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
+void YUY2ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUVRow_Any_LSX(const uint8_t* src_ptr,
+ int src_stride_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr,
int src_stride_ptr,
uint8_t* dst_u,
@@ -5008,6 +5136,10 @@ void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
+void YUY2ToUV422Row_Any_LSX(const uint8_t* src_ptr,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width);
void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -5243,6 +5375,11 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y,
const uint8_t* src_v,
uint8_t* dst_yuy2,
int width);
+void I422ToYUY2Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width);
void I422ToYUY2Row_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -5253,6 +5390,11 @@ void I422ToUYVYRow_MSA(const uint8_t* src_y,
const uint8_t* src_v,
uint8_t* dst_uyvy,
int width);
+void I422ToUYVYRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width);
void I422ToUYVYRow_LASX(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -5263,6 +5405,11 @@ void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
+void I422ToYUY2Row_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
@@ -5273,6 +5420,11 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
+void I422ToUYVYRow_Any_LSX(const uint8_t* y_buf,
+ const uint8_t* u_buf,
+ const uint8_t* v_buf,
+ uint8_t* dst_ptr,
+ int width);
void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
diff --git a/source/convert.cc b/source/convert.cc
index 0bcfbf20..ad679c59 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -1457,6 +1457,16 @@ int YUY2ToI420(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ YUY2ToUVRow = YUY2ToUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ YUY2ToUVRow = YUY2ToUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index c797a756..691208fd 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -120,6 +120,14 @@ int I420ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -361,6 +369,14 @@ int I422ToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
@@ -2007,6 +2023,14 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422ALPHATOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX;
@@ -2132,6 +2156,14 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422ALPHATOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422AlphaToARGBRow = I422AlphaToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422ALPHATOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422AlphaToARGBRow = I422AlphaToARGBRow_Any_LASX;
@@ -4463,6 +4495,14 @@ int I422ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGBARow = I422ToRGBARow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGBAROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGBARow = I422ToRGBARow_Any_LASX;
@@ -4678,6 +4718,14 @@ int I420ToRGBAMatrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGBAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGBARow = I422ToRGBARow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGBARow = I422ToRGBARow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGBAROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGBARow = I422ToRGBARow_Any_LASX;
@@ -4795,6 +4843,14 @@ int I420ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB24ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
@@ -4984,6 +5040,14 @@ int I422ToRGB24Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB24ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB24Row = I422ToRGB24Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB24Row = I422ToRGB24Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB24ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB24Row = I422ToRGB24Row_Any_LASX;
@@ -5098,6 +5162,14 @@ int I420ToARGB1555(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGB1555ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB1555Row = I422ToARGB1555Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGB1555ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGB1555Row = I422ToARGB1555Row_Any_LASX;
@@ -5179,6 +5251,14 @@ int I420ToARGB4444(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGB4444ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGB4444Row = I422ToARGB4444Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGB4444ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGB4444Row = I422ToARGB4444Row_Any_LASX;
@@ -5261,6 +5341,14 @@ int I420ToRGB565Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
@@ -5393,6 +5481,14 @@ int I422ToRGB565Matrix(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TORGB565ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToRGB565Row = I422ToRGB565Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToRGB565Row = I422ToRGB565Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TORGB565ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToRGB565Row = I422ToRGB565Row_Any_LASX;
@@ -5508,6 +5604,14 @@ int I420ToRGB565Dither(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 8bd07e4c..4102d610 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -446,6 +446,14 @@ int I420ToYUY2(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
@@ -533,6 +541,14 @@ int I422ToUYVY(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
@@ -608,6 +624,14 @@ int I420ToUYVY(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index 47ee3437..7e6d8647 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -1117,6 +1117,14 @@ int ARGBToYUY2(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_I422TOYUY2ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToYUY2Row = I422ToYUY2Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToYUY2Row = I422ToYUY2Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOYUY2ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToYUY2Row = I422ToYUY2Row_Any_LASX;
@@ -1288,6 +1296,14 @@ int ARGBToUYVY(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_I422TOUYVYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToUYVYRow = I422ToUYVYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ I422ToUYVYRow = I422ToUYVYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOUYVYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToUYVYRow = I422ToUYVYRow_Any_LASX;
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index c6f9d5c7..b0dc2f43 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -2113,6 +2113,16 @@ int YUY2ToI422(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
@@ -2414,6 +2424,14 @@ void MirrorPlane(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_MIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorRow = MirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
@@ -2480,6 +2498,14 @@ void MirrorUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_MIRRORUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorUVRow = MirrorUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ MirrorUVRow = MirrorUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorUVRow = MirrorUVRow_Any_LASX;
@@ -2652,6 +2678,14 @@ int ARGBMirror(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBMIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
@@ -5314,6 +5348,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
diff --git a/source/rotate.cc b/source/rotate.cc
index b1b4458e..6797ff02 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -178,6 +178,14 @@ void RotatePlane180(const uint8_t* src,
}
}
#endif
+#if defined(HAS_MIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorRow = MirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc
index 28226210..9667f34c 100644
--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -156,6 +156,14 @@ static int ARGBRotate180(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBMIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
diff --git a/source/row_any.cc b/source/row_any.cc
index 37aa1fba..27b12a7a 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -113,6 +113,9 @@ ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7)
#ifdef HAS_I422ALPHATOARGBROW_MSA
ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
#endif
+#ifdef HAS_I422ALPHATOARGBROW_LSX
+ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15)
+#endif
#ifdef HAS_I422ALPHATOARGBROW_LASX
ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15)
#endif
@@ -284,6 +287,9 @@ ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOYUY2ROW_MSA
ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31)
#endif
+#ifdef HAS_I422TOYUY2ROW_LSX
+ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15)
+#endif
#ifdef HAS_I422TOYUY2ROW_LASX
ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31)
#endif
@@ -293,6 +299,9 @@ ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#ifdef HAS_I422TOUYVYROW_MSA
ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31)
#endif
+#ifdef HAS_I422TOUYVYROW_LSX
+ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15)
+#endif
#ifdef HAS_I422TOUYVYROW_LASX
ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31)
#endif
@@ -408,6 +417,14 @@ ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7)
ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
#endif
+#ifdef HAS_I422TOARGBROW_LSX
+ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15)
+ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15)
+ANY31C(I422ToRGB24Row_Any_LSX, I422ToRGB24Row_LSX, 1, 0, 3, 15)
+ANY31C(I422ToRGB565Row_Any_LSX, I422ToRGB565Row_LSX, 1, 0, 2, 15)
+ANY31C(I422ToARGB4444Row_Any_LSX, I422ToARGB4444Row_LSX, 1, 0, 2, 15)
+ANY31C(I422ToARGB1555Row_Any_LSX, I422ToARGB1555Row_LSX, 1, 0, 2, 15)
+#endif
#ifdef HAS_I422TOARGBROW_LASX
ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31)
ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31)
@@ -1204,6 +1221,9 @@ ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15)
#ifdef HAS_YUY2TOYROW_MSA
ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_YUY2TOYROW_LSX
+ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_YUY2TOYROW_LASX
ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31)
#endif
@@ -1842,6 +1862,9 @@ ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
#ifdef HAS_MIRRORROW_MSA
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
#endif
+#ifdef HAS_MIRRORROW_LSX
+ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31)
+#endif
#ifdef HAS_MIRRORROW_LASX
ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
#endif
@@ -1857,6 +1880,9 @@ ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
#ifdef HAS_MIRRORUVROW_MSA
ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7)
#endif
+#ifdef HAS_MIRRORUVROW_LSX
+ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7)
+#endif
#ifdef HAS_MIRRORUVROW_LASX
ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15)
#endif
@@ -1872,6 +1898,9 @@ ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7)
#ifdef HAS_ARGBMIRRORROW_MSA
ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#endif
+#ifdef HAS_ARGBMIRRORROW_LSX
+ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7)
+#endif
#ifdef HAS_ARGBMIRRORROW_LASX
ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
#endif
@@ -1970,6 +1999,9 @@ ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15)
ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31)
ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
#endif
+#ifdef HAS_YUY2TOUV422ROW_LSX
+ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15)
+#endif
#ifdef HAS_YUY2TOUV422ROW_LASX
ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31)
ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31)
@@ -2251,6 +2283,9 @@ ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15)
#ifdef HAS_YUY2TOUVROW_MSA
ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31)
#endif
+#ifdef HAS_YUY2TOUVROW_LSX
+ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15)
+#endif
#ifdef HAS_YUY2TOUVROW_LASX
ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31)
#endif
diff --git a/source/row_lsx.cc b/source/row_lsx.cc
index 9c1e16f2..48baafb7 100644
--- a/source/row_lsx.cc
+++ b/source/row_lsx.cc
@@ -31,6 +31,91 @@ extern "C" {
yb = __lsx_vreplgr2vr_w(yuvconst->kYBiasToRgb[0]); \
}
+// Load 32 YUV422 pixel data
+#define READYUV422_D(psrc_y, psrc_u, psrc_v, out_y, uv_l, uv_h) \
+ { \
+ __m128i temp0, temp1; \
+ \
+ DUP2_ARG2(__lsx_vld, psrc_y, 0, psrc_u, 0, out_y, temp0); \
+ temp1 = __lsx_vld(psrc_v, 0); \
+ temp0 = __lsx_vsub_b(temp0, const_80); \
+ temp1 = __lsx_vsub_b(temp1, const_80); \
+ temp0 = __lsx_vsllwil_h_b(temp0, 0); \
+ temp1 = __lsx_vsllwil_h_b(temp1, 0); \
+ uv_l = __lsx_vilvl_h(temp0, temp1); \
+ uv_h = __lsx_vilvh_h(temp0, temp1); \
+ }
+
+// Load 16 YUV422 pixel data
+#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, uv) \
+ { \
+ __m128i temp0, temp1; \
+ \
+ out_y = __lsx_vld(psrc_y, 0); \
+ temp0 = __lsx_vldrepl_d(psrc_u, 0); \
+ temp1 = __lsx_vldrepl_d(psrc_v, 0); \
+ uv = __lsx_vilvl_b(temp0, temp1); \
+ uv = __lsx_vsub_b(uv, const_80); \
+ uv = __lsx_vsllwil_h_b(uv, 0); \
+ }
+
+// Convert 16 pixels of YUV420 to RGB.
+#define YUVTORGB_D(in_y, in_uvl, in_uvh, ubvr, ugvg, yg, yb, b_l, \
+ b_h, g_l,g_h, r_l, r_h) \
+ { \
+ __m128i u_l, u_h, v_l, v_h; \
+ __m128i yl_ev, yl_od, yh_ev, yh_od; \
+ __m128i temp0, temp1, temp2, temp3; \
+ \
+ temp0 = __lsx_vilvl_b(in_y, in_y); \
+ temp1 = __lsx_vilvh_b(in_y, in_y); \
+ yl_ev = __lsx_vmulwev_w_hu_h(temp0, yg); \
+ yl_od = __lsx_vmulwod_w_hu_h(temp0, yg); \
+ yh_ev = __lsx_vmulwev_w_hu_h(temp1, yg); \
+ yh_od = __lsx_vmulwod_w_hu_h(temp1, yg); \
+ DUP4_ARG2(__lsx_vsrai_w, yl_ev, 16, yl_od, 16, yh_ev, 16, yh_od, 16, \
+ yl_ev, yl_od, yh_ev, yh_od); \
+ yl_ev = __lsx_vadd_w(yl_ev, yb); \
+ yl_od = __lsx_vadd_w(yl_od, yb); \
+ yh_ev = __lsx_vadd_w(yh_ev, yb); \
+ yh_od = __lsx_vadd_w(yh_od, yb); \
+ v_l = __lsx_vmulwev_w_h(in_uvl, ubvr); \
+ u_l = __lsx_vmulwod_w_h(in_uvl, ubvr); \
+ v_h = __lsx_vmulwev_w_h(in_uvh, ubvr); \
+ u_h = __lsx_vmulwod_w_h(in_uvh, ubvr); \
+ temp0 = __lsx_vadd_w(yl_ev, u_l); \
+ temp1 = __lsx_vadd_w(yl_od, u_l); \
+ temp2 = __lsx_vadd_w(yh_ev, u_h); \
+ temp3 = __lsx_vadd_w(yh_od, u_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ b_l = __lsx_vpackev_h(temp1, temp0); \
+ b_h = __lsx_vpackev_h(temp3, temp2); \
+ temp0 = __lsx_vadd_w(yl_ev, v_l); \
+ temp1 = __lsx_vadd_w(yl_od, v_l); \
+ temp2 = __lsx_vadd_w(yh_ev, v_h); \
+ temp3 = __lsx_vadd_w(yh_od, v_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ r_l = __lsx_vpackev_h(temp1, temp0); \
+ r_h = __lsx_vpackev_h(temp3, temp2); \
+ DUP2_ARG2(__lsx_vdp2_w_h, in_uvl, ugvg, in_uvh, ugvg, u_l, u_h); \
+ temp0 = __lsx_vsub_w(yl_ev, u_l); \
+ temp1 = __lsx_vsub_w(yl_od, u_l); \
+ temp2 = __lsx_vsub_w(yh_ev, u_h); \
+ temp3 = __lsx_vsub_w(yh_od, u_h); \
+ DUP4_ARG2(__lsx_vsrai_w, temp0, 6, temp1, 6, temp2, 6, temp3, 6, temp0, \
+ temp1, temp2, temp3); \
+ DUP4_ARG1(__lsx_vclip255_w, temp0, temp1, temp2, temp3, temp0, temp1, \
+ temp2, temp3); \
+ g_l = __lsx_vpackev_h(temp1, temp0); \
+ g_h = __lsx_vpackev_h(temp3, temp2); \
+ }
+
// Convert 8 pixels of YUV420 to RGB.
#define YUVTORGB(in_y, in_vu, vrub, vgug, yg, yb, out_b, out_g, out_r) \
{ \
@@ -118,6 +203,26 @@ extern "C" {
out_g = __lsx_vpackev_h(tmp1, tmp0); \
}
+// Pack and Store 16 ARGB values.
+#define STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, pdst_argb) \
+ { \
+ \
+ __m128i temp0, temp1, temp2, temp3; \
+ temp0 = __lsx_vpackev_b(g_l, b_l); \
+ temp1 = __lsx_vpackev_b(a_l, r_l); \
+ temp2 = __lsx_vpackev_b(g_h, b_h); \
+ temp3 = __lsx_vpackev_b(a_h, r_h); \
+ r_l = __lsx_vilvl_h(temp1, temp0); \
+ r_h = __lsx_vilvh_h(temp1, temp0); \
+ g_l = __lsx_vilvl_h(temp3, temp2); \
+ g_h = __lsx_vilvh_h(temp3, temp2); \
+ __lsx_vst(r_l, pdst_argb, 0); \
+ __lsx_vst(r_h, pdst_argb, 16); \
+ __lsx_vst(g_l, pdst_argb, 32); \
+ __lsx_vst(g_h, pdst_argb, 48); \
+ pdst_argb += 64; \
+ }
+
// Pack and Store 8 ARGB values.
#define STOREARGB(in_a, in_r, in_g, in_b, pdst_argb) \
{ \
@@ -155,6 +260,460 @@ extern "C" {
_dst0 = __lsx_vpickod_b(_reg1, _reg0); \
}
+void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ int len = width / 32;
+ __m128i src0, src1;
+ __m128i shuffler = {0x08090A0B0C0D0E0F, 0x0001020304050607};
+ src += width - 32;
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
+ DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0,
+ src1);
+ __lsx_vst(src1, dst, 0);
+ __lsx_vst(src0, dst, 16);
+ dst += 32;
+ src -= 32;
+ }
+}
+
+void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src, dst;
+ __m128i shuffler = {0x0004000500060007, 0x0000000100020003};
+
+ src_uv += (width - 8) << 1;
+ for (x = 0; x < len; x++) {
+ src = __lsx_vld(src_uv, 0);
+ dst = __lsx_vshuf_h(shuffler, src, src);
+ __lsx_vst(dst, dst_uv, 0);
+ src_uv -= 16;
+ dst_uv += 16;
+ }
+}
+
+void ARGBMirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width) {
+ int x;
+ int len = width / 8;
+ __m128i src0, src1;
+ __m128i shuffler = {0x0B0A09080F0E0D0C, 0x0302010007060504};
+
+ src += (width * 4) - 32;
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
+ DUP2_ARG3(__lsx_vshuf_b, src0, src0, shuffler, src1, src1, shuffler, src0,
+ src1);
+ __lsx_vst(src1, dst, 0);
+ __lsx_vst(src0, dst, 16);
+ dst += 32;
+ src -= 32;
+ }
+}
+
+void I422ToYUY2Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_yuy2,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src_u0, src_v0, src_y0, vec_uv0;
+ __m128i vec_yuy2_0, vec_yuy2_1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0);
+ src_y0 = __lsx_vld(src_y, 0);
+ vec_uv0 = __lsx_vilvl_b(src_v0, src_u0);
+ vec_yuy2_0 = __lsx_vilvl_b(vec_uv0, src_y0);
+ vec_yuy2_1 = __lsx_vilvh_b(vec_uv0, src_y0);
+ __lsx_vst(vec_yuy2_0, dst_yuy2, 0);
+ __lsx_vst(vec_yuy2_1, dst_yuy2, 16);
+ src_u += 8;
+ src_v += 8;
+ src_y += 16;
+ dst_yuy2 += 32;
+ }
+}
+
+void I422ToUYVYRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_uyvy,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src_u0, src_v0, src_y0, vec_uv0;
+ __m128i vec_uyvy0, vec_uyvy1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_u, 0, src_v, 0, src_u0, src_v0);
+ src_y0 = __lsx_vld(src_y, 0);
+ vec_uv0 = __lsx_vilvl_b(src_v0, src_u0);
+ vec_uyvy0 = __lsx_vilvl_b(src_y0, vec_uv0);
+ vec_uyvy1 = __lsx_vilvh_b(src_y0, vec_uv0);
+ __lsx_vst(vec_uyvy0, dst_uyvy, 0);
+ __lsx_vst(vec_uyvy1, dst_uyvy, 16);
+ src_u += 8;
+ src_v += 8;
+ src_y += 16;
+ dst_uyvy += 32;
+ }
+}
+
+void I422ToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_ug, vec_vr, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i alpha = __lsx_vldi(0xFF);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(alpha, alpha, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422ToRGBARow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i alpha = __lsx_vldi(0xFF);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(r_l, r_h, g_l, g_h, b_l, b_h, alpha, alpha, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422AlphaToARGBRow_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ const uint8_t* src_a,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ int res = width & 15;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i zero = __lsx_vldi(0);
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h, a_l, a_h;
+
+ y = __lsx_vld(src_a, 0);
+ a_l = __lsx_vilvl_b(zero, y);
+ a_h = __lsx_vilvh_b(zero, y);
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ STOREARGB_D(a_l, a_h, r_l, r_h, g_l, g_h, b_l, b_h, dst_argb);
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ src_a += 16;
+ }
+ if (res) {
+ __m128i y, uv, r, g, b, a;
+ a = __lsx_vld(src_a, 0);
+ a = __lsx_vsllwil_hu_bu(a, 0);
+ READYUV422(src_y, src_u, src_v, y, uv);
+ YUVTORGB(y, uv, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b, g, r);
+ STOREARGB(a, r, g, b, dst_argb);
+ }
+}
+
+void I422ToRGB24Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb,
+ const struct YuvConstants* yuvconstants,
+ int32_t width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i shuffler0 = {0x0504120302100100, 0x0A18090816070614};
+ __m128i shuffler1 = {0x1E0F0E1C0D0C1A0B, 0x1E0F0E1C0D0C1A0B};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+ __m128i temp0, temp1, temp2, temp3;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ temp0 = __lsx_vpackev_b(g_l, b_l);
+ temp1 = __lsx_vpackev_b(g_h, b_h);
+ DUP4_ARG3(__lsx_vshuf_b, r_l, temp0, shuffler1, r_h, temp1, shuffler1,
+ r_l, temp0, shuffler0, r_h, temp1, shuffler0, temp2, temp3, temp0,
+ temp1);
+
+ b_l = __lsx_vilvl_d(temp1, temp2);
+ b_h = __lsx_vilvh_d(temp3, temp1);
+ __lsx_vst(temp0, dst_argb, 0);
+ __lsx_vst(b_l, dst_argb, 16);
+ __lsx_vst(b_h, dst_argb, 32);
+ dst_argb += 48;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
+void I422ToRGB565Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_rgb565,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 3);
+ b_h = __lsx_vsrli_h(b_h, 3);
+ g_l = __lsx_vsrli_h(g_l, 2);
+ g_h = __lsx_vsrli_h(g_h, 2);
+ r_l = __lsx_vsrli_h(r_l, 3);
+ r_h = __lsx_vsrli_h(r_h, 3);
+ r_l = __lsx_vslli_h(r_l, 11);
+ r_h = __lsx_vslli_h(r_h, 11);
+ g_l = __lsx_vslli_h(g_l, 5);
+ g_h = __lsx_vslli_h(g_h, 5);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_rgb565, 0);
+ __lsx_vst(r_h, dst_rgb565, 16);
+ dst_rgb565 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+// TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G.
+void I422ToARGB4444Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb4444,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i alpha = {0xF000F000F000F000, 0xF000F000F000F000};
+ __m128i mask = {0x00F000F000F000F0, 0x00F000F000F000F0};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 4);
+ b_h = __lsx_vsrli_h(b_h, 4);
+ r_l = __lsx_vsrli_h(r_l, 4);
+ r_h = __lsx_vsrli_h(r_h, 4);
+ g_l = __lsx_vand_v(g_l, mask);
+ g_h = __lsx_vand_v(g_h, mask);
+ r_l = __lsx_vslli_h(r_l, 8);
+ r_h = __lsx_vslli_h(r_h, 8);
+ r_l = __lsx_vor_v(r_l, alpha);
+ r_h = __lsx_vor_v(r_h, alpha);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_argb4444, 0);
+ __lsx_vst(r_h, dst_argb4444, 16);
+ dst_argb4444 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void I422ToARGB1555Row_LSX(const uint8_t* src_y,
+ const uint8_t* src_u,
+ const uint8_t* src_v,
+ uint8_t* dst_argb1555,
+ const struct YuvConstants* yuvconstants,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i vec_yb, vec_yg, vec_ub, vec_vr, vec_ug, vec_vg;
+ __m128i vec_ubvr, vec_ugvg;
+ __m128i const_80 = __lsx_vldi(0x80);
+ __m128i alpha = {0x8000800080008000, 0x8000800080008000};
+
+ YUVTORGB_SETUP(yuvconstants, vec_vr, vec_ub, vec_vg, vec_ug, vec_yg, vec_yb);
+ vec_ubvr = __lsx_vilvl_h(vec_ub, vec_vr);
+ vec_ugvg = __lsx_vilvl_h(vec_ug, vec_vg);
+
+ for (x = 0; x < len; x++) {
+ __m128i y, uv_l, uv_h, b_l, b_h, g_l, g_h, r_l, r_h;
+
+ READYUV422_D(src_y, src_u, src_v, y, uv_l, uv_h);
+ YUVTORGB_D(y, uv_l, uv_h, vec_ubvr, vec_ugvg, vec_yg, vec_yb, b_l, b_h, g_l,
+ g_h, r_l, r_h);
+ b_l = __lsx_vsrli_h(b_l, 3);
+ b_h = __lsx_vsrli_h(b_h, 3);
+ g_l = __lsx_vsrli_h(g_l, 3);
+
+ g_h = __lsx_vsrli_h(g_h, 3);
+ g_l = __lsx_vslli_h(g_l, 5);
+ g_h = __lsx_vslli_h(g_h, 5);
+ r_l = __lsx_vsrli_h(r_l, 3);
+ r_h = __lsx_vsrli_h(r_h, 3);
+ r_l = __lsx_vslli_h(r_l, 10);
+ r_h = __lsx_vslli_h(r_h, 10);
+ r_l = __lsx_vor_v(r_l, alpha);
+ r_h = __lsx_vor_v(r_h, alpha);
+ r_l = __lsx_vor_v(r_l, g_l);
+ r_h = __lsx_vor_v(r_h, g_h);
+ r_l = __lsx_vor_v(r_l, b_l);
+ r_h = __lsx_vor_v(r_h, b_h);
+ __lsx_vst(r_l, dst_argb1555, 0);
+ __lsx_vst(r_h, dst_argb1555, 16);
+ dst_argb1555 += 32;
+ src_y += 16;
+ src_u += 8;
+ src_v += 8;
+ }
+}
+
+void YUY2ToYRow_LSX(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, dst0;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1);
+ dst0 = __lsx_vpickev_b(src1, src0);
+ __lsx_vst(dst0, dst_y, 0);
+ src_yuy2 += 32;
+ dst_y += 16;
+ }
+}
+
+void YUY2ToUVRow_LSX(const uint8_t* src_yuy2,
+ int src_stride_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2;
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, src2, src3;
+ __m128i tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP4_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src_yuy2_next, 0,
+ src_yuy2_next, 16, src0, src1, src2, src3);
+ src0 = __lsx_vpickod_b(src1, src0);
+ src1 = __lsx_vpickod_b(src3, src2);
+ tmp0 = __lsx_vavgr_bu(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_yuy2 += 32;
+ src_yuy2_next += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
+void YUY2ToUV422Row_LSX(const uint8_t* src_yuy2,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
+ int x;
+ int len = width / 16;
+ __m128i src0, src1, tmp0, dst0, dst1;
+
+ for (x = 0; x < len; x++) {
+ DUP2_ARG2(__lsx_vld, src_yuy2, 0, src_yuy2, 16, src0, src1);
+ tmp0 = __lsx_vpickod_b(src1, src0);
+ dst0 = __lsx_vpickev_b(tmp0, tmp0);
+ dst1 = __lsx_vpickod_b(tmp0, tmp0);
+ __lsx_vstelm_d(dst0, dst_u, 0, 0);
+ __lsx_vstelm_d(dst1, dst_v, 0, 0);
+ src_yuy2 += 32;
+ dst_u += 8;
+ dst_v += 8;
+ }
+}
+
void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444,
uint8_t* dst_argb,
int width) {
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 3e6f5477..7e78cc1b 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -659,6 +659,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
}
}
#endif
+#if defined(HAS_I422TOARGBROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ I422ToARGBRow = I422ToARGBRow_Any_LSX;
+ if (IS_ALIGNED(src_width, 16)) {
+ I422ToARGBRow = I422ToARGBRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;