diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:41:11 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:41:11 +0000 |
commit | c7328b4bdec80237c84f23470d1e95eb352d632d (patch) | |
tree | 25cbeae94c26cad28a13bb089a662291b4b5d905 /files/include/libyuv/row.h | |
parent | 6d2ffb9ab7f8de0fc513aebaeff15cbec1354240 (diff) | |
parent | 3c4c137522d3a6759f2d96aee67149410ca2c1b9 (diff) | |
download | libyuv-c7328b4bdec80237c84f23470d1e95eb352d632d.tar.gz |
Snap for 10453563 from 3c4c137522d3a6759f2d96aee67149410ca2c1b9 to mainline-adservices-releaseaml_ads_341027030aml_ads_340915050
Change-Id: Iad33de02eee422466c2bdbddf87f82ec5897883e
Diffstat (limited to 'files/include/libyuv/row.h')
-rw-r--r-- | files/include/libyuv/row.h | 2705 |
1 files changed, 2101 insertions, 604 deletions
diff --git a/files/include/libyuv/row.h b/files/include/libyuv/row.h index 9bb48850..1a1cf4b6 100644 --- a/files/include/libyuv/row.h +++ b/files/include/libyuv/row.h @@ -74,8 +74,10 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) // Conversions: -#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ABGRTOUVROW_SSSE3 +#endif #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGBEXTRACTALPHAROW_SSE2 @@ -87,18 +89,19 @@ extern "C" { #define HAS_ARGBTORGB24ROW_SSSE3 #define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565ROW_SSE2 +#define HAS_ARGBTOYJROW_SSSE3 +#define HAS_ARGBTOYROW_SSSE3 +#define HAS_BGRATOYROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) #define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3 -#define HAS_ARGBTOYJROW_SSSE3 -#define HAS_ARGBTOYROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3 -#define HAS_BGRATOYROW_SSSE3 +#endif #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 #define HAS_H422TOARGBROW_SSSE3 #define HAS_HALFFLOATROW_SSE2 -#define HAS_I400TOARGBROW_SSE2 #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 @@ -108,11 +111,12 @@ extern "C" { #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 +#define HAS_INTERPOLATEROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 #define HAS_MIRRORROW_SSSE3 -#define HAS_MIRRORUVROW_SSSE3 +#define HAS_MIRRORSPLITUVROW_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV12TORGB24ROW_SSSE3 #define HAS_NV12TORGB565ROW_SSSE3 @@ -124,8 +128,12 @@ extern "C" { #define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 -#define HAS_RGBATOUVROW_SSSE3 #define HAS_RGBATOYROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_RGB24TOYJROW_SSSE3 +#define HAS_RAWTOYJROW_SSSE3 +#define HAS_RGBATOUVROW_SSSE3 +#endif #define HAS_SETROW_ERMS #define HAS_SETROW_X86 #define HAS_SPLITUVROW_SSE2 @@ -141,7 +149,9 @@ extern "C" { // Effects: #define HAS_ARGBADDROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2 +#if !defined(LIBYUV_BIT_EXACT) #define HAS_ARGBATTENUATEROW_SSSE3 +#endif #define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORTABLEROW_X86 @@ -160,7 +170,6 @@ extern "C" { #define HAS_BLENDPLANEROW_SSSE3 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -#define HAS_INTERPOLATEROW_SSSE3 #define HAS_RGBCOLORTABLEROW_X86 #define HAS_SOBELROW_SSE2 #define HAS_SOBELTOPLANEROW_SSE2 @@ -175,6 +184,7 @@ extern "C" { // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_SSSE3 +#define HAS_I444ALPHATOARGBROW_SSSE3 #endif #endif @@ -190,15 +200,18 @@ extern "C" { #define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBTORGB565DITHERROW_AVX2 -#define HAS_ARGBTOUVJROW_AVX2 -#define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 +#define HAS_RAWTOYJROW_AVX2 +#define HAS_RGB24TOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ARGBTOUVJROW_AVX2 +#define HAS_ARGBTOUVROW_AVX2 +#endif #define HAS_COPYROW_AVX #define HAS_H422TOARGBROW_AVX2 #define HAS_HALFFLOATROW_AVX2 // #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast -#define HAS_I400TOARGBROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 @@ -227,7 +240,9 @@ extern "C" { // Effects: #define HAS_ARGBADDROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) #define HAS_ARGBATTENUATEROW_AVX2 +#endif #define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 @@ -238,13 +253,14 @@ extern "C" { // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_AVX2 +#define HAS_I444ALPHATOARGBROW_AVX2 #endif #endif -// The following are available for AVX2 Visual C and clangcl 32 bit: +// The following are available for AVX2 Visual C 32 bit: // TODO(fbarchard): Port to gcc. #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ - (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) + !defined(__clang__) && defined(VISUALC_HAS_AVX2) #define HAS_ARGB1555TOARGBROW_AVX2 #define HAS_ARGB4444TOARGBROW_AVX2 #define HAS_ARGBTOARGB1555ROW_AVX2 @@ -257,62 +273,148 @@ extern "C" { // The following are also available on x64 Visual C. #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \ (!defined(__clang__) || defined(__SSSE3__)) +#define HAS_I444ALPHATOARGBROW_SSSE3 +#define HAS_I444TOARGBROW_SSSE3 #define HAS_I422ALPHATOARGBROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 #endif // The following are available for gcc/clang x86 platforms: // TODO(fbarchard): Port to Visual C -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) +#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) #define HAS_ABGRTOAR30ROW_SSSE3 #define HAS_ARGBTOAR30ROW_SSSE3 +#define HAS_ARGBTOAR64ROW_SSSE3 +#define HAS_ARGBTOAB64ROW_SSSE3 +#define HAS_AR64TOARGBROW_SSSE3 +#define HAS_AB64TOARGBROW_SSSE3 #define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT8TO16ROW_SSE2 -// I210 is for H010. 2 = 422. I for 601 vs H for 709. +#define HAS_DETILEROW_SSE2 +#define HAS_DETILESPLITUVROW_SSSE3 +#define HAS_HALFMERGEUVROW_SSSE3 #define HAS_I210TOAR30ROW_SSSE3 #define HAS_I210TOARGBROW_SSSE3 +#define HAS_I212TOAR30ROW_SSSE3 +#define HAS_I212TOARGBROW_SSSE3 +#define HAS_I400TOARGBROW_SSE2 #define HAS_I422TOAR30ROW_SSSE3 +#define HAS_I410TOAR30ROW_SSSE3 +#define HAS_I410TOARGBROW_SSSE3 +#define HAS_MERGEARGBROW_SSE2 +#define HAS_MERGEXRGBROW_SSE2 #define HAS_MERGERGBROW_SSSE3 +#define HAS_MIRRORUVROW_SSSE3 +#define HAS_NV21TOYUV24ROW_SSSE3 +#define HAS_P210TOAR30ROW_SSSE3 +#define HAS_P210TOARGBROW_SSSE3 +#define HAS_P410TOAR30ROW_SSSE3 +#define HAS_P410TOARGBROW_SSSE3 +#define HAS_RAWTORGBAROW_SSSE3 +#define HAS_RGB24MIRRORROW_SSSE3 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_RGBATOYJROW_SSSE3 +#endif +#define HAS_SPLITARGBROW_SSE2 +#define HAS_SPLITARGBROW_SSSE3 +#define HAS_SPLITXRGBROW_SSE2 +#define HAS_SPLITXRGBROW_SSSE3 #define HAS_SPLITRGBROW_SSSE3 +#define HAS_SWAPUVROW_SSSE3 + +#if defined(__x86_64__) || !defined(__pic__) +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I210ALPHATOARGBROW_SSSE3 +#define HAS_I410ALPHATOARGBROW_SSSE3 +#endif #endif // The following are available for AVX2 gcc/clang x86 platforms: // TODO(fbarchard): Port to Visual C -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__)) && \ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) #define HAS_ABGRTOAR30ROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_ABGRTOUVROW_AVX2 +#define HAS_ABGRTOYROW_AVX2 +#endif #define HAS_ARGBTOAR30ROW_AVX2 #define HAS_ARGBTORAWROW_AVX2 #define HAS_ARGBTORGB24ROW_AVX2 +#define HAS_ARGBTOAR64ROW_AVX2 +#define HAS_ARGBTOAB64ROW_AVX2 +#define HAS_AR64TOARGBROW_AVX2 +#define HAS_AB64TOARGBROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2 +#define HAS_INTERPOLATEROW_16TO8_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 +#define HAS_DIVIDEROW_16_AVX2 +#define HAS_HALFMERGEUVROW_AVX2 +#define HAS_MERGEAR64ROW_AVX2 +#define HAS_MERGEARGB16TO8ROW_AVX2 +#define HAS_MERGEARGBROW_AVX2 +#define HAS_MERGEXR30ROW_AVX2 +#define HAS_MERGEXR64ROW_AVX2 +#define HAS_MERGEXRGB16TO8ROW_AVX2 +#define HAS_MERGEXRGBROW_AVX2 +#define HAS_NV21TOYUV24ROW_AVX2 #define HAS_I210TOAR30ROW_AVX2 #define HAS_I210TOARGBROW_AVX2 +#define HAS_I212TOAR30ROW_AVX2 +#define HAS_I212TOARGBROW_AVX2 +#define HAS_I400TOARGBROW_AVX2 +#define HAS_I410TOAR30ROW_AVX2 +#define HAS_I410TOARGBROW_AVX2 +#define HAS_P210TOAR30ROW_AVX2 +#define HAS_P210TOARGBROW_AVX2 +#define HAS_P410TOAR30ROW_AVX2 +#define HAS_P410TOARGBROW_AVX2 #define HAS_I422TOAR30ROW_AVX2 #define HAS_I422TOUYVYROW_AVX2 #define HAS_I422TOYUY2ROW_AVX2 #define HAS_MERGEUVROW_16_AVX2 +#define HAS_MIRRORUVROW_AVX2 #define HAS_MULTIPLYROW_16_AVX2 -// TODO(fbarchard): Fix AVX2 version of YUV24 -// #define HAS_NV21TOYUV24ROW_AVX2 +#if !defined(LIBYUV_BIT_EXACT) +#define HAS_RGBATOYJROW_AVX2 +#endif +#define HAS_SPLITARGBROW_AVX2 +#define HAS_SPLITXRGBROW_AVX2 +#define HAS_SPLITUVROW_16_AVX2 +#define HAS_SWAPUVROW_AVX2 + +#if defined(__x86_64__) || !defined(__pic__) +// TODO(fbarchard): fix build error on android_full_debug=1 +// https://code.google.com/p/libyuv/issues/detail?id=517 +#define HAS_I210ALPHATOARGBROW_AVX2 +#define HAS_I410ALPHATOARGBROW_AVX2 +#endif #endif // The following are available for AVX512 clang x86 platforms: // TODO(fbarchard): Port to GCC and Visual C // TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789 -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ - (defined(CLANG_HAS_AVX512)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__)) && (defined(CLANG_HAS_AVX512)) #define HAS_ARGBTORGB24ROW_AVX512VBMI #endif +// The following are available for AVX512 clang x64 platforms: +// TODO(fbarchard): Port to x86 +#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) && \ + (defined(CLANG_HAS_AVX512)) +#define HAS_I422TOARGBROW_AVX512BW +#endif + // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) +#define HAS_AB64TOARGBROW_NEON #define HAS_ABGRTOUVROW_NEON #define HAS_ABGRTOYROW_NEON +#define HAS_AR64TOARGBROW_NEON #define HAS_ARGB1555TOARGBROW_NEON #define HAS_ARGB1555TOUVROW_NEON #define HAS_ARGB1555TOYROW_NEON @@ -321,6 +423,8 @@ extern "C" { #define HAS_ARGB4444TOYROW_NEON #define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_ARGBSETROW_NEON +#define HAS_ARGBTOAB64ROW_NEON +#define HAS_ARGBTOAR64ROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTORAWROW_NEON @@ -338,8 +442,13 @@ extern "C" { #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON #define HAS_BYTETOFLOATROW_NEON +#define HAS_CONVERT16TO8ROW_NEON #define HAS_COPYROW_NEON +#define HAS_DETILEROW_NEON +#define HAS_DETILESPLITUVROW_NEON +#define HAS_DIVIDEROW_16_NEON #define HAS_HALFFLOATROW_NEON +#define HAS_HALFMERGEUVROW_NEON #define HAS_I400TOARGBROW_NEON #define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON @@ -350,11 +459,24 @@ extern "C" { #define HAS_I422TORGBAROW_NEON #define HAS_I422TOUYVYROW_NEON #define HAS_I422TOYUY2ROW_NEON +#define HAS_I444ALPHATOARGBROW_NEON #define HAS_I444TOARGBROW_NEON +#define HAS_INTERPOLATEROW_16_NEON +#define HAS_INTERPOLATEROW_NEON #define HAS_J400TOARGBROW_NEON +#define HAS_MERGEAR64ROW_NEON +#define HAS_MERGEARGB16TO8ROW_NEON +#define HAS_MERGEARGBROW_NEON +#define HAS_MERGEUVROW_16_NEON #define HAS_MERGEUVROW_NEON +#define HAS_MERGEXR30ROW_NEON +#define HAS_MERGEXR64ROW_NEON +#define HAS_MERGEXRGB16TO8ROW_NEON +#define HAS_MERGEXRGBROW_NEON #define HAS_MIRRORROW_NEON +#define HAS_MIRRORSPLITUVROW_NEON #define HAS_MIRRORUVROW_NEON +#define HAS_MULTIPLYROW_16_NEON #define HAS_NV12TOARGBROW_NEON #define HAS_NV12TORGB24ROW_NEON #define HAS_NV12TORGB565ROW_NEON @@ -363,20 +485,29 @@ extern "C" { #define HAS_NV21TOYUV24ROW_NEON #define HAS_RAWTOARGBROW_NEON #define HAS_RAWTORGB24ROW_NEON +#define HAS_RAWTORGBAROW_NEON +#define HAS_RAWTOUVJROW_NEON #define HAS_RAWTOUVROW_NEON +#define HAS_RAWTOYJROW_NEON #define HAS_RAWTOYROW_NEON #define HAS_RGB24TOARGBROW_NEON +#define HAS_RGB24TOUVJROW_NEON #define HAS_RGB24TOUVROW_NEON +#define HAS_RGB24TOYJROW_NEON #define HAS_RGB24TOYROW_NEON #define HAS_RGB565TOARGBROW_NEON #define HAS_RGB565TOUVROW_NEON #define HAS_RGB565TOYROW_NEON #define HAS_RGBATOUVROW_NEON +#define HAS_RGBATOYJROW_NEON #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON +#define HAS_SPLITARGBROW_NEON #define HAS_SPLITRGBROW_NEON +#define HAS_SPLITUVROW_16_NEON #define HAS_SPLITUVROW_NEON -#define HAS_UVToVUROW_NEON +#define HAS_SPLITXRGBROW_NEON +#define HAS_SWAPUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON @@ -393,13 +524,13 @@ extern "C" { #define HAS_ARGBCOLORMATRIXROW_NEON #define HAS_ARGBGRAYROW_NEON #define HAS_ARGBMIRRORROW_NEON +#define HAS_RGB24MIRRORROW_NEON #define HAS_ARGBMULTIPLYROW_NEON #define HAS_ARGBQUANTIZEROW_NEON #define HAS_ARGBSEPIAROW_NEON #define HAS_ARGBSHADEROW_NEON #define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSUBTRACTROW_NEON -#define HAS_INTERPOLATEROW_NEON #define HAS_SOBELROW_NEON #define HAS_SOBELTOPLANEROW_NEON #define HAS_SOBELXROW_NEON @@ -409,8 +540,10 @@ extern "C" { // The following are available on AArch64 platforms: #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) -#define HAS_FLOATDIVTOBYTEROW_NEON +#define HAS_INTERPOLATEROW_16TO8_NEON #define HAS_SCALESUMSAMPLES_NEON +#define HAS_GAUSSROW_F32_NEON +#define HAS_GAUSSCOL_F32_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_ABGRTOUVROW_MSA @@ -448,21 +581,27 @@ extern "C" { #define HAS_BGRATOYROW_MSA #define HAS_HALFFLOATROW_MSA #define HAS_I400TOARGBROW_MSA -#define HAS_I422ALPHATOARGBROW_MSA -#define HAS_I422TOARGBROW_MSA -#define HAS_I422TORGB24ROW_MSA -#define HAS_I422TORGBAROW_MSA #define HAS_I422TOUYVYROW_MSA #define HAS_I422TOYUY2ROW_MSA +#define HAS_I422TOARGBROW_MSA +#define HAS_I422TORGBAROW_MSA +#define HAS_I422ALPHATOARGBROW_MSA +#define HAS_I422TORGB24ROW_MSA +#define HAS_I422TORGB565ROW_MSA +#define HAS_I422TOARGB4444ROW_MSA +#define HAS_I422TOARGB1555ROW_MSA +#define HAS_NV12TOARGBROW_MSA +#define HAS_NV12TORGB565ROW_MSA +#define HAS_NV21TOARGBROW_MSA +#define HAS_YUY2TOARGBROW_MSA +#define HAS_UYVYTOARGBROW_MSA #define HAS_I444TOARGBROW_MSA #define HAS_INTERPOLATEROW_MSA #define HAS_J400TOARGBROW_MSA #define HAS_MERGEUVROW_MSA #define HAS_MIRRORROW_MSA #define HAS_MIRRORUVROW_MSA -#define HAS_NV12TOARGBROW_MSA -#define HAS_NV12TORGB565ROW_MSA -#define HAS_NV21TOARGBROW_MSA +#define HAS_MIRRORSPLITUVROW_MSA #define HAS_RAWTOARGBROW_MSA #define HAS_RAWTORGB24ROW_MSA #define HAS_RAWTOUVROW_MSA @@ -482,88 +621,113 @@ extern "C" { #define HAS_SOBELXYROW_MSA #define HAS_SOBELYROW_MSA #define HAS_SPLITUVROW_MSA -#define HAS_UYVYTOARGBROW_MSA #define HAS_UYVYTOUVROW_MSA #define HAS_UYVYTOYROW_MSA -#define HAS_YUY2TOARGBROW_MSA #define HAS_YUY2TOUV422ROW_MSA #define HAS_YUY2TOUVROW_MSA #define HAS_YUY2TOYROW_MSA #endif -#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A) -#define HAS_ABGRTOUVROW_MMI -#define HAS_ABGRTOYROW_MMI -#define HAS_ARGB1555TOARGBROW_MMI -#define HAS_ARGB1555TOUVROW_MMI -#define HAS_ARGB1555TOYROW_MMI -#define HAS_ARGB4444TOARGBROW_MMI -#define HAS_ARGB4444TOUVROW_MMI -#define HAS_ARGB4444TOYROW_MMI -#define HAS_ARGBADDROW_MMI -#define HAS_ARGBATTENUATEROW_MMI -#define HAS_ARGBBLENDROW_MMI -#define HAS_ARGBCOLORMATRIXROW_MMI -#define HAS_ARGBCOPYALPHAROW_MMI -#define HAS_ARGBCOPYYTOALPHAROW_MMI -#define HAS_ARGBEXTRACTALPHAROW_MMI -#define HAS_ARGBGRAYROW_MMI -#define HAS_ARGBMIRRORROW_MMI -#define HAS_ARGBMULTIPLYROW_MMI -#define HAS_ARGBSEPIAROW_MMI -#define HAS_ARGBSHADEROW_MMI -#define HAS_ARGBSHUFFLEROW_MMI -#define HAS_ARGBSUBTRACTROW_MMI -#define HAS_ARGBTOARGB1555ROW_MMI -#define HAS_ARGBTOARGB4444ROW_MMI -#define HAS_ARGBTORAWROW_MMI -#define HAS_ARGBTORGB24ROW_MMI -#define HAS_ARGBTORGB565DITHERROW_MMI -#define HAS_ARGBTORGB565ROW_MMI -#define HAS_ARGBTOUV444ROW_MMI -#define HAS_ARGBTOUVJROW_MMI -#define HAS_ARGBTOUVROW_MMI -#define HAS_ARGBTOYJROW_MMI -#define HAS_ARGBTOYROW_MMI -#define HAS_BGRATOUVROW_MMI -#define HAS_BGRATOYROW_MMI -#define HAS_BLENDPLANEROW_MMI -#define HAS_COMPUTECUMULATIVESUMROW_MMI -#define HAS_CUMULATIVESUMTOAVERAGEROW_MMI -#define HAS_HALFFLOATROW_MMI -#define HAS_I400TOARGBROW_MMI -#define HAS_I422TOUYVYROW_MMI -#define HAS_I422TOYUY2ROW_MMI -#define HAS_INTERPOLATEROW_MMI -#define HAS_J400TOARGBROW_MMI -#define HAS_MERGERGBROW_MMI -#define HAS_MERGEUVROW_MMI -#define HAS_MIRRORROW_MMI -#define HAS_MIRRORUVROW_MMI -#define HAS_RAWTOARGBROW_MMI -#define HAS_RAWTORGB24ROW_MMI -#define HAS_RAWTOUVROW_MMI -#define HAS_RAWTOYROW_MMI -#define HAS_RGB24TOARGBROW_MMI -#define HAS_RGB24TOUVROW_MMI -#define HAS_RGB24TOYROW_MMI -#define HAS_RGB565TOARGBROW_MMI -#define HAS_RGB565TOUVROW_MMI -#define HAS_RGB565TOYROW_MMI -#define HAS_RGBATOUVROW_MMI -#define HAS_RGBATOYROW_MMI -#define HAS_SOBELROW_MMI -#define HAS_SOBELTOPLANEROW_MMI -#define HAS_SOBELXROW_MMI -#define HAS_SOBELXYROW_MMI -#define HAS_SOBELYROW_MMI -#define HAS_SPLITRGBROW_MMI -#define HAS_SPLITUVROW_MMI -#define HAS_UYVYTOUVROW_MMI -#define HAS_UYVYTOYROW_MMI -#define HAS_YUY2TOUV422ROW_MMI -#define HAS_YUY2TOUVROW_MMI -#define HAS_YUY2TOYROW_MMI +#if !defined(LIBYUV_DISABLE_LSX) && defined(__loongarch_sx) +#define HAS_ARGB4444TOARGBROW_LSX +#define HAS_ARGB1555TOARGBROW_LSX +#define HAS_RGB565TOARGBROW_LSX +#define HAS_RGB24TOARGBROW_LSX +#define HAS_RAWTOARGBROW_LSX +#define HAS_ARGB1555TOYROW_LSX +#define HAS_ARGB1555TOUVROW_LSX +#define HAS_RGB565TOYROW_LSX +#define HAS_RGB565TOUVROW_LSX +#define HAS_RGB24TOYROW_LSX +#define HAS_RGB24TOUVROW_LSX +#define HAS_RAWTOYROW_LSX +#define HAS_RAWTOUVROW_LSX +#define HAS_NV12TOARGBROW_LSX +#define HAS_NV12TORGB565ROW_LSX +#define HAS_NV21TOARGBROW_LSX +#define HAS_SOBELROW_LSX +#define HAS_SOBELTOPLANEROW_LSX +#define HAS_SOBELXYROW_LSX +#define HAS_ARGBTOYJROW_LSX +#define HAS_BGRATOYROW_LSX +#define HAS_BGRATOUVROW_LSX +#define HAS_ABGRTOYROW_LSX +#define HAS_ABGRTOUVROW_LSX +#define HAS_RGBATOYROW_LSX +#define HAS_RGBATOUVROW_LSX +#define HAS_ARGBTOUVJROW_LSX +#define HAS_I444TOARGBROW_LSX +#define HAS_I400TOARGBROW_LSX +#define HAS_J400TOARGBROW_LSX +#define HAS_YUY2TOARGBROW_LSX +#define HAS_UYVYTOARGBROW_LSX +#define HAS_INTERPOLATEROW_LSX +#define HAS_ARGBSETROW_LSX +#define HAS_RAWTORGB24ROW_LSX +#define HAS_MERGEUVROW_LSX +#define HAS_ARGBEXTRACTALPHAROW_LSX +#define HAS_ARGBBLENDROW_LSX +#define HAS_ARGBQUANTIZEROW_LSX +#define HAS_ARGBCOLORMATRIXROW_LSX +#define HAS_SPLITUVROW_LSX +#define HAS_SETROW_LSX +#define HAS_MIRRORSPLITUVROW_LSX +#endif + +#if !defined(LIBYUV_DISABLE_LASX) && defined(__loongarch_asx) +#define HAS_I422TOARGBROW_LASX +#define HAS_I422TORGBAROW_LASX +#define HAS_I422ALPHATOARGBROW_LASX +#define HAS_I422TOYUY2ROW_LASX +#define HAS_I422TOUYVYROW_LASX +#define HAS_MIRRORROW_LASX +#define HAS_MIRRORUVROW_LASX +#define HAS_ARGBMIRRORROW_LASX +#define HAS_I422TORGB24ROW_LASX +#define HAS_I422TORGB565ROW_LASX +#define HAS_I422TOARGB4444ROW_LASX +#define HAS_I422TOARGB1555ROW_LASX +#define HAS_YUY2TOUVROW_LASX +#define HAS_YUY2TOYROW_LASX +#define HAS_YUY2TOUV422ROW_LASX +#define HAS_UYVYTOYROW_LASX +#define HAS_UYVYTOUVROW_LASX +#define HAS_UYVYTOUV422ROW_LASX +#define HAS_ARGBTOYROW_LASX +#define HAS_ARGBTOUVROW_LASX +#define HAS_ARGBTORGB24ROW_LASX +#define HAS_ARGBTORAWROW_LASX +#define HAS_ARGBTORGB565ROW_LASX +#define HAS_ARGBTOARGB1555ROW_LASX +#define HAS_ARGBTOARGB4444ROW_LASX +#define HAS_ARGBTOUV444ROW_LASX +#define HAS_ARGBMULTIPLYROW_LASX +#define HAS_ARGBADDROW_LASX +#define HAS_ARGBSUBTRACTROW_LASX +#define HAS_ARGBATTENUATEROW_LASX +#define HAS_ARGBTORGB565DITHERROW_LASX +#define HAS_ARGBSHUFFLEROW_LASX +#define HAS_ARGBSHADEROW_LASX +#define HAS_ARGBGRAYROW_LASX +#define HAS_ARGBSEPIAROW_LASX +#define HAS_ARGB4444TOARGBROW_LASX +#define HAS_ARGB1555TOARGBROW_LASX +#define HAS_RGB565TOARGBROW_LASX +#define HAS_RGB24TOARGBROW_LASX +#define HAS_RAWTOARGBROW_LASX +#define HAS_ARGB1555TOYROW_LASX +#define HAS_ARGB1555TOUVROW_LASX +#define HAS_RGB565TOYROW_LASX +#define HAS_RGB565TOUVROW_LASX +#define HAS_RGB24TOYROW_LASX +#define HAS_RGB24TOUVROW_LASX +#define HAS_RAWTOYROW_LASX +#define HAS_RAWTOUVROW_LASX +#define HAS_NV12TOARGBROW_LASX +#define HAS_NV12TORGB565ROW_LASX +#define HAS_NV21TOARGBROW_LASX +#define HAS_ARGBTOYJROW_LASX +#define HAS_ARGBTOUVJROW_LASX #endif #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) @@ -572,8 +736,10 @@ extern "C" { #else #define SIMD_ALIGNED(var) __declspec(align(16)) var #endif +#define LIBYUV_NOINLINE __declspec(noinline) typedef __declspec(align(16)) int16_t vec16[8]; typedef __declspec(align(16)) int32_t vec32[4]; +typedef __declspec(align(16)) float vecf32[4]; typedef __declspec(align(16)) int8_t vec8[16]; typedef __declspec(align(16)) uint16_t uvec16[8]; typedef __declspec(align(16)) uint32_t uvec32[4]; @@ -591,8 +757,10 @@ typedef __declspec(align(32)) uint8_t ulvec8[32]; #else #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #endif +#define LIBYUV_NOINLINE __attribute__((noinline)) typedef int16_t __attribute__((vector_size(16))) vec16; typedef int32_t __attribute__((vector_size(16))) vec32; +typedef float __attribute__((vector_size(16))) vecf32; typedef int8_t __attribute__((vector_size(16))) vec8; typedef uint16_t __attribute__((vector_size(16))) uvec16; typedef uint32_t __attribute__((vector_size(16))) uvec32; @@ -605,8 +773,10 @@ typedef uint32_t __attribute__((vector_size(32))) ulvec32; typedef uint8_t __attribute__((vector_size(32))) ulvec8; #else #define SIMD_ALIGNED(var) var +#define LIBYUV_NOINLINE typedef int16_t vec16[8]; typedef int32_t vec32[4]; +typedef float vecf32[4]; typedef int8_t vec8[16]; typedef uint16_t uvec16[8]; typedef uint32_t uvec32[4]; @@ -619,55 +789,30 @@ typedef uint32_t ulvec32[8]; typedef uint8_t ulvec8[32]; #endif -#if defined(__aarch64__) -// This struct is for Arm64 color conversion. -struct YuvConstants { - uvec16 kUVToRB; - uvec16 kUVToRB2; - uvec16 kUVToG; - uvec16 kUVToG2; - vec16 kUVBiasBGR; - vec32 kYToRgb; -}; -#elif defined(__arm__) -// This struct is for ArmV7 color conversion. +#if defined(__aarch64__) || defined(__arm__) +// This struct is for ARM color conversion. struct YuvConstants { - uvec8 kUVToRB; - uvec8 kUVToG; - vec16 kUVBiasBGR; - vec32 kYToRgb; + uvec8 kUVCoeff; + vec16 kRGBCoeffBias; }; #else // This struct is for Intel color conversion. struct YuvConstants { - int8_t kUVToB[32]; - int8_t kUVToG[32]; - int8_t kUVToR[32]; - int16_t kUVBiasB[16]; - int16_t kUVBiasG[16]; - int16_t kUVBiasR[16]; + uint8_t kUVToB[32]; + uint8_t kUVToG[32]; + uint8_t kUVToR[32]; int16_t kYToRgb[16]; + int16_t kYBiasToRgb[16]; }; // Offsets into YuvConstants structure #define KUVTOB 0 #define KUVTOG 32 #define KUVTOR 64 -#define KUVBIASB 96 -#define KUVBIASG 128 -#define KUVBIASR 160 -#define KYTORGB 192 -#endif - -// Conversion matrix for YUV to RGB -extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants); // BT.601 -extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants); // JPeg -extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants); // BT.709 +#define KYTORGB 96 +#define KYBIASTORGB 128 -// Conversion matrix for YVU to BGR -extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601 -extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg -extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 +#endif #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) @@ -755,6 +900,13 @@ void I422ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I444AlphaToARGBRow_NEON(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -762,12 +914,6 @@ void I422AlphaToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I422ToARGBRow_NEON(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToRGBARow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -841,6 +987,12 @@ void I444ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I444ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, @@ -848,12 +1000,24 @@ void I422ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422ToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGBARow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422ToRGBARow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -861,30 +1025,61 @@ void I422AlphaToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422AlphaToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB24Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB24Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB565Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB565Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB4444Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB4444Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB1555Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB1555Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); void NV12ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_argb, @@ -909,23 +1104,73 @@ void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, const struct YuvConstants* yuvconstants, int width); +void NV12ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_LSX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_LASX(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_LSX(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_LASX(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_LSX(const uint8_t* src_yuy2, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_LSX(const uint8_t* src_uyvy, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); + void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void RGBAToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width); void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width); +void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); +void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width); void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_LSX(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void ARGBToYJRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToUV444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, @@ -939,20 +1184,20 @@ void ARGBToUV444Row_MSA(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVRow_MSA(const uint8_t* src_argb0, - int src_stride_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGBToUV444Row_MMI(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGBToUVRow_MMI(const uint8_t* src_argb0, +void ARGBToUVRow_MSA(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); +void ARGBToUVRow_LASX(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_LASX(const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -983,6 +1228,16 @@ void RAWToUVRow_NEON(const uint8_t* src_raw, uint8_t* dst_u, uint8_t* dst_v, int width); +void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVJRow_NEON(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, @@ -998,32 +1253,32 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, +void ARGBToUVJRow_MSA(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void BGRAToUVRow_MSA(const uint8_t* src_rgb0, +void BGRAToUVRow_MSA(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ABGRToUVRow_MSA(const uint8_t* src_rgb0, +void ABGRToUVRow_MSA(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGBAToUVRow_MSA(const uint8_t* src_rgb0, +void RGBAToUVRow_MSA(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, +void RGB24ToUVRow_MSA(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void RAWToUVRow_MSA(const uint8_t* src_rgb0, +void RAWToUVRow_MSA(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, @@ -1038,56 +1293,78 @@ void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_MMI(const uint8_t* src_rgb0, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void BGRAToUVRow_MMI(const uint8_t* src_rgb0, - int src_stride_rgb, +void BGRAToUVRow_LSX(const uint8_t* src_bgra, + int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width); -void ABGRToUVRow_MMI(const uint8_t* src_rgb0, - int src_stride_rgb, +void ABGRToUVRow_LSX(const uint8_t* src_abgr, + int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGBAToUVRow_MMI(const uint8_t* src_rgb0, - int src_stride_rgb, +void RGBAToUVRow_LSX(const uint8_t* src_rgba, + int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGB24ToUVRow_MMI(const uint8_t* src_rgb0, - int src_stride_rgb, +void ARGBToUVJRow_LSX(const uint8_t* src_argb, + int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void RAWToUVRow_MMI(const uint8_t* src_rgb0, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGB565ToUVRow_MMI(const uint8_t* src_rgb565, - int src_stride_rgb565, +void ARGBToUVJRow_LASX(const uint8_t* src_argb, + int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555, +void ARGB1555ToUVRow_LSX(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void ARGB1555ToUVRow_LASX(const uint8_t* src_argb1555, + int src_stride_argb1555, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_LSX(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_LASX(const uint8_t* src_rgb565, + int src_stride_rgb565, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_LSX(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_LASX(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_LSX(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_LASX(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width); void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width); void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, @@ -1095,46 +1372,65 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width); -void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); +void BGRAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void ABGRToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGBAToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RGB24ToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); +void RAWToYRow_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width); void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); -void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width); -void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555, uint8_t* dst_y, int width); -void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444, uint8_t* dst_y, int width); - -void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); -void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); + +void BGRAToYRow_LSX(const uint8_t* src_bgra, uint8_t* dst_y, int width); +void ABGRToYRow_LSX(const uint8_t* src_abgr, uint8_t* dst_y, int width); +void RGBAToYRow_LSX(const uint8_t* src_rgba, uint8_t* dst_y, int width); +void ARGB1555ToYRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_y, int width); +void ARGB1555ToYRow_LASX(const uint8_t* src_argb1555, + uint8_t* dst_y, + int width); +void RGB565ToYRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void RGB565ToYRow_LASX(const uint8_t* src_rgb565, uint8_t* dst_y, int width); +void RGB24ToYRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RGB24ToYRow_LASX(const uint8_t* src_rgb24, uint8_t* dst_y, int width); +void RAWToYRow_LSX(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width); + +void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGBAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGB24ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RGB24ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RAWToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); +void RAWToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); -void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); +void RGB24ToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RAWToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -1153,88 +1449,104 @@ void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void BGRAToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ABGRToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGBAToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToYJRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RAWToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB565ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGB1555ToYRow_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGB4444ToYRow_Any_MMI(const uint8_t* src_ptr, + +void BGRAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToUVRow_AVX2(const uint8_t* src_argb0, +void RGB565ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB1555ToYRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void ARGBToUVRow_AVX2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, +void ABGRToUVRow_AVX2(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVJRow_AVX2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, +void ARGBToUVRow_SSSE3(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, +void ARGBToUVJRow_SSSE3(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); -void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, +void BGRAToUVRow_SSSE3(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width); -void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, +void ABGRToUVRow_SSSE3(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0, +void RGBAToUVRow_SSSE3(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ABGRToUVRow_Any_AVX2(const uint8_t* src_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -1243,7 +1555,7 @@ void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_v, int width); void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -1256,57 +1568,67 @@ void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUV444Row_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGBToUVRow_Any_MMI(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void ARGBToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUV444Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void RAWToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); +void RGB24ToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVJRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -1350,96 +1672,126 @@ void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_Any_MMI(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void BGRAToUVRow_Any_MMI(const uint8_t* src_ptr, +void ABGRToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void ABGRToUVRow_Any_MMI(const uint8_t* src_ptr, +void BGRAToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGBAToUVRow_Any_MMI(const uint8_t* src_ptr, +void RGBAToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGB24ToUVRow_Any_MMI(const uint8_t* src_ptr, +void ARGBToUVJRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void RAWToUVRow_Any_MMI(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void RGB565ToUVRow_Any_MMI(const uint8_t* src_ptr, +void ARGBToUVJRow_Any_LASX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGB1555ToUVRow_Any_MMI(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void ARGB4444ToUVRow_Any_MMI(const uint8_t* src_ptr, +void ARGB1555ToUVRow_Any_LSX(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVRow_C(const uint8_t* src_rgb0, +void ARGB1555ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB565ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RGB24ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_LSX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void ARGBToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_C(const uint8_t* src_rgb0, +void ARGBToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVRow_C(const uint8_t* src_rgb0, +void ARGBToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ARGBToUVJRow_C(const uint8_t* src_rgb0, +void ARGBToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void BGRAToUVRow_C(const uint8_t* src_rgb0, +void BGRAToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void ABGRToUVRow_C(const uint8_t* src_rgb0, +void ABGRToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGBAToUVRow_C(const uint8_t* src_rgb0, +void RGBAToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void RGB24ToUVRow_C(const uint8_t* src_rgb0, +void RGB24ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); -void RAWToUVRow_C(const uint8_t* src_rgb0, +void RAWToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); +void RGB24ToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void RAWToUVJRow_C(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void RGB565ToUVRow_C(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, @@ -1474,41 +1826,52 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); -void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width); +void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void MirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width); +void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void MirrorUVRow_SSSE3(const uint8_t* src, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void MirrorUVRow_NEON(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void MirrorUVRow_MSA(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void MirrorUVRow_MMI(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void MirrorUVRow_C(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void MirrorSplitUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_NEON(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_MSA(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_LSX(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void MirrorSplitUVRow_C(const uint8_t* src_uv, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); -void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); -void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width); +void ARGBMirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -1520,7 +1883,23 @@ void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBMirrorRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); + +void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_rgb24, + int width); +void RGB24MirrorRow_NEON(const uint8_t* src_rgb24, + uint8_t* dst_rgb24, + int width); +void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width); +void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, @@ -1542,7 +1921,7 @@ void SplitUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); -void SplitUVRow_MMI(const uint8_t* src_uv, +void SplitUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -1562,11 +1941,56 @@ void SplitUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void SplitUVRow_Any_MMI(const uint8_t* src_ptr, +void SplitUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); +void DetileRow_C(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_NEON(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_Any_NEON(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_SSE2(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileRow_Any_SSE2(const uint8_t* src, + ptrdiff_t src_tile_stride, + uint8_t* dst, + int width); +void DetileSplitUVRow_C(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_SSSE3(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_Any_SSSE3(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_NEON(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv, + ptrdiff_t src_tile_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void MergeUVRow_C(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, @@ -1587,7 +2011,7 @@ void MergeUVRow_MSA(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); -void MergeUVRow_MMI(const uint8_t* src_u, +void MergeUVRow_LSX(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); @@ -1607,11 +2031,39 @@ void MergeUVRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void MergeUVRow_Any_MMI(const uint8_t* y_buf, +void MergeUVRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); +void HalfMergeUVRow_C(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + +void HalfMergeUVRow_NEON(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + +void HalfMergeUVRow_SSSE3(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + +void HalfMergeUVRow_AVX2(const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_uv, + int width); + void SplitRGBRow_C(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, @@ -1627,11 +2079,6 @@ void SplitRGBRow_NEON(const uint8_t* src_rgb, uint8_t* dst_g, uint8_t* dst_b, int width); -void SplitRGBRow_MMI(const uint8_t* src_rgb, - uint8_t* dst_r, - uint8_t* dst_g, - uint8_t* dst_b, - int width); void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, @@ -1642,11 +2089,6 @@ void SplitRGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_g, uint8_t* dst_b, int width); -void SplitRGBRow_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_r, - uint8_t* dst_g, - uint8_t* dst_b, - int width); void MergeRGBRow_C(const uint8_t* src_r, const uint8_t* src_g, @@ -1663,11 +2105,6 @@ void MergeRGBRow_NEON(const uint8_t* src_r, const uint8_t* src_b, uint8_t* dst_rgb, int width); -void MergeRGBRow_MMI(const uint8_t* src_r, - const uint8_t* src_g, - const uint8_t* src_b, - uint8_t* dst_rgb, - int width); void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -1678,31 +2115,449 @@ void MergeRGBRow_Any_NEON(const uint8_t* src_r, const uint8_t* src_b, uint8_t* dst_rgb, int width); -void MergeRGBRow_Any_MMI(const uint8_t* src_r, - const uint8_t* src_g, - const uint8_t* src_b, - uint8_t* dst_rgb, +void MergeARGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_SSE2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_AVX2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + const uint8_t* src_a, + uint8_t* dst_argb, + int width); +void MergeARGBRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + int width); +void MergeARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + int width); +void MergeARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + int width); +void SplitARGBRow_C(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_NEON(const uint8_t* src_rgba, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void SplitARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + uint8_t* dst_a, + int width); +void MergeXRGBRow_C(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_SSE2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_AVX2(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_NEON(const uint8_t* src_r, + const uint8_t* src_g, + const uint8_t* src_b, + uint8_t* dst_argb, + int width); +void MergeXRGBRow_Any_SSE2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void MergeXRGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void MergeXRGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); +void SplitXRGBRow_C(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_SSE2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_SSSE3(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_AVX2(const uint8_t* src_argb, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_NEON(const uint8_t* src_rgba, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_AVX2(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); +void SplitXRGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_r, + uint8_t* dst_g, + uint8_t* dst_b, + int width); + +void MergeXR30Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int depth, + int width); +void MergeAR64Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint16_t* dst_ar64, + int depth, + int width); +void MergeARGB16To8Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR64Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint16_t* dst_ar64, + int depth, + int width); +void MergeXRGB16To8Row_C(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_argb, + int depth, int width); +void MergeXR30Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int depth, + int width); +void MergeAR64Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint16_t* dst_ar64, + int depth, + int width); +void MergeARGB16To8Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR64Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint16_t* dst_ar64, + int depth, + int width); +void MergeXRGB16To8Row_AVX2(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR30Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int depth, + int width); +void MergeXR30Row_10_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_ar30, + int /* depth */, + int width); +void MergeAR64Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint16_t* dst_ar64, + int depth, + int width); +void MergeARGB16To8Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + const uint16_t* src_a, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR64Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint16_t* dst_ar64, + int depth, + int width); +void MergeXRGB16To8Row_NEON(const uint16_t* src_r, + const uint16_t* src_g, + const uint16_t* src_b, + uint8_t* dst_argb, + int depth, + int width); +void MergeXR30Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeAR64Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeXR64Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeARGB16To8Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXRGB16To8Row_Any_AVX2(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXR30Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXR30Row_10_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeAR64Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeARGB16To8Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + int depth, + int width); +void MergeXR64Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint16_t* dst_ptr, + int depth, + int width); +void MergeXRGB16To8Row_Any_NEON(const uint16_t* r_buf, + const uint16_t* g_buf, + const uint16_t* b_buf, + uint8_t* dst_ptr, + int depth, + int width); void MergeUVRow_16_C(const uint16_t* src_u, const uint16_t* src_v, uint16_t* dst_uv, - int scale, /* 64 for 10 bit */ + int depth, int width); void MergeUVRow_16_AVX2(const uint16_t* src_u, const uint16_t* src_v, uint16_t* dst_uv, - int scale, + int depth, + int width); +void MergeUVRow_16_Any_AVX2(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, + int width); +void MergeUVRow_16_NEON(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, int width); +void MergeUVRow_16_Any_NEON(const uint16_t* src_u, + const uint16_t* src_v, + uint16_t* dst_uv, + int depth, + int width); + +void SplitUVRow_16_C(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_AVX2(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_Any_AVX2(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_NEON(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); +void SplitUVRow_16_Any_NEON(const uint16_t* src_uv, + uint16_t* dst_u, + uint16_t* dst_v, + int depth, + int width); -void MultiplyRow_16_AVX2(const uint16_t* src_y, - uint16_t* dst_y, - int scale, - int width); void MultiplyRow_16_C(const uint16_t* src_y, uint16_t* dst_y, int scale, int width); +void MultiplyRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void MultiplyRow_16_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); +void MultiplyRow_16_NEON(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void MultiplyRow_16_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); + +void DivideRow_16_C(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void DivideRow_16_AVX2(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void DivideRow_16_Any_AVX2(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); +void DivideRow_16_NEON(const uint16_t* src_y, + uint16_t* dst_y, + int scale, + int width); +void DivideRow_16_Any_NEON(const uint16_t* src_ptr, + uint16_t* dst_ptr, + int scale, + int width); void Convert8To16Row_C(const uint8_t* src_y, uint16_t* dst_y, @@ -1745,6 +2600,14 @@ void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr, uint8_t* dst_ptr, int scale, int width); +void Convert16To8Row_NEON(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); +void Convert16To8Row_Any_NEON(const uint16_t* src_y, + uint8_t* dst_y, + int scale, + int width); void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width); @@ -1761,16 +2624,12 @@ void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count); void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); -void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBCopyAlphaRow_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, @@ -1785,7 +2644,7 @@ void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, uint8_t* dst_a, int width); -void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb, +void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, @@ -1800,31 +2659,29 @@ void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr, void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBExtractAlphaRow_Any_MMI(const uint8_t* src_ptr, +void ARGBExtractAlphaRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); -void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBCopyYToAlphaRow_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void SetRow_C(uint8_t* dst, uint8_t v8, int width); void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); void SetRow_X86(uint8_t* dst, uint8_t v8, int width); void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width); void SetRow_NEON(uint8_t* dst, uint8_t v8, int width); +void SetRow_LSX(uint8_t* dst, uint8_t v8, int width); void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width); void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width); +void SetRow_Any_LSX(uint8_t* dst_ptr, uint8_t v32, int width); void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width); @@ -1832,6 +2689,8 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width); void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width); void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width); +void ARGBSetRow_LSX(uint8_t* dst_argb, uint32_t v32, int width); +void ARGBSetRow_Any_LSX(uint8_t* dst_ptr, uint32_t v32, int width); // ARGBShufflers for BGRAToARGB etc. void ARGBShuffleRow_C(const uint8_t* src_argb, @@ -1854,10 +2713,10 @@ void ARGBShuffleRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); -void ARGBShuffleRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width); +void ARGBShuffleRow_LASX(const uint8_t* src_argb, + uint8_t* dst_argb, + const uint8_t* shuffler, + int width); void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, @@ -1874,15 +2733,16 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); -void ARGBShuffleRow_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const uint8_t* param, - int width); +void ARGBShuffleRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint8_t* param, + int width); void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); @@ -1901,42 +2761,57 @@ void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); -void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RGB24ToARGBRow_LSX(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RGB24ToARGBRow_LASX(const uint8_t* src_rgb24, + uint8_t* dst_argb, + int width); void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_NEON(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); -void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_LSX(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToARGBRow_LASX(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); -void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); +void RAWToRGB24Row_LSX(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); -void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565, +void RGB565ToARGBRow_LSX(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); +void RGB565ToARGBRow_LASX(const uint8_t* src_rgb565, + uint8_t* dst_argb, + int width); void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); -void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555, +void ARGB1555ToARGBRow_LSX(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); +void ARGB1555ToARGBRow_LASX(const uint8_t* src_argb1555, + uint8_t* dst_argb, + int width); void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); -void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444, +void ARGB4444ToARGBRow_LSX(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); +void ARGB4444ToARGBRow_LASX(const uint8_t* src_argb4444, + uint8_t* dst_argb, + int width); void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); +void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, @@ -1956,6 +2831,9 @@ void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGBARow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -1985,45 +2863,59 @@ void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr, void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24ToARGBRow_Any_MMI(const uint8_t* src_ptr, +void RGB24ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToARGBRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGBARow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RAWToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToARGBRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RAWToRGB24Row_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RAWToRGB24Row_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB565ToARGBRow_Any_MMI(const uint8_t* src_ptr, +void RGB565ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToARGBRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGB1555ToARGBRow_Any_MMI(const uint8_t* src_ptr, +void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, +void ARGB1555ToARGBRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGB4444ToARGBRow_Any_MMI(const uint8_t* src_ptr, +void ARGB4444ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGB4444ToARGBRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); @@ -2091,20 +2983,20 @@ void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width); +void ARGBToRGB565DitherRow_LASX(const uint8_t* src_argb, + uint8_t* dst_rgb, + const uint32_t dither4, + int width); -void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width); -void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width); -void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width); -void ARGBToARGB1555Row_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width); -void ARGBToARGB4444Row_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - int width); -void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb, - uint8_t* dst_rgb, - const uint32_t dither4, - int width); +void ARGBToRGB24Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRAWRow_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToRGB565Row_LASX(const uint8_t* src_argb, uint8_t* dst_rgb, int width); +void ARGBToARGB1555Row_LASX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); +void ARGBToARGB4444Row_LASX(const uint8_t* src_argb, + uint8_t* dst_rgb, + int width); void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); @@ -2115,11 +3007,76 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width); void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); +void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width); +void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width); +void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width); +void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void AR64ShuffleRow_C(const uint8_t* src_ar64, + uint8_t* dst_ar64, + const uint8_t* shuffler, + int width); +void ARGBToAR64Row_SSSE3(const uint8_t* src_argb, + uint16_t* dst_ar64, + int width); +void ARGBToAB64Row_SSSE3(const uint8_t* src_argb, + uint16_t* dst_ab64, + int width); +void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64, + uint8_t* dst_argb, + int width); +void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64, + uint8_t* dst_argb, + int width); +void ARGBToAR64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ar64, int width); +void ARGBToAB64Row_AVX2(const uint8_t* src_argb, uint16_t* dst_ab64, int width); +void AR64ToARGBRow_AVX2(const uint16_t* src_ar64, uint8_t* dst_argb, int width); +void AB64ToARGBRow_AVX2(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void ARGBToAR64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ar64, int width); +void ARGBToAB64Row_NEON(const uint8_t* src_argb, uint16_t* dst_ab64, int width); +void AR64ToARGBRow_NEON(const uint16_t* src_ar64, uint8_t* dst_argb, int width); +void AB64ToARGBRow_NEON(const uint16_t* src_ab64, uint8_t* dst_argb, int width); +void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void ARGBToAB64Row_Any_SSSE3(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void AR64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void AB64ToARGBRow_Any_SSSE3(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR64Row_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void ARGBToAB64Row_Any_AVX2(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void AR64ToARGBRow_Any_AVX2(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void AB64ToARGBRow_Any_AVX2(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToAR64Row_Any_NEON(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void ARGBToAB64Row_Any_NEON(const uint8_t* src_ptr, + uint16_t* dst_ptr, + int width); +void AR64ToARGBRow_Any_NEON(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); +void AB64ToARGBRow_Any_NEON(const uint16_t* src_ptr, + uint8_t* dst_ptr, + int width); + void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); -void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -2131,7 +3088,7 @@ void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void J400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void J400ToARGBRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void I444ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, @@ -2163,6 +3120,51 @@ void I210ToARGBRow_C(const uint16_t* src_y, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); +void I212ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + const uint16_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_u, + const uint16_t* src_v, + const uint16_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_C(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + const uint8_t* src_a, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -2207,6 +3209,27 @@ void UYVYToARGBRow_C(const uint8_t* src_uyvy, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); +void P210ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_C(const uint16_t* src_y, + const uint16_t* src_uv, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); + void I422ToRGBARow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -2243,6 +3266,12 @@ void I422ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I422ToARGBRow_AVX512BW(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGBARow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2261,18 +3290,6 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_SSSE3(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); -void I444ToARGBRow_AVX2(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2298,6 +3315,44 @@ void I210ToARGBRow_SSSE3(const uint16_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); +void I212ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422ToAR30Row_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2316,6 +3371,58 @@ void I210ToAR30Row_AVX2(const uint16_t* y_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width); +void I212ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I212ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2365,6 +3472,10 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); +void NV21ToYUV24Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); void NV21ToYUV24Row_AVX2(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_yuv24, @@ -2400,6 +3511,48 @@ void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); + +void P210ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); + void I422ToRGBARow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2460,6 +3613,12 @@ void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToARGBRow_Any_AVX512BW(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2502,6 +3661,44 @@ void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I212ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I212ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2520,6 +3717,58 @@ void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I212ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I212ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I210AlphaToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I410AlphaToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* u_buf, + const uint16_t* v_buf, + const uint16_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I444AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2574,6 +3823,10 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void NV21ToYUV24Row_Any_SSSE3(const uint8_t* src_y, + const uint8_t* src_vu, + uint8_t* dst_yuv24, + int width); void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_yuv24, @@ -2604,6 +3857,46 @@ void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void P210ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_Any_SSSE3(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_Any_AVX2(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -2659,30 +3952,57 @@ void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width); -void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width); -void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width); -void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width); -void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); -void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); -void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width); +void I400ToARGBRow_C(const uint8_t* src_y, + uint8_t* rgb_buf, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_SSE2(const uint8_t* y_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_AVX2(const uint8_t* y_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_NEON(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_MSA(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_LSX(const uint8_t* src_y, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, + const struct YuvConstants* param, int width); void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, + const struct YuvConstants* param, int width); void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, + const struct YuvConstants* param, int width); -void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void I400ToARGBRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I400ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); // ARGB preattenuated alpha blend. -void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, +void ARGBBlendRow_SSSE3(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBBlendRow_NEON(const uint8_t* src_argb0, +void ARGBBlendRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2690,11 +4010,11 @@ void ARGBBlendRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBBlendRow_MMI(const uint8_t* src_argb0, +void ARGBBlendRow_LSX(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBBlendRow_C(const uint8_t* src_argb0, +void ARGBBlendRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2720,16 +4040,6 @@ void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); -void BlendPlaneRow_MMI(const uint8_t* src0, - const uint8_t* src1, - const uint8_t* alpha, - uint8_t* dst, - int width); -void BlendPlaneRow_Any_MMI(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - int width); void BlendPlaneRow_C(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, @@ -2738,11 +4048,11 @@ void BlendPlaneRow_C(const uint8_t* src0, // ARGB multiply images. Same API as Blend, but these require // pointer and width alignment for SSE2. -void ARGBMultiplyRow_C(const uint8_t* src_argb0, +void ARGBMultiplyRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, +void ARGBMultiplyRow_SSE2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2750,7 +4060,7 @@ void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, +void ARGBMultiplyRow_AVX2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2758,7 +4068,7 @@ void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, +void ARGBMultiplyRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2774,21 +4084,21 @@ void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBMultiplyRow_MMI(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); -void ARGBMultiplyRow_Any_MMI(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); +void ARGBMultiplyRow_LASX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBMultiplyRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); // ARGB add images. -void ARGBAddRow_C(const uint8_t* src_argb0, +void ARGBAddRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBAddRow_SSE2(const uint8_t* src_argb0, +void ARGBAddRow_SSE2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2796,7 +4106,7 @@ void ARGBAddRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBAddRow_AVX2(const uint8_t* src_argb0, +void ARGBAddRow_AVX2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2804,7 +4114,7 @@ void ARGBAddRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBAddRow_NEON(const uint8_t* src_argb0, +void ARGBAddRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2820,22 +4130,22 @@ void ARGBAddRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBAddRow_MMI(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); -void ARGBAddRow_Any_MMI(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); +void ARGBAddRow_LASX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBAddRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); // ARGB subtract images. Same API as Blend, but these require // pointer and width alignment for SSE2. -void ARGBSubtractRow_C(const uint8_t* src_argb0, +void ARGBSubtractRow_C(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); -void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, +void ARGBSubtractRow_SSE2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2843,7 +4153,7 @@ void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, +void ARGBSubtractRow_AVX2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2851,7 +4161,7 @@ void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBSubtractRow_NEON(const uint8_t* src_argb0, +void ARGBSubtractRow_NEON(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width); @@ -2867,14 +4177,14 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void ARGBSubtractRow_MMI(const uint8_t* src_argb0, - const uint8_t* src_argb1, - uint8_t* dst_argb, - int width); -void ARGBSubtractRow_Any_MMI(const uint8_t* y_buf, - const uint8_t* uv_buf, - uint8_t* dst_ptr, - int width); +void ARGBSubtractRow_LASX(const uint8_t* src_argb0, + const uint8_t* src_argb1, + uint8_t* dst_argb, + int width); +void ARGBSubtractRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + int width); void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, @@ -2963,24 +4273,24 @@ void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); +void ARGBToRGB565DitherRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); -void ARGBToRGB24Row_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToRAWRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToRGB565Row_Any_MMI(const uint8_t* src_ptr, +void ARGBToRGB24Row_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBToARGB1555Row_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToARGB4444Row_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToRGB565DitherRow_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const uint32_t param, - int width); +void ARGBToRAWRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToRGB565Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB4444Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, @@ -2994,6 +4304,13 @@ void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -3051,9 +4368,9 @@ void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void NV21ToYUV24Row_Any_NEON(const uint8_t* src_y, - const uint8_t* src_vu, - uint8_t* dst_yuv24, +void NV21ToYUV24Row_Any_NEON(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, int width); void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, @@ -3068,24 +4385,82 @@ void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void P210ToARGBRow_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P210ToARGBRow_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P410ToARGBRow_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_argb, + const struct YuvConstants* yuvconstants, + int width); +void P210ToAR30Row_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); +void P410ToAR30Row_Any_NEON(const uint16_t* y_buf, + const uint16_t* uv_buf, + uint8_t* dst_ar30, + const struct YuvConstants* yuvconstants, + int width); void I444ToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I444ToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToRGBARow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -3093,30 +4468,61 @@ void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422AlphaToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + const uint8_t* a_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB24Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToRGB565Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB4444Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); +void I422ToARGB1555Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, @@ -3141,6 +4547,45 @@ void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr, const struct YuvConstants* yuvconstants, int width); +void NV12ToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_Any_LSX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV21ToARGBRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void YUY2ToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void UYVYToARGBRow_Any_LSX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); + void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, @@ -3172,25 +4617,25 @@ void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, uint8_t* dst_v, int width); void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); -void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width); +void YUY2ToYRow_LASX(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToUVRow_MMI(const uint8_t* src_yuy2, - int src_stride_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void YUY2ToUVRow_LASX(const uint8_t* src_yuy2, + int src_stride_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void YUY2ToUV422Row_LASX(const uint8_t* src_yuy2, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_C(const uint8_t* src_yuy2, int src_stride_yuy2, @@ -3203,7 +4648,7 @@ void YUY2ToUV422Row_C(const uint8_t* src_yuy2, int width); void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -3213,7 +4658,7 @@ void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr, int width); void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -3223,7 +4668,7 @@ void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr, int width); void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -3232,25 +4677,25 @@ void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_v, int width); void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void YUY2ToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void YUY2ToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToUVRow_Any_MMI(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void YUY2ToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void YUY2ToUV422Row_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void YUY2ToUV422Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, @@ -3292,25 +4737,25 @@ void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, uint8_t* dst_v, int width); void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); -void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width); +void UYVYToYRow_LASX(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_MSA(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToUVRow_MMI(const uint8_t* src_uyvy, - int src_stride_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void UYVYToUVRow_LASX(const uint8_t* src_uyvy, + int src_stride_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToUV422Row_MMI(const uint8_t* src_uyvy, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void UYVYToUV422Row_LASX(const uint8_t* src_uyvy, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_C(const uint8_t* src_uyvy, @@ -3324,7 +4769,7 @@ void UYVYToUV422Row_C(const uint8_t* src_uyvy, int width); void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -3334,7 +4779,7 @@ void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr, int width); void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -3344,7 +4789,7 @@ void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr, int width); void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr, - int src_stride_ptr, + int src_stride, uint8_t* dst_u, uint8_t* dst_v, int width); @@ -3353,53 +4798,57 @@ void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_v, int width); void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void UYVYToYRow_Any_MMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void UYVYToYRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToUVRow_Any_MMI(const uint8_t* src_ptr, - int src_stride_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void UYVYToUVRow_Any_LASX(const uint8_t* src_ptr, + int src_stride_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); -void UYVYToUV422Row_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void UVToVURow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width); -void UVToVURow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); -void UVToVURow_Any_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void UYVYToUV422Row_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_u, + uint8_t* dst_v, + int width); +void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width); +void SwapUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width); void AYUVToUVRow_C(const uint8_t* src_ayuv, - int stride_ayuv, + int src_stride_ayuv, uint8_t* dst_uv, int width); void AYUVToVURow_C(const uint8_t* src_ayuv, - int stride_ayuv, + int src_stride_ayuv, uint8_t* dst_vu, int width); void AYUVToYRow_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width); void AYUVToUVRow_NEON(const uint8_t* src_ayuv, - int stride_ayuv, + int src_stride_ayuv, uint8_t* dst_uv, int width); void AYUVToVURow_NEON(const uint8_t* src_ayuv, - int stride_ayuv, + int src_stride_ayuv, uint8_t* dst_vu, int width); -void AYUVToYRow_Any_NEON(const uint8_t* src_ayuv, uint8_t* dst_y, int width); -void AYUVToUVRow_Any_NEON(const uint8_t* src_ayuv, - int stride_ayuv, - uint8_t* dst_uv, +void AYUVToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void AYUVToUVRow_Any_NEON(const uint8_t* src_ptr, + int src_stride, + uint8_t* dst_vu, int width); -void AYUVToVURow_Any_NEON(const uint8_t* src_ayuv, - int stride_ayuv, +void AYUVToVURow_Any_NEON(const uint8_t* src_ptr, + int src_stride, uint8_t* dst_vu, int width); @@ -3478,41 +4927,41 @@ void I422ToYUY2Row_MSA(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_yuy2, int width); -void I422ToYUY2Row_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_yuy2, - int width); +void I422ToYUY2Row_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_yuy2, + int width); void I422ToUYVYRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width); -void I422ToUYVYRow_MMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_uyvy, - int width); +void I422ToUYVYRow_LASX(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_uyvy, + int width); void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); -void I422ToYUY2Row_Any_MMI(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - int width); +void I422ToYUY2Row_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); -void I422ToUYVYRow_Any_MMI(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - int width); +void I422ToUYVYRow_Any_LASX(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + int width); // Effects related row functions. void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); @@ -3528,9 +4977,9 @@ void ARGBAttenuateRow_NEON(const uint8_t* src_argb, void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); -void ARGBAttenuateRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - int width); +void ARGBAttenuateRow_LASX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width); void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -3543,9 +4992,9 @@ void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void ARGBAttenuateRow_Any_MMI(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void ARGBAttenuateRow_Any_LASX(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); // Inverse table for unattenuate, shared by C and SSE2. extern const uint32_t fixed_invtbl8[256]; @@ -3569,13 +5018,13 @@ void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); -void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width); +void ARGBGrayRow_LASX(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBSepiaRow_C(uint8_t* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); -void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width); +void ARGBSepiaRow_LASX(uint8_t* dst_argb, int width); void ARGBColorMatrixRow_C(const uint8_t* src_argb, uint8_t* dst_argb, @@ -3593,7 +5042,7 @@ void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width); -void ARGBColorMatrixRow_MMI(const uint8_t* src_argb, +void ARGBColorMatrixRow_LSX(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width); @@ -3632,6 +5081,11 @@ void ARGBQuantizeRow_MSA(uint8_t* dst_argb, int interval_size, int interval_offset, int width); +void ARGBQuantizeRow_LSX(uint8_t* dst_argb, + int scale, + int interval_size, + int interval_offset, + int width); void ARGBShadeRow_C(const uint8_t* src_argb, uint8_t* dst_argb, @@ -3649,10 +5103,10 @@ void ARGBShadeRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value); -void ARGBShadeRow_MMI(const uint8_t* src_argb, - uint8_t* dst_argb, - int width, - uint32_t value); +void ARGBShadeRow_LASX(const uint8_t* src_argb, + uint8_t* dst_argb, + int width, + uint32_t value); // Used for blur. void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, @@ -3666,11 +5120,6 @@ void ComputeCumulativeSumRow_SSE2(const uint8_t* row, const int32_t* previous_cumsum, int width); -void ComputeCumulativeSumRow_MMI(const uint8_t* row, - int32_t* cumsum, - const int32_t* previous_cumsum, - int width); - void CumulativeSumToAverageRow_C(const int32_t* tl, const int32_t* bl, int w, @@ -3721,7 +5170,7 @@ void InterpolateRow_MSA(uint8_t* dst_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); -void InterpolateRow_MMI(uint8_t* dst_ptr, +void InterpolateRow_LSX(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int width, @@ -3746,7 +5195,7 @@ void InterpolateRow_Any_MSA(uint8_t* dst_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); -void InterpolateRow_Any_MMI(uint8_t* dst_ptr, +void InterpolateRow_Any_LSX(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, @@ -3757,6 +5206,47 @@ void InterpolateRow_16_C(uint16_t* dst_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); +void InterpolateRow_16_NEON(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); +void InterpolateRow_16_Any_NEON(uint16_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction); + +void InterpolateRow_16To8_C(uint8_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int scale, + int width, + int source_y_fraction); +void InterpolateRow_16To8_NEON(uint8_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int scale, + int width, + int source_y_fraction); +void InterpolateRow_16To8_Any_NEON(uint8_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int scale, + int width, + int source_y_fraction); +void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int scale, + int width, + int source_y_fraction); +void InterpolateRow_16To8_Any_AVX2(uint8_t* dst_ptr, + const uint16_t* src_ptr, + ptrdiff_t src_stride, + int scale, + int width, + int source_y_fraction); // Sobel images. void SobelXRow_C(const uint8_t* src_y0, @@ -3779,11 +5269,6 @@ void SobelXRow_MSA(const uint8_t* src_y0, const uint8_t* src_y2, uint8_t* dst_sobelx, int width); -void SobelXRow_MMI(const uint8_t* src_y0, - const uint8_t* src_y1, - const uint8_t* src_y2, - uint8_t* dst_sobelx, - int width); void SobelYRow_C(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, @@ -3800,10 +5285,6 @@ void SobelYRow_MSA(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width); -void SobelYRow_MMI(const uint8_t* src_y0, - const uint8_t* src_y1, - uint8_t* dst_sobely, - int width); void SobelRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, @@ -3820,7 +5301,7 @@ void SobelRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); -void SobelRow_MMI(const uint8_t* src_sobelx, +void SobelRow_LSX(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); @@ -3840,7 +5321,7 @@ void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width); -void SobelToPlaneRow_MMI(const uint8_t* src_sobelx, +void SobelToPlaneRow_LSX(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width); @@ -3860,7 +5341,7 @@ void SobelXYRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); -void SobelXYRow_MMI(const uint8_t* src_sobelx, +void SobelXYRow_LSX(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); @@ -3876,7 +5357,7 @@ void SobelRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void SobelRow_Any_MMI(const uint8_t* y_buf, +void SobelRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); @@ -3892,7 +5373,7 @@ void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void SobelToPlaneRow_Any_MMI(const uint8_t* y_buf, +void SobelToPlaneRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); @@ -3908,7 +5389,7 @@ void SobelXYRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); -void SobelXYRow_Any_MMI(const uint8_t* y_buf, +void SobelXYRow_Any_LSX(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); @@ -3984,6 +5465,14 @@ void HalfFloatRow_Any_MSA(const uint16_t* src_ptr, uint16_t* dst_ptr, float param, int width); +void HalfFloatRow_LSX(const uint16_t* src, + uint16_t* dst, + float scale, + int width); +void HalfFloatRow_Any_LSX(const uint16_t* src_ptr, + uint16_t* dst_ptr, + float param, + int width); void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width); void ByteToFloatRow_NEON(const uint8_t* src, float* dst, @@ -4018,16 +5507,24 @@ float ScaleSumSamples_NEON(const float* src, void ScaleSamples_C(const float* src, float* dst, float scale, int width); void ScaleSamples_NEON(const float* src, float* dst, float scale, int width); -void FloatDivToByteRow_C(const float* src_weights, - const float* src_values, - uint8_t* dst_out, - uint8_t* dst_mask, - int width); -void FloatDivToByteRow_NEON(const float* src_weights, - const float* src_values, - uint8_t* dst_out, - uint8_t* dst_mask, - int width); +void GaussRow_F32_NEON(const float* src, float* dst, int width); +void GaussRow_F32_C(const float* src, float* dst, int width); + +void GaussCol_F32_NEON(const float* src0, + const float* src1, + const float* src2, + const float* src3, + const float* src4, + float* dst, + int width); + +void GaussCol_F32_C(const float* src0, + const float* src1, + const float* src2, + const float* src3, + const float* src4, + float* dst, + int width); #ifdef __cplusplus } // extern "C" |