diff options
Diffstat (limited to 'files/source/convert_from_argb.cc')
-rw-r--r-- | files/source/convert_from_argb.cc | 1061 |
1 files changed, 901 insertions, 160 deletions
diff --git a/files/source/convert_from_argb.cc b/files/source/convert_from_argb.cc index e50c2af3..c3d037c4 100644 --- a/files/source/convert_from_argb.cc +++ b/files/source/convert_from_argb.cc @@ -76,6 +76,14 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOUV444ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUV444Row = ARGBToUV444Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444Row = ARGBToUV444Row_LSX; + } + } +#endif #if defined(HAS_ARGBTOUV444ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToUV444Row = ARGBToUV444Row_Any_LASX; @@ -116,6 +124,14 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -124,6 +140,11 @@ int ARGBToI444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToUV444Row(src_argb, dst_u, dst_v, width); @@ -230,7 +251,24 @@ int ARGBToI422(const uint8_t* src_argb, } } #endif - +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -241,6 +279,11 @@ int ARGBToI422(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); @@ -340,6 +383,14 @@ int ARGBToNV12(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -350,6 +401,11 @@ int ARGBToNV12(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -361,11 +417,19 @@ int ARGBToNV12(const uint8_t* src_argb, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -390,6 +454,11 @@ int ARGBToNV12(const uint8_t* src_argb, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); @@ -502,6 +571,24 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -512,6 +599,11 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -523,11 +615,19 @@ int ARGBToNV21(const uint8_t* src_argb, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -552,6 +652,11 @@ int ARGBToNV21(const uint8_t* src_argb, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); @@ -663,6 +768,27 @@ int ABGRToNV12(const uint8_t* src_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -674,11 +800,19 @@ int ABGRToNV12(const uint8_t* src_abgr, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -703,6 +837,11 @@ int ABGRToNV12(const uint8_t* src_abgr, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); @@ -815,6 +954,27 @@ int ABGRToNV21(const uint8_t* src_abgr, } } #endif +#if defined(HAS_ABGRTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; @@ -826,11 +986,19 @@ int ABGRToNV21(const uint8_t* src_abgr, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -855,6 +1023,11 @@ int ABGRToNV21(const uint8_t* src_abgr, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); @@ -972,6 +1145,24 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -982,6 +1173,11 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; @@ -1014,6 +1210,14 @@ int ARGBToYUY2(const uint8_t* src_argb, } } #endif +#if defined(HAS_I422TOYUY2ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToYUY2Row = I422ToYUY2Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_LSX; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToYUY2Row = I422ToYUY2Row_Any_LASX; @@ -1135,6 +1339,24 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LSX) && defined(HAS_ARGBTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + ARGBToUVRow = ARGBToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + ARGBToUVRow = ARGBToUVRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -1145,6 +1367,11 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; @@ -1177,6 +1404,14 @@ int ARGBToUYVY(const uint8_t* src_argb, } } #endif +#if defined(HAS_I422TOUYVYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToUYVYRow = I422ToUYVYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_LSX; + } + } +#endif #if defined(HAS_I422TOUYVYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { I422ToUYVYRow = I422ToUYVYRow_Any_LASX; @@ -1262,6 +1497,14 @@ int ARGBToI400(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYRow = ARGBToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYRow = ARGBToYRow_LSX; + } + } +#endif #if defined(HAS_ARGBTOYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToYRow = ARGBToYRow_Any_LASX; @@ -1270,6 +1513,11 @@ int ARGBToI400(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYRow = ARGBToYRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToYRow(src_argb, dst_y, width); @@ -1360,6 +1608,14 @@ int ARGBToRGB24(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB24ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRGB24Row = ARGBToRGB24Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToRGB24Row = ARGBToRGB24Row_LSX; + } + } +#endif #if defined(HAS_ARGBTORGB24ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_LASX; @@ -1368,6 +1624,11 @@ int ARGBToRGB24(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToRGB24Row = ARGBToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToRGB24Row(src_argb, dst_rgb24, width); @@ -1434,6 +1695,14 @@ int ARGBToRAW(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORAWROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRAWRow = ARGBToRAWRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToRAWRow = ARGBToRAWRow_LSX; + } + } +#endif #if defined(HAS_ARGBTORAWROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRAWRow = ARGBToRAWRow_Any_LASX; @@ -1442,6 +1711,11 @@ int ARGBToRAW(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORAWROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToRAWRow = ARGBToRAWRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBToRAWRow(src_argb, dst_raw, width); @@ -1467,7 +1741,7 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, int height) { int y; void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, - const uint32_t dither4, int width) = + uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; @@ -1512,6 +1786,14 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB565DITHERROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_LSX; + } + } +#endif #if defined(HAS_ARGBTORGB565DITHERROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_LASX; @@ -1589,6 +1871,15 @@ int ARGBToRGB565(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTORGB565ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToRGB565Row = ARGBToRGB565Row_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToRGB565Row = ARGBToRGB565Row_LSX; + } + } +#endif + #if defined(HAS_ARGBTORGB565ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_LASX; @@ -1663,6 +1954,14 @@ int ARGBToARGB1555(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOARGB1555ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_LSX; + } + } +#endif #if defined(HAS_ARGBTOARGB1555ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_LASX; @@ -1737,6 +2036,14 @@ int ARGBToARGB4444(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOARGB4444ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_LSX; + } + } +#endif #if defined(HAS_ARGBTOARGB4444ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_LASX; @@ -1858,19 +2165,19 @@ int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1879,6 +2186,22 @@ int ARGBToJ420(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } +#if defined(HAS_ARGBTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVJRow = ARGBToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_NEON; + } + } +#endif #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYJRow = ARGBToYJRow_Any_SSSE3; @@ -1903,19 +2226,11 @@ int ARGBToJ420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVJRow = ARGBToUVJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_NEON; +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; } } #endif @@ -1951,18 +2266,23 @@ int ARGBToJ420(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif for (y = 0; y < height - 1; y += 2) { - ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, src_stride_argb, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width); src_argb += src_stride_argb * 2; dst_yj += dst_stride_yj * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } if (height & 1) { - ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); } return 0; @@ -1974,19 +2294,19 @@ int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; - if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { + if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. @@ -1997,10 +2317,10 @@ int ARGBToJ422(const uint8_t* src_argb, } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_yj == width && - dst_stride_u * 2 == width && dst_stride_v * 2 == width) { + dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { width *= height; height = 1; - src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0; + src_stride_argb = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; } #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -2026,6 +2346,14 @@ int ARGBToJ422(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; @@ -2074,270 +2402,649 @@ int ARGBToJ422(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); + ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width); ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; dst_yj += dst_stride_yj; - dst_u += dst_stride_u; - dst_v += dst_stride_v; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } return 0; } -// Convert ARGB to AR64. +// Convert ARGB to J400. LIBYUV_API -int ARGBToAR64(const uint8_t* src_argb, +int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, - uint16_t* dst_ar64, - int dst_stride_ar64, + uint8_t* dst_yj, + int dst_stride_yj, int width, int height) { int y; - void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, - int width) = ARGBToAR64Row_C; - if (!src_argb || !dst_ar64 || width <= 0 || height == 0) { + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = + ARGBToYJRow_C; + if (!src_argb || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) { + if (src_stride_argb == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_argb = dst_stride_ar64 = 0; + src_stride_argb = dst_stride_yj = 0; } -#if defined(HAS_ARGBTOAR64ROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAR64Row = ARGBToAR64Row_SSSE3; + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOAR64ROW_AVX2) +#if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAR64Row = ARGBToAR64Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAR64Row = ARGBToAR64Row_AVX2; + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOAR64ROW_NEON) +#if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToAR64Row = ARGBToAR64Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToAR64Row = ARGBToAR64Row_NEON; + ARGBToYJRow = ARGBToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_NEON; } } #endif +#if defined(HAS_ARGBTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYJRow = ARGBToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToAR64Row(src_argb, dst_ar64, width); + ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; - dst_ar64 += dst_stride_ar64; + dst_yj += dst_stride_yj; } return 0; } -// Convert ARGB to AB64. +// Convert RGBA to J400. LIBYUV_API -int ARGBToAB64(const uint8_t* src_argb, - int src_stride_argb, - uint16_t* dst_ab64, - int dst_stride_ab64, +int RGBAToJ400(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_yj, + int dst_stride_yj, int width, int height) { int y; - void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, - int width) = ARGBToAB64Row_C; - if (!src_argb || !dst_ab64 || width <= 0 || height == 0) { + void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) = + RGBAToYJRow_C; + if (!src_rgba || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) { + if (src_stride_rgba == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_argb = dst_stride_ab64 = 0; + src_stride_rgba = dst_stride_yj = 0; } -#if defined(HAS_ARGBTOAB64ROW_SSSE3) +#if defined(HAS_RGBATOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBToAB64Row = ARGBToAB64Row_SSSE3; + RGBAToYJRow = RGBAToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOAB64ROW_AVX2) +#if defined(HAS_RGBATOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToAB64Row = ARGBToAB64Row_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBToAB64Row = ARGBToAB64Row_AVX2; + RGBAToYJRow = RGBAToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGBAToYJRow = RGBAToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOAB64ROW_NEON) +#if defined(HAS_RGBATOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToAB64Row = ARGBToAB64Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToAB64Row = ARGBToAB64Row_NEON; + RGBAToYJRow = RGBAToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_NEON; } } #endif +#if defined(HAS_RGBATOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RGBAToYJRow = RGBAToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_MSA; + } + } +#endif +#if defined(HAS_RGBATOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGBAToYJRow = RGBAToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGBAToYJRow = RGBAToYJRow_LSX; + } + } +#endif +#if defined(HAS_RGBATOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGBAToYJRow = RGBAToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGBAToYJRow = RGBAToYJRow_LASX; + } + } +#endif +#if defined(HAS_RGBATOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBAToYJRow = RGBAToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToAB64Row(src_argb, dst_ab64, width); - src_argb += src_stride_argb; - dst_ab64 += dst_stride_ab64; + RGBAToYJRow(src_rgba, dst_yj, width); + src_rgba += src_stride_rgba; + dst_yj += dst_stride_yj; } return 0; } -// Convert ARGB to J400. +// Convert ABGR to J420. (JPeg full range I420). LIBYUV_API -int ARGBToJ400(const uint8_t* src_argb, - int src_stride_argb, +int ABGRToJ420(const uint8_t* src_abgr, + int src_stride_abgr, uint8_t* dst_yj, int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { int y; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = - ARGBToYJRow_C; - if (!src_argb || !dst_yj || width <= 0 || height == 0) { + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } +#if defined(HAS_ABGRTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYJRow = ABGRToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ABGRToYJRow = ABGRToYJRow_Any_MSA; + ABGRToUVJRow = ABGRToUVJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_MSA; + ABGRToUVJRow = ABGRToUVJRow_MSA; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif + + for (y = 0; y < height - 1; y += 2) { + ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width); + src_abgr += src_stride_abgr * 2; + dst_yj += dst_stride_yj * 2; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; + } + if (height & 1) { + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + } + return 0; +} + +// Convert ABGR to J422. (JPeg full range I422). +LIBYUV_API +int ABGRToJ422(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height) { + int y; + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_yj == width) { + if (src_stride_abgr == width * 4 && dst_stride_yj == width && + dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { width *= height; height = 1; - src_stride_argb = dst_stride_yj = 0; + src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; } -#if defined(HAS_ARGBTOYJROW_SSSE3) +#if defined(HAS_ABGRTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; + ABGRToYJRow = ABGRToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; + ABGRToYJRow = ABGRToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; + ABGRToYJRow = ABGRToYJRow_AVX2; } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; + ABGRToYJRow = ABGRToYJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; + ABGRToYJRow = ABGRToYJRow_NEON; } } #endif -#if defined(HAS_ARGBTOYJROW_MSA) +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYJRow = ARGBToYJRow_Any_MSA; + ABGRToYJRow = ABGRToYJRow_Any_MSA; + ABGRToUVJRow = ABGRToUVJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_MSA; + ABGRToYJRow = ABGRToYJRow_MSA; + } + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_MSA; } } #endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - ARGBToYJRow(src_argb, dst_yj, width); - src_argb += src_stride_argb; + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; dst_yj += dst_stride_yj; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; } return 0; } -// Convert RGBA to J400. +// Convert ABGR to J400. LIBYUV_API -int RGBAToJ400(const uint8_t* src_rgba, - int src_stride_rgba, +int ABGRToJ400(const uint8_t* src_abgr, + int src_stride_abgr, uint8_t* dst_yj, int dst_stride_yj, int width, int height) { int y; - void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) = - RGBAToYJRow_C; - if (!src_rgba || !dst_yj || width <= 0 || height == 0) { + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_rgba = src_rgba + (height - 1) * src_stride_rgba; - src_stride_rgba = -src_stride_rgba; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; } // Coalesce rows. - if (src_stride_rgba == width * 4 && dst_stride_yj == width) { + if (src_stride_abgr == width * 4 && dst_stride_yj == width) { width *= height; height = 1; - src_stride_rgba = dst_stride_yj = 0; + src_stride_abgr = dst_stride_yj = 0; } -#if defined(HAS_RGBATOYJROW_SSSE3) +#if defined(HAS_ABGRTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - RGBAToYJRow = RGBAToYJRow_Any_SSSE3; + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_SSSE3; + ABGRToYJRow = ABGRToYJRow_SSSE3; } } #endif -#if defined(HAS_RGBATOYJROW_AVX2) +#if defined(HAS_ABGRTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - RGBAToYJRow = RGBAToYJRow_Any_AVX2; + ABGRToYJRow = ABGRToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - RGBAToYJRow = RGBAToYJRow_AVX2; + ABGRToYJRow = ABGRToYJRow_AVX2; } } #endif -#if defined(HAS_RGBATOYJROW_NEON) +#if defined(HAS_ABGRTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - RGBAToYJRow = RGBAToYJRow_Any_NEON; + ABGRToYJRow = ABGRToYJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_NEON; + ABGRToYJRow = ABGRToYJRow_NEON; } } #endif -#if defined(HAS_RGBATOYJROW_MSA) +#if defined(HAS_ABGRTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - RGBAToYJRow = RGBAToYJRow_Any_MSA; + ABGRToYJRow = ABGRToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { - RGBAToYJRow = RGBAToYJRow_MSA; + ABGRToYJRow = ABGRToYJRow_MSA; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; } } #endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif for (y = 0; y < height; ++y) { - RGBAToYJRow(src_rgba, dst_yj, width); - src_rgba += src_stride_rgba; + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; dst_yj += dst_stride_yj; } return 0; } +// Convert ARGB to AR64. +LIBYUV_API +int ARGBToAR64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ar64, + int dst_stride_ar64, + int width, + int height) { + int y; + void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, + int width) = ARGBToAR64Row_C; + if (!src_argb || !dst_ar64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ar64 = 0; + } +#if defined(HAS_ARGBTOAR64ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAR64Row = ARGBToAR64Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAR64Row = ARGBToAR64Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAR64Row = ARGBToAR64Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToAR64Row = ARGBToAR64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToAR64Row = ARGBToAR64Row_NEON; + } + } +#endif +#if defined(HAS_ARGBTOAR64ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToAR64Row = ARGBToAR64Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToAR64Row(src_argb, dst_ar64, width); + src_argb += src_stride_argb; + dst_ar64 += dst_stride_ar64; + } + return 0; +} + +// Convert ARGB to AB64. +LIBYUV_API +int ARGBToAB64(const uint8_t* src_argb, + int src_stride_argb, + uint16_t* dst_ab64, + int dst_stride_ab64, + int width, + int height) { + int y; + void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, + int width) = ARGBToAB64Row_C; + if (!src_argb || !dst_ab64 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) { + width *= height; + height = 1; + src_stride_argb = dst_stride_ab64 = 0; + } +#if defined(HAS_ARGBTOAB64ROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3; + if (IS_ALIGNED(width, 4)) { + ARGBToAB64Row = ARGBToAB64Row_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToAB64Row = ARGBToAB64Row_Any_AVX2; + if (IS_ALIGNED(width, 8)) { + ARGBToAB64Row = ARGBToAB64Row_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToAB64Row = ARGBToAB64Row_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToAB64Row = ARGBToAB64Row_NEON; + } + } +#endif +#if defined(HAS_ARGBTOAB64ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToAB64Row = ARGBToAB64Row_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToAB64Row(src_argb, dst_ab64, width); + src_argb += src_stride_argb; + dst_ab64 += dst_stride_ab64; + } + return 0; +} + // Enabled if 1 pass is available -#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \ + defined(HAS_RAWTOYJROW_RVV) #define HAS_RAWTOYJROW #endif @@ -2355,7 +3062,7 @@ int RAWToJNV21(const uint8_t* src_raw, int halfwidth = (width + 1) >> 1; #if defined(HAS_RAWTOYJROW) void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = RAWToUVJRow_C; void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYJRow_C; @@ -2363,12 +3070,12 @@ int RAWToJNV21(const uint8_t* src_raw, void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = + uint8_t* dst_uj, uint8_t* dst_vj, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYJRow_C; #endif - void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + void (*MergeUVRow_)(const uint8_t* src_uj, const uint8_t* src_vj, uint8_t* dst_vu, int width) = MergeUVRow_C; if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) { return -1; @@ -2403,6 +3110,27 @@ int RAWToJNV21(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToYJRow = RAWToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToYJRow = RAWToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToYJRow = RAWToYJRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToYJRow = RAWToYJRow_RVV; + } +#endif // Other platforms do intermediate conversion from RAW to ARGB. #else // HAS_RAWTOYJROW @@ -2459,11 +3187,19 @@ int RAWToJNV21(const uint8_t* src_raw, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { + if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow_ = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow_ = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; @@ -2488,29 +3224,34 @@ int RAWToJNV21(const uint8_t* src_raw, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow_ = MergeUVRow_RVV; + } +#endif { // Allocate a row of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31); #if !defined(HAS_RAWTOYJROW) // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); #endif for (y = 0; y < height - 1; y += 2) { #if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, src_stride_raw, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); RAWToYJRow(src_raw, dst_y, width); RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVJRow(row, kRowSize, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); + ARGBToUVJRow(row, row_size, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); + ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; @@ -2518,20 +3259,20 @@ int RAWToJNV21(const uint8_t* src_raw, } if (height & 1) { #if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToUVJRow(src_raw, 0, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); RAWToYJRow(src_raw, dst_y, width); #else RAWToARGBRow(src_raw, row, width); - ARGBToUVJRow(row, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + ARGBToUVJRow(row, 0, row_uj, row_vj, width); + MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth); ARGBToYJRow(row, dst_y, width); #endif } #if !defined(HAS_RAWTOYJROW) free_aligned_buffer_64(row); #endif - free_aligned_buffer_64(row_u); + free_aligned_buffer_64(row_uj); } return 0; } |