diff options
Diffstat (limited to 'files/source/planar_functions.cc')
-rw-r--r-- | files/source/planar_functions.cc | 659 |
1 files changed, 510 insertions, 149 deletions
diff --git a/files/source/planar_functions.cc b/files/source/planar_functions.cc index 169d4a8f..d115a2a1 100644 --- a/files/source/planar_functions.cc +++ b/files/source/planar_functions.cc @@ -75,6 +75,11 @@ void CopyPlane(const uint8_t* src_y, CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif +#if defined(HAS_COPYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + CopyRow = CopyRow_RVV; + } +#endif // Copy plane for (y = 0; y < height; ++y) { @@ -162,7 +167,7 @@ void Convert8To16Plane(const uint8_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, - int scale, // 16384 for 10 bits + int scale, // 1024 for 10 bits int width, int height) { int y; @@ -333,6 +338,45 @@ int I210Copy(const uint16_t* src_y, return 0; } +// Copy I410. +LIBYUV_API +int I410Copy(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + if (dst_y) { + CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height); + CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height); + return 0; +} + // Copy I400. LIBYUV_API int I400ToI400(const uint8_t* src_y, @@ -385,6 +429,7 @@ int I420ToI400(const uint8_t* src_y, } // Copy NV12. Supports inverting. +LIBYUV_API int NV12Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, @@ -418,6 +463,7 @@ int NV12Copy(const uint8_t* src_y, } // Copy NV21. Supports inverting. +LIBYUV_API int NV21Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, @@ -504,6 +550,11 @@ void SplitUVPlane(const uint8_t* src_uv, } } #endif +#if defined(HAS_SPLITUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitUVRow = SplitUVRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Copy a row of UV. @@ -553,11 +604,19 @@ void MergeUVPlane(const uint8_t* src_u, #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { + if (IS_ALIGNED(width, 16)) { MergeUVRow = MergeUVRow_AVX2; } } #endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 32)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow = MergeUVRow_Any_NEON; @@ -582,6 +641,11 @@ void MergeUVPlane(const uint8_t* src_u, } } #endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of UV. @@ -687,7 +751,7 @@ void MergeUVPlane_16(const uint16_t* src_u, #if defined(HAS_MERGEUVROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_16 = MergeUVRow_16_Any_AVX2; - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { MergeUVRow_16 = MergeUVRow_16_AVX2; } } @@ -911,31 +975,31 @@ int NV21ToNV12(const uint8_t* src_y, return 0; } +// Test if tile_height is a power of 2 (16 or 32) +#define IS_POWEROFTWO(x) (!((x) & ((x)-1))) + // Detile a plane of data // tile width is 16 and assumed. // tile_height is 16 or 32 for MM21. // src_stride_y is bytes per row of source ignoring tiling. e.g. 640 // TODO: More detile row functions. - LIBYUV_API -void DetilePlane(const uint8_t* src_y, - int src_stride_y, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height, - int tile_height) { +int DetilePlane(const uint8_t* src_y, + int src_stride_y, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height) { const ptrdiff_t src_tile_stride = 16 * tile_height; int y; void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width) = DetileRow_C; - assert(src_stride_y >= 0); - assert(tile_height > 0); - assert(src_stride_y > 0); - - if (width <= 0 || height == 0) { - return; + if (!src_y || !dst_y || width <= 0 || height == 0 || + !IS_POWEROFTWO(tile_height)) { + return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; @@ -970,6 +1034,72 @@ void DetilePlane(const uint8_t* src_y, src_y = src_y - src_tile_stride + src_stride_y * tile_height; } } + return 0; +} + +// Convert a plane of 16 bit tiles of 16 x H to linear. +// tile width is 16 and assumed. +// tile_height is 16 or 32 for MT2T. +LIBYUV_API +int DetilePlane_16(const uint16_t* src_y, + int src_stride_y, + uint16_t* dst_y, + int dst_stride_y, + int width, + int height, + int tile_height) { + const ptrdiff_t src_tile_stride = 16 * tile_height; + int y; + void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride, + uint16_t* dst, int width) = DetileRow_16_C; + if (!src_y || !dst_y || width <= 0 || height == 0 || + !IS_POWEROFTWO(tile_height)) { + return -1; + } + + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_stride_y = -dst_stride_y; + } + +#if defined(HAS_DETILEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + DetileRow_16 = DetileRow_16_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_SSE2; + } + } +#endif +#if defined(HAS_DETILEROW_16_AVX) + if (TestCpuFlag(kCpuHasAVX)) { + DetileRow_16 = DetileRow_16_Any_AVX; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_AVX; + } + } +#endif +#if defined(HAS_DETILEROW_16_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DetileRow_16 = DetileRow_16_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileRow_16 = DetileRow_16_NEON; + } + } +#endif + + // Detile plane + for (y = 0; y < height; ++y) { + DetileRow_16(src_y, src_tile_stride, dst_y, width); + dst_y += dst_stride_y; + src_y += 16; + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_y = src_y - src_tile_stride + src_stride_y * tile_height; + } + } + return 0; } LIBYUV_API @@ -1033,6 +1163,74 @@ void DetileSplitUVPlane(const uint8_t* src_uv, } } +LIBYUV_API +void DetileToYUY2(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height, + int tile_height) { + const ptrdiff_t src_y_tile_stride = 16 * tile_height; + const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2; + int y; + void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, + const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, + uint8_t* dst_yuy2, int width) = DetileToYUY2_C; + assert(src_stride_y >= 0); + assert(src_stride_y > 0); + assert(src_stride_uv >= 0); + assert(src_stride_uv > 0); + assert(tile_height > 0); + + if (width <= 0 || height == 0 || tile_height <= 0) { + return; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; + dst_stride_yuy2 = -dst_stride_yuy2; + } + +#if defined(HAS_DETILETOYUY2_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + DetileToYUY2 = DetileToYUY2_Any_NEON; + if (IS_ALIGNED(width, 16)) { + DetileToYUY2 = DetileToYUY2_NEON; + } + } +#endif + +#if defined(HAS_DETILETOYUY2_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + DetileToYUY2 = DetileToYUY2_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + DetileToYUY2 = DetileToYUY2_SSE2; + } + } +#endif + + // Detile plane + for (y = 0; y < height; ++y) { + DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, + width); + dst_yuy2 += dst_stride_yuy2; + src_y += 16; + + if (y & 0x1) + src_uv += 16; + + // Advance to next row of tiles. + if ((y & (tile_height - 1)) == (tile_height - 1)) { + src_y = src_y - src_y_tile_stride + src_stride_y * tile_height; + src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2); + } + } +} + // Support function for NV12 etc RGB channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API @@ -1085,6 +1283,11 @@ void SplitRGBPlane(const uint8_t* src_rgb, } } #endif +#if defined(HAS_SPLITRGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitRGBRow = SplitRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Copy a row of RGB. @@ -1144,6 +1347,11 @@ void MergeRGBPlane(const uint8_t* src_r, } } #endif +#if defined(HAS_MERGERGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeRGBRow = MergeRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of RGB. @@ -1156,18 +1364,18 @@ void MergeRGBPlane(const uint8_t* src_r, } LIBYUV_NOINLINE -void SplitARGBPlaneAlpha(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_r, - int dst_stride_r, - uint8_t* dst_g, - int dst_stride_g, - uint8_t* dst_b, - int dst_stride_b, - uint8_t* dst_a, - int dst_stride_a, - int width, - int height) { +static void SplitARGBPlaneAlpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { int y; void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, uint8_t* dst_a, int width) = @@ -1175,6 +1383,9 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb, assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_argb == width * 4 && dst_stride_r == width && dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) { width *= height; @@ -1215,6 +1426,11 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb, } } #endif +#if defined(HAS_SPLITARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitARGBRow = SplitARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width); @@ -1227,21 +1443,24 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb, } LIBYUV_NOINLINE -void SplitARGBPlaneOpaque(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_r, - int dst_stride_r, - uint8_t* dst_g, - int dst_stride_g, - uint8_t* dst_b, - int dst_stride_b, - int width, - int height) { +static void SplitARGBPlaneOpaque(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_r, + int dst_stride_r, + uint8_t* dst_g, + int dst_stride_g, + uint8_t* dst_b, + int dst_stride_b, + int width, + int height) { int y; void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitXRGBRow_C; assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_argb == width * 4 && dst_stride_r == width && dst_stride_g == width && dst_stride_b == width) { width *= height; @@ -1281,6 +1500,11 @@ void SplitARGBPlaneOpaque(const uint8_t* src_argb, } } #endif +#if defined(HAS_SPLITXRGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitXRGBRow = SplitXRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width); @@ -1328,18 +1552,18 @@ void SplitARGBPlane(const uint8_t* src_argb, } LIBYUV_NOINLINE -void MergeARGBPlaneAlpha(const uint8_t* src_r, - int src_stride_r, - const uint8_t* src_g, - int src_stride_g, - const uint8_t* src_b, - int src_stride_b, - const uint8_t* src_a, - int src_stride_a, - uint8_t* dst_argb, - int dst_stride_argb, - int width, - int height) { +static void MergeARGBPlaneAlpha(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + const uint8_t* src_a, + int src_stride_a, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, const uint8_t* src_a, @@ -1347,6 +1571,9 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r, assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && src_stride_a == width && dst_stride_argb == width * 4) { width *= height; @@ -1378,6 +1605,11 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r, } } #endif +#if defined(HAS_MERGEARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeARGBRow = MergeARGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width); @@ -1390,16 +1622,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r, } LIBYUV_NOINLINE -void MergeARGBPlaneOpaque(const uint8_t* src_r, - int src_stride_r, - const uint8_t* src_g, - int src_stride_g, - const uint8_t* src_b, - int src_stride_b, - uint8_t* dst_argb, - int dst_stride_argb, - int width, - int height) { +static void MergeARGBPlaneOpaque(const uint8_t* src_r, + int src_stride_r, + const uint8_t* src_g, + int src_stride_g, + const uint8_t* src_b, + int src_stride_b, + uint8_t* dst_argb, + int dst_stride_argb, + int width, + int height) { int y; void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_argb, int width) = @@ -1407,6 +1639,9 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r, assert(height > 0); + if (width <= 0 || height == 0) { + return; + } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && dst_stride_argb == width * 4) { width *= height; @@ -1437,6 +1672,11 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r, } } #endif +#if defined(HAS_MERGEXRGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeXRGBRow = MergeXRGBRow_RVV; + } +#endif for (y = 0; y < height; ++y) { MergeXRGBRow(src_r, src_g, src_b, dst_argb, width); @@ -1888,6 +2128,16 @@ int YUY2ToI422(const uint8_t* src_yuy2, } } #endif +#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + YUY2ToYRow = YUY2ToYRow_Any_LSX; + YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + YUY2ToYRow = YUY2ToYRow_LSX; + YUY2ToUV422Row = YUY2ToUV422Row_LSX; + } + } +#endif #if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { YUY2ToYRow = YUY2ToYRow_Any_LASX; @@ -1984,6 +2234,16 @@ int UYVYToI422(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + UYVYToYRow = UYVYToYRow_Any_LSX; + UYVYToUV422Row = UYVYToUV422Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_LSX; + UYVYToUV422Row = UYVYToUV422Row_LSX; + } + } +#endif #if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { UYVYToYRow = UYVYToYRow_Any_LASX; @@ -2131,6 +2391,14 @@ int UYVYToY(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_UYVYTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + UYVYToYRow = UYVYToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_LSX; + } + } +#endif for (y = 0; y < height; ++y) { UYVYToYRow(src_uyvy, dst_y, width); @@ -2189,6 +2457,14 @@ void MirrorPlane(const uint8_t* src_y, } } #endif +#if defined(HAS_MIRRORROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MirrorRow = MirrorRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + MirrorRow = MirrorRow_LSX; + } + } +#endif #if defined(HAS_MIRRORROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { MirrorRow = MirrorRow_Any_LASX; @@ -2255,6 +2531,14 @@ void MirrorUVPlane(const uint8_t* src_uv, } } #endif +#if defined(HAS_MIRRORUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MirrorUVRow = MirrorUVRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + MirrorUVRow = MirrorUVRow_LSX; + } + } +#endif #if defined(HAS_MIRRORUVROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { MirrorUVRow = MirrorUVRow_Any_LASX; @@ -2427,6 +2711,14 @@ int ARGBMirror(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBMIRRORROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBMirrorRow = ARGBMirrorRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBMirrorRow = ARGBMirrorRow_LSX; + } + } +#endif #if defined(HAS_ARGBMIRRORROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBMirrorRow = ARGBMirrorRow_Any_LASX; @@ -2809,6 +3101,14 @@ int ARGBMultiply(const uint8_t* src_argb0, } } #endif +#if defined(HAS_ARGBMULTIPLYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBMultiplyRow = ARGBMultiplyRow_LSX; + } + } +#endif #if defined(HAS_ARGBMULTIPLYROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX; @@ -2894,6 +3194,14 @@ int ARGBAdd(const uint8_t* src_argb0, } } #endif +#if defined(HAS_ARGBADDROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBAddRow = ARGBAddRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBAddRow = ARGBAddRow_LSX; + } + } +#endif #if defined(HAS_ARGBADDROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBAddRow = ARGBAddRow_Any_LASX; @@ -2974,6 +3282,14 @@ int ARGBSubtract(const uint8_t* src_argb0, } } #endif +#if defined(HAS_ARGBSUBTRACTROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBSubtractRow = ARGBSubtractRow_Any_LSX; + if (IS_ALIGNED(width, 4)) { + ARGBSubtractRow = ARGBSubtractRow_LSX; + } + } +#endif #if defined(HAS_ARGBSUBTRACTROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBSubtractRow = ARGBSubtractRow_Any_LASX; @@ -3051,6 +3367,11 @@ int RAWToRGB24(const uint8_t* src_raw, } } #endif +#if defined(HAS_RAWTORGB24ROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToRGB24Row = RAWToRGB24Row_RVV; + } +#endif for (y = 0; y < height; ++y) { RAWToRGB24Row(src_raw, dst_rgb24, width); @@ -3060,6 +3381,7 @@ int RAWToRGB24(const uint8_t* src_raw, return 0; } +// TODO(fbarchard): Consider uint8_t value LIBYUV_API void SetPlane(uint8_t* dst_y, int dst_stride_y, @@ -3067,7 +3389,7 @@ void SetPlane(uint8_t* dst_y, int height, uint32_t value) { int y; - void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; + void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C; if (width <= 0 || height == 0) { return; @@ -3120,7 +3442,7 @@ void SetPlane(uint8_t* dst_y, // Set plane for (y = 0; y < height; ++y) { - SetRow(dst_y, value, width); + SetRow(dst_y, (uint8_t)value, width); dst_y += dst_stride_y; } } @@ -3168,7 +3490,7 @@ int ARGBRect(uint8_t* dst_argb, int height, uint32_t value) { int y; - void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) = + void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) = ARGBSetRow_C; if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -3293,6 +3615,14 @@ int ARGBAttenuate(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBATTENUATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBAttenuateRow = ARGBAttenuateRow_LSX; + } + } +#endif #if defined(HAS_ARGBATTENUATEROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX; @@ -3301,6 +3631,11 @@ int ARGBAttenuate(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBATTENUATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBAttenuateRow = ARGBAttenuateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { ARGBAttenuateRow(src_argb, dst_argb, width); @@ -3401,6 +3736,11 @@ int ARGBGrayTo(const uint8_t* src_argb, ARGBGrayRow = ARGBGrayRow_MSA; } #endif +#if defined(HAS_ARGBGRAYROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBGrayRow = ARGBGrayRow_LSX; + } +#endif #if defined(HAS_ARGBGRAYROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { ARGBGrayRow = ARGBGrayRow_LASX; @@ -3451,6 +3791,11 @@ int ARGBGray(uint8_t* dst_argb, ARGBGrayRow = ARGBGrayRow_MSA; } #endif +#if defined(HAS_ARGBGRAYROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBGrayRow = ARGBGrayRow_LSX; + } +#endif #if defined(HAS_ARGBGRAYROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { ARGBGrayRow = ARGBGrayRow_LASX; @@ -3473,7 +3818,7 @@ int ARGBSepia(uint8_t* dst_argb, int width, int height) { int y; - void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C; + void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -3499,6 +3844,11 @@ int ARGBSepia(uint8_t* dst_argb, ARGBSepiaRow = ARGBSepiaRow_MSA; } #endif +#if defined(HAS_ARGBSEPIAROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) { + ARGBSepiaRow = ARGBSepiaRow_LSX; + } +#endif #if defined(HAS_ARGBSEPIAROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) { ARGBSepiaRow = ARGBSepiaRow_LASX; @@ -3616,7 +3966,7 @@ int ARGBColorTable(uint8_t* dst_argb, int width, int height) { int y; - void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, + void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb, int width) = ARGBColorTableRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || @@ -3652,7 +4002,7 @@ int RGBColorTable(uint8_t* dst_argb, int width, int height) { int y; - void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, + void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb, int width) = RGBColorTableRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || @@ -3697,7 +4047,7 @@ int ARGBQuantize(uint8_t* dst_argb, int width, int height) { int y; - void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size, + void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) = ARGBQuantizeRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || @@ -3924,6 +4274,11 @@ int ARGBShade(const uint8_t* src_argb, ARGBShadeRow = ARGBShadeRow_MSA; } #endif +#if defined(HAS_ARGBSHADEROW_LSX) + if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) { + ARGBShadeRow = ARGBShadeRow_LSX; + } +#endif #if defined(HAS_ARGBSHADEROW_LASX) if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) { ARGBShadeRow = ARGBShadeRow_LASX; @@ -3950,7 +4305,7 @@ int InterpolatePlane(const uint8_t* src0, int height, int interpolation) { int y; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { @@ -4008,6 +4363,11 @@ int InterpolatePlane(const uint8_t* src0, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif for (y = 0; y < height; ++y) { InterpolateRow(dst, src0, src1 - src0, width, interpolation); @@ -4030,7 +4390,7 @@ int InterpolatePlane_16(const uint16_t* src0, int height, int interpolation) { int y; - void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr, + void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { @@ -4213,6 +4573,14 @@ int ARGBShuffle(const uint8_t* src_bgra, } } #endif +#if defined(HAS_ARGBSHUFFLEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBShuffleRow = ARGBShuffleRow_Any_LSX; + if (IS_ALIGNED(width, 8)) { + ARGBShuffleRow = ARGBShuffleRow_LSX; + } + } +#endif #if defined(HAS_ARGBSHUFFLEROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { ARGBShuffleRow = ARGBShuffleRow_Any_LASX; @@ -4444,6 +4812,11 @@ static int ARGBSobelize(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYJRow = ARGBToYJRow_RVV; + } +#endif #if defined(HAS_SOBELYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { @@ -4477,16 +4850,16 @@ static int ARGBSobelize(const uint8_t* src_argb, #endif { // 3 rows with edges before/after. - const int kRowSize = (width + kEdge + 31) & ~31; - align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); + const int row_size = (width + kEdge + 31) & ~31; + align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge)); uint8_t* row_sobelx = rows; - uint8_t* row_sobely = rows + kRowSize; - uint8_t* row_y = rows + kRowSize * 2; + uint8_t* row_sobely = rows + row_size; + uint8_t* row_y = rows + row_size * 2; // Convert first row. uint8_t* row_y0 = row_y + kEdge; - uint8_t* row_y1 = row_y0 + kRowSize; - uint8_t* row_y2 = row_y1 + kRowSize; + uint8_t* row_y1 = row_y0 + row_size; + uint8_t* row_y2 = row_y1 + row_size; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. @@ -5027,9 +5400,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, return 0; } -// TODO(fbarchard): Consider if width is even Y channel can be split -// directly. A SplitUVRow_Odd function could copy the remaining chroma. - LIBYUV_API int YUY2ToNV12(const uint8_t* src_yuy2, int src_stride_yuy2, @@ -5040,13 +5410,10 @@ int YUY2ToNV12(const uint8_t* src_yuy2, int width, int height) { int y; - int halfwidth = (width + 1) >> 1; - void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, - int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) = InterpolateRow_C; - + void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = + YUY2ToYRow_C; + void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2, + uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C; if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -5057,109 +5424,91 @@ int YUY2ToNV12(const uint8_t* src_yuy2, src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } -#if defined(HAS_SPLITUVROW_SSE2) +#if defined(HAS_YUY2TOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { - SplitUVRow = SplitUVRow_Any_SSE2; + YUY2ToYRow = YUY2ToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { - SplitUVRow = SplitUVRow_SSE2; + YUY2ToYRow = YUY2ToYRow_SSE2; } } #endif -#if defined(HAS_SPLITUVROW_AVX2) +#if defined(HAS_YUY2TOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - SplitUVRow = SplitUVRow_Any_AVX2; + YUY2ToYRow = YUY2ToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_AVX2; + YUY2ToYRow = YUY2ToYRow_AVX2; } } #endif -#if defined(HAS_SPLITUVROW_NEON) +#if defined(HAS_YUY2TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - SplitUVRow = SplitUVRow_Any_NEON; + YUY2ToYRow = YUY2ToYRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - SplitUVRow = SplitUVRow_NEON; + YUY2ToYRow = YUY2ToYRow_NEON; } } #endif -#if defined(HAS_SPLITUVROW_MSA) +#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { - SplitUVRow = SplitUVRow_Any_MSA; + YUY2ToYRow = YUY2ToYRow_Any_MSA; if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_MSA; + YUY2ToYRow = YUY2ToYRow_MSA; } } #endif -#if defined(HAS_SPLITUVROW_LSX) +#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { - SplitUVRow = SplitUVRow_Any_LSX; - if (IS_ALIGNED(width, 32)) { - SplitUVRow = SplitUVRow_LSX; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_Any_SSSE3; + YUY2ToYRow = YUY2ToYRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - InterpolateRow = InterpolateRow_SSSE3; + YUY2ToYRow = YUY2ToYRow_LSX; } } #endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_AVX2; +#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + YUY2ToYRow = YUY2ToYRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_AVX2; + YUY2ToYRow = YUY2ToYRow_LASX; } } #endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_NEON; + +#if defined(HAS_YUY2TONVUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { - InterpolateRow = InterpolateRow_NEON; + YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2; } } #endif -#if defined(HAS_INTERPOLATEROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - InterpolateRow = InterpolateRow_Any_MSA; +#if defined(HAS_YUY2TONVUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_MSA; + YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2; } } #endif -#if defined(HAS_INTERPOLATEROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - InterpolateRow = InterpolateRow_Any_LSX; - if (IS_ALIGNED(width, 32)) { - InterpolateRow = InterpolateRow_LSX; +#if defined(HAS_YUY2TONVUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + YUY2ToNVUVRow = YUY2ToNVUVRow_NEON; } } #endif - { - int awidth = halfwidth * 2; - // row of y and 2 rows of uv - align_buffer_64(rows, awidth * 3); - - for (y = 0; y < height - 1; y += 2) { - // Split Y from UV. - SplitUVRow(src_yuy2, rows, rows + awidth, awidth); - memcpy(dst_y, rows, width); - SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth); - memcpy(dst_y + dst_stride_y, rows, width); - InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128); - src_yuy2 += src_stride_yuy2 * 2; - dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; - } - if (height & 1) { - // Split Y from UV. - SplitUVRow(src_yuy2, rows, dst_uv, awidth); - memcpy(dst_y, rows, width); - } - free_aligned_buffer_64(rows); + for (y = 0; y < height - 1; y += 2) { + YUY2ToYRow(src_yuy2, dst_y, width); + YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width); + YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width); + src_yuy2 += src_stride_yuy2 * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + YUY2ToYRow(src_yuy2, dst_y, width); + YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width); } return 0; } @@ -5177,7 +5526,7 @@ int UYVYToNV12(const uint8_t* src_uyvy, int halfwidth = (width + 1) >> 1; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, + void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; @@ -5231,6 +5580,12 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_SPLITUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + SplitUVRow = SplitUVRow_RVV; + } +#endif + #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; @@ -5271,6 +5626,11 @@ int UYVYToNV12(const uint8_t* src_uyvy, } } #endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif { int awidth = halfwidth * 2; @@ -5336,6 +5696,7 @@ void HalfMergeUVPlane(const uint8_t* src_u, HalfMergeUVRow = HalfMergeUVRow_AVX2; } #endif + for (y = 0; y < height - 1; y += 2) { // Merge a row of U and V into a row of UV. HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width); |