diff options
Diffstat (limited to 'files/source/convert.cc')
-rw-r--r-- | files/source/convert.cc | 1863 |
1 files changed, 1280 insertions, 583 deletions
diff --git a/files/source/convert.cc b/files/source/convert.cc index 614fa482..7178580f 100644 --- a/files/source/convert.cc +++ b/files/source/convert.cc @@ -15,7 +15,9 @@ #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/row.h" -#include "libyuv/scale.h" // For ScalePlane() +#include "libyuv/scale.h" // For ScalePlane() +#include "libyuv/scale_row.h" // For FixedDiv +#include "libyuv/scale_uv.h" // For UVScale() #ifdef __cplusplus namespace libyuv { @@ -48,7 +50,7 @@ static int I4xxToI420(const uint8_t* src_y, const int dst_y_height = Abs(src_y_height); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); - if (src_uv_width == 0 || src_uv_height == 0) { + if (src_uv_width <= 0 || src_uv_height == 0) { return -1; } if (dst_y) { @@ -82,7 +84,8 @@ int I420Copy(const uint8_t* src_y, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -124,7 +127,8 @@ int I010Copy(const uint16_t* src_y, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { return -1; } // Negative height means invert the image. @@ -148,6 +152,53 @@ int I010Copy(const uint16_t* src_y, return 0; } +static int Planar16bitTo8bit(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + int subsample_x, + int subsample_y, + int depth) { + int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); + int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); + int scale = 1 << (24 - depth); + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + uv_height = -uv_height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (uv_height - 1) * src_stride_u; + src_v = src_v + (uv_height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + // Convert Y plane. + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width, + height); + // Convert UV planes. + Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, scale, uv_width, + uv_height); + Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, scale, uv_width, + uv_height); + return 0; +} + // Convert 10 bit YUV to 8 bit. LIBYUV_API int I010ToI420(const uint16_t* src_y, @@ -164,34 +215,344 @@ int I010ToI420(const uint16_t* src_y, int dst_stride_v, int width, int height) { - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { + return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, 1, + 1, 10); +} + +LIBYUV_API +int I210ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + const int depth = 10; + const int scale = 1 << (24 - depth); + + if (width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } - // Convert Y plane. - Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width, - height); - // Convert UV planes. - Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth, - halfheight); - Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth, - halfheight); + { + const int uv_width = SUBSAMPLE(width, 1, 1); + const int uv_height = SUBSAMPLE(height, 1, 1); + const int dy = FixedDiv(height, uv_height); + + Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width, + height); + ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u, + dst_stride_u, src_u, dst_u, 0, 32768, dy, + /*bpp=*/1, scale, kFilterBilinear); + ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v, + dst_stride_v, src_v, dst_v, 0, 32768, dy, + /*bpp=*/1, scale, kFilterBilinear); + } return 0; } +LIBYUV_API +int I210ToI422(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, 1, + 0, 10); +} + +LIBYUV_API +int I410ToI444(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, 0, + 0, 10); +} + +LIBYUV_API +int I012ToI420(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, 1, + 1, 12); +} + +LIBYUV_API +int I212ToI422(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, 1, + 0, 12); +} + +LIBYUV_API +int I412ToI444(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + return Planar16bitTo8bit(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, 0, + 0, 12); +} + +// Any Ix10 To I010 format with mirroring. +static int Ix10ToI010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height, + int subsample_x, + int subsample_y) { + const int dst_y_width = Abs(width); + const int dst_y_height = Abs(height); + const int src_uv_width = SUBSAMPLE(width, subsample_x, subsample_x); + const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y); + const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); + const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + if (width <= 0 || height == 0) { + return -1; + } + if (dst_y) { + ScalePlane_12(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + dst_y_width, dst_y_height, kFilterBilinear); + } + ScalePlane_12(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, + dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); + ScalePlane_12(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, + dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); + return 0; +} + +LIBYUV_API +int I410ToI010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + return Ix10ToI010(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, 0, 0); +} + +LIBYUV_API +int I210ToI010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + return Ix10ToI010(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, 1, 0); +} + +// Any I[420]1[02] to P[420]1[02] format with mirroring. +static int IxxxToPxxx(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height, + int subsample_x, + int subsample_y, + int depth) { + const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); + const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); + if (width <= 0 || height == 0) { + return -1; + } + + ConvertToMSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height, + depth); + MergeUVPlane_16(src_u, src_stride_u, src_v, src_stride_v, dst_uv, + dst_stride_uv, uv_width, uv_height, depth); + return 0; +} + +LIBYUV_API +int I010ToP010(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv, + width, height, 1, 1, 10); +} + +LIBYUV_API +int I210ToP210(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv, + width, height, 1, 0, 10); +} + +LIBYUV_API +int I012ToP012(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv, + width, height, 1, 1, 12); +} + +LIBYUV_API +int I212ToP212(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_u, + int src_stride_u, + const uint16_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + return IxxxToPxxx(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv, + width, height, 1, 0, 12); +} + // 422 chroma is 1/2 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API @@ -215,6 +576,48 @@ int I422ToI420(const uint8_t* src_y, dst_v, dst_stride_v, width, height, src_uv_width, height); } +LIBYUV_API +int I422ToI210(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_u, + int dst_stride_u, + uint16_t* dst_v, + int dst_stride_v, + int width, + int height) { + int halfwidth = (width + 1) >> 1; + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + + // Convert Y plane. + Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 1024, width, + height); + // Convert UV planes. + Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 1024, halfwidth, + height); + Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 1024, halfwidth, + height); + return 0; +} + // TODO(fbarchard): Implement row conversion. LIBYUV_API int I422ToNV21(const uint8_t* src_y, @@ -256,6 +659,60 @@ int I422ToNV21(const uint8_t* src_y, return 0; } +LIBYUV_API +int MM21ToNV12(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + if (!src_uv || !dst_uv || width <= 0) { + return -1; + } + + int sign = height < 0 ? -1 : 1; + + if (dst_y) { + DetilePlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height, 32); + } + DetilePlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, (width + 1) & ~1, + (height + sign) / 2, 16); + + return 0; +} + +LIBYUV_API +int MM21ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int sign = height < 0 ? -1 : 1; + + if (!src_uv || !dst_u || !dst_v || width <= 0) { + return -1; + } + + if (dst_y) { + DetilePlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height, 32); + } + DetileSplitUVPlane(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, + dst_stride_v, (width + 1) & ~1, (height + sign) / 2, 16); + + return 0; +} + #ifdef I422TONV21_ROW_VERSION // Unittest fails for this version. // 422 chroma is 1/2 width, 1x height @@ -328,11 +785,11 @@ int I422ToNV21(const uint8_t* src_y, } } #endif -#if defined(HAS_MERGEUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - MergeUVRow = MergeUVRow_Any_MMI; - if (IS_ALIGNED(halfwidth, 8)) { - MergeUVRow = MergeUVRow_MMI; +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_LSX; } } #endif @@ -368,11 +825,11 @@ int I422ToNV21(const uint8_t* src_y, } } #endif -#if defined(HAS_INTERPOLATEROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - InterpolateRow = InterpolateRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - InterpolateRow = InterpolateRow_MMI; +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(width, 32)) { + InterpolateRow = InterpolateRow_LSX; } } #endif @@ -426,9 +883,8 @@ int I444ToI420(const uint8_t* src_y, dst_v, dst_stride_v, width, height, width, height); } -// TODO(fbarchard): Implement row conversion. LIBYUV_API -int I444ToNV21(const uint8_t* src_y, +int I444ToNV12(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, @@ -436,16 +892,16 @@ int I444ToNV21(const uint8_t* src_y, int src_stride_v, uint8_t* dst_y, int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, + uint8_t* dst_uv, + int dst_stride_uv, int width, int height) { - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; + if (!src_y || !src_u || !src_v || !dst_uv || width <= 0 || height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; - halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (height - 1) * src_stride_u; src_v = src_v + (height - 1) * src_stride_v; @@ -453,19 +909,32 @@ int I444ToNV21(const uint8_t* src_y, src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } - // Allocate u and v buffers - align_buffer_64(plane_u, halfwidth * halfheight * 2); - uint8_t* plane_v = plane_u + halfwidth * halfheight; - - I444ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, - dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width, - height); - MergeUVPlane(plane_v, halfwidth, plane_u, halfwidth, dst_vu, dst_stride_vu, - halfwidth, halfheight); - free_aligned_buffer_64(plane_u); + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + HalfMergeUVPlane(src_u, src_stride_u, src_v, src_stride_v, dst_uv, + dst_stride_uv, width, height); return 0; } +LIBYUV_API +int I444ToNV21(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_u, + int src_stride_u, + const uint8_t* src_v, + int src_stride_v, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + return I444ToNV12(src_y, src_stride_y, src_v, src_stride_v, src_u, + src_stride_u, dst_y, dst_stride_y, dst_vu, dst_stride_vu, + width, height); +} + // I400 is greyscale typically used in MJPG LIBYUV_API int I400ToI420(const uint8_t* src_y, @@ -527,70 +996,21 @@ int I400ToNV21(const uint8_t* src_y, return 0; } -static void CopyPlane2(const uint8_t* src, - int src_stride_0, - int src_stride_1, - uint8_t* dst, - int dst_stride, - int width, - int height) { - int y; - void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; - } -#endif -#if defined(HAS_COPYROW_AVX) - if (TestCpuFlag(kCpuHasAVX)) { - CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX; - } -#endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif -#if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; - } -#endif - - // Copy plane - for (y = 0; y < height - 1; y += 2) { - CopyRow(src, dst, width); - CopyRow(src + src_stride_0, dst + dst_stride, width); - src += src_stride_0 + src_stride_1; - dst += dst_stride * 2; - } - if (height & 1) { - CopyRow(src, dst, width); - } -} - -// Support converting from FOURCC_M420 -// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for -// easy conversion to I420. -// M420 format description: -// M420 is row biplanar 420: 2 rows of Y and 1 row of UV. -// Chroma is half width / half height. (420) -// src_stride_m420 is row planar. Normally this will be the width in pixels. -// The UV plane is half width, but 2 values, so src_stride_m420 applies to -// this as well as the two Y planes. -static int X420ToI420(const uint8_t* src_y, - int src_stride_y0, - int src_stride_y1, - const uint8_t* src_uv, - int src_stride_uv, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { +// Convert NV12 to I420. +// TODO(fbarchard): Consider inverting destination. Faster on ARM with prfm. +LIBYUV_API +int NV12ToI420(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_uv || !dst_u || !dst_v || width <= 0 || height == 0) { @@ -600,21 +1020,16 @@ static int X420ToI420(const uint8_t* src_y, if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - if (dst_y) { - dst_y = dst_y + (height - 1) * dst_stride_y; - } - dst_u = dst_u + (halfheight - 1) * dst_stride_u; - dst_v = dst_v + (halfheight - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; + src_stride_y = -src_stride_y; + src_stride_uv = -src_stride_uv; } // Coalesce rows. - if (src_stride_y0 == width && src_stride_y1 == width && - dst_stride_y == width) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; - src_stride_y0 = src_stride_y1 = dst_stride_y = 0; + src_stride_y = dst_stride_y = 0; } // Coalesce rows. if (src_stride_uv == halfwidth * 2 && dst_stride_u == halfwidth && @@ -625,12 +1040,7 @@ static int X420ToI420(const uint8_t* src_y, } if (dst_y) { - if (src_stride_y0 == src_stride_y1) { - CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height); - } else { - CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, - width, height); - } + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } // Split UV plane - NV12 / NV21 @@ -640,12 +1050,12 @@ static int X420ToI420(const uint8_t* src_y, return 0; } -// Convert NV12 to I420. +// Convert NV21 to I420. Same as NV12 but u and v pointers swapped. LIBYUV_API -int NV12ToI420(const uint8_t* src_y, +int NV21ToI420(const uint8_t* src_y, int src_stride_y, - const uint8_t* src_uv, - int src_stride_uv, + const uint8_t* src_vu, + int src_stride_vu, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, @@ -654,46 +1064,107 @@ int NV12ToI420(const uint8_t* src_y, int dst_stride_v, int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, src_uv, src_stride_uv, - dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, - dst_stride_v, width, height); + return NV12ToI420(src_y, src_stride_y, src_vu, src_stride_vu, dst_y, + dst_stride_y, dst_v, dst_stride_v, dst_u, dst_stride_u, + width, height); } -// Convert NV21 to I420. Same as NV12 but u and v pointers swapped. LIBYUV_API -int NV21ToI420(const uint8_t* src_y, +int NV12ToNV24(const uint8_t* src_y, int src_stride_y, - const uint8_t* src_vu, - int src_stride_vu, + const uint8_t* src_uv, + int src_stride_uv, uint8_t* dst_y, int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uv, + int dst_stride_uv, int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, src_vu, src_stride_vu, - dst_y, dst_stride_y, dst_v, dst_stride_v, dst_u, - dst_stride_u, width, height); + if (width <= 0 || height == 0) { + return -1; + } + + if (dst_y) { + ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + } + UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return 0; } -// Convert M420 to I420. LIBYUV_API -int M420ToI420(const uint8_t* src_m420, - int src_stride_m420, +int NV16ToNV24(const uint8_t* src_y, + int src_stride_y, + const uint8_t* src_uv, + int src_stride_uv, uint8_t* dst_y, int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_uv, + int dst_stride_uv, int width, int height) { - return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2, - src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, dst_y, - dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, - width, height); + if (width <= 0 || height == 0) { + return -1; + } + + if (dst_y) { + ScalePlane(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + } + UVScale(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return 0; +} + +LIBYUV_API +int P010ToP410(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + if (width <= 0 || height == 0) { + return -1; + } + + if (dst_y) { + ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + } + UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), + SUBSAMPLE(height, 1, 1), dst_uv, dst_stride_uv, Abs(width), + Abs(height), kFilterBilinear); + return 0; +} + +LIBYUV_API +int P210ToP410(const uint16_t* src_y, + int src_stride_y, + const uint16_t* src_uv, + int src_stride_uv, + uint16_t* dst_y, + int dst_stride_y, + uint16_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + if (width <= 0 || height == 0) { + return -1; + } + + if (dst_y) { + ScalePlane_16(src_y, src_stride_y, width, height, dst_y, dst_stride_y, + Abs(width), Abs(height), kFilterBilinear); + } + UVScale_16(src_uv, src_stride_uv, SUBSAMPLE(width, 1, 1), height, dst_uv, + dst_stride_uv, Abs(width), Abs(height), kFilterBilinear); + return 0; } // Convert YUY2 to I420. @@ -750,7 +1221,7 @@ int YUY2ToI420(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MSA) +#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { YUY2ToYRow = YUY2ToYRow_Any_MSA; YUY2ToUVRow = YUY2ToUVRow_Any_MSA; @@ -760,15 +1231,13 @@ int YUY2ToI420(const uint8_t* src_yuy2, } } #endif -#if defined(HAS_YUY2TOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - YUY2ToYRow = YUY2ToYRow_Any_MMI; - YUY2ToUVRow = YUY2ToUVRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - YUY2ToYRow = YUY2ToYRow_MMI; - if (IS_ALIGNED(width, 16)) { - YUY2ToUVRow = YUY2ToUVRow_MMI; - } +#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + YUY2ToYRow = YUY2ToYRow_Any_LASX; + YUY2ToUVRow = YUY2ToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + YUY2ToYRow = YUY2ToYRow_LASX; + YUY2ToUVRow = YUY2ToUVRow_LASX; } } #endif @@ -853,13 +1322,13 @@ int UYVYToI420(const uint8_t* src_uyvy, } } #endif -#if defined(HAS_UYVYTOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - UYVYToYRow = UYVYToYRow_Any_MMI; - UYVYToUVRow = UYVYToUVRow_Any_MMI; - if (IS_ALIGNED(width, 16)) { - UYVYToYRow = UYVYToYRow_MMI; - UYVYToUVRow = UYVYToUVRow_MMI; +#if defined(HAS_UYVYTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + UYVYToYRow = UYVYToYRow_Any_LASX; + UYVYToUVRow = UYVYToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + UYVYToYRow = UYVYToYRow_LASX; + UYVYToUVRow = UYVYToUVRow_LASX; } } #endif @@ -1045,30 +1514,10 @@ int ARGBToI420(const uint8_t* src_argb, src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - ARGBToYRow = ARGBToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_NEON; } } @@ -1081,35 +1530,57 @@ int ARGBToI420(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBTOYROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToYRow = ARGBToYRow_Any_MSA; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_MSA; + ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { - ARGBToUVRow = ARGBToUVRow_Any_MSA; +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_MSA; + ARGBToYRow = ARGBToYRow_AVX2; } } #endif -#if defined(HAS_ARGBTOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBToYRow = ARGBToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_MMI; +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; } } #endif -#if defined(HAS_ARGBTOUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBToUVRow = ARGBToUVRow_Any_MMI; +#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + ARGBToYRow = ARGBToYRow_Any_MSA; + ARGBToUVRow = ARGBToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_MMI; + ARGBToYRow = ARGBToYRow_MSA; + } + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_MSA; + } + } +#endif +#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYRow = ARGBToYRow_Any_LASX; + ARGBToUVRow = ARGBToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_LASX; + ARGBToUVRow = ARGBToUVRow_LASX; } } #endif @@ -1170,7 +1641,7 @@ int BGRAToI420(const uint8_t* src_bgra, #if defined(HAS_BGRATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { BGRAToYRow = BGRAToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { BGRAToYRow = BGRAToYRow_NEON; } } @@ -1183,35 +1654,23 @@ int BGRAToI420(const uint8_t* src_bgra, } } #endif -#if defined(HAS_BGRATOYROW_MSA) +#if defined(HAS_BGRATOYROW_MSA) && defined(HAS_BGRATOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { BGRAToYRow = BGRAToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - BGRAToYRow = BGRAToYRow_MSA; - } - } -#endif -#if defined(HAS_BGRATOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { BGRAToUVRow = BGRAToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_MSA; BGRAToUVRow = BGRAToUVRow_MSA; } } #endif -#if defined(HAS_BGRATOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - BGRAToYRow = BGRAToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - BGRAToYRow = BGRAToYRow_MMI; - } - } -#endif -#if defined(HAS_BGRATOUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - BGRAToUVRow = BGRAToUVRow_Any_MMI; +#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + BGRAToYRow = BGRAToYRow_Any_LSX; + BGRAToUVRow = BGRAToUVRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_MMI; + BGRAToYRow = BGRAToYRow_LSX; + BGRAToUVRow = BGRAToUVRow_LSX; } } #endif @@ -1259,20 +1718,42 @@ int ABGRToI420(const uint8_t* src_abgr, src_abgr = src_abgr + (height - 1) * src_stride_abgr; src_stride_abgr = -src_stride_abgr; } -#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3) +#if defined(HAS_ABGRTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; ABGRToYRow = ABGRToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_SSSE3; ABGRToYRow = ABGRToYRow_SSSE3; } } #endif +#if defined(HAS_ABGRTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYRow = ABGRToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVRow = ABGRToUVRow_AVX2; + } + } +#endif #if defined(HAS_ABGRTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ABGRToYRow = ABGRToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ABGRToYRow = ABGRToYRow_NEON; } } @@ -1285,35 +1766,23 @@ int ABGRToI420(const uint8_t* src_abgr, } } #endif -#if defined(HAS_ABGRTOYROW_MSA) +#if defined(HAS_ABGRTOYROW_MSA) && defined(HAS_ABGRTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ABGRToYRow = ABGRToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - ABGRToYRow = ABGRToYRow_MSA; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { ABGRToUVRow = ABGRToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_MSA; ABGRToUVRow = ABGRToUVRow_MSA; } } #endif -#if defined(HAS_ABGRTOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ABGRToYRow = ABGRToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - ABGRToYRow = ABGRToYRow_MMI; - } - } -#endif -#if defined(HAS_ABGRTOUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ABGRToUVRow = ABGRToUVRow_Any_MMI; +#if defined(HAS_ABGRTOYROW_LSX) && defined(HAS_ABGRTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + ABGRToUVRow = ABGRToUVRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_MMI; + ABGRToYRow = ABGRToYRow_LSX; + ABGRToUVRow = ABGRToUVRow_LSX; } } #endif @@ -1361,20 +1830,26 @@ int RGBAToI420(const uint8_t* src_rgba, src_rgba = src_rgba + (height - 1) * src_stride_rgba; src_stride_rgba = -src_stride_rgba; } -#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3) +#if defined(HAS_RGBATOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - RGBAToUVRow = RGBAToUVRow_Any_SSSE3; RGBAToYRow = RGBAToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_SSSE3; RGBAToYRow = RGBAToYRow_SSSE3; } } #endif +#if defined(HAS_RGBATOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGBAToUVRow = RGBAToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGBAToUVRow = RGBAToUVRow_SSSE3; + } + } +#endif #if defined(HAS_RGBATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGBAToYRow = RGBAToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGBAToYRow = RGBAToYRow_NEON; } } @@ -1387,35 +1862,23 @@ int RGBAToI420(const uint8_t* src_rgba, } } #endif -#if defined(HAS_RGBATOYROW_MSA) +#if defined(HAS_RGBATOYROW_MSA) && defined(HAS_RGBATOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGBAToYRow = RGBAToYRow_Any_MSA; - if (IS_ALIGNED(width, 16)) { - RGBAToYRow = RGBAToYRow_MSA; - } - } -#endif -#if defined(HAS_RGBATOUVROW_MSA) - if (TestCpuFlag(kCpuHasMSA)) { RGBAToUVRow = RGBAToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { + RGBAToYRow = RGBAToYRow_MSA; RGBAToUVRow = RGBAToUVRow_MSA; } } #endif -#if defined(HAS_RGBATOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RGBAToYRow = RGBAToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - RGBAToYRow = RGBAToYRow_MMI; - } - } -#endif -#if defined(HAS_RGBATOUVROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RGBAToUVRow = RGBAToUVRow_Any_MMI; +#if defined(HAS_RGBATOYROW_LSX) && defined(HAS_RGBATOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGBAToYRow = RGBAToYRow_Any_LSX; + RGBAToUVRow = RGBAToUVRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_MMI; + RGBAToYRow = RGBAToYRow_LSX; + RGBAToUVRow = RGBAToUVRow_LSX; } } #endif @@ -1436,6 +1899,12 @@ int RGBAToI420(const uint8_t* src_rgba, return 0; } +// Enabled if 1 pass is available +#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ + defined(HAS_RGB24TOYROW_LSX)) +#define HAS_RGB24TOYROW +#endif + // Convert RGB24 to I420. LIBYUV_API int RGB24ToI420(const uint8_t* src_rgb24, @@ -1449,8 +1918,7 @@ int RGB24ToI420(const uint8_t* src_rgb24, int width, int height) { int y; -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if defined(HAS_RGB24TOYROW) void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB24ToUVRow_C; @@ -1475,19 +1943,20 @@ int RGB24ToI420(const uint8_t* src_rgb24, src_stride_rgb24 = -src_stride_rgb24; } +#if defined(HAS_RGB24TOYROW) + // Neon version does direct RGB24 to YUV. -#if defined(HAS_RGB24TOYROW_NEON) +#if defined(HAS_RGB24TOYROW_NEON) && defined(HAS_RGB24TOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToUVRow = RGB24ToUVRow_Any_NEON; RGB24ToYRow = RGB24ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGB24ToYRow = RGB24ToYRow_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVRow = RGB24ToUVRow_NEON; - } + RGB24ToUVRow = RGB24ToUVRow_NEON; } } -#elif defined(HAS_RGB24TOYROW_MSA) +#endif +#if defined(HAS_RGB24TOYROW_MSA) && defined(HAS_RGB24TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToUVRow = RGB24ToUVRow_Any_MSA; RGB24ToYRow = RGB24ToYRow_Any_MSA; @@ -1496,19 +1965,31 @@ int RGB24ToI420(const uint8_t* src_rgb24, RGB24ToUVRow = RGB24ToUVRow_MSA; } } -#elif defined(HAS_RGB24TOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RGB24ToUVRow = RGB24ToUVRow_Any_MMI; - RGB24ToYRow = RGB24ToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - RGB24ToYRow = RGB24ToYRow_MMI; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVRow = RGB24ToUVRow_MMI; - } +#endif +#if defined(HAS_RGB24TOYROW_LSX) && defined(HAS_RGB24TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToUVRow = RGB24ToUVRow_Any_LSX; + RGB24ToYRow = RGB24ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToYRow = RGB24ToYRow_LSX; + RGB24ToUVRow = RGB24ToUVRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOYROW_LASX) && defined(HAS_RGB24TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToUVRow = RGB24ToUVRow_Any_LASX; + RGB24ToYRow = RGB24ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToYRow = RGB24ToYRow_LASX; + RGB24ToUVRow = RGB24ToUVRow_LASX; } } +#endif + // Other platforms do intermediate conversion from RGB24 to ARGB. -#else +#else // HAS_RGB24TOYROW + #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; @@ -1517,39 +1998,49 @@ int RGB24ToI420(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } #endif +#endif // HAS_RGB24TOYROW { -#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if !defined(HAS_RGB24TOYROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if defined(HAS_RGB24TOYROW) RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); @@ -1566,8 +2057,7 @@ int RGB24ToI420(const uint8_t* src_rgb24, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if defined(HAS_RGB24TOYROW) RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); #else @@ -1576,15 +2066,19 @@ int RGB24ToI420(const uint8_t* src_rgb24, ARGBToYRow(row, dst_y, width); #endif } -#if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA) || \ - defined(HAS_RGB24TOYROW_MMI)) +#if !defined(HAS_RGB24TOYROW) free_aligned_buffer_64(row); #endif } return 0; } +#undef HAS_RGB24TOYROW + +// Enabled if 1 pass is available +#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) +#define HAS_RGB24TOYJROW +#endif -// TODO(fbarchard): Use Matrix version to implement I420 and J420. // Convert RGB24 to J420. LIBYUV_API int RGB24ToJ420(const uint8_t* src_rgb24, @@ -1598,8 +2092,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, int width, int height) { int y; -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) +#if defined(HAS_RGB24TOYJROW) void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB24ToUVJRow_C; @@ -1624,19 +2117,20 @@ int RGB24ToJ420(const uint8_t* src_rgb24, src_stride_rgb24 = -src_stride_rgb24; } +#if defined(HAS_RGB24TOYJROW) + // Neon version does direct RGB24 to YUV. -#if defined(HAS_RGB24TOYJROW_NEON) +#if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON; RGB24ToYJRow = RGB24ToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGB24ToYJRow = RGB24ToYJRow_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVJRow = RGB24ToUVJRow_NEON; - } + RGB24ToUVJRow = RGB24ToUVJRow_NEON; } } -#elif defined(HAS_RGB24TOYJROW_MSA) +#endif +#if defined(HAS_RGB24TOYJROW_MSA) && defined(HAS_RGB24TOUVJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToUVJRow = RGB24ToUVJRow_Any_MSA; RGB24ToYJRow = RGB24ToYJRow_Any_MSA; @@ -1645,19 +2139,11 @@ int RGB24ToJ420(const uint8_t* src_rgb24, RGB24ToUVJRow = RGB24ToUVJRow_MSA; } } -#elif defined(HAS_RGB24TOYJROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RGB24ToUVJRow = RGB24ToUVJRow_Any_MMI; - RGB24ToYJRow = RGB24ToYJRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - RGB24ToYJRow = RGB24ToYJRow_MMI; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVJRow = RGB24ToUVJRow_MMI; - } - } - } +#endif + // Other platforms do intermediate conversion from RGB24 to ARGB. -#else +#else // HAS_RGB24TOYJROW + #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; @@ -1666,39 +2152,49 @@ int RGB24ToJ420(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) +#if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) +#if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_AVX2; ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } #endif +#endif // HAS_RGB24TOYJROW { -#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) +#if !defined(HAS_RGB24TOYJROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) +#if defined(HAS_RGB24TOYJROW) RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToYJRow(src_rgb24, dst_y, width); RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); @@ -1715,8 +2211,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) +#if defined(HAS_RGB24TOYJROW) RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToYJRow(src_rgb24, dst_y, width); #else @@ -1725,13 +2220,19 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ARGBToYJRow(row, dst_y, width); #endif } -#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) +#if !defined(HAS_RGB24TOYJROW) free_aligned_buffer_64(row); #endif } return 0; } +#undef HAS_RGB24TOYJROW + +// Enabled if 1 pass is available +#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ + defined(HAS_RAWTOYROW_LSX)) +#define HAS_RAWTOYROW +#endif // Convert RAW to I420. LIBYUV_API @@ -1746,8 +2247,7 @@ int RAWToI420(const uint8_t* src_raw, int width, int height) { int y; -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if defined(HAS_RAWTOYROW) void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, uint8_t* dst_v, int width) = RAWToUVRow_C; void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = @@ -1771,19 +2271,20 @@ int RAWToI420(const uint8_t* src_raw, src_stride_raw = -src_stride_raw; } +#if defined(HAS_RAWTOYROW) + // Neon version does direct RAW to YUV. -#if defined(HAS_RAWTOYROW_NEON) +#if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RAWToUVRow = RAWToUVRow_Any_NEON; RAWToYRow = RAWToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RAWToYRow = RAWToYRow_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToUVRow = RAWToUVRow_NEON; - } + RAWToUVRow = RAWToUVRow_NEON; } } -#elif defined(HAS_RAWTOYROW_MSA) +#endif +#if defined(HAS_RAWTOYROW_MSA) && defined(HAS_RAWTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RAWToUVRow = RAWToUVRow_Any_MSA; RAWToYRow = RAWToYRow_Any_MSA; @@ -1792,19 +2293,31 @@ int RAWToI420(const uint8_t* src_raw, RAWToUVRow = RAWToUVRow_MSA; } } -#elif defined(HAS_RAWTOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RAWToUVRow = RAWToUVRow_Any_MMI; - RAWToYRow = RAWToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - RAWToYRow = RAWToYRow_MMI; - if (IS_ALIGNED(width, 16)) { - RAWToUVRow = RAWToUVRow_MMI; - } +#endif +#if defined(HAS_RAWTOYROW_LSX) && defined(HAS_RAWTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RAWToUVRow = RAWToUVRow_Any_LSX; + RAWToYRow = RAWToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToYRow = RAWToYRow_LSX; + RAWToUVRow = RAWToUVRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOYROW_LASX) && defined(HAS_RAWTOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToUVRow = RAWToUVRow_Any_LASX; + RAWToYRow = RAWToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToYRow = RAWToYRow_LASX; + RAWToUVRow = RAWToUVRow_LASX; } } +#endif + // Other platforms do intermediate conversion from RAW to ARGB. -#else +#else // HAS_RAWTOYROW + #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -1813,39 +2326,49 @@ int RAWToI420(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } #endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#endif // HAS_RAWTOYROW { -#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if !defined(HAS_RAWTOYROW) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if defined(HAS_RAWTOYROW) RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); @@ -1862,8 +2385,7 @@ int RAWToI420(const uint8_t* src_raw, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if defined(HAS_RAWTOYROW) RAWToUVRow(src_raw, 0, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); #else @@ -1872,13 +2394,167 @@ int RAWToI420(const uint8_t* src_raw, ARGBToYRow(row, dst_y, width); #endif } -#if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ - defined(HAS_RAWTOYROW_MMI)) +#if !defined(HAS_RAWTOYROW) free_aligned_buffer_64(row); #endif } return 0; } +#undef HAS_RAWTOYROW + +// Enabled if 1 pass is available +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) +#define HAS_RAWTOYJROW +#endif + +// Convert RAW to J420. +LIBYUV_API +int RAWToJ420(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + int y; +#if defined(HAS_RAWTOYJROW) + void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RAWToUVJRow_C; + void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = + RAWToYJRow_C; +#else + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYJRow_C; +#endif + if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + +#if defined(HAS_RAWTOYJROW) + +// Neon version does direct RAW to YUV. +#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToUVJRow = RAWToUVJRow_Any_NEON; + RAWToYJRow = RAWToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_NEON; + RAWToUVJRow = RAWToUVJRow_NEON; + } + } +#endif +#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RAWToUVJRow = RAWToUVJRow_Any_MSA; + RAWToYJRow = RAWToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_MSA; + RAWToUVJRow = RAWToUVJRow_MSA; + } + } +#endif + +// Other platforms do intermediate conversion from RAW to ARGB. +#else // HAS_RAWTOYJROW + +#if defined(HAS_RAWTOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif +#endif // HAS_RAWTOYJROW + + { +#if !defined(HAS_RAWTOYJROW) + // Allocate 2 rows of ARGB. + const int kRowSize = (width * 4 + 31) & ~31; + align_buffer_64(row, kRowSize * 2); +#endif + + for (y = 0; y < height - 1; y += 2) { +#if defined(HAS_RAWTOYJROW) + RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width); + RAWToYJRow(src_raw, dst_y, width); + RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); +#else + RAWToARGBRow(src_raw, row, width); + RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); + ARGBToUVJRow(row, kRowSize, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); + ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); +#endif + src_raw += src_stride_raw * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { +#if defined(HAS_RAWTOYJROW) + RAWToUVJRow(src_raw, 0, dst_u, dst_v, width); + RAWToYJRow(src_raw, dst_y, width); +#else + RAWToARGBRow(src_raw, row, width); + ARGBToUVJRow(row, 0, dst_u, dst_v, width); + ARGBToYJRow(row, dst_y, width); +#endif + } +#if !defined(HAS_RAWTOYJROW) + free_aligned_buffer_64(row); +#endif + } + return 0; +} +#undef HAS_RAWTOYJROW // Convert RGB565 to I420. LIBYUV_API @@ -1894,7 +2570,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, int height) { int y; #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB565ToUVRow_C; @@ -1931,7 +2607,10 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } } -#elif defined(HAS_RGB565TOYROW_MSA) +// MSA version does direct RGB565 to YUV. +#elif (defined(HAS_RGB565TOYROW_MSA) || defined(HAS_RGB565TOYROW_LSX) || \ + defined(HAS_RGB565TOYROW_LASX)) +#if defined(HAS_RGB565TOYROW_MSA) && defined(HAS_RGB565TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB565ToUVRow = RGB565ToUVRow_Any_MSA; RGB565ToYRow = RGB565ToYRow_Any_MSA; @@ -1940,17 +2619,27 @@ int RGB565ToI420(const uint8_t* src_rgb565, RGB565ToUVRow = RGB565ToUVRow_MSA; } } -#elif defined(HAS_RGB565TOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RGB565ToUVRow = RGB565ToUVRow_Any_MMI; - RGB565ToYRow = RGB565ToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - RGB565ToYRow = RGB565ToYRow_MMI; - if (IS_ALIGNED(width, 16)) { - RGB565ToUVRow = RGB565ToUVRow_MMI; - } +#endif +#if defined(HAS_RGB565TOYROW_LSX) && defined(HAS_RGB565TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB565ToUVRow = RGB565ToUVRow_Any_LSX; + RGB565ToYRow = RGB565ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB565ToYRow = RGB565ToYRow_LSX; + RGB565ToUVRow = RGB565ToUVRow_LSX; + } + } +#endif +#if defined(HAS_RGB565TOYROW_LASX) && defined(HAS_RGB565TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB565ToUVRow = RGB565ToUVRow_Any_LASX; + RGB565ToYRow = RGB565ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB565ToYRow = RGB565ToYRow_LASX; + RGB565ToUVRow = RGB565ToUVRow_LASX; } } +#endif // Other platforms do intermediate conversion from RGB565 to ARGB. #else #if defined(HAS_RGB565TOARGBROW_SSE2) @@ -1969,37 +2658,49 @@ int RGB565ToI420(const uint8_t* src_rgb565, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif #endif { #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); @@ -2017,7 +2718,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, } if (height & 1) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); #else @@ -2027,7 +2728,7 @@ int RGB565ToI420(const uint8_t* src_rgb565, #endif } #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA) || \ - defined(HAS_RGB565TOYROW_MMI)) + defined(HAS_RGB565TOYROW_LSX) || defined(HAS_RGB565TOYROW_LASX)) free_aligned_buffer_64(row); #endif } @@ -2048,7 +2749,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, int height) { int y; #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB1555ToUVRow_C; @@ -2086,7 +2787,10 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } } -#elif defined(HAS_ARGB1555TOYROW_MSA) +// MSA version does direct ARGB1555 to YUV. +#elif (defined(HAS_ARGB1555TOYROW_MSA) || defined(HAS_ARGB1555TOYROW_LSX) || \ + defined(HAS_ARGB1555TOYROW_LASX)) +#if defined(HAS_ARGB1555TOYROW_MSA) && defined(HAS_ARGB1555TOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA; ARGB1555ToYRow = ARGB1555ToYRow_Any_MSA; @@ -2095,17 +2799,27 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, ARGB1555ToUVRow = ARGB1555ToUVRow_MSA; } } -#elif defined(HAS_ARGB1555TOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MMI; - ARGB1555ToYRow = ARGB1555ToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - ARGB1555ToYRow = ARGB1555ToYRow_MMI; - if (IS_ALIGNED(width, 16)) { - ARGB1555ToUVRow = ARGB1555ToUVRow_MMI; - } +#endif +#if defined(HAS_ARGB1555TOYROW_LSX) && defined(HAS_ARGB1555TOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LSX; + ARGB1555ToYRow = ARGB1555ToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGB1555ToYRow = ARGB1555ToYRow_LSX; + ARGB1555ToUVRow = ARGB1555ToUVRow_LSX; } } +#endif +#if defined(HAS_ARGB1555TOYROW_LASX) && defined(HAS_ARGB1555TOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGB1555ToUVRow = ARGB1555ToUVRow_Any_LASX; + ARGB1555ToYRow = ARGB1555ToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGB1555ToYRow = ARGB1555ToYRow_LASX; + ARGB1555ToUVRow = ARGB1555ToUVRow_LASX; + } + } +#endif // Other platforms do intermediate conversion from ARGB1555 to ARGB. #else #if defined(HAS_ARGB1555TOARGBROW_SSE2) @@ -2124,30 +2838,42 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif #endif { #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); @@ -2155,7 +2881,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, @@ -2175,7 +2901,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, } if (height & 1) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); #else @@ -2185,7 +2911,7 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, #endif } #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA) || \ - defined(HAS_ARGB1555TOYROW_MMI)) + defined(HAS_ARGB1555TOYROW_LSX) || defined(HAS_ARGB1555TOYROW_LASX)) free_aligned_buffer_64(row); #endif } @@ -2205,7 +2931,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, int width, int height) { int y; -#if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) +#if defined(HAS_ARGB4444TOYROW_NEON) void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB4444ToUVRow_C; @@ -2243,17 +2969,6 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } } -#elif defined(HAS_ARGB4444TOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGB4444ToUVRow = ARGB4444ToUVRow_Any_MMI; - ARGB4444ToYRow = ARGB4444ToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - ARGB4444ToYRow = ARGB4444ToYRow_MMI; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToUVRow = ARGB4444ToUVRow_MMI; - } - } - } // Other platforms do intermediate conversion from ARGB4444 to ARGB. #else #if defined(HAS_ARGB4444TOARGBROW_SSE2) @@ -2280,27 +2995,55 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) +#if defined(HAS_ARGB4444TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_ARGB4444TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) +#if defined(HAS_ARGBTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVRow = ARGBToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif -#if defined(HAS_ARGBTOYROW_MSA) +#if defined(HAS_ARGBTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVRow = ARGBToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVRow = ARGBToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_MSA) && defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA; @@ -2312,29 +3055,27 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, } } #endif -#if defined(HAS_ARGBTOYROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - ARGBToUVRow = ARGBToUVRow_Any_MMI; - ARGBToYRow = ARGBToYRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_MMI; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_MMI; - } +#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYRow = ARGBToYRow_Any_LASX; + ARGBToUVRow = ARGBToUVRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYRow = ARGBToYRow_LASX; + ARGBToUVRow = ARGBToUVRow_LASX; } } #endif #endif { -#if !(defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) +#if !(defined(HAS_ARGB4444TOYROW_NEON)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) +#if defined(HAS_ARGB4444TOYROW_NEON) ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width); ARGB4444ToYRow(src_argb4444, dst_y, width); ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, @@ -2353,7 +3094,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, dst_v += dst_stride_v; } if (height & 1) { -#if (defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) +#if defined(HAS_ARGB4444TOYROW_NEON) ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width); ARGB4444ToYRow(src_argb4444, dst_y, width); #else @@ -2362,7 +3103,7 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, ARGBToYRow(row, dst_y, width); #endif } -#if !(defined(HAS_ARGB4444TOYROW_NEON) || defined(HAS_ARGB4444TOYROW_MMI)) +#if !(defined(HAS_ARGB4444TOYROW_NEON)) free_aligned_buffer_64(row); #endif } @@ -2378,125 +3119,129 @@ int RGB24ToJ400(const uint8_t* src_rgb24, int width, int height) { int y; -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) = RGB24ToYJRow_C; -#else - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RGB24ToARGBRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = - ARGBToYJRow_C; -#endif if (!src_rgb24 || !dst_yj || width <= 0 || height == 0) { return -1; } - // Negative height means invert the image. if (height < 0) { height = -height; src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } - -// Neon version does direct RGB24 to YUV. + // Coalesce rows. + if (src_stride_rgb24 == width * 3 && dst_stride_yj == width) { + width *= height; + height = 1; + src_stride_rgb24 = dst_stride_yj = 0; + } +#if defined(HAS_RGB24TOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToYJRow = RGB24ToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToYJRow = RGB24ToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_RGB24TOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGB24ToYJRow = RGB24ToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGB24ToYJRow = RGB24ToYJRow_AVX2; + } + } +#endif #if defined(HAS_RGB24TOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToYJRow = RGB24ToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { RGB24ToYJRow = RGB24ToYJRow_NEON; } } -#elif defined(HAS_RGB24TOYJROW_MSA) +#endif +#if defined(HAS_RGB24TOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToYJRow = RGB24ToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGB24ToYJRow = RGB24ToYJRow_MSA; } } -#elif defined(HAS_RGB24TOYJROW_MMI) - if (TestCpuFlag(kCpuHasMMI)) { - RGB24ToYJRow = RGB24ToYJRow_Any_MMI; - if (IS_ALIGNED(width, 8)) { - RGB24ToYJRow = RGB24ToYJRow_MMI; - } +#endif + + for (y = 0; y < height; ++y) { + RGB24ToYJRow(src_rgb24, dst_yj, width); + src_rgb24 += src_stride_rgb24; + dst_yj += dst_stride_yj; } -// Other platforms do intermediate conversion from RGB24 to ARGB. -#else -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } + return 0; +} + +// Convert RAW to J400. +LIBYUV_API +int RAWToJ400(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height) { + int y; + void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_yj, int width) = + RAWToYJRow_C; + if (!src_raw || !dst_yj || width <= 0 || height == 0) { + return -1; } -#endif -#if defined(HAS_ARGBTOYJROW_SSSE3) + + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + // Coalesce rows. + if (src_stride_raw == width * 3 && dst_stride_yj == width) { + width *= height; + height = 1; + src_stride_raw = dst_stride_yj = 0; + } + +#if defined(HAS_RAWTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + RAWToYJRow = RAWToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; + RAWToYJRow = RAWToYJRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) +#if defined(HAS_RAWTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; + RAWToYJRow = RAWToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; + RAWToYJRow = RAWToYJRow_AVX2; } } #endif -#endif - - { -#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 31) & ~31; - align_buffer_64(row, kRowSize * 2); -#endif - - for (y = 0; y < height - 1; y += 2) { -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - RGB24ToYJRow(src_rgb24, dst_yj, width); - RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_yj + dst_stride_yj, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToYJRow(row, dst_yj, width); - ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width); -#endif - src_rgb24 += src_stride_rgb24 * 2; - dst_yj += dst_stride_yj * 2; +#if defined(HAS_RAWTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToYJRow = RAWToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_NEON; } - if (height & 1) { -#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - RGB24ToYJRow(src_rgb24, dst_yj, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - ARGBToYJRow(row, dst_yj, width); + } #endif +#if defined(HAS_RAWTOYJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RAWToYJRow = RAWToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_MSA; } -#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ - defined(HAS_RGB24TOYJROW_MMI)) - free_aligned_buffer_64(row); -#endif } - return 0; -} +#endif -static void SplitPixels(const uint8_t* src_u, - int src_pixel_stride_uv, - uint8_t* dst_u, - int width) { - int i; - for (i = 0; i < width; ++i) { - *dst_u = *src_u; - ++dst_u; - src_u += src_pixel_stride_uv; + for (y = 0; y < height; ++y) { + RAWToYJRow(src_raw, dst_yj, width); + src_raw += src_stride_raw; + dst_yj += dst_stride_yj; } + return 0; } // Convert Android420 to I420. @@ -2516,58 +3261,10 @@ int Android420ToI420(const uint8_t* src_y, int dst_stride_v, int width, int height) { - int y; - const ptrdiff_t vu_off = src_v - src_u; - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - - // Copy UV planes as is - I420 - if (src_pixel_stride_uv == 1) { - CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); - CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); - return 0; - // Split UV planes - NV21 - } - if (src_pixel_stride_uv == 2 && vu_off == -1 && - src_stride_u == src_stride_v) { - SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u, - halfwidth, halfheight); - return 0; - // Split UV planes - NV12 - } - if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) { - SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v, - halfwidth, halfheight); - return 0; - } - - for (y = 0; y < halfheight; ++y) { - SplitPixels(src_u, src_pixel_stride_uv, dst_u, halfwidth); - SplitPixels(src_v, src_pixel_stride_uv, dst_v, halfwidth); - src_u += src_stride_u; - src_v += src_stride_v; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; + return Android420ToI420Rotate(src_y, src_stride_y, src_u, src_stride_u, src_v, + src_stride_v, src_pixel_stride_uv, dst_y, + dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, width, height, kRotate0); } #ifdef __cplusplus |