aboutsummaryrefslogtreecommitdiff
path: root/files/source/planar_functions.cc
diff options
context:
space:
mode:
Diffstat (limited to 'files/source/planar_functions.cc')
-rw-r--r--files/source/planar_functions.cc659
1 files changed, 510 insertions, 149 deletions
diff --git a/files/source/planar_functions.cc b/files/source/planar_functions.cc
index 169d4a8f..d115a2a1 100644
--- a/files/source/planar_functions.cc
+++ b/files/source/planar_functions.cc
@@ -75,6 +75,11 @@ void CopyPlane(const uint8_t* src_y,
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
}
#endif
+#if defined(HAS_COPYROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ CopyRow = CopyRow_RVV;
+ }
+#endif
// Copy plane
for (y = 0; y < height; ++y) {
@@ -162,7 +167,7 @@ void Convert8To16Plane(const uint8_t* src_y,
int src_stride_y,
uint16_t* dst_y,
int dst_stride_y,
- int scale, // 16384 for 10 bits
+ int scale, // 1024 for 10 bits
int width,
int height) {
int y;
@@ -333,6 +338,45 @@ int I210Copy(const uint16_t* src_y,
return 0;
}
+// Copy I410.
+LIBYUV_API
+int I410Copy(const uint16_t* src_y,
+ int src_stride_y,
+ const uint16_t* src_u,
+ int src_stride_u,
+ const uint16_t* src_v,
+ int src_stride_v,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ uint16_t* dst_u,
+ int dst_stride_u,
+ uint16_t* dst_v,
+ int dst_stride_v,
+ int width,
+ int height) {
+ if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
+ height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_y = src_y + (height - 1) * src_stride_y;
+ src_u = src_u + (height - 1) * src_stride_u;
+ src_v = src_v + (height - 1) * src_stride_v;
+ src_stride_y = -src_stride_y;
+ src_stride_u = -src_stride_u;
+ src_stride_v = -src_stride_v;
+ }
+
+ if (dst_y) {
+ CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+ }
+ CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
+ CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
+ return 0;
+}
+
// Copy I400.
LIBYUV_API
int I400ToI400(const uint8_t* src_y,
@@ -385,6 +429,7 @@ int I420ToI400(const uint8_t* src_y,
}
// Copy NV12. Supports inverting.
+LIBYUV_API
int NV12Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
@@ -418,6 +463,7 @@ int NV12Copy(const uint8_t* src_y,
}
// Copy NV21. Supports inverting.
+LIBYUV_API
int NV21Copy(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_vu,
@@ -504,6 +550,11 @@ void SplitUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_SPLITUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitUVRow = SplitUVRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Copy a row of UV.
@@ -553,11 +604,19 @@ void MergeUVPlane(const uint8_t* src_u,
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow = MergeUVRow_Any_AVX2;
- if (IS_ALIGNED(width, 32)) {
+ if (IS_ALIGNED(width, 16)) {
MergeUVRow = MergeUVRow_AVX2;
}
}
#endif
+#if defined(HAS_MERGEUVROW_AVX512BW)
+ if (TestCpuFlag(kCpuHasAVX512BW)) {
+ MergeUVRow = MergeUVRow_Any_AVX512BW;
+ if (IS_ALIGNED(width, 32)) {
+ MergeUVRow = MergeUVRow_AVX512BW;
+ }
+ }
+#endif
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
MergeUVRow = MergeUVRow_Any_NEON;
@@ -582,6 +641,11 @@ void MergeUVPlane(const uint8_t* src_u,
}
}
#endif
+#if defined(HAS_MERGEUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeUVRow = MergeUVRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of UV.
@@ -687,7 +751,7 @@ void MergeUVPlane_16(const uint16_t* src_u,
#if defined(HAS_MERGEUVROW_16_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
- if (IS_ALIGNED(width, 16)) {
+ if (IS_ALIGNED(width, 8)) {
MergeUVRow_16 = MergeUVRow_16_AVX2;
}
}
@@ -911,31 +975,31 @@ int NV21ToNV12(const uint8_t* src_y,
return 0;
}
+// Test if tile_height is a power of 2 (16 or 32)
+#define IS_POWEROFTWO(x) (!((x) & ((x)-1)))
+
// Detile a plane of data
// tile width is 16 and assumed.
// tile_height is 16 or 32 for MM21.
// src_stride_y is bytes per row of source ignoring tiling. e.g. 640
// TODO: More detile row functions.
-
LIBYUV_API
-void DetilePlane(const uint8_t* src_y,
- int src_stride_y,
- uint8_t* dst_y,
- int dst_stride_y,
- int width,
- int height,
- int tile_height) {
+int DetilePlane(const uint8_t* src_y,
+ int src_stride_y,
+ uint8_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
const ptrdiff_t src_tile_stride = 16 * tile_height;
int y;
void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
int width) = DetileRow_C;
- assert(src_stride_y >= 0);
- assert(tile_height > 0);
- assert(src_stride_y > 0);
-
- if (width <= 0 || height == 0) {
- return;
+ if (!src_y || !dst_y || width <= 0 || height == 0 ||
+ !IS_POWEROFTWO(tile_height)) {
+ return -1;
}
+
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -970,6 +1034,72 @@ void DetilePlane(const uint8_t* src_y,
src_y = src_y - src_tile_stride + src_stride_y * tile_height;
}
}
+ return 0;
+}
+
+// Convert a plane of 16 bit tiles of 16 x H to linear.
+// tile width is 16 and assumed.
+// tile_height is 16 or 32 for MT2T.
+LIBYUV_API
+int DetilePlane_16(const uint16_t* src_y,
+ int src_stride_y,
+ uint16_t* dst_y,
+ int dst_stride_y,
+ int width,
+ int height,
+ int tile_height) {
+ const ptrdiff_t src_tile_stride = 16 * tile_height;
+ int y;
+ void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
+ uint16_t* dst, int width) = DetileRow_16_C;
+ if (!src_y || !dst_y || width <= 0 || height == 0 ||
+ !IS_POWEROFTWO(tile_height)) {
+ return -1;
+ }
+
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_y = dst_y + (height - 1) * dst_stride_y;
+ dst_stride_y = -dst_stride_y;
+ }
+
+#if defined(HAS_DETILEROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ DetileRow_16 = DetileRow_16_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_DETILEROW_16_AVX)
+ if (TestCpuFlag(kCpuHasAVX)) {
+ DetileRow_16 = DetileRow_16_Any_AVX;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_AVX;
+ }
+ }
+#endif
+#if defined(HAS_DETILEROW_16_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DetileRow_16 = DetileRow_16_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DetileRow_16 = DetileRow_16_NEON;
+ }
+ }
+#endif
+
+ // Detile plane
+ for (y = 0; y < height; ++y) {
+ DetileRow_16(src_y, src_tile_stride, dst_y, width);
+ dst_y += dst_stride_y;
+ src_y += 16;
+ // Advance to next row of tiles.
+ if ((y & (tile_height - 1)) == (tile_height - 1)) {
+ src_y = src_y - src_tile_stride + src_stride_y * tile_height;
+ }
+ }
+ return 0;
}
LIBYUV_API
@@ -1033,6 +1163,74 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
}
}
+LIBYUV_API
+void DetileToYUY2(const uint8_t* src_y,
+ int src_stride_y,
+ const uint8_t* src_uv,
+ int src_stride_uv,
+ uint8_t* dst_yuy2,
+ int dst_stride_yuy2,
+ int width,
+ int height,
+ int tile_height) {
+ const ptrdiff_t src_y_tile_stride = 16 * tile_height;
+ const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
+ int y;
+ void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
+ const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
+ uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
+ assert(src_stride_y >= 0);
+ assert(src_stride_y > 0);
+ assert(src_stride_uv >= 0);
+ assert(src_stride_uv > 0);
+ assert(tile_height > 0);
+
+ if (width <= 0 || height == 0 || tile_height <= 0) {
+ return;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
+ dst_stride_yuy2 = -dst_stride_yuy2;
+ }
+
+#if defined(HAS_DETILETOYUY2_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ DetileToYUY2 = DetileToYUY2_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ DetileToYUY2 = DetileToYUY2_NEON;
+ }
+ }
+#endif
+
+#if defined(HAS_DETILETOYUY2_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ DetileToYUY2 = DetileToYUY2_Any_SSE2;
+ if (IS_ALIGNED(width, 16)) {
+ DetileToYUY2 = DetileToYUY2_SSE2;
+ }
+ }
+#endif
+
+ // Detile plane
+ for (y = 0; y < height; ++y) {
+ DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2,
+ width);
+ dst_yuy2 += dst_stride_yuy2;
+ src_y += 16;
+
+ if (y & 0x1)
+ src_uv += 16;
+
+ // Advance to next row of tiles.
+ if ((y & (tile_height - 1)) == (tile_height - 1)) {
+ src_y = src_y - src_y_tile_stride + src_stride_y * tile_height;
+ src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2);
+ }
+ }
+}
+
// Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API
@@ -1085,6 +1283,11 @@ void SplitRGBPlane(const uint8_t* src_rgb,
}
}
#endif
+#if defined(HAS_SPLITRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitRGBRow = SplitRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Copy a row of RGB.
@@ -1144,6 +1347,11 @@ void MergeRGBPlane(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGERGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeRGBRow = MergeRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
// Merge a row of U and V into a row of RGB.
@@ -1156,18 +1364,18 @@ void MergeRGBPlane(const uint8_t* src_r,
}
LIBYUV_NOINLINE
-void SplitARGBPlaneAlpha(const uint8_t* src_argb,
- int src_stride_argb,
- uint8_t* dst_r,
- int dst_stride_r,
- uint8_t* dst_g,
- int dst_stride_g,
- uint8_t* dst_b,
- int dst_stride_b,
- uint8_t* dst_a,
- int dst_stride_a,
- int width,
- int height) {
+static void SplitARGBPlaneAlpha(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ uint8_t* dst_a,
+ int dst_stride_a,
+ int width,
+ int height) {
int y;
void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, uint8_t* dst_a, int width) =
@@ -1175,6 +1383,9 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
width *= height;
@@ -1215,6 +1426,11 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SPLITARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitARGBRow = SplitARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
@@ -1227,21 +1443,24 @@ void SplitARGBPlaneAlpha(const uint8_t* src_argb,
}
LIBYUV_NOINLINE
-void SplitARGBPlaneOpaque(const uint8_t* src_argb,
- int src_stride_argb,
- uint8_t* dst_r,
- int dst_stride_r,
- uint8_t* dst_g,
- int dst_stride_g,
- uint8_t* dst_b,
- int dst_stride_b,
- int width,
- int height) {
+static void SplitARGBPlaneOpaque(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_r,
+ int dst_stride_r,
+ uint8_t* dst_g,
+ int dst_stride_g,
+ uint8_t* dst_b,
+ int dst_stride_b,
+ int width,
+ int height) {
int y;
void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
uint8_t* dst_b, int width) = SplitXRGBRow_C;
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_argb == width * 4 && dst_stride_r == width &&
dst_stride_g == width && dst_stride_b == width) {
width *= height;
@@ -1281,6 +1500,11 @@ void SplitARGBPlaneOpaque(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_SPLITXRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitXRGBRow = SplitXRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
@@ -1328,18 +1552,18 @@ void SplitARGBPlane(const uint8_t* src_argb,
}
LIBYUV_NOINLINE
-void MergeARGBPlaneAlpha(const uint8_t* src_r,
- int src_stride_r,
- const uint8_t* src_g,
- int src_stride_g,
- const uint8_t* src_b,
- int src_stride_b,
- const uint8_t* src_a,
- int src_stride_a,
- uint8_t* dst_argb,
- int dst_stride_argb,
- int width,
- int height) {
+static void MergeARGBPlaneAlpha(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ const uint8_t* src_a,
+ int src_stride_a,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
int y;
void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, const uint8_t* src_a,
@@ -1347,6 +1571,9 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
src_stride_a == width && dst_stride_argb == width * 4) {
width *= height;
@@ -1378,6 +1605,11 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGEARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeARGBRow = MergeARGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
@@ -1390,16 +1622,16 @@ void MergeARGBPlaneAlpha(const uint8_t* src_r,
}
LIBYUV_NOINLINE
-void MergeARGBPlaneOpaque(const uint8_t* src_r,
- int src_stride_r,
- const uint8_t* src_g,
- int src_stride_g,
- const uint8_t* src_b,
- int src_stride_b,
- uint8_t* dst_argb,
- int dst_stride_argb,
- int width,
- int height) {
+static void MergeARGBPlaneOpaque(const uint8_t* src_r,
+ int src_stride_r,
+ const uint8_t* src_g,
+ int src_stride_g,
+ const uint8_t* src_b,
+ int src_stride_b,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
int y;
void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
const uint8_t* src_b, uint8_t* dst_argb, int width) =
@@ -1407,6 +1639,9 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r,
assert(height > 0);
+ if (width <= 0 || height == 0) {
+ return;
+ }
if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
dst_stride_argb == width * 4) {
width *= height;
@@ -1437,6 +1672,11 @@ void MergeARGBPlaneOpaque(const uint8_t* src_r,
}
}
#endif
+#if defined(HAS_MERGEXRGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ MergeXRGBRow = MergeXRGBRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
@@ -1888,6 +2128,16 @@ int YUY2ToI422(const uint8_t* src_yuy2,
}
}
#endif
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToYRow = YUY2ToYRow_LSX;
+ YUY2ToUV422Row = YUY2ToUV422Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
YUY2ToYRow = YUY2ToYRow_Any_LASX;
@@ -1984,6 +2234,16 @@ int UYVYToI422(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ UYVYToUV422Row = UYVYToUV422Row_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ UYVYToUV422Row = UYVYToUV422Row_LSX;
+ }
+ }
+#endif
#if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
UYVYToYRow = UYVYToYRow_Any_LASX;
@@ -2131,6 +2391,14 @@ int UYVYToY(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_UYVYTOYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ UYVYToYRow = UYVYToYRow_Any_LSX;
+ if (IS_ALIGNED(width, 16)) {
+ UYVYToYRow = UYVYToYRow_LSX;
+ }
+ }
+#endif
for (y = 0; y < height; ++y) {
UYVYToYRow(src_uyvy, dst_y, width);
@@ -2189,6 +2457,14 @@ void MirrorPlane(const uint8_t* src_y,
}
}
#endif
+#if defined(HAS_MIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorRow = MirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 32)) {
+ MirrorRow = MirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorRow = MirrorRow_Any_LASX;
@@ -2255,6 +2531,14 @@ void MirrorUVPlane(const uint8_t* src_uv,
}
}
#endif
+#if defined(HAS_MIRRORUVROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ MirrorUVRow = MirrorUVRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ MirrorUVRow = MirrorUVRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_MIRRORUVROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
MirrorUVRow = MirrorUVRow_Any_LASX;
@@ -2427,6 +2711,14 @@ int ARGBMirror(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBMIRRORROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBMirrorRow = ARGBMirrorRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMIRRORROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
@@ -2809,6 +3101,14 @@ int ARGBMultiply(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBMULTIPLYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBMultiplyRow = ARGBMultiplyRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBMULTIPLYROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX;
@@ -2894,6 +3194,14 @@ int ARGBAdd(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBADDROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBAddRow = ARGBAddRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBAddRow = ARGBAddRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBADDROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBAddRow = ARGBAddRow_Any_LASX;
@@ -2974,6 +3282,14 @@ int ARGBSubtract(const uint8_t* src_argb0,
}
}
#endif
+#if defined(HAS_ARGBSUBTRACTROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBSubtractRow = ARGBSubtractRow_Any_LSX;
+ if (IS_ALIGNED(width, 4)) {
+ ARGBSubtractRow = ARGBSubtractRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBSUBTRACTROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBSubtractRow = ARGBSubtractRow_Any_LASX;
@@ -3051,6 +3367,11 @@ int RAWToRGB24(const uint8_t* src_raw,
}
}
#endif
+#if defined(HAS_RAWTORGB24ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RAWToRGB24Row = RAWToRGB24Row_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
RAWToRGB24Row(src_raw, dst_rgb24, width);
@@ -3060,6 +3381,7 @@ int RAWToRGB24(const uint8_t* src_raw,
return 0;
}
+// TODO(fbarchard): Consider uint8_t value
LIBYUV_API
void SetPlane(uint8_t* dst_y,
int dst_stride_y,
@@ -3067,7 +3389,7 @@ void SetPlane(uint8_t* dst_y,
int height,
uint32_t value) {
int y;
- void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
+ void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;
if (width <= 0 || height == 0) {
return;
@@ -3120,7 +3442,7 @@ void SetPlane(uint8_t* dst_y,
// Set plane
for (y = 0; y < height; ++y) {
- SetRow(dst_y, value, width);
+ SetRow(dst_y, (uint8_t)value, width);
dst_y += dst_stride_y;
}
}
@@ -3168,7 +3490,7 @@ int ARGBRect(uint8_t* dst_argb,
int height,
uint32_t value) {
int y;
- void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
+ void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
ARGBSetRow_C;
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
return -1;
@@ -3293,6 +3615,14 @@ int ARGBAttenuate(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBATTENUATEROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX;
@@ -3301,6 +3631,11 @@ int ARGBAttenuate(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBATTENUATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBAttenuateRow = ARGBAttenuateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
ARGBAttenuateRow(src_argb, dst_argb, width);
@@ -3401,6 +3736,11 @@ int ARGBGrayTo(const uint8_t* src_argb,
ARGBGrayRow = ARGBGrayRow_MSA;
}
#endif
+#if defined(HAS_ARGBGRAYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBGRAYROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBGrayRow = ARGBGrayRow_LASX;
@@ -3451,6 +3791,11 @@ int ARGBGray(uint8_t* dst_argb,
ARGBGrayRow = ARGBGrayRow_MSA;
}
#endif
+#if defined(HAS_ARGBGRAYROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBGrayRow = ARGBGrayRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBGRAYROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBGrayRow = ARGBGrayRow_LASX;
@@ -3473,7 +3818,7 @@ int ARGBSepia(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
+ void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
@@ -3499,6 +3844,11 @@ int ARGBSepia(uint8_t* dst_argb,
ARGBSepiaRow = ARGBSepiaRow_MSA;
}
#endif
+#if defined(HAS_ARGBSEPIAROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
+ ARGBSepiaRow = ARGBSepiaRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBSEPIAROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
ARGBSepiaRow = ARGBSepiaRow_LASX;
@@ -3616,7 +3966,7 @@ int ARGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = ARGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@@ -3652,7 +4002,7 @@ int RGBColorTable(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
+ void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
int width) = RGBColorTableRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
@@ -3697,7 +4047,7 @@ int ARGBQuantize(uint8_t* dst_argb,
int width,
int height) {
int y;
- void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
+ void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
int interval_offset, int width) = ARGBQuantizeRow_C;
uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
@@ -3924,6 +4274,11 @@ int ARGBShade(const uint8_t* src_argb,
ARGBShadeRow = ARGBShadeRow_MSA;
}
#endif
+#if defined(HAS_ARGBSHADEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) {
+ ARGBShadeRow = ARGBShadeRow_LSX;
+ }
+#endif
#if defined(HAS_ARGBSHADEROW_LASX)
if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) {
ARGBShadeRow = ARGBShadeRow_LASX;
@@ -3950,7 +4305,7 @@ int InterpolatePlane(const uint8_t* src0,
int height,
int interpolation) {
int y;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@@ -4008,6 +4363,11 @@ int InterpolatePlane(const uint8_t* src0,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
for (y = 0; y < height; ++y) {
InterpolateRow(dst, src0, src1 - src0, width, interpolation);
@@ -4030,7 +4390,7 @@ int InterpolatePlane_16(const uint16_t* src0,
int height,
int interpolation) {
int y;
- void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
+ void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_16_C;
if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
@@ -4213,6 +4573,14 @@ int ARGBShuffle(const uint8_t* src_bgra,
}
}
#endif
+#if defined(HAS_ARGBSHUFFLEROW_LSX)
+ if (TestCpuFlag(kCpuHasLSX)) {
+ ARGBShuffleRow = ARGBShuffleRow_Any_LSX;
+ if (IS_ALIGNED(width, 8)) {
+ ARGBShuffleRow = ARGBShuffleRow_LSX;
+ }
+ }
+#endif
#if defined(HAS_ARGBSHUFFLEROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBShuffleRow = ARGBShuffleRow_Any_LASX;
@@ -4444,6 +4812,11 @@ static int ARGBSobelize(const uint8_t* src_argb,
}
}
#endif
+#if defined(HAS_ARGBTOYJROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToYJRow = ARGBToYJRow_RVV;
+ }
+#endif
#if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
@@ -4477,16 +4850,16 @@ static int ARGBSobelize(const uint8_t* src_argb,
#endif
{
// 3 rows with edges before/after.
- const int kRowSize = (width + kEdge + 31) & ~31;
- align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
+ const int row_size = (width + kEdge + 31) & ~31;
+ align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge));
uint8_t* row_sobelx = rows;
- uint8_t* row_sobely = rows + kRowSize;
- uint8_t* row_y = rows + kRowSize * 2;
+ uint8_t* row_sobely = rows + row_size;
+ uint8_t* row_y = rows + row_size * 2;
// Convert first row.
uint8_t* row_y0 = row_y + kEdge;
- uint8_t* row_y1 = row_y0 + kRowSize;
- uint8_t* row_y2 = row_y1 + kRowSize;
+ uint8_t* row_y1 = row_y0 + row_size;
+ uint8_t* row_y2 = row_y1 + row_size;
ARGBToYJRow(src_argb, row_y0, width);
row_y0[-1] = row_y0[0];
memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
@@ -5027,9 +5400,6 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
return 0;
}
-// TODO(fbarchard): Consider if width is even Y channel can be split
-// directly. A SplitUVRow_Odd function could copy the remaining chroma.
-
LIBYUV_API
int YUY2ToNV12(const uint8_t* src_yuy2,
int src_stride_yuy2,
@@ -5040,13 +5410,10 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
int width,
int height) {
int y;
- int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
- int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
- ptrdiff_t src_stride, int dst_width,
- int source_y_fraction) = InterpolateRow_C;
-
+ void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
+ YUY2ToYRow_C;
+ void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2,
+ uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C;
if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1;
}
@@ -5057,109 +5424,91 @@ int YUY2ToNV12(const uint8_t* src_yuy2,
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2;
}
-#if defined(HAS_SPLITUVROW_SSE2)
+#if defined(HAS_YUY2TOYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
- SplitUVRow = SplitUVRow_Any_SSE2;
+ YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_SSE2;
+ YUY2ToYRow = YUY2ToYRow_SSE2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_AVX2)
+#if defined(HAS_YUY2TOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- SplitUVRow = SplitUVRow_Any_AVX2;
+ YUY2ToYRow = YUY2ToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_AVX2;
}
}
#endif
-#if defined(HAS_SPLITUVROW_NEON)
+#if defined(HAS_YUY2TOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- SplitUVRow = SplitUVRow_Any_NEON;
+ YUY2ToYRow = YUY2ToYRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
- SplitUVRow = SplitUVRow_NEON;
+ YUY2ToYRow = YUY2ToYRow_NEON;
}
}
#endif
-#if defined(HAS_SPLITUVROW_MSA)
+#if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
- SplitUVRow = SplitUVRow_Any_MSA;
+ YUY2ToYRow = YUY2ToYRow_Any_MSA;
if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_MSA;
+ YUY2ToYRow = YUY2ToYRow_MSA;
}
}
#endif
-#if defined(HAS_SPLITUVROW_LSX)
+#if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
- SplitUVRow = SplitUVRow_Any_LSX;
- if (IS_ALIGNED(width, 32)) {
- SplitUVRow = SplitUVRow_LSX;
- }
- }
-#endif
-#if defined(HAS_INTERPOLATEROW_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3)) {
- InterpolateRow = InterpolateRow_Any_SSSE3;
+ YUY2ToYRow = YUY2ToYRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_SSSE3;
+ YUY2ToYRow = YUY2ToYRow_LSX;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_AVX2)
- if (TestCpuFlag(kCpuHasAVX2)) {
- InterpolateRow = InterpolateRow_Any_AVX2;
+#if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
+ if (TestCpuFlag(kCpuHasLASX)) {
+ YUY2ToYRow = YUY2ToYRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_AVX2;
+ YUY2ToYRow = YUY2ToYRow_LASX;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_NEON)
- if (TestCpuFlag(kCpuHasNEON)) {
- InterpolateRow = InterpolateRow_Any_NEON;
+
+#if defined(HAS_YUY2TONVUVROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
- InterpolateRow = InterpolateRow_NEON;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_MSA)
- if (TestCpuFlag(kCpuHasMSA)) {
- InterpolateRow = InterpolateRow_Any_MSA;
+#if defined(HAS_YUY2TONVUVROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_MSA;
+ YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2;
}
}
#endif
-#if defined(HAS_INTERPOLATEROW_LSX)
- if (TestCpuFlag(kCpuHasLSX)) {
- InterpolateRow = InterpolateRow_Any_LSX;
- if (IS_ALIGNED(width, 32)) {
- InterpolateRow = InterpolateRow_LSX;
+#if defined(HAS_YUY2TONVUVROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON;
+ if (IS_ALIGNED(width, 16)) {
+ YUY2ToNVUVRow = YUY2ToNVUVRow_NEON;
}
}
#endif
- {
- int awidth = halfwidth * 2;
- // row of y and 2 rows of uv
- align_buffer_64(rows, awidth * 3);
-
- for (y = 0; y < height - 1; y += 2) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
- memcpy(dst_y, rows, width);
- SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
- memcpy(dst_y + dst_stride_y, rows, width);
- InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
- src_yuy2 += src_stride_yuy2 * 2;
- dst_y += dst_stride_y * 2;
- dst_uv += dst_stride_uv;
- }
- if (height & 1) {
- // Split Y from UV.
- SplitUVRow(src_yuy2, rows, dst_uv, awidth);
- memcpy(dst_y, rows, width);
- }
- free_aligned_buffer_64(rows);
+ for (y = 0; y < height - 1; y += 2) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
+ YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width);
+ src_yuy2 += src_stride_yuy2 * 2;
+ dst_y += dst_stride_y * 2;
+ dst_uv += dst_stride_uv;
+ }
+ if (height & 1) {
+ YUY2ToYRow(src_yuy2, dst_y, width);
+ YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width);
}
return 0;
}
@@ -5177,7 +5526,7 @@ int UYVYToNV12(const uint8_t* src_uyvy,
int halfwidth = (width + 1) >> 1;
void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
int width) = SplitUVRow_C;
- void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
+ void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
@@ -5231,6 +5580,12 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_SPLITUVROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ SplitUVRow = SplitUVRow_RVV;
+ }
+#endif
+
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
@@ -5271,6 +5626,11 @@ int UYVYToNV12(const uint8_t* src_uyvy,
}
}
#endif
+#if defined(HAS_INTERPOLATEROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ InterpolateRow = InterpolateRow_RVV;
+ }
+#endif
{
int awidth = halfwidth * 2;
@@ -5336,6 +5696,7 @@ void HalfMergeUVPlane(const uint8_t* src_u,
HalfMergeUVRow = HalfMergeUVRow_AVX2;
}
#endif
+
for (y = 0; y < height - 1; y += 2) {
// Merge a row of U and V into a row of UV.
HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);