aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Lai <bruce.lai@sifive.com>2023-08-18 01:50:49 -0700
committerFrank Barchard <fbarchard@chromium.org>2023-09-05 22:44:48 +0000
commitec2e9ca0007df3ac1caae5c6c1fdddcbbe07a842 (patch)
tree9f2f5ab149455da52a0ee7cbef23ca800c7a856d
parentf0921806a293e3e008e6325a51d4ea760c39d2c1 (diff)
downloadlibyuv-ec2e9ca0007df3ac1caae5c6c1fdddcbbe07a842.tar.gz
[RVV] Support AR64ToAB64 and RGBA-family color conversions
Add scalar code for AR64ToAB64, ARGBToRGBA, ARGBToBGRA, ARGBToABGR, RGBAToARGB, BGRAToARGB, and ABGRToARGB. They are originally implemented by ARGBShffle. This CL independetly implements them, and only enables for risc-v now. This CL also add RVV implementation for `RGBA-family <-> RGBA-family` color conversions. * Run on SiFive internal FPGA(VLEN=128): Test Case Speedup AR64ToAB64_Opt x4.6 ARGBToRGBA_Opt x6 ARGBToBGRA_Opt x6 ARGBToABGR_Opt x6 RGBAToARGB_Opt x6 Change-Id: Ie0630901046084aa259699fcdeccc64170d7103f Signed-off-by: Bruce Lai <bruce.lai@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4797451 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
-rw-r--r--include/libyuv/row.h14
-rw-r--r--source/convert_argb.cc190
-rw-r--r--source/convert_from_argb.cc42
-rw-r--r--source/row_common.cc80
-rw-r--r--source/row_rvv.cc77
5 files changed, 403 insertions, 0 deletions
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 0455b4cc..36561d6e 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -800,14 +800,18 @@ extern "C" {
#define HAS_ABGRTOYJROW_RVV
#define HAS_ABGRTOYROW_RVV
#define HAS_AR64TOARGBROW_RVV
+#define HAS_AR64TOAB64ROW_RVV
#define HAS_ARGBATTENUATEROW_RVV
#define HAS_ARGBBLENDROW_RVV
#define HAS_ARGBCOPYYTOALPHAROW_RVV
#define HAS_ARGBEXTRACTALPHAROW_RVV
#define HAS_ARGBTOAB64ROW_RVV
+#define HAS_ARGBTOABGRROW_RVV
#define HAS_ARGBTOAR64ROW_RVV
+#define HAS_ARGBTOBGRAROW_RVV
#define HAS_ARGBTORAWROW_RVV
#define HAS_ARGBTORGB24ROW_RVV
+#define HAS_ARGBTORGBAROW_RVV
#define HAS_ARGBTOYJROW_RVV
#define HAS_ARGBTOYMATRIXROW_RVV
#define HAS_ARGBTOYROW_RVV
@@ -839,6 +843,7 @@ extern "C" {
#define HAS_RGB24TOARGBROW_RVV
#define HAS_RGB24TOYJROW_RVV
#define HAS_RGB24TOYROW_RVV
+#define HAS_RGBATOARGBROW_RVV
#define HAS_RGBATOYJROW_RVV
#define HAS_RGBATOYMATRIXROW_RVV
#define HAS_RGBATOYROW_RVV
@@ -3494,8 +3499,13 @@ void ARGBToARGB4444Row_LASX(const uint8_t* src_argb,
int width);
void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width);
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width);
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_RVV(const uint8_t* src_argb, uint8_t* dst_rgb24, int width);
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width);
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width);
void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
@@ -3509,6 +3519,8 @@ void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
const uint8_t* shuffler,
@@ -3537,6 +3549,8 @@ void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width);
void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width);
void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width);
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width);
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64, uint16_t* dst_ab64, int width);
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width);
void ARGBToAR64Row_Any_SSSE3(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int width);
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index f6ab0784..5c946c3a 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -3003,6 +3003,7 @@ int J400ToARGB(const uint8_t* src_y,
return 0;
}
+#ifndef __riscv
// Shuffle table for converting BGRA to ARGB.
static const uvec8 kShuffleMaskBGRAToARGB = {
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u};
@@ -3090,6 +3091,195 @@ int AR64ToAB64(const uint16_t* src_ar64,
return AR64Shuffle(src_ar64, src_stride_ar64, dst_ab64, dst_stride_ab64,
(const uint8_t*)&kShuffleMaskAR64ToAB64, width, height);
}
+#else
+// Convert BGRA to ARGB (same as ARGBToBGRA).
+LIBYUV_API
+int BGRAToARGB(const uint8_t* src_bgra,
+ int src_stride_bgra,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBToBGRA(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, width,
+ height);
+}
+
+// Convert ARGB to BGRA.
+LIBYUV_API
+int ARGBToBGRA(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_bgra,
+ int dst_stride_bgra,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) =
+ ARGBToBGRARow_C;
+ if (!src_argb || !dst_bgra || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_bgra = 0;
+ }
+
+#if defined(HAS_ARGBTOBGRAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToBGRARow = ARGBToBGRARow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToBGRARow(src_argb, dst_bgra, width);
+ src_argb += src_stride_argb;
+ dst_bgra += dst_stride_bgra;
+ }
+ return 0;
+}
+
+// Convert ARGB to ABGR.
+LIBYUV_API
+int ARGBToABGR(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_abgr,
+ int dst_stride_abgr,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) =
+ ARGBToABGRRow_C;
+ if (!src_argb || !dst_abgr || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_abgr = 0;
+ }
+
+#if defined(HAS_ARGBTOABGRROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToABGRRow = ARGBToABGRRow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToABGRRow(src_argb, dst_abgr, width);
+ src_argb += src_stride_argb;
+ dst_abgr += dst_stride_abgr;
+ }
+ return 0;
+}
+
+// Convert ABGR to ARGB (same as ARGBToABGR).
+LIBYUV_API
+int ABGRToARGB(const uint8_t* src_abgr,
+ int src_stride_abgr,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ return ARGBToABGR(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, width,
+ height);
+}
+
+// Convert RGBA to ARGB.
+LIBYUV_API
+int RGBAToARGB(const uint8_t* src_rgba,
+ int src_stride_rgba,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) =
+ RGBAToARGBRow_C;
+ if (!src_rgba || !dst_argb || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_rgba = src_rgba + (height - 1) * src_stride_rgba;
+ src_stride_rgba = -src_stride_rgba;
+ }
+ // Coalesce rows.
+ if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_rgba = dst_stride_argb = 0;
+ }
+
+#if defined(HAS_RGBATOARGBROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ RGBAToARGBRow = RGBAToARGBRow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ RGBAToARGBRow(src_rgba, dst_argb, width);
+ src_rgba += src_stride_rgba;
+ dst_argb += dst_stride_argb;
+ }
+ return 0;
+}
+
+// Convert AR64 To AB64.
+LIBYUV_API
+int AR64ToAB64(const uint16_t* src_ar64,
+ int src_stride_ar64,
+ uint16_t* dst_ab64,
+ int dst_stride_ab64,
+ int width,
+ int height) {
+ int y;
+ void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64,
+ int width) = AR64ToAB64Row_C;
+ if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) {
+ return -1;
+ }
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
+ src_stride_ar64 = -src_stride_ar64;
+ }
+ // Coalesce rows.
+ if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_ar64 = dst_stride_ab64 = 0;
+ }
+
+#if defined(HAS_AR64TOAB64ROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ AR64ToAB64Row = AR64ToAB64Row_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ AR64ToAB64Row(src_ar64, dst_ab64, width);
+ src_ar64 += src_stride_ar64;
+ dst_ab64 += dst_stride_ab64;
+ }
+ return 0;
+}
+#endif
// Convert RGB24 to ARGB.
LIBYUV_API
diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc
index c3d037c4..96129d84 100644
--- a/source/convert_from_argb.cc
+++ b/source/convert_from_argb.cc
@@ -1527,6 +1527,7 @@ int ARGBToI400(const uint8_t* src_argb,
return 0;
}
+#ifndef __riscv
// Shuffle table for converting ARGB to RGBA.
static const uvec8 kShuffleMaskARGBToRGBA = {
3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u};
@@ -1542,6 +1543,47 @@ int ARGBToRGBA(const uint8_t* src_argb,
return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba,
(const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height);
}
+#else
+// Convert ARGB to RGBA.
+LIBYUV_API
+int ARGBToRGBA(const uint8_t* src_argb,
+ int src_stride_argb,
+ uint8_t* dst_rgba,
+ int dst_stride_rgba,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) =
+ ARGBToRGBARow_C;
+ if (!src_argb || !dst_rgba || width <= 0 || height == 0) {
+ return -1;
+ }
+ if (height < 0) {
+ height = -height;
+ src_argb = src_argb + (height - 1) * src_stride_argb;
+ src_stride_argb = -src_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb = dst_stride_rgba = 0;
+ }
+
+#if defined(HAS_ARGBTORGBAROW_RVV)
+ if (TestCpuFlag(kCpuHasRVV)) {
+ ARGBToRGBARow = ARGBToRGBARow_RVV;
+ }
+#endif
+
+ for (y = 0; y < height; ++y) {
+ ARGBToRGBARow(src_argb, dst_rgba, width);
+ src_argb += src_stride_argb;
+ dst_rgba += dst_stride_rgba;
+ }
+ return 0;
+}
+#endif
// Convert ARGB To RGB24.
LIBYUV_API
diff --git a/source/row_common.cc b/source/row_common.cc
index 7591c6b6..3afc4b4d 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -281,6 +281,54 @@ void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
}
}
+void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ uint8_t a = src_argb[3];
+ dst_abgr[0] = r;
+ dst_abgr[1] = g;
+ dst_abgr[2] = b;
+ dst_abgr[3] = a;
+ dst_abgr += 4;
+ src_argb += 4;
+ }
+}
+
+void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ uint8_t a = src_argb[3];
+ dst_bgra[0] = a;
+ dst_bgra[1] = r;
+ dst_bgra[2] = g;
+ dst_bgra[3] = b;
+ dst_bgra += 4;
+ src_argb += 4;
+ }
+}
+
+void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t b = src_argb[0];
+ uint8_t g = src_argb[1];
+ uint8_t r = src_argb[2];
+ uint8_t a = src_argb[3];
+ dst_rgba[0] = a;
+ dst_rgba[1] = b;
+ dst_rgba[2] = g;
+ dst_rgba[3] = r;
+ dst_rgba += 4;
+ src_argb += 4;
+ }
+}
+
void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
@@ -309,6 +357,22 @@ void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
}
}
+void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint8_t a = src_rgba[0];
+ uint8_t b = src_rgba[1];
+ uint8_t g = src_rgba[2];
+ uint8_t r = src_rgba[3];
+ dst_argb[0] = b;
+ dst_argb[1] = g;
+ dst_argb[2] = r;
+ dst_argb[3] = a;
+ dst_argb += 4;
+ src_rgba += 4;
+ }
+}
+
void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
@@ -517,6 +581,22 @@ void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
}
}
+void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
+ int x;
+ for (x = 0; x < width; ++x) {
+ uint16_t b = src_ar64[0];
+ uint16_t g = src_ar64[1];
+ uint16_t r = src_ar64[2];
+ uint16_t a = src_ar64[3];
+ dst_ab64[0] = r;
+ dst_ab64[1] = g;
+ dst_ab64[2] = b;
+ dst_ab64[3] = a;
+ dst_ab64 += 4;
+ src_ar64 += 4;
+ }
+}
+
// TODO(fbarchard): Make shuffle compatible with SIMD versions
void AR64ShuffleRow_C(const uint8_t* src_ar64,
uint8_t* dst_ar64,
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index c875be2f..0bf2bef6 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -200,6 +200,23 @@ void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
}
#endif
+#ifdef HAS_AR64TOAB64ROW_RVV
+void AR64ToAB64Row_RVV(const uint16_t* src_ar64,
+ uint16_t* dst_ab64,
+ int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e16m2(w);
+ vuint16m2_t v_b, v_g, v_r, v_a;
+ __riscv_vlseg4e16_v_u16m2(&v_b, &v_g, &v_r, &v_a, src_ar64, vl);
+ __riscv_vsseg4e16_v_u16m2(dst_ab64, v_r, v_g, v_b, v_a, vl);
+ w -= vl;
+ src_ar64 += vl * 4;
+ dst_ab64 += vl * 4;
+ } while (w > 0);
+}
+#endif
+
#ifdef HAS_AB64TOARGBROW_RVV
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
size_t avl = (size_t)width;
@@ -301,6 +318,66 @@ void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
}
#endif
+#ifdef HAS_ARGBTOABGRROW_RVV
+void ARGBToABGRRow_RVV(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_abgr, v_r, v_g, v_b, v_a, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_abgr += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTOBGRAROW_RVV
+void ARGBToBGRARow_RVV(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_bgra, v_a, v_r, v_g, v_b, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_bgra += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_ARGBTORGBAROW_RVV
+void ARGBToRGBARow_RVV(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_rgba, v_a, v_b, v_g, v_r, vl);
+ w -= vl;
+ src_argb += vl * 4;
+ dst_rgba += vl * 4;
+ } while (w > 0);
+}
+#endif
+
+#ifdef HAS_RGBATOARGBROW_RVV
+void RGBAToARGBRow_RVV(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
+ size_t w = (size_t)width;
+ do {
+ size_t vl = __riscv_vsetvl_e8m2(w);
+ vuint8m2_t v_a, v_r, v_g, v_b;
+ __riscv_vlseg4e8_v_u8m2(&v_a, &v_b, &v_g, &v_r, src_rgba, vl);
+ __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
+ w -= vl;
+ src_rgba += vl * 4;
+ dst_argb += vl * 4;
+ } while (w > 0);
+}
+#endif
+
#ifdef HAS_RGB24TOARGBROW_RVV
void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
uint8_t* dst_argb,