[RVV] Update ARGBAttenuateRow_RVV implementation

Bug: libyuv:956 Change-Id: Ib539c2196767e88fa6e419ed2f22d95b6deaf406 Signed-off-by: Bruce Lai <bruce.lai@sifive.com> Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4623172 Reviewed-by: Frank Barchard <fbarchard@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
author: Bruce Lai <bruce.lai@sifive.com> 2023-06-16 19:09:17 -0700
committer: libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com> 2023-06-17 15:50:34 +0000
commit: 4472b5b8491189ca173b5eb771a25855cf5f6ed9 (patch)
tree: ed4fae9082a25a679a8ebcd600dbf76528fa5de3
parent: 7939e039e76072bc687128fc5c1f10fe04f7858c (diff)
download: libyuv-4472b5b8491189ca173b5eb771a25855cf5f6ed9.tar.gz
1 files changed, 12 insertions, 5 deletions
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index 29422574..bbb54a1b 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -924,24 +924,31 @@ void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
   RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
 }
 
+// Attenuate: (f * a + 255) >> 8
 void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
                           uint8_t* dst_argb,
                           int width) {
   size_t w = (size_t)width;
   // To match behavior on other platforms, vxrm (fixed-point rounding mode
-  // register) is set to round-to-nearest-up(0).
-  asm volatile("csrwi vxrm, 0");
+  // register) is set to round-down(2).
+  asm volatile("csrwi vxrm, 2");
   do {
     vuint8m2_t v_b, v_g, v_r, v_a;
     vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
     size_t vl = __riscv_vsetvl_e8m2(w);
     __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
+    // f * a
     v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
     v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
     v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
-    v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl);
-    v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl);
-    v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl);
+    // f * a + 255
+    v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl);
+    v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl);
+    v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl);
+    // (f * a + 255) >> 8
+    v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl);
+    v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl);
+    v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl);
     __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
     w -= vl;
     src_argb += vl * 4;
author	Bruce Lai <bruce.lai@sifive.com>	2023-06-16 19:09:17 -0700
committer	libyuv LUCI CQ <libyuv-scoped@luci-project-accounts.iam.gserviceaccount.com>	2023-06-17 15:50:34 +0000
commit	4472b5b8491189ca173b5eb771a25855cf5f6ed9 (patch)
tree	ed4fae9082a25a679a8ebcd600dbf76528fa5de3
parent	7939e039e76072bc687128fc5c1f10fe04f7858c (diff)
download	libyuv-4472b5b8491189ca173b5eb771a25855cf5f6ed9.tar.gz