From 4472b5b8491189ca173b5eb771a25855cf5f6ed9 Mon Sep 17 00:00:00 2001 From: Bruce Lai Date: Fri, 16 Jun 2023 19:09:17 -0700 Subject: [RVV] Update ARGBAttenuateRow_RVV implementation Bug: libyuv:956 Change-Id: Ib539c2196767e88fa6e419ed2f22d95b6deaf406 Signed-off-by: Bruce Lai Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4623172 Reviewed-by: Frank Barchard Commit-Queue: Frank Barchard --- source/row_rvv.cc | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/source/row_rvv.cc b/source/row_rvv.cc index 29422574..bbb54a1b 100644 --- a/source/row_rvv.cc +++ b/source/row_rvv.cc @@ -924,24 +924,31 @@ void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) { RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants); } +// Attenuate: (f * a + 255) >> 8 void ARGBAttenuateRow_RVV(const uint8_t* src_argb, uint8_t* dst_argb, int width) { size_t w = (size_t)width; // To match behavior on other platforms, vxrm (fixed-point rounding mode - // register) is set to round-to-nearest-up(0). - asm volatile("csrwi vxrm, 0"); + // register) is set to round-down(2). + asm volatile("csrwi vxrm, 2"); do { vuint8m2_t v_b, v_g, v_r, v_a; vuint16m4_t v_ba_16, v_ga_16, v_ra_16; size_t vl = __riscv_vsetvl_e8m2(w); __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl); + // f * a v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl); v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl); v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl); - v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl); - v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl); - v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl); + // f * a + 255 + v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl); + v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl); + v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl); + // (f * a + 255) >> 8 + v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl); + v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl); + v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl); __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl); w -= vl; src_argb += vl * 4; -- cgit v1.2.3