diff options
author | Neelkamal Semwal <neelkamal.semwal@ittiam.com> | 2021-03-01 23:27:25 +0530 |
---|---|---|
committer | Ray Essick <essick@google.com> | 2021-03-26 21:24:54 -0700 |
commit | d4db570ab39053ad9427bb5fca1671a33b3cb1a7 (patch) | |
tree | 2ebb40271ab922db3d4b4382cbb86c05853ad526 | |
parent | a341d8615aef944802185a86b1d25bd07e1acfa1 (diff) | |
download | libhevc-d4db570ab39053ad9427bb5fca1671a33b3cb1a7.tar.gz |
encoder: Update chroma modules to avoid reading an extra byte
When processing V plane, an extra byte was being read in some
of the neon modules.
Modules are now updated to avoid that extra byte read.
Bug: 177433559
Bug: 183012467
Test: poc in bug
Test: atest CtsMediaTestCases:VideoEncoderTest
Test: atest CtsMediaV2TestCases:CodecEncoderTest
Test: atest VtsHalMediaC2V1_0TargetVideoEncTest
Change-Id: I598c50f727b4d62f19523cbb008482e27de5e3cc
-rw-r--r-- | common/arm/ihevc_resi_trans_neon.c | 93 | ||||
-rw-r--r-- | common/arm/ihevc_resi_trans_neon_32x32.c | 12 | ||||
-rw-r--r-- | common/ihevc_defs.h | 7 | ||||
-rw-r--r-- | common/ihevc_resi_trans.c | 95 | ||||
-rw-r--r-- | common/ihevc_resi_trans.h | 24 | ||||
-rw-r--r-- | encoder/arm/ihevce_ssd_calculator_neon.c | 87 | ||||
-rw-r--r-- | encoder/ihevce_cmn_utils_instr_set_router.h | 4 | ||||
-rw-r--r-- | encoder/ihevce_common_utils.c | 10 | ||||
-rw-r--r-- | encoder/ihevce_defs.h | 7 | ||||
-rw-r--r-- | encoder/ihevce_enc_loop_structs.h | 3 | ||||
-rw-r--r-- | encoder/ihevce_enc_loop_utils.c | 46 | ||||
-rw-r--r-- | encoder/ihevce_enc_structs.h | 3 | ||||
-rw-r--r-- | encoder/ihevce_recur_bracketing.c | 2 | ||||
-rw-r--r-- | encoder/ihevce_sao.c | 8 |
14 files changed, 231 insertions, 170 deletions
diff --git a/common/arm/ihevc_resi_trans_neon.c b/common/arm/ihevc_resi_trans_neon.c index 280b8e9..bf9c058 100644 --- a/common/arm/ihevc_resi_trans_neon.c +++ b/common/arm/ihevc_resi_trans_neon.c @@ -66,10 +66,9 @@ UWORD32 ihevc_resi_trans_4x4_neon( WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { - WORD32 chroma_flag = dst_strd_chr_flag & 1; - WORD32 dst_strd = dst_strd_chr_flag >> 16; UWORD32 sad; uint8x16_t inp_buf, pred_buf; int16x8_t diff_1, diff_2; @@ -86,15 +85,15 @@ UWORD32 ihevc_resi_trans_4x4_neon( uint64x2_t c; (void)pi4_temp; - if(chroma_flag == 0) + if(e_chroma_plane == NULL_PLANE) { inp_buf = load_unaligned_u8q(pu1_src, src_strd); pred_buf = load_unaligned_u8q(pu1_pred, pred_strd); } else { - inp_buf = load_unaligned_u8qi(pu1_src, src_strd); - pred_buf = load_unaligned_u8qi(pu1_pred, pred_strd); + inp_buf = load_unaligned_u8qi(pu1_src + e_chroma_plane, src_strd); + pred_buf = load_unaligned_u8qi(pu1_pred + e_chroma_plane, pred_strd); } abs = vabdl_u8(vget_low_u8(inp_buf), vget_low_u8(pred_buf)); @@ -198,9 +197,11 @@ UWORD32 ihevc_resi_trans_4x4_neon( * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit - * 0 - luma transform, 1 - chroma transform. Not used for 4x4ttyppe1 + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns block sad * @@ -216,9 +217,9 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon( WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { - WORD32 dst_strd; UWORD32 sad; int16x4_t src0_4x16b; int16x4_t src1_4x16b; @@ -242,7 +243,7 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon( uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(pred_u8)); uint32x4_t b; uint64x2_t c; - + UNUSED(e_chroma_plane); abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(pred_u8)); b = vpaddlq_u16(abs); c = vpaddlq_u32(b); @@ -251,7 +252,6 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon( 0); (void)pi4_temp; - dst_strd = dst_strd_chr_flag >> 16; /************************* 4x4 16bit Transpose ***********************/ src0_4x16b = vget_low_s16(src_reg0); @@ -379,8 +379,11 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon( * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns Void * @@ -396,7 +399,8 @@ UWORD32 ihevc_resi_trans_8x8_neon( WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { int16x8_t diff_16[8]; int16x8_t abs = vdupq_n_s16(0); @@ -404,13 +408,11 @@ UWORD32 ihevc_resi_trans_8x8_neon( int64x2_t tmp_b; int32x2_t sad_v; int32x4x2_t a0, a1, a2, a3, a4, a5, a6, a7; - int chroma_flag = dst_strd_chr_flag & 1; - int dst_strd = dst_strd_chr_flag >> 16; UWORD32 sad; (void)pi4_temp; -#define RESIDUE(k, is_chroma) \ - if(!is_chroma) \ +#define RESIDUE(k) \ + if(NULL_PLANE == e_chroma_plane) \ { \ const uint8x8_t s##k = vld1_u8(pu1_src); \ const uint8x8_t p##k = vld1_u8(pu1_pred); \ @@ -421,8 +423,8 @@ UWORD32 ihevc_resi_trans_8x8_neon( } \ else \ { \ - const uint8x8_t s##k = vld2_u8(pu1_src).val[0]; \ - const uint8x8_t p##k = vld2_u8(pu1_pred).val[0]; \ + const uint8x8_t s##k = vld2_u8(pu1_src).val[e_chroma_plane]; \ + const uint8x8_t p##k = vld2_u8(pu1_pred).val[e_chroma_plane]; \ diff_16[k] = vreinterpretq_s16_u16(vsubl_u8(s##k, p##k)); \ pu1_src += src_strd; \ pu1_pred += pred_strd; \ @@ -430,14 +432,14 @@ UWORD32 ihevc_resi_trans_8x8_neon( } // stage 1 - RESIDUE(0, chroma_flag); - RESIDUE(1, chroma_flag); - RESIDUE(2, chroma_flag); - RESIDUE(3, chroma_flag); - RESIDUE(4, chroma_flag); - RESIDUE(5, chroma_flag); - RESIDUE(6, chroma_flag); - RESIDUE(7, chroma_flag); + RESIDUE(0); + RESIDUE(1); + RESIDUE(2); + RESIDUE(3); + RESIDUE(4); + RESIDUE(5); + RESIDUE(6); + RESIDUE(7); tmp_a = vpaddlq_s16(abs); tmp_b = vpaddlq_s32(tmp_a); @@ -792,11 +794,12 @@ UWORD32 ihevc_resi_trans_8x8_neon( return sad; } -static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b, int is_chroma) +static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b, + CHROMA_PLANE_ID_T e_chroma_plane) { int i; - if(is_chroma == 0) + if(e_chroma_plane == NULL_PLANE) { for (i = 0; i < 16; i++) { @@ -808,7 +811,7 @@ static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b, int is_chrom { for (i = 0; i < 16; i++) { - b[i] = vld2_u8(a).val[0]; + b[i] = vld2_u8(a).val[e_chroma_plane]; a += stride; } } @@ -1261,8 +1264,11 @@ static void dct_body_32_32(int32x4x2_t *in /*[16]*/, int32x4x2_t *out /*[16]*/) * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns Void * @@ -1278,12 +1284,11 @@ UWORD32 ihevc_resi_trans_16x16_neon( WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { UWORD32 u4_blk_sad = 0; WORD32 chroma_flag; - WORD32 dst_strd; - uint8x8_t temp0[16], temp1[16]; int16x8_t temp2[16], temp3[16]; int32x4_t tmp_a, tmp_b; @@ -1292,21 +1297,19 @@ UWORD32 ihevc_resi_trans_16x16_neon( int32x4x2_t out0[16], out1[16], temp4[16], temp5[16]; (void)pi4_temp; - chroma_flag = dst_strd_chr_flag & 1; - dst_strd = dst_strd_chr_flag >> 16; - + chroma_flag = e_chroma_plane != NULL_PLANE; /* Residue + Forward Transform 1st stage */ // Left half. - load(pu1_src, src_strd, temp0, chroma_flag); - load(pu1_pred, pred_strd, temp1, chroma_flag); + load(pu1_src, src_strd, temp0, e_chroma_plane); + load(pu1_pred, pred_strd, temp1, e_chroma_plane); tmp_a = diff(temp0, temp1, temp2); cross_input_16(temp2, temp3); dct_body_16_32(temp3, out0); // Right half. - load(pu1_src + 8 * (1 + chroma_flag), src_strd, temp0, chroma_flag); - load(pu1_pred + 8 * (1 + chroma_flag), pred_strd, temp1, chroma_flag); + load(pu1_src + 8 * (1 + chroma_flag), src_strd, temp0, e_chroma_plane); + load(pu1_pred + 8 * (1 + chroma_flag), pred_strd, temp1, e_chroma_plane); tmp_b = diff(temp0, temp1, temp2); cross_input_16(temp2, temp3); diff --git a/common/arm/ihevc_resi_trans_neon_32x32.c b/common/arm/ihevc_resi_trans_neon_32x32.c index 5270f80..67f742c 100644 --- a/common/arm/ihevc_resi_trans_neon_32x32.c +++ b/common/arm/ihevc_resi_trans_neon_32x32.c @@ -86,8 +86,11 @@ * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns Void * @@ -98,18 +101,17 @@ */ UWORD32 ihevc_resi_trans_32x32_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 *pi4_temp, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, CHROMA_PLANE_ID_T e_chroma_plane) { int16x8_t diff_16[4][2]; WORD32 i; int32x2_t sad; int64x2_t tmp_a; UWORD32 u4_blk_sad = 0; - WORD32 dst_strd = dst_strd_chr_flag >> 16; WORD32 *pi4_temp_orig = pi4_temp; int16x8_t abs = vdupq_n_s16(0); int32x4_t sum_val = vdupq_n_s32(0); - + UNUSED(e_chroma_plane); // Stage 1 for(i = 0; i < 16; i++) diff --git a/common/ihevc_defs.h b/common/ihevc_defs.h index 58761eb..faa3704 100644 --- a/common/ihevc_defs.h +++ b/common/ihevc_defs.h @@ -133,6 +133,13 @@ enum CHROMA_FMT_IDC_YUV444_PLANES = 4, }; +typedef enum +{ + NULL_PLANE = -1, + U_PLANE = 0, + V_PLANE = 1 +} CHROMA_PLANE_ID_T; + /* Pred Modes */ /* Do not change enum values */ enum diff --git a/common/ihevc_resi_trans.c b/common/ihevc_resi_trans.c index e1537cc..d1c2470 100644 --- a/common/ihevc_resi_trans.c +++ b/common/ihevc_resi_trans.c @@ -81,8 +81,11 @@ * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * * @returns Void @@ -99,7 +102,8 @@ UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { WORD32 i, c[4]; WORD32 add, shift; @@ -107,11 +111,7 @@ UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src, WORD32 *pi4_tmp_orig; WORD16 *pi2_dst_orig; UWORD32 u4_blk_sad = 0; - // WORD32 chroma_flag; - WORD32 dst_strd; - - // chroma_flag = dst_strd_chr_flag & 1; - dst_strd = dst_strd_chr_flag >> 16; + UNUSED(e_chroma_plane); pi2_dst_orig = pi2_dst; pi4_tmp_orig = pi4_temp; @@ -216,8 +216,11 @@ UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src, * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns Void * @@ -233,7 +236,8 @@ UWORD32 ihevc_resi_trans_4x4(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { WORD32 i; WORD32 e[2], o[2]; @@ -242,11 +246,14 @@ UWORD32 ihevc_resi_trans_4x4(UWORD8 *pu1_src, WORD32 *pi4_tmp_orig; WORD16 *pi2_dst_orig; UWORD32 u4_blk_sad=0; - WORD32 chroma_flag; - WORD32 dst_strd; + WORD32 chroma_flag = 0; - chroma_flag = dst_strd_chr_flag & 1; - dst_strd = dst_strd_chr_flag >> 16; + if (e_chroma_plane != NULL_PLANE) + { + chroma_flag = 1; + pu1_src += e_chroma_plane; + pu1_pred += e_chroma_plane; + } pi2_dst_orig = pi2_dst; pi4_tmp_orig = pi4_temp; @@ -427,8 +434,11 @@ void ihevc_resi_trans_4x4_16bit(WORD16 *pi2_src, * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns Void * @@ -444,7 +454,8 @@ UWORD32 ihevc_resi_trans_8x8(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { WORD32 i, k; WORD32 e[4], o[4]; @@ -455,11 +466,14 @@ UWORD32 ihevc_resi_trans_8x8(UWORD8 *pu1_src, // WORD16 *pi2_tmp; WORD16 *pi2_dst_orig; UWORD32 u4_blk_sad=0; - WORD32 chroma_flag; - WORD32 dst_strd; + WORD32 chroma_flag = 0; - chroma_flag = dst_strd_chr_flag & 1; - dst_strd = dst_strd_chr_flag >> 16; + if (e_chroma_plane != NULL_PLANE) + { + chroma_flag = 1; + pu1_src += e_chroma_plane; + pu1_pred += e_chroma_plane; + } pi2_dst_orig = pi2_dst; pi4_tmp_orig = pi4_temp; @@ -724,8 +738,11 @@ void ihevc_resi_trans_8x8_16bit(WORD16 *pi2_src, * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns Void * @@ -741,7 +758,8 @@ UWORD32 ihevc_resi_trans_16x16(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { WORD32 i, k; WORD32 e[8], o[8]; @@ -752,11 +770,14 @@ UWORD32 ihevc_resi_trans_16x16(UWORD8 *pu1_src, WORD32 *pi4_tmp_orig; WORD16 *pi2_dst_orig; UWORD32 u4_blk_sad = 0; - WORD32 chroma_flag; - WORD32 dst_strd; + WORD32 chroma_flag = 0; - chroma_flag = dst_strd_chr_flag & 1; - dst_strd = dst_strd_chr_flag >> 16; + if (e_chroma_plane != NULL_PLANE) + { + chroma_flag = 1; + pu1_src += e_chroma_plane; + pu1_pred += e_chroma_plane; + } pi2_dst_orig = pi2_dst; pi4_tmp_orig = pi4_temp; @@ -1056,8 +1077,11 @@ void ihevc_resi_trans_16x16_16bit(WORD16 *pi2_src, * @param[in] pred_strd * Prediction Stride * - * @param[in] dst_strd_chr_flag - * Output Stride and Chroma Flag packed in the MS and LS 16-bit + * @param[in] dst_strd + * Output Stride + * + * @param[in] e_chroma_plane + * Enum singalling chroma plane * * @returns Void * @@ -1073,7 +1097,8 @@ UWORD32 ihevc_resi_trans_32x32(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag) + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane) { WORD32 i, k; WORD32 e[16], o[16]; @@ -1085,11 +1110,7 @@ UWORD32 ihevc_resi_trans_32x32(UWORD8 *pu1_src, WORD32 *pi4_tmp_orig; WORD16 *pi2_dst_orig; UWORD32 u4_blk_sad = 0 ; - WORD32 chroma_flag; - WORD32 dst_strd; - - chroma_flag = dst_strd_chr_flag & 1; - dst_strd = dst_strd_chr_flag >> 16; + UNUSED(e_chroma_plane); pi2_dst_orig = pi2_dst; pi4_tmp_orig = pi4_temp; diff --git a/common/ihevc_resi_trans.h b/common/ihevc_resi_trans.h index 3ca184b..9c40fa1 100644 --- a/common/ihevc_resi_trans.h +++ b/common/ihevc_resi_trans.h @@ -42,7 +42,8 @@ typedef UWORD32 ihevc_resi_trans_4x4_ttype1_ft(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag); + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane); typedef UWORD32 ihevc_hbd_resi_trans_4x4_ttype1_ft(UWORD16 *pu2_src, UWORD16 *pu2_pred, @@ -50,7 +51,8 @@ typedef UWORD32 ihevc_hbd_resi_trans_4x4_ttype1_ft(UWORD16 *pu2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag, + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane, UWORD8 bit_depth); typedef UWORD32 ihevc_resi_trans_4x4_ft(UWORD8 *pu1_src, @@ -59,7 +61,8 @@ typedef UWORD32 ihevc_resi_trans_4x4_ft(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag); + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane); typedef UWORD32 ihevc_hbd_resi_trans_4x4_ft ( @@ -79,7 +82,8 @@ typedef UWORD32 ihevc_resi_trans_8x8_ft(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag); + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane); typedef UWORD32 ihevc_hbd_resi_trans_8x8_ft ( @@ -100,7 +104,8 @@ typedef UWORD32 ihevc_resi_trans_16x16_ft(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag); + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane); typedef UWORD32 ihevc_hbd_resi_trans_16x16_ft(UWORD16 *pu2_src, UWORD16 *pu2_pred, @@ -108,7 +113,8 @@ typedef UWORD32 ihevc_hbd_resi_trans_16x16_ft(UWORD16 *pu2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag, + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane, UWORD8 bit_depth); typedef UWORD32 ihevc_resi_trans_32x32_ft(UWORD8 *pu1_src, @@ -117,7 +123,8 @@ typedef UWORD32 ihevc_resi_trans_32x32_ft(UWORD8 *pu1_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag); + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane); typedef UWORD32 ihevc_hbd_resi_trans_32x32_ft(UWORD16 *pu2_src, UWORD16 *pu2_pred, @@ -125,7 +132,8 @@ typedef UWORD32 ihevc_hbd_resi_trans_32x32_ft(UWORD16 *pu2_src, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag, + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane, UWORD8 bit_depth); diff --git a/encoder/arm/ihevce_ssd_calculator_neon.c b/encoder/arm/ihevce_ssd_calculator_neon.c index d62bcfc..9cb659c 100644 --- a/encoder/arm/ihevce_ssd_calculator_neon.c +++ b/encoder/arm/ihevce_ssd_calculator_neon.c @@ -54,21 +54,22 @@ /* Function Definitions */ /*****************************************************************************/ static INLINE uint32x4_t ihevce_4x4_ssd_computer_neon( - UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 is_chroma) + UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, + CHROMA_PLANE_ID_T chroma_plane) { uint32x4_t ssd_low, ssd_high; uint8x16_t src, pred, abs; uint16x8_t sqabs_low, sqabs_high; - if(!is_chroma) + if(chroma_plane == NULL_PLANE) { src = load_unaligned_u8q(pu1_src, src_strd); pred = load_unaligned_u8q(pu1_pred, pred_strd); } else { - src = load_unaligned_u8qi(pu1_src, src_strd); - pred = load_unaligned_u8qi(pu1_pred, pred_strd); + src = load_unaligned_u8qi(pu1_src + chroma_plane, src_strd); + pred = load_unaligned_u8qi(pu1_pred + chroma_plane, pred_strd); } abs = vabdq_u8(src, pred); sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs)); @@ -80,21 +81,22 @@ static INLINE uint32x4_t ihevce_4x4_ssd_computer_neon( } static INLINE uint32x4_t - ihevce_1x8_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma) + ihevce_1x8_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, + CHROMA_PLANE_ID_T chroma_plane) { uint32x4_t ssd_val; uint8x8_t src, pred, abs; uint16x8_t sqabs; - if(!is_chroma) + if(chroma_plane == NULL_PLANE) { src = vld1_u8(pu1_src); pred = vld1_u8(pu1_pred); } else { - src = vld2_u8(pu1_src).val[0]; - pred = vld2_u8(pu1_pred).val[0]; + src = vld2_u8(pu1_src).val[chroma_plane]; + pred = vld2_u8(pu1_pred).val[chroma_plane]; } abs = vabd_u8(src, pred); sqabs = vmull_u8(abs, abs); @@ -104,21 +106,22 @@ static INLINE uint32x4_t } static INLINE uint32x4_t - ihevce_1x16_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma) + ihevce_1x16_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, + CHROMA_PLANE_ID_T chroma_plane) { uint32x4_t ssd_low, ssd_high; uint8x16_t src, pred, abs; uint16x8_t sqabs_low, sqabs_high; - if(!is_chroma) + if(chroma_plane == NULL_PLANE) { src = vld1q_u8(pu1_src); pred = vld1q_u8(pu1_pred); } else { - src = vld2q_u8(pu1_src).val[0]; - pred = vld2q_u8(pu1_pred).val[0]; + src = vld2q_u8(pu1_src).val[chroma_plane]; + pred = vld2q_u8(pu1_pred).val[chroma_plane]; } abs = vabdq_u8(src, pred); sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs)); @@ -130,13 +133,14 @@ static INLINE uint32x4_t } static INLINE uint32x4_t - ihevce_1x32_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma) + ihevce_1x32_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, + CHROMA_PLANE_ID_T chroma_plane) { uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3; uint8x16_t src_0, pred_0, src_1, pred_1, abs_0, abs_1; uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3; - if(!is_chroma) + if(chroma_plane == NULL_PLANE) { src_0 = vld1q_u8(pu1_src); pred_0 = vld1q_u8(pu1_pred); @@ -145,10 +149,10 @@ static INLINE uint32x4_t } else { - src_0 = vld2q_u8(pu1_src).val[0]; - pred_0 = vld2q_u8(pu1_pred).val[0]; - src_1 = vld2q_u8(pu1_src + 32).val[0]; - pred_1 = vld2q_u8(pu1_pred + 32).val[0]; + src_0 = vld2q_u8(pu1_src).val[chroma_plane]; + pred_0 = vld2q_u8(pu1_pred).val[chroma_plane]; + src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane]; + pred_1 = vld2q_u8(pu1_pred + 32).val[chroma_plane]; } abs_0 = vabdq_u8(src_0, pred_0); abs_1 = vabdq_u8(src_1, pred_1); @@ -167,7 +171,8 @@ static INLINE uint32x4_t } static INLINE uint32x4_t - ihevce_1x64_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma) + ihevce_1x64_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, + CHROMA_PLANE_ID_T chroma_plane) { uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3; uint32x4_t ssd_4, ssd_5, ssd_6, ssd_7; @@ -177,7 +182,7 @@ static INLINE uint32x4_t uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3; uint16x8_t sqabs_4, sqabs_5, sqabs_6, sqabs_7; - if(!is_chroma) + if(chroma_plane == NULL_PLANE) { src_0 = vld1q_u8(pu1_src); pred_0 = vld1q_u8(pu1_pred); @@ -190,14 +195,14 @@ static INLINE uint32x4_t } else { - src_0 = vld2q_u8(pu1_src).val[0]; - pred_0 = vld2q_u8(pu1_pred).val[0]; - src_1 = vld2q_u8(pu1_src + 32).val[0]; - pred_1 = vld2q_u8(pu1_pred + 32).val[0]; - src_2 = vld2q_u8(pu1_src + 64).val[0]; - pred_2 = vld2q_u8(pu1_pred + 64).val[0]; - src_3 = vld2q_u8(pu1_src + 96).val[0]; - pred_3 = vld2q_u8(pu1_pred + 96).val[0]; + src_0 = vld2q_u8(pu1_src).val[chroma_plane]; + pred_0 = vld2q_u8(pu1_pred).val[chroma_plane]; + src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane]; + pred_1 = vld2q_u8(pu1_pred + 32).val[chroma_plane]; + src_2 = vld2q_u8(pu1_src + 64).val[chroma_plane]; + pred_2 = vld2q_u8(pu1_pred + 64).val[chroma_plane]; + src_3 = vld2q_u8(pu1_src + 96).val[chroma_plane]; + pred_3 = vld2q_u8(pu1_pred + 96).val[chroma_plane]; } abs_0 = vabdq_u8(src_0, pred_0); abs_1 = vabdq_u8(src_1, pred_1); @@ -236,7 +241,7 @@ static LWORD64 ihevce_ssd_calculator_plane_neon( UWORD32 ref_stride, UWORD32 wd, UWORD32 ht, - WORD32 is_chroma) + CHROMA_PLANE_ID_T chroma_plane) { uint32x4_t ssd = vdupq_n_u32(0); uint32x2_t sum; @@ -248,13 +253,13 @@ static LWORD64 ihevce_ssd_calculator_plane_neon( for(row = ht; row > 0; row--) { if(wd == 8) - ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma)); + ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane)); else if(wd == 16) - ssd = vaddq_u32(ssd, ihevce_1x16_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma)); + ssd = vaddq_u32(ssd, ihevce_1x16_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane)); else if(wd == 32) - ssd = vaddq_u32(ssd, ihevce_1x32_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma)); + ssd = vaddq_u32(ssd, ihevce_1x32_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane)); else if(wd == 64) - ssd = vaddq_u32(ssd, ihevce_1x64_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma)); + ssd = vaddq_u32(ssd, ihevce_1x64_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane)); else if(wd % 8 == 0) { UWORD32 col; @@ -262,7 +267,7 @@ static LWORD64 ihevce_ssd_calculator_plane_neon( for(col = 0; col < wd; col += 8) { - ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(inp, ref, is_chroma)); + ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(inp, ref, chroma_plane)); ref = ref + 8; inp = inp + 8; } @@ -275,7 +280,7 @@ static LWORD64 ihevce_ssd_calculator_plane_neon( else if(wd == 4) { assert(ht == 4); - ssd = ihevce_4x4_ssd_computer_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, is_chroma); + ssd = ihevce_4x4_ssd_computer_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, chroma_plane); } sum = vadd_u32(vget_low_u32(ssd), vget_high_u32(ssd)); @@ -283,13 +288,17 @@ static LWORD64 ihevce_ssd_calculator_plane_neon( } LWORD64 ihevce_ssd_calculator_neon( - UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht) + UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, + UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane) { - return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 0); + return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, + chroma_plane); } LWORD64 ihevce_chroma_interleave_ssd_calculator_neon( - UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht) + UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, + UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane) { - return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 1); + return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, + chroma_plane); } diff --git a/encoder/ihevce_cmn_utils_instr_set_router.h b/encoder/ihevce_cmn_utils_instr_set_router.h index be7554f..f385372 100644 --- a/encoder/ihevce_cmn_utils_instr_set_router.h +++ b/encoder/ihevce_cmn_utils_instr_set_router.h @@ -40,6 +40,7 @@ #define __IHEVCE_CMN_UTILS_INSTR_SET_ROUTER_H_ #include "ihevc_typedefs.h" +#include "ihevc_defs.h" #include "ihevce_defs.h" /*****************************************************************************/ @@ -47,7 +48,8 @@ /*****************************************************************************/ typedef UWORD32 FT_CALC_HAD_SATD_8BIT(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD16 *, WORD32); -typedef LWORD64 FT_SSD_CALCULATOR(UWORD8 *, UWORD8 *, UWORD32, UWORD32, UWORD32, UWORD32); +typedef LWORD64 FT_SSD_CALCULATOR( + UWORD8 *, UWORD8 *, UWORD32, UWORD32, UWORD32, UWORD32, CHROMA_PLANE_ID_T); typedef LWORD64 FT_SSD_AND_SAD_CALCULATOR(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, UWORD32 *); diff --git a/encoder/ihevce_common_utils.c b/encoder/ihevce_common_utils.c index dd99132..e7fb036 100644 --- a/encoder/ihevce_common_utils.c +++ b/encoder/ihevce_common_utils.c @@ -858,11 +858,12 @@ WORD32 ihevce_osal_delete(void *pv_hle_ctxt) ******************************************************************************* */ LWORD64 ihevce_ssd_calculator( - UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht) + UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, + UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane) { UWORD32 i, j; LWORD64 ssd = 0; - + UNUSED(chroma_plane); for(i = 0; i < ht; i++) { for(j = 0; j < wd; j++) @@ -910,10 +911,13 @@ LWORD64 ihevce_ssd_calculator( ******************************************************************************* */ LWORD64 ihevce_chroma_interleave_ssd_calculator( - UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht) + UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, + UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane) { UWORD32 i, j; LWORD64 ssd = 0; + pu1_inp += chroma_plane; + pu1_ref += chroma_plane; /* run a loop and find the ssd by doing diff followed by square */ for(i = 0; i < ht; i++) diff --git a/encoder/ihevce_defs.h b/encoder/ihevce_defs.h index ffc35e3..23871ea 100644 --- a/encoder/ihevce_defs.h +++ b/encoder/ihevce_defs.h @@ -967,13 +967,6 @@ typedef enum } REF_LISTS_t; -typedef enum -{ - NULL_PLANE = -1, - U_PLANE = 0, - V_PLANE = 1 -} CHROMA_PLANE_ID_T; - typedef enum SSD_TYPE_T { NULL_TYPE = -1, diff --git a/encoder/ihevce_enc_loop_structs.h b/encoder/ihevce_enc_loop_structs.h index 74417c7..0768871 100644 --- a/encoder/ihevce_enc_loop_structs.h +++ b/encoder/ihevce_enc_loop_structs.h @@ -215,7 +215,8 @@ typedef UWORD32 (*pf_res_trans_chroma)( WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag); + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane); /** \breif function pointer prototype for quantization and inv Quant for ssd calc. for all transform sizes */ diff --git a/encoder/ihevce_enc_loop_utils.c b/encoder/ihevce_enc_loop_utils.c index 9aa7bc8..4665cd0 100644 --- a/encoder/ihevce_enc_loop_utils.c +++ b/encoder/ihevce_enc_loop_utils.c @@ -2188,7 +2188,8 @@ WORD32 ihevce_t_q_iq_ssd_scan_fxn( pi2_trans_values, src_strd, pred_strd, - ((trans_size << 16) + 0)); /* dst strd and chroma flag are packed together */ + trans_size, + NULL_PLANE); cbf = ps_ctxt->apf_quant_iquant_ssd [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]( @@ -2297,7 +2298,7 @@ WORD32 ihevce_t_q_iq_ssd_scan_fxn( zero_cbf_cost = ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( - pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size); + pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE); } /************************************************************************/ @@ -7601,7 +7602,8 @@ LWORD64 ihevce_chroma_cu_prcs_rdopt( pred_strd, chrm_src_stride, trans_size, - trans_size); + trans_size, + U_PLANE); if(u1_compute_spatial_ssd) { @@ -7861,12 +7863,13 @@ LWORD64 ihevce_chroma_cu_prcs_rdopt( curr_cr_cod_cost = trans_ssd_v = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( - pu1_cur_pred + 1, - pu1_cur_src + 1, + pu1_cur_pred, + pu1_cur_src, pred_strd, chrm_src_stride, trans_size, - trans_size); + trans_size, + V_PLANE); if(u1_compute_spatial_ssd) { @@ -10487,7 +10490,8 @@ LWORD64 ihevce_it_recon_ssd( i4_zero_row); return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( - pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size); + pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size, + e_chroma_plane); } else { @@ -10507,12 +10511,13 @@ LWORD64 ihevce_it_recon_ssd( e_chroma_plane); return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( - pu1_recon + (e_chroma_plane == V_PLANE), - pu1_src + (e_chroma_plane == V_PLANE), + pu1_recon, + pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, - u1_trans_size); + u1_trans_size, + e_chroma_plane); } } @@ -10628,12 +10633,13 @@ WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn( if(u1_is_skip) { pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( - pu1_pred + e_chroma_plane, - pu1_src + e_chroma_plane, + pu1_pred, + pu1_src, pred_strd, src_strd, trans_size, - trans_size); + trans_size, + e_chroma_plane); if(e_ssd_type == SPATIAL_DOMAIN_SSD) { @@ -10735,13 +10741,14 @@ WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn( /* ---------- call residue and transform block ------- */ u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1]( - pu1_src + (e_chroma_plane == V_PLANE), - pu1_pred + (e_chroma_plane == V_PLANE), + pu1_src, + pu1_pred, pi4_trans_scratch, pi2_trans_values, src_strd, pred_strd, - ((trans_size << 16) + 1)); /* dst strd and chroma flag are packed together */ + trans_size, + e_chroma_plane); (void)u4_blk_sad; /* -------- calculate SSD calculation in Transform Domain ------ */ @@ -10855,12 +10862,13 @@ WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn( zero_cbf_cost_u = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( - pu1_pred + (e_chroma_plane == V_PLANE), - pu1_src + (e_chroma_plane == V_PLANE), + pu1_pred, + pu1_src, pred_strd, src_strd, trans_size, - trans_size); + trans_size, + e_chroma_plane); } /************************************************************************/ diff --git a/encoder/ihevce_enc_structs.h b/encoder/ihevce_enc_structs.h index 2c442b9..ac39673 100644 --- a/encoder/ihevce_enc_structs.h +++ b/encoder/ihevce_enc_structs.h @@ -221,7 +221,8 @@ typedef UWORD32 (*pf_res_trans_luma)( WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd, - WORD32 dst_strd_chr_flag); + WORD32 dst_strd, + CHROMA_PLANE_ID_T e_chroma_plane); typedef WORD32 (*pf_quant)( WORD16 *pi2_coeffs, diff --git a/encoder/ihevce_recur_bracketing.c b/encoder/ihevce_recur_bracketing.c index 2967b37..15d09b4 100644 --- a/encoder/ihevce_recur_bracketing.c +++ b/encoder/ihevce_recur_bracketing.c @@ -451,7 +451,7 @@ void ihevce_pu_calc_4x4_blk( if(u1_use_satd) { ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr( - pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0); + pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, 4, NULL_PLANE); sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4); } diff --git a/encoder/ihevce_sao.c b/encoder/ihevce_sao.c index 4e7ea0a..d658535 100644 --- a/encoder/ihevce_sao.c +++ b/encoder/ihevce_sao.c @@ -779,7 +779,7 @@ void ihevce_sao_analyse( distortion = ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, - s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht); + s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE); // clang-format on ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx; @@ -1145,7 +1145,8 @@ void ihevce_sao_analyse( ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, - ctb_ht); + ctb_ht, + NULL_PLANE); } // clang-format on if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) @@ -1155,7 +1156,8 @@ void ihevce_sao_analyse( s_sao_ctxt.pu1_cur_chroma_recon_buf, chroma_src_stride, s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd, - (ctb_ht >> !u1_is_422)); + (ctb_ht >> !u1_is_422), + NULL_PLANE); } // clang-format on /*chroma distortion is added after correction because of lambda difference*/ |