/****************************************************************************** * * Copyright (C) 2022 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************************** * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ /** ******************************************************************************* * @file * isvce_residual_pred.c * * @brief * Contains functions used for SVC residual prediction * ******************************************************************************* */ #include #include #include "ih264_typedefs.h" #include "iv2.h" #include "isvc_macros.h" #include "ih264_debug.h" #include "isvc_defs.h" #include "isvc_structs.h" #include "isvce_defs.h" #include "isvce_structs.h" #include "isvce_res_pred_private_defs.h" #include "isvce_residual_pred.h" #include "isvce_utils.h" #include "isvc_defs.h" void isvce_chroma_residual_sampler_2x(coordinates_t *ps_ref_array_positions, coordinates_t *ps_ref_array_phases, buffer_container_t *ps_inp, buffer_container_t *ps_out, buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz, UWORD8 u1_ref_tx_size) { WORD32 i4_i; WORD16 *pi2_ref_data_byte; WORD32 *pi4_ref_array; WORD32 i4_phase1, i4_phase2; WORD16 *pi2_inp_data = ps_inp->pv_data; WORD16 *pi2_out_res = ps_out->pv_data; WORD32 i4_inp_data_stride = ps_inp->i4_data_stride; WORD32 i4_out_res_stride = ps_out->i4_data_stride; UNUSED(u4_ref_nnz); UNUSED(ps_ref_array_positions); UNUSED(u1_ref_tx_size); /* For 2x scaling, offsets always point to TL pixel outside MB */ /* Hence, refTransBlkIdc will be different and since phase */ /* for first refArray pos for horiz filtering samples > 8, */ /* first row and first column from the refArray is never used */ pi2_inp_data += 2 + i4_inp_data_stride; pi2_ref_data_byte = pi2_inp_data; i4_phase1 = ps_ref_array_phases[0].i4_abscissa; i4_phase2 = ps_ref_array_phases[1].i4_abscissa; ASSERT(i4_phase1 >= 8); pi4_ref_array = (WORD32 *) ps_scratch->pv_data; for(i4_i = 0; i4_i < BLK_SIZE; i4_i++) { WORD16 i2_coeff1, i2_coeff2; i2_coeff1 = (WORD16) (pi2_ref_data_byte[0]); /* populate the first inter sample */ *pi4_ref_array++ = i2_coeff1 << 4; { /* unroll count 1 */ i2_coeff2 = (WORD16) (pi2_ref_data_byte[2]); /* populate 2 samples based on current coeffs */ *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2); /* unroll count 2 */ *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2); /* unroll count 3 */ i2_coeff1 = (WORD16) (pi2_ref_data_byte[4]); /* populate 2 samples based on current coeffs */ *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff2 + i4_phase2 * i2_coeff1); /* unroll count 4 */ *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff2 + i4_phase1 * i2_coeff1); /* unroll count 5 */ i2_coeff2 = (WORD16) (pi2_ref_data_byte[6]); /* populate 2 samples based on current coeffs */ *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2); /* unroll count 6 */ *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2); } /* populate the last inter sample */ *pi4_ref_array++ = i2_coeff2 << 4; /* vertical loop uopdates */ pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride); } /* ----------- Vertical Interpolation ---------------- */ pi4_ref_array = (WORD32 *) ps_scratch->pv_data; i4_phase1 = ps_ref_array_phases[0].i4_ordinate; i4_phase2 = ps_ref_array_phases[2].i4_ordinate; for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++) { WORD16 *pi2_out; WORD32 *pi4_ref_array_temp; WORD32 i4_horz_samp_1, i4_horz_samp_2; pi2_out = pi2_out_res; pi4_ref_array_temp = pi4_ref_array; /* populate the first inter sample */ i4_horz_samp_1 = *pi4_ref_array_temp; pi4_ref_array_temp += BLK8x8SIZE; *pi2_out = (i4_horz_samp_1 + 8) >> 4; pi2_out += i4_out_res_stride; { /* unroll count 1 */ i4_horz_samp_2 = *pi4_ref_array_temp; pi4_ref_array_temp += BLK8x8SIZE; /* populate 2 samples based on current coeffs */ *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8; pi2_out += i4_out_res_stride; /* unroll count 2 */ *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8; pi2_out += i4_out_res_stride; /* unroll count 3 */ i4_horz_samp_1 = *pi4_ref_array_temp; pi4_ref_array_temp += BLK8x8SIZE; /* populate 2 samples based on current coeffs */ *pi2_out = ((16 - i4_phase2) * i4_horz_samp_2 + i4_phase2 * i4_horz_samp_1 + 128) >> 8; pi2_out += i4_out_res_stride; /* unroll count 4 */ *pi2_out = ((16 - i4_phase1) * i4_horz_samp_2 + i4_phase1 * i4_horz_samp_1 + 128) >> 8; pi2_out += i4_out_res_stride; /* unroll count 5 */ i4_horz_samp_2 = *pi4_ref_array_temp; /* populate 2 samples based on current coeffs */ *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8; pi2_out += i4_out_res_stride; /* unroll count 6 */ *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8; pi2_out += i4_out_res_stride; } /* populate the last inter sample */ *pi2_out = (i4_horz_samp_2 + 8) >> 4; /* horizontal loop updates */ pi4_ref_array++; pi2_out_res += 2; } } void isvce_luma_residual_sampler_2x(coordinates_t *ps_ref_array_positions, coordinates_t *ps_ref_array_phases, buffer_container_t *ps_inp, buffer_container_t *ps_out, buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz, UWORD8 u1_ref_tx_size) { WORD16 *pi2_inp_data = ps_inp->pv_data; WORD16 *pi2_out_res = ps_out->pv_data; WORD32 i4_inp_data_stride = ps_inp->i4_data_stride; WORD32 i4_out_res_stride = ps_out->i4_data_stride; WORD16 *pi2_refarray_buffer = ps_scratch->pv_data; WORD32 i4_blk_ctr; UNUSED(ps_ref_array_positions); UNUSED(ps_ref_array_phases); /* For 2x scaling, offsets always point to TL pixel outside MB */ /* Hence, refTransBlkIdc will be different and since phase */ /* for first refArray pos for horiz filtering samples > 8, */ /* first row and first column from the refArray is never used */ pi2_inp_data += 1 + i4_inp_data_stride; if((u1_ref_tx_size) && (0 != u4_ref_nnz)) { WORD16 *pi2_ref_data_byte; WORD32 *pi4_ref_array; WORD32 i4_i, i4_j; pi2_ref_data_byte = pi2_inp_data; /* ----------- Horizontal Interpolation ---------------- */ pi4_ref_array = (WORD32 *) pi2_refarray_buffer; for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++) { WORD16 i2_coeff1, i2_coeff2; i2_coeff1 = (WORD16) (*pi2_ref_data_byte++); /* populate the first inter sample */ *pi4_ref_array++ = i2_coeff1 << 2; for(i4_j = 0; i4_j < 14; i4_j += 2) { i2_coeff2 = (WORD16) (*pi2_ref_data_byte++); /* populate 2 samples based on current coeffs */ *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); /* store the coeff 2 to coeff 1 */ /* (used in next iteration) */ i2_coeff1 = i2_coeff2; } /* populate the last inter sample */ *pi4_ref_array++ = i2_coeff1 << 2; /* vertical loop uopdates */ pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride); } /* ----------- Vertical Interpolation ---------------- */ pi4_ref_array = (WORD32 *) pi2_refarray_buffer; for(i4_i = 0; i4_i < MB_SIZE; i4_i++) { WORD32 *pi4_ref_array_temp; WORD16 *pi2_out; WORD32 i4_horz_samp_1, i4_horz_samp_2; pi4_ref_array_temp = pi4_ref_array; pi2_out = pi2_out_res; i4_horz_samp_1 = *pi4_ref_array_temp; /* populate the first inter sample */ *pi2_out = (i4_horz_samp_1 + 2) >> 2; pi2_out += i4_out_res_stride; for(i4_j = 0; i4_j < 14; i4_j += 2) { pi4_ref_array_temp += MB_SIZE; i4_horz_samp_2 = *pi4_ref_array_temp; /* populate 2 samples based on current coeffs */ *pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; pi2_out += i4_out_res_stride; *pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; pi2_out += i4_out_res_stride; /* store the coeff 2 to coeff 1 */ /* (used in next iteration) */ i4_horz_samp_1 = i4_horz_samp_2; } /* populate the first inter sample */ *pi2_out = (i4_horz_samp_1 + 2) >> 2; /* horizontal loop updates */ pi4_ref_array++; pi2_out_res++; } } else { /* ----------------------------------------------------------------- */ /* LOOP over number of blocks */ /* ----------------------------------------------------------------- */ for(i4_blk_ctr = 0; i4_blk_ctr < BLK_SIZE; i4_blk_ctr++) { WORD16 *pi2_ref_data_byte; WORD32 *pi4_ref_array; WORD32 i4_i; /* if reference layer is not coded then no processing */ if(0 != (u4_ref_nnz & 0x1)) { pi2_ref_data_byte = pi2_inp_data; /* ----------- Horizontal Interpolation ---------------- */ pi4_ref_array = (WORD32 *) pi2_refarray_buffer; for(i4_i = 0; i4_i < BLK_SIZE; i4_i++) { WORD16 i2_coeff1, i2_coeff2; i2_coeff1 = (WORD16) (*pi2_ref_data_byte++); /* populate the first inter sample */ *pi4_ref_array++ = i2_coeff1 << 2; { i2_coeff2 = (WORD16) (*pi2_ref_data_byte++); /* populate 2 samples based on current coeffs */ *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); i2_coeff1 = (WORD16) (*pi2_ref_data_byte++); /* populate 2 samples based on current coeffs */ *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); i2_coeff2 = (WORD16) (*pi2_ref_data_byte++); /* populate 2 samples based on current coeffs */ *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2)); *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1)); } /* populate the last inter sample */ *pi4_ref_array++ = i2_coeff2 << 2; /* vertical loop uopdates */ pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride); } /* ----------- Vertical Interpolation ---------------- */ pi4_ref_array = (WORD32 *) pi2_refarray_buffer; for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++) { WORD32 *pi4_ref_array_temp; WORD16 *pi2_out; WORD32 i4_horz_samp_1, i4_horz_samp_2; pi4_ref_array_temp = pi4_ref_array; pi2_out = pi2_out_res; i4_horz_samp_1 = *pi4_ref_array_temp; /* populate the first inter sample */ *pi2_out = (i4_horz_samp_1 + 2) >> 2; pi2_out += i4_out_res_stride; { /* unroll loop count 1 */ pi4_ref_array_temp += BLK8x8SIZE; i4_horz_samp_2 = *pi4_ref_array_temp; /* populate 2 samples based on current coeffs */ *pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; pi2_out += i4_out_res_stride; *pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; pi2_out += i4_out_res_stride; /* unroll loop count 2 */ pi4_ref_array_temp += BLK8x8SIZE; i4_horz_samp_1 = *pi4_ref_array_temp; /* populate 2 samples based on current coeffs */ *pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; pi2_out += i4_out_res_stride; *pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; pi2_out += i4_out_res_stride; /* unroll loop count 3 */ pi4_ref_array_temp += BLK8x8SIZE; i4_horz_samp_2 = *pi4_ref_array_temp; /* populate 2 samples based on current coeffs */ *pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4; pi2_out += i4_out_res_stride; *pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4; pi2_out += i4_out_res_stride; } /* populate the last inter sample */ *pi2_out = (i4_horz_samp_2 + 2) >> 2; /* horizontal loop updates */ pi4_ref_array++; pi2_out_res++; } } else { pi2_out_res += BLK8x8SIZE; } if(1 == i4_blk_ctr) { pi2_inp_data -= BLK_SIZE; pi2_inp_data += (i4_inp_data_stride * BLK_SIZE); pi2_out_res -= MB_SIZE; pi2_out_res += (i4_out_res_stride * BLK8x8SIZE); u4_ref_nnz >>= 2; } else { pi2_inp_data += BLK_SIZE; } u4_ref_nnz >>= 1; } } } /** ******************************************************************************* * * @brief * Returns size of buffers for storing residual pred ctxt * * @param[in] u1_num_spatial_layers * Num Spatial Layers * * @param[in] d_spatial_res_ratio * Resolution Ratio b/w spatial layers * * @param[in] u4_wd * Input Width * * @param[in] u4_ht * Input Height * * @returns Size of buffers * ******************************************************************************* */ UWORD32 isvce_get_svc_res_pred_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht) { UWORD32 u4_size = 0; if(u1_num_spatial_layers > 1) { WORD32 i; u4_size += MAX_PROCESS_CTXT * sizeof(svc_res_pred_ctxt_t); u4_size += MAX_PROCESS_CTXT * sizeof(res_pred_state_t); /* Mem for storing pred */ u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16); u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16); /* Mem for storing intermediates */ u4_size += MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16); /* Mem for pu1_ref_x_ptr_incr and pu1_ref_y_ptr_incr*/ u4_size += 2 * MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8); u4_size += MAX_PROCESS_CTXT * u1_num_spatial_layers * sizeof(res_pred_layer_state_t); for(i = u1_num_spatial_layers - 1; i >= 1; i--) { WORD32 i4_layer_luma_wd = (WORD32) ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; WORD32 i4_layer_luma_ht = ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE); WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99; WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99; WORD32 i4_layer_u_mbs = (i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2)); /* ps_luma_mb_states */ { u4_size += i4_layer_luma_mbs * sizeof(res_pred_mb_state_t); /* ps_ref_array_positions */ u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * MB_SIZE * MB_SIZE) : 0) * sizeof(coordinates_t); /* ps_ref_array_phases */ u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * 5) : 0) * sizeof(coordinates_t); } /* ps_chroma_mb_states */ { u4_size += i4_layer_u_mbs * sizeof(res_pred_mb_state_t); /* ps_ref_array_positions */ u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * (MB_SIZE / 2) * (MB_SIZE / 2)) : 0) * sizeof(coordinates_t); /* ps_ref_array_phases */ u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * 5) : 3) * sizeof(coordinates_t); } } for(i = u1_num_spatial_layers - 1; i >= 0; i--) { WORD32 i4_layer_luma_wd = (WORD32) ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; WORD32 i4_layer_luma_ht = ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99; WORD32 i4_layer_luma_mbs = ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); /* pi1_mb_mode */ u4_size += i4_layer_luma_mbs * sizeof(WORD8); } } else { u4_size += MAX_PROCESS_CTXT * sizeof(yuv_buf_props_t); /* Mem for storing pred */ u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16); u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16); } return u4_size; } static FORCEINLINE WORD32 isvce_get_scaled_pixel_pos(layer_resampler_props_t *ps_layer_props, WORD32 i4_pixel_pos, UWORD8 u1_dim_id) { if(1 == u1_dim_id) { return (((i4_pixel_pos - ps_layer_props->i4_offset_y) * ((WORD64) ps_layer_props->u4_scale_y) + ps_layer_props->i4_add_y) >> (ps_layer_props->u4_shift_y - 4)) - ps_layer_props->i4_delta_y; } else { return (((i4_pixel_pos - ps_layer_props->i4_offset_x) * ((WORD64) ps_layer_props->u4_scale_x) + ps_layer_props->i4_add_x) >> (ps_layer_props->u4_shift_x - 4)) - ps_layer_props->i4_delta_x; } } static FORCEINLINE void isvce_ref_array_pos_and_phase_init_dyadic( layer_resampler_props_t *ps_layer_props, res_pred_mb_state_t *ps_mb_state, coordinates_t *ps_mb_pos, UWORD8 u1_frame_mbs_only_flag, UWORD8 u1_field_mb_flag, UWORD8 u1_ref_layer_frame_mbs_only_flag) { UWORD32 i, j; coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases; WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; for(i = 0; i < 2; i++) { WORD32 i4_y_ref16; WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) { i4_yc = i4_yc >> (1 - u1_field_mb_flag); } i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); for(j = 0; j < ((0 == i) ? 2 : 1); j++) { WORD32 i4_x_ref16; WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); ps_ref_array_phases[j + i * 2].i4_abscissa = (i4_x_ref16 - (16 * i4_x_offset)) & 15; ps_ref_array_phases[j + i * 2].i4_ordinate = (i4_y_ref16 - (16 * i4_y_offset)) & 15; } } } static FORCEINLINE void isvce_ref_array_pos_and_phase_init(layer_resampler_props_t *ps_layer_props, res_pred_mb_state_t *ps_mb_state, coordinates_t *ps_mb_pos, UWORD8 u1_frame_mbs_only_flag, UWORD8 u1_field_mb_flag, UWORD8 u1_ref_layer_frame_mbs_only_flag) { UWORD32 i, j; coordinates_t *ps_ref_array_positions = ps_mb_state->ps_ref_array_positions; coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases; WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa; WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate; UWORD32 u4_phase_array_idx = 0; for(i = 0; i < ps_layer_props->u4_mb_ht; i++) { WORD32 i4_y_ref16; WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i; if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag)) { i4_yc = i4_yc >> (1 - u1_field_mb_flag); } i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1); for(j = 0; j < ps_layer_props->u4_mb_wd; j++) { WORD32 i4_x_ref16; WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j; i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0); ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_abscissa = (i4_x_ref16 >> 4) - i4_x_offset; ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_ordinate = (i4_y_ref16 >> 4) - i4_y_offset; if(((0 == i) && (j < 3)) || ((0 == j) && (i < 3))) { ps_ref_array_phases[u4_phase_array_idx].i4_abscissa = (i4_x_ref16 - (16 * i4_x_offset)) & 15; ps_ref_array_phases[u4_phase_array_idx].i4_ordinate = (i4_y_ref16 - (16 * i4_y_offset)) & 15; u4_phase_array_idx++; } } } } static void isvce_res_pred_layer_state_init(res_pred_layer_state_t *ps_layer_state, DOUBLE d_spatial_res_ratio, UWORD32 u4_wd, UWORD32 u4_ht, IV_COLOR_FORMAT_T e_color_format) { UWORD32 i, j, k; const UWORD8 u1_ref_layer_field_pic_flag = 0; const UWORD8 u1_field_pic_flag = 0; const UWORD8 u1_frame_mbs_only_flag = 1; const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1; const UWORD8 u1_field_mb_flag = 0; ASSERT((IV_YUV_420P == e_color_format) || (IV_YUV_420SP_UV == e_color_format)); UNUSED(e_color_format); for(i = 0; i < 2; i++) { res_pred_mb_state_t *ps_mb_states; layer_resampler_props_t *ps_layer_props; UWORD32 u4_wd_in_mbs; UWORD32 u4_ht_in_mbs; UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio); UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag); UWORD32 u4_scaled_wd = u4_wd; UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag); ps_mb_states = u1_is_chroma ? ps_layer_state->ps_chroma_mb_states : ps_layer_state->ps_luma_mb_states; ps_layer_props = u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; u4_ref_wd = u4_ref_wd >> u1_is_chroma; u4_ref_ht = u4_ref_ht >> u1_is_chroma; u4_scaled_wd = u4_scaled_wd >> u1_is_chroma; u4_scaled_ht = u4_scaled_ht >> u1_is_chroma; u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd; u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht; for(j = 0; j < u4_ht_in_mbs; j++) { WORD32 i4_y_refmin16; WORD32 i4_y_refmax16; WORD32 i4_y_offset; i4_y_refmin16 = isvce_get_scaled_pixel_pos(ps_layer_props, j * ps_layer_props->u4_mb_ht, 1); i4_y_refmax16 = isvce_get_scaled_pixel_pos( ps_layer_props, j * ps_layer_props->u4_mb_ht + ps_layer_props->u4_mb_ht - 1, 1); i4_y_offset = i4_y_refmin16 >> 4; for(k = 0; k < u4_wd_in_mbs; k++) { WORD32 i4_x_refmin16; WORD32 i4_x_refmax16; WORD32 i4_x_offset; coordinates_t s_mb_pos = {k, j}; i4_x_refmin16 = isvce_get_scaled_pixel_pos(ps_layer_props, k * ps_layer_props->u4_mb_wd, 0); i4_x_refmax16 = isvce_get_scaled_pixel_pos( ps_layer_props, k * ps_layer_props->u4_mb_wd + ps_layer_props->u4_mb_wd - 1, 0); i4_x_offset = i4_x_refmin16 >> 4; ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_abscissa = i4_x_offset; ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_ordinate = i4_y_offset; ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_abscissa = (i4_x_refmax16 >> 4) - i4_x_offset + 2; ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_ordinate = (i4_y_refmax16 >> 4) - i4_y_offset + 2; if((0 == k) && (0 == j) && (2 == d_spatial_res_ratio) && u1_is_chroma) { isvce_ref_array_pos_and_phase_init_dyadic( ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos, u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag); } else if(1.5 == d_spatial_res_ratio) { isvce_ref_array_pos_and_phase_init( ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos, u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag); } } } } } void isvce_svc_residual_sampling_function_selector(res_pred_state_t *ps_res_pred_state, DOUBLE d_spatial_res_ratio, IV_ARCH_T e_arch) { if(2. == d_spatial_res_ratio) { ps_res_pred_state->apf_residual_samplers[U] = isvce_chroma_residual_sampler_2x; ps_res_pred_state->apf_residual_samplers[V] = isvce_chroma_residual_sampler_2x; switch(e_arch) { #if defined(X86) case ARCH_X86_SSE42: { ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_sse42; break; } #elif defined(ARMV8) case ARCH_ARM_A53: case ARCH_ARM_A57: case ARCH_ARM_V8_NEON: { ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon; break; } #elif defined(ARM) && !defined(DISABLE_NEON) case ARCH_ARM_A9Q: case ARCH_ARM_A9A: case ARCH_ARM_A9: case ARCH_ARM_A7: case ARCH_ARM_A5: case ARCH_ARM_A15: { ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon; break; } #endif default: { ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x; break; } } } switch(e_arch) { #if defined(X86) case ARCH_X86_SSE42: { ps_res_pred_state->pf_get_sad_with_residual_pred = isvce_get_sad_with_residual_pred_sse42; break; } #elif defined(ARMV8) case ARCH_ARM_A53: case ARCH_ARM_A57: case ARCH_ARM_V8_NEON: { ps_res_pred_state->pf_get_sad_with_residual_pred = isvce_get_sad_with_residual_pred_neon; break; } #elif defined(ARM) && !defined(DISABLE_NEON) case ARCH_ARM_A9Q: case ARCH_ARM_A9A: case ARCH_ARM_A9: case ARCH_ARM_A7: case ARCH_ARM_A5: case ARCH_ARM_A15: { ps_res_pred_state->pf_get_sad_with_residual_pred = isvce_get_sad_with_residual_pred_neon; break; } #endif default: { ps_res_pred_state->pf_get_sad_with_residual_pred = isvce_get_sad_with_residual_pred; break; } } } /** ******************************************************************************* * * @brief * Function to initialize svc ilp buffers * * @param[in] ps_codec * Pointer to codec context * * @param[in] ps_mem_rec * Pointer to memory allocated for input buffers * ******************************************************************************* */ void isvce_svc_res_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec) { WORD32 i, j, k; const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]); UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers; DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio; UWORD32 u4_wd = ps_codec->s_cfg.u4_wd; UWORD32 u4_ht = ps_codec->s_cfg.u4_ht; UWORD8 *pu1_buf = ps_mem_rec->pv_base; WORD64 i8_alloc_mem_size = isvce_get_svc_res_pred_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht); if(u1_num_spatial_layers > 1) { res_pred_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS]; res_pred_mb_state_t *aps_chroma_mb_states[MAX_NUM_SPATIAL_LAYERS]; WORD8 *api1_mb_mode[MAX_NUM_SPATIAL_LAYERS]; WORD32 ai4_mb_mode_stride[MAX_NUM_SPATIAL_LAYERS]; WORD32 i4_size; for(i = 0; i < i4_num_proc_ctxts; i++) { res_pred_state_t *ps_res_pred_state; svc_res_pred_ctxt_t *ps_res_pred_ctxt; yuv_buf_props_t *ps_mb_res_buf; res_pred_mem_store_t *ps_mem_store; isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i; ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt = (svc_res_pred_ctxt_t *) pu1_buf; pu1_buf += sizeof(svc_res_pred_ctxt_t); i8_alloc_mem_size -= sizeof(svc_res_pred_ctxt_t); ps_res_pred_ctxt->s_res_pred_constants.pv_state = pu1_buf; ps_res_pred_state = (res_pred_state_t *) pu1_buf; pu1_buf += sizeof(res_pred_state_t); i8_alloc_mem_size -= sizeof(res_pred_state_t); ps_res_pred_state->ps_layer_state = (res_pred_layer_state_t *) pu1_buf; pu1_buf += u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]); i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]); i4_size = REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8); ps_res_pred_state->pu1_ref_x_ptr_incr = (UWORD8 *) pu1_buf; pu1_buf += i4_size; ps_res_pred_state->pu1_ref_y_ptr_incr = (UWORD8 *) pu1_buf; pu1_buf += i4_size; ASSERT(i8_alloc_mem_size >= 0); if(0 == i) { UWORD32 au4_ref_pos_array_size[NUM_SP_COMPONENTS]; UWORD32 au4_ref_phase_array_size[NUM_SP_COMPONENTS]; if(1.5 == d_spatial_res_ratio) { au4_ref_pos_array_size[Y] = MB_SIZE * MB_SIZE; au4_ref_phase_array_size[Y] = 5; au4_ref_pos_array_size[U] = (MB_SIZE / 2) * (MB_SIZE / 2); au4_ref_phase_array_size[U] = 5; } else { au4_ref_pos_array_size[Y] = au4_ref_pos_array_size[U] = 0; au4_ref_phase_array_size[Y] = 0; au4_ref_phase_array_size[U] = 3; } for(j = u1_num_spatial_layers - 1; j >= 1; j--) { res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; WORD32 i4_layer_luma_wd = ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + 0.99; WORD32 i4_layer_luma_ht = ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + 0.99; WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE); WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99; WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99; WORD32 i4_layer_u_mbs = (i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2)); ps_layer->ps_luma_mb_states = (res_pred_mb_state_t *) pu1_buf; aps_luma_mb_states[j] = ps_layer->ps_luma_mb_states; pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_luma_mb_states[0]); i8_alloc_mem_size -= u1_num_spatial_layers * sizeof(ps_layer->ps_luma_mb_states[0]); ps_layer->ps_chroma_mb_states = (res_pred_mb_state_t *) pu1_buf; aps_chroma_mb_states[j] = ps_layer->ps_chroma_mb_states; pu1_buf += i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]); i8_alloc_mem_size -= i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]); if(1.5 == d_spatial_res_ratio) { coordinates_t *ps_ref_array_pos = (coordinates_t *) pu1_buf; coordinates_t *ps_ref_array_phases = ps_ref_array_pos + i4_layer_luma_mbs * au4_ref_pos_array_size[Y]; for(k = 0; k < i4_layer_luma_mbs; k++) { ps_layer->ps_luma_mb_states[k].ps_ref_array_positions = ps_ref_array_pos + k * au4_ref_pos_array_size[Y]; ps_layer->ps_luma_mb_states[k].ps_ref_array_phases = ps_ref_array_phases + k * au4_ref_phase_array_size[Y]; pu1_buf += au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]); i8_alloc_mem_size -= au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]); pu1_buf += au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]); i8_alloc_mem_size -= au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]); } ps_ref_array_pos = (coordinates_t *) pu1_buf; ps_ref_array_phases = ps_ref_array_pos + i4_layer_u_mbs * au4_ref_pos_array_size[U]; for(k = 0; k < i4_layer_u_mbs; k++) { ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions = ps_ref_array_pos + k * au4_ref_pos_array_size[U]; ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases = ps_ref_array_phases + k * au4_ref_phase_array_size[U]; pu1_buf += au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]); i8_alloc_mem_size -= au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]); pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); i8_alloc_mem_size -= au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); } } else { coordinates_t *ps_ref_array_pos = NULL; coordinates_t *ps_ref_array_phases = NULL; for(k = 0; k < i4_layer_luma_mbs; k++) { ps_layer->ps_luma_mb_states[k].ps_ref_array_positions = ps_ref_array_pos; ps_layer->ps_luma_mb_states[k].ps_ref_array_phases = ps_ref_array_phases; } ps_ref_array_pos = NULL; ps_ref_array_phases = (coordinates_t *) pu1_buf; for(k = 0; k < i4_layer_u_mbs; k++) { ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions = ps_ref_array_pos; ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases = ps_ref_array_phases; } pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_pos[0]); i8_alloc_mem_size -= au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]); } ASSERT(i8_alloc_mem_size >= 0); /* Asserts below verify that * 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised */ ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd == MB_SIZE); ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j].u4_mb_wd == (MB_SIZE / 2)); ps_layer->ps_luma_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j]; ps_layer->ps_chroma_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j]; isvce_res_pred_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd, i4_layer_luma_ht, ps_codec->s_cfg.e_inp_color_fmt); } for(j = u1_num_spatial_layers - 1; j >= 0; j--) { res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; WORD32 i4_layer_luma_wd = ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + 0.99; WORD32 i4_layer_luma_ht = ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) + 0.99; WORD32 i4_layer_luma_mbs = ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2); ps_layer->pi1_mb_mode = (WORD8 *) pu1_buf; pu1_buf += i4_layer_luma_mbs * sizeof(WORD8); memset(ps_layer->pi1_mb_mode, -1, i4_layer_luma_mbs); ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j] = (i4_layer_luma_wd / MB_SIZE) + 2; ps_layer->pi1_mb_mode += 1 + ps_layer->i4_mb_mode_stride; api1_mb_mode[j] = ps_layer->pi1_mb_mode; } } else { for(j = u1_num_spatial_layers - 1; j >= 1; j--) { res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; ps_layer->ps_luma_mb_states = aps_luma_mb_states[j]; ps_layer->ps_chroma_mb_states = aps_chroma_mb_states[j]; ps_layer->ps_luma_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j]; ps_layer->ps_chroma_props = &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j]; } for(j = u1_num_spatial_layers - 1; j >= 0; j--) { res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j]; ps_layer->pi1_mb_mode = api1_mb_mode[j]; ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j]; } } ps_mb_res_buf = &ps_res_pred_ctxt->s_res_pred_outputs.s_res_pred; ps_mem_store = &ps_res_pred_state->s_mem_store; ps_proc->ps_mb_res_buf = ps_mb_res_buf; for(j = 0; j < NUM_SP_COMPONENTS; j++) { buffer_container_t *ps_comp_buf = &ps_mb_res_buf->as_component_bufs[j]; UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j)); ps_comp_buf->pv_data = pu1_buf; ps_comp_buf->i4_data_stride = MB_SIZE; pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); } ps_mem_store->s_scratch.pv_data = pu1_buf; ps_mem_store->s_scratch.i4_data_stride = REF_ARRAY_MAX_WIDTH; pu1_buf += REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16); i8_alloc_mem_size -= REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16); ASSERT(i8_alloc_mem_size >= 0); ps_mb_res_buf->as_component_bufs[V].pv_data = NULL; ps_mb_res_buf->e_color_format = IV_YUV_420SP_UV; ps_mb_res_buf->u1_bit_depth = 10; ps_mb_res_buf->u4_width = MB_SIZE; ps_mb_res_buf->u4_height = MB_SIZE; isvce_svc_residual_sampling_function_selector(ps_res_pred_state, d_spatial_res_ratio, ps_codec->s_cfg.e_arch); } } else { for(i = 0; i < i4_num_proc_ctxts; i++) { isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i; ps_proc->ps_res_pred_ctxt = NULL; ps_proc->ps_mb_res_buf = (yuv_buf_props_t *) pu1_buf; pu1_buf += sizeof(yuv_buf_props_t); i8_alloc_mem_size -= sizeof(yuv_buf_props_t); for(j = 0; j < NUM_SP_COMPONENTS; j++) { buffer_container_t *ps_comp_buf = &ps_proc->ps_mb_res_buf->as_component_bufs[j]; UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j)); ps_comp_buf->pv_data = pu1_buf; ps_comp_buf->i4_data_stride = MB_SIZE; pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16); } ASSERT(i8_alloc_mem_size >= 0); } } } void isvce_get_mb_residual_pred(svc_res_pred_ctxt_t *ps_res_pred_ctxt) { buffer_container_t s_inp; buffer_container_t s_out; coordinates_t s_frame_dims; coordinates_t s_frame_dims_in_mbs; coordinates_t s_ref_array_offsets; svc_layer_data_t *ps_ref_layer_data; res_pred_layer_state_t *ps_layer_state; yuv_buf_props_t *ps_ref_residual_buf; res_pred_mb_state_t *ps_luma_mb_state; res_pred_mb_state_t *ps_chroma_mb_state; isvce_mb_info_t *ps_ref_mb; WORD32 i; res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs; res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store; svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos; UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; ASSERT(u1_spatial_layer_id > 0); s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; ps_ref_layer_data = &ps_svc_ilp_data->ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1]; ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id]; ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1]; ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; for(i = 0; i < NUM_COMPONENTS; i++) { res_pred_mb_state_t *ps_mb_state; layer_resampler_props_t *ps_layer_props; UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state; ps_layer_props = u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props; /* Presence of appropriate padding is assumed */ s_ref_array_offsets = ps_mb_state->s_offsets; s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y]; s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) + (s_ref_array_offsets.i4_abscissa << u1_is_chroma) + s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride; s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y]; s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i)); ps_ref_mb = ps_ref_layer_data->ps_mb_info + ((s_ref_array_offsets.i4_abscissa + (ps_mb_state->s_ref_array_dims.i4_abscissa / 2)) / ps_layer_props->u4_mb_wd) + ((s_ref_array_offsets.i4_ordinate + (ps_mb_state->s_ref_array_dims.i4_ordinate / 2)) / ps_layer_props->u4_mb_ht) * (s_frame_dims_in_mbs.i4_abscissa / 2); ps_res_pred_state->apf_residual_samplers[i]( ps_mb_state->ps_ref_array_positions, ps_mb_state->ps_ref_array_phases, &s_inp, &s_out, &ps_mem_store->s_scratch, UINT32_MAX, ps_ref_mb->u1_tx_size == 8); } } void isvce_get_ref_layer_mbtype_tx_size(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride, WORD32 i4_element_size, WORD32 i4_x_ref, WORD32 i4_y_ref, WORD32 *pi4_mb_type, WORD32 *pi4_tx_size, WORD32 i4_chroma_flag) { WORD32 i4_mb_wd_sft, i4_mb_ht_sft; WORD32 i4_mb_x, i4_mb_y; WORD8 i1_mb_mode; if(i4_x_ref < 0) { i4_x_ref = 0; } if(i4_y_ref < 0) { i4_y_ref = 0; } i4_mb_wd_sft = (MB_WIDTH_SHIFT - i4_chroma_flag); i4_mb_ht_sft = (MB_HEIGHT_SHIFT - i4_chroma_flag); i4_mb_x = (i4_x_ref >> i4_mb_wd_sft); i4_mb_y = (i4_y_ref >> i4_mb_ht_sft); pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size); pi1_ref_mb_modes += (i4_mb_x * i4_element_size); i1_mb_mode = *pi1_ref_mb_modes; i1_mb_mode = (i1_mb_mode < 0) ? i1_mb_mode : SVC_EXTRACT_MB_MODE(*pi1_ref_mb_modes); if(i1_mb_mode <= SVC_INTER_MB) { *pi4_mb_type = SVC_INTER_MB; *pi4_tx_size = GET_BIT_TX_SIZE(*pi1_ref_mb_modes, 1); } else { *pi4_mb_type = SVC_INTRA_MB; *pi4_tx_size = 1; } } void isvce_ref_layer_ptr_incr(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride, WORD32 i4_element_size, WORD32 i4_x_offset, WORD32 i4_y_offset, WORD32 i4_refary_wd, WORD32 i4_refary_ht, UWORD8 *pu1_ref_x_ptr_incr, UWORD8 *pu1_ref_y_ptr_incr, WORD32 i4_chroma_flag) { WORD32 i4_x, i4_y; WORD32 i4_x_idx, i4_y_idx; WORD32 i4_prev_x, i4_prev_y; WORD32 i4_const_val; WORD32 i4_pos_x, i4_pos_y; WORD32 i4_trans_size; WORD32 i4_mb_type, i4_tx_size; WORD32 i4_act_ary_wd, i4_act_ary_ht; WORD32 i4_and_const; UWORD8 *pu1_incr_x, *pu1_incr_y; memset(pu1_ref_x_ptr_incr, 1, (i4_refary_wd * i4_refary_ht)); memset(pu1_ref_y_ptr_incr, 1, (i4_refary_wd * i4_refary_ht)); i4_act_ary_wd = i4_refary_wd; i4_act_ary_ht = i4_refary_ht; i4_x = 0; i4_y = 0; i4_prev_y = 0; if(0 == i4_chroma_flag) { do { WORD32 i4_x_ref, i4_y_ref; WORD32 i4_idx; WORD32 i4_wd, i4_ht; WORD32 i4_max_pos_x, i4_max_pos_y; i4_prev_x = i4_x; i4_x_ref = i4_x_offset + i4_x; i4_y_ref = i4_y_offset + i4_y; isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride, i4_element_size, i4_x_ref, i4_y_ref, &i4_mb_type, &i4_tx_size, i4_chroma_flag); i4_trans_size = ((i4_tx_size + 1) << 2); i4_const_val = i4_trans_size - 1; i4_and_const = i4_const_val; /* Fill horizontal tx block edges of current reference mb with 0 */ pu1_incr_x = pu1_ref_x_ptr_incr + i4_x; pu1_incr_x += (i4_y * i4_refary_wd); i4_ht = (16 - (i4_y_ref & 0xF)); i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht); i4_x_idx = i4_x; i4_pos_x = i4_x_ref & 0xF; i4_max_pos_x = 16; i4_x += (16 - i4_pos_x); /* Get the transform block edge pos */ i4_idx = (i4_const_val - (i4_pos_x & i4_and_const)); i4_x_idx += i4_idx; while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd)) { WORD32 i4_i; UWORD8 *pu1_incr; pu1_incr = pu1_incr_x + i4_idx; for(i4_i = 0; i4_i < i4_ht; i4_i++) { /* Fill the block edge with 0s */ *pu1_incr = 0; pu1_incr += i4_refary_wd; } i4_pos_x += i4_trans_size; pu1_incr_x += i4_trans_size; i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx)); } /* Fill vertical tx block edges of current reference mb with 0 */ pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x; pu1_incr_y += (i4_y * i4_refary_wd); i4_wd = (16 - (i4_x_ref & 0xF)); i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd); i4_y_idx = i4_y; i4_pos_y = i4_y_ref & 0xF; i4_max_pos_y = 16; i4_y += (16 - i4_pos_y); /* Get the transform block edge pos */ i4_idx = (i4_const_val - (i4_pos_y & i4_and_const)); i4_y_idx += i4_idx; while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht)) { WORD32 i4_i; UWORD8 *pu1_incr; pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd; for(i4_i = 0; i4_i < i4_wd; i4_i++) { /* Fill the block edge with 0s */ *pu1_incr = 0; pu1_incr++; } i4_pos_y += i4_trans_size; pu1_incr_y += i4_trans_size * i4_refary_wd; i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx)); } if(i4_x < i4_act_ary_wd) { i4_y = i4_prev_y; } else if(i4_y < i4_act_ary_ht) { i4_prev_y = i4_y; i4_x = 0; } } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd)); } else { i4_trans_size = 4; i4_const_val = 3; do { WORD32 i4_x_ref, i4_y_ref; WORD32 i4_idx; WORD32 i4_wd, i4_ht; WORD32 i4_max_pos_x, i4_max_pos_y; i4_prev_x = i4_x; i4_x_ref = i4_x_offset + i4_x; i4_y_ref = i4_y_offset + i4_y; /* Fill horizontal tx block edges of current reference mb with 0 */ pu1_incr_x = pu1_ref_x_ptr_incr + i4_x; pu1_incr_x += (i4_y * i4_refary_wd); i4_ht = (8 - (i4_y_ref & 0x7)); i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht); i4_x_idx = i4_x; i4_pos_x = i4_x_ref & 0x7; i4_max_pos_x = 8; i4_x += (8 - i4_pos_x); /* Get the transform block edge pos */ i4_idx = (i4_const_val - (i4_pos_x & 0x3)); i4_x_idx += i4_idx; while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd)) { WORD32 i4_i; UWORD8 *pu1_incr; pu1_incr = pu1_incr_x + i4_idx; for(i4_i = 0; i4_i < i4_ht; i4_i++) { /* Fill the block edge with 0s */ *pu1_incr = 0; pu1_incr += i4_refary_wd; } i4_pos_x += i4_trans_size; pu1_incr_x += i4_trans_size; i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx)); } /* Fill vertical tx block edges of current reference mb with 0 */ pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x; pu1_incr_y += (i4_y * i4_refary_wd); i4_wd = (8 - (i4_x_ref & 0x7)); i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd); i4_y_idx = i4_y; i4_pos_y = i4_y_ref & 0x7; i4_max_pos_y = 8; i4_y += (8 - i4_pos_y); /* Get the transform block edge pos */ i4_idx = (i4_const_val - (i4_pos_y & 0x3)); i4_y_idx += i4_idx; while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht)) { WORD32 i4_i; UWORD8 *pu1_incr; pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd; for(i4_i = 0; i4_i < i4_wd; i4_i++) { /* Fill the block edge with 0s */ *pu1_incr = 0; pu1_incr++; } i4_pos_y += i4_trans_size; pu1_incr_y += i4_trans_size * i4_refary_wd; i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx)); } if(i4_x < i4_act_ary_wd) { i4_y = i4_prev_y; } else if(i4_y < i4_act_ary_ht) { i4_prev_y = i4_y; i4_x = 0; } } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd)); } } void isvce_residual_reflayer_const(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_inp_data, WORD32 i4_inp_data_stride, WORD8 *ps_ref_mb_mode, WORD32 i4_ref_mb_mode_stride, WORD32 *pi4_refarr_wd, WORD32 i4_chroma_flag) { WORD8 *pi1_ref_mb_modes; WORD32 i4_ref_mode_stride; WORD32 i4_x, i4_y; WORD32 i4_ref_wd; WORD32 i4_ref_ht; WORD32 i4_x_offset; WORD32 i4_y_offset; WORD32 i4_refarray_wd; WORD32 i4_refarray_ht; WORD16 *pi2_ref_array; res_pred_mb_state_t *ps_mb_states; res_pred_layer_state_t *ps_layer_state; res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store; svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos; UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id]; pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data; pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode; i4_ref_mode_stride = i4_ref_mb_mode_stride; ASSERT(NULL != pi1_ref_mb_modes); { WORD32 i4_base_width; WORD32 i4_base_height; coordinates_t s_frame_dims, s_frame_dims_in_mbs; s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; ps_mb_states = i4_chroma_flag ? ps_layer_state->ps_chroma_mb_states : ps_layer_state->ps_luma_mb_states; ps_mb_states += ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; i4_base_width = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width; i4_base_height = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height; i4_ref_wd = i4_base_width >> i4_chroma_flag; i4_ref_ht = i4_base_height >> i4_chroma_flag; i4_x_offset = ps_mb_states->s_offsets.i4_abscissa; i4_y_offset = ps_mb_states->s_offsets.i4_ordinate; i4_refarray_wd = ps_mb_states->s_ref_array_dims.i4_abscissa; i4_refarray_ht = ps_mb_states->s_ref_array_dims.i4_ordinate; } { isvce_ref_layer_ptr_incr(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_offset, i4_y_offset, i4_refarray_wd, i4_refarray_ht, ps_res_pred_state->pu1_ref_x_ptr_incr, ps_res_pred_state->pu1_ref_y_ptr_incr, i4_chroma_flag); } for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++) { for(i4_x = 0; i4_x < i4_refarray_wd; i4_x++) { WORD32 i4_x_ref; WORD32 i4_y_ref; WORD32 i4_ref_mb_type, i4_ref_tx_size; WORD16 *pi2_ref_data_byte; WORD16 *pi2_ref_array_temp; i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset)); i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset)); isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_ref, i4_y_ref, &i4_ref_mb_type, &i4_ref_tx_size, i4_chroma_flag); if(0 <= i4_x_offset) { i4_x_ref = i4_x_ref - i4_x_offset; } if(0 <= i4_y_offset) { i4_y_ref = i4_y_ref - i4_y_offset; } pi2_ref_array_temp = pi2_ref_array + i4_x; pi2_ref_array_temp += i4_y * i4_refarray_wd; if(SVC_INTER_MB == i4_ref_mb_type) { pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag); pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride; *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte); } else { *pi2_ref_array_temp = 0; } } } *pi4_refarr_wd = i4_refarray_wd; } void isvce_interpolate_residual(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_out, WORD32 i4_out_stride, WORD32 i4_refarray_wd, WORD32 i4_chroma_flag, coordinates_t *ps_mb_pos) { res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store; WORD32 i4_x, i4_y; WORD32 i4_temp_array_ht; WORD32 i4_mb_wd; WORD32 i4_mb_ht; WORD16 *pi2_ref_array; UWORD8 *pu1_ref_x_ptr_incr, *pu1_ref_y_ptr_incr; coordinates_t *ps_phase; coordinates_t *ps_pos; res_pred_mb_state_t *ps_mb_states; coordinates_t s_frame_dims; coordinates_t s_frame_dims_in_mbs; UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; res_pred_mb_state_t *ps_mb_state; s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; pu1_ref_x_ptr_incr = ps_res_pred_state->pu1_ref_x_ptr_incr; pu1_ref_y_ptr_incr = ps_res_pred_state->pu1_ref_y_ptr_incr; ps_mb_states = i4_chroma_flag ? ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_chroma_mb_states : ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_luma_mb_states; i4_mb_wd = MB_SIZE >> i4_chroma_flag; i4_mb_ht = MB_SIZE >> i4_chroma_flag; ps_mb_state = &ps_mb_states[ps_mb_pos->i4_abscissa + (ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa)]; ps_phase = ps_mb_state->ps_ref_array_phases; ps_pos = ps_mb_state->ps_ref_array_positions; i4_temp_array_ht = i4_mb_ht; pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data; for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++) { for(i4_x = 0; i4_x < i4_mb_wd; i4_x++) { WORD32 i4_i; WORD32 i4_y_ref; WORD32 i4_y_phase; WORD32 i4_x_ref; WORD32 i4_x_phase; WORD32 i4_x_ref_round; WORD16 *pi2_out_curr; WORD32 ai4_temp_pred[2]; UWORD8 *pu1_ref_y_ptr_incr_temp; WORD32 *pi4_temp_pred; UWORD8 u1_incr_y; WORD16 i2_res; pi2_out_curr = pi2_out + (i4_x << i4_chroma_flag) + (i4_y * i4_out_stride); i4_y_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_ordinate; i4_y_phase = ps_phase[((i4_y % 3) > 0) * 2 + (i4_y % 3)].i4_ordinate; i4_x_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_abscissa; i4_x_phase = ps_phase[i4_x % 3].i4_abscissa; /* horizontal processing*/ for(i4_i = 0; i4_i < 2; i4_i++) { UWORD8 *pu1_ref_x_ptr_incr_temp; UWORD8 u1_incr; WORD16 *pi2_ref_array_1, *pi2_ref_array_2; pu1_ref_x_ptr_incr_temp = pu1_ref_x_ptr_incr + i4_x_ref; pu1_ref_x_ptr_incr_temp += ((i4_y_ref + i4_i) * i4_refarray_wd); u1_incr = *pu1_ref_x_ptr_incr_temp; pi2_ref_array_1 = pi2_ref_array + i4_x_ref; pi2_ref_array_1 += ((i4_y_ref + i4_i) * i4_refarray_wd); if(!u1_incr) { pi2_ref_array_1 += (i4_x_phase >> 3); } pi2_ref_array_2 = pi2_ref_array_1 + u1_incr; ai4_temp_pred[i4_i] = (16 - i4_x_phase) * (*pi2_ref_array_1) + i4_x_phase * (*pi2_ref_array_2); } /* vertical processing */ i4_x_ref_round = (i4_x_ref + (i4_x_phase >> 3)); pu1_ref_y_ptr_incr_temp = pu1_ref_y_ptr_incr + i4_x_ref_round + (i4_y_ref * i4_refarray_wd); u1_incr_y = *pu1_ref_y_ptr_incr_temp; pi4_temp_pred = &ai4_temp_pred[0]; if(!u1_incr_y) { pi4_temp_pred += (i4_y_phase >> 3); } i2_res = (((16 - i4_y_phase) * pi4_temp_pred[0] + i4_y_phase * pi4_temp_pred[u1_incr_y] + 128) >> 8); *pi2_out_curr = i2_res; } } } void isvce_get_mb_residual_pred_non_dyadic(svc_res_pred_ctxt_t *ps_res_pred_ctxt) { buffer_container_t s_inp; buffer_container_t s_out; coordinates_t s_frame_dims; coordinates_t s_frame_dims_in_mbs; coordinates_t s_ref_array_offsets; res_pred_layer_state_t *ps_layer_state, *ps_ref_layer_state; yuv_buf_props_t *ps_ref_residual_buf; res_pred_mb_state_t *ps_luma_mb_state; res_pred_mb_state_t *ps_chroma_mb_state; WORD16 *pi2_inp, *pi2_out; WORD32 i4_inp_stride, i4_out_stride; res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables; res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs; res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data; coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos; UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id; WORD32 i4_refarray_wd; WORD32 i; ASSERT(u1_spatial_layer_id > 0); s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width; s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height; s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE; s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE; ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id]; ps_ref_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id - 1]; ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1]; ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa; for(i = 0; i < NUM_COMPONENTS; i++) { res_pred_mb_state_t *ps_mb_state; UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i)); ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state; s_ref_array_offsets.i4_abscissa = MAX(0, MIN(ps_mb_state->s_offsets.i4_abscissa, (s_frame_dims.i4_abscissa >> u1_is_chroma) - 1)); s_ref_array_offsets.i4_ordinate = MAX(0, MIN(ps_mb_state->s_offsets.i4_ordinate, (s_frame_dims.i4_ordinate >> u1_is_chroma) - 1)); s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y]; s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) + (s_ref_array_offsets.i4_abscissa << u1_is_chroma) + s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride; s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y]; s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i)); pi2_inp = (WORD16 *) s_inp.pv_data; pi2_out = (WORD16 *) s_out.pv_data; i4_inp_stride = s_inp.i4_data_stride; i4_out_stride = s_out.i4_data_stride; /* ------- Constructing refSampleArray ----------------------- */ isvce_residual_reflayer_const( ps_res_pred_ctxt, pi2_inp, i4_inp_stride, ps_ref_layer_state->pi1_mb_mode, ps_ref_layer_state->i4_mb_mode_stride, &i4_refarray_wd, u1_is_chroma); /* ---- Interpolation process for Residual prediction ------ */ isvce_interpolate_residual(ps_res_pred_ctxt, pi2_out, i4_out_stride, i4_refarray_wd, u1_is_chroma, ps_mb_pos); } } UWORD32 isvce_get_sad_with_residual_pred(buffer_container_t *ps_src, buffer_container_t *ps_pred, buffer_container_t *ps_res, UWORD32 u4_mb_wd, UWORD32 u4_mb_ht) { UWORD32 i, j; UWORD32 u4_sad = 0; for(i = 0; i < u4_mb_ht; i++) { for(j = 0; j < u4_mb_wd; j++) { WORD16 i2_src = ((UWORD8 *) ps_src->pv_data)[j + i * ps_src->i4_data_stride]; WORD16 i2_pred = ((UWORD8 *) ps_pred->pv_data)[j + i * ps_pred->i4_data_stride]; WORD16 i2_res = ((WORD16 *) ps_res->pv_data)[j + i * ps_res->i4_data_stride]; u4_sad += ABS(i2_src - i2_pred - i2_res); } } return u4_sad; } /** ******************************************************************************* * * @brief * Function to evaluate residual_prediction_flag * * @param[in] ps_src * Pointer to MB src buffers * * @param[in] ps_pred * Pointer to MB pred buffers * * @param[in] ps_res * Pointer to MB res buffers * * @param[out] pu4_res_pred_sad * Output variable for SAD * * @param[out] pu1_residual_prediction_flag * Output variable for residual_prediction_flag * * @param[in] u4_winning_sad * Winning mode's SAD * * @notes The algorithm currently uses only luma for evaluating * residual_prediction_flag. * ******************************************************************************* */ void isvce_residual_pred_eval(svc_res_pred_ctxt_t *ps_res_pred_ctxt, yuv_buf_props_t *ps_src, yuv_buf_props_t *ps_pred, yuv_buf_props_t *ps_res, UWORD32 *pu4_res_pred_sad, UWORD8 *pu1_residual_prediction_flag, UWORD32 u4_winning_sad) { res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; pu4_res_pred_sad[0] = ps_res_pred_state->pf_get_sad_with_residual_pred( &ps_src->as_component_bufs[Y], &ps_pred->as_component_bufs[Y], &ps_res->as_component_bufs[Y], MB_SIZE, MB_SIZE); pu1_residual_prediction_flag[0] = pu4_res_pred_sad[0] < u4_winning_sad; } void isvce_update_res_pred_info(isvce_process_ctxt_t *ps_proc) { if(ps_proc->s_svc_params.u1_num_spatial_layers > 1) { svc_res_pred_ctxt_t *ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt; res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants; res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state; res_pred_layer_state_t *ps_layer_state = &ps_res_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id]; WORD8 i1_is_intra = ps_proc->ps_mb_info->u1_is_intra; WORD8 *pi1_mb_mode = &ps_layer_state->pi1_mb_mode[ps_proc->i4_mb_x + (ps_proc->i4_mb_y * (ps_layer_state->i4_mb_mode_stride))]; if(ps_proc->ps_mb_info->u1_base_mode_flag == 1 && i1_is_intra) { *pi1_mb_mode = SVC_IBL_MB; } else { if(i1_is_intra) { *pi1_mb_mode = SVC_INTRA_MB; } else { *pi1_mb_mode = SVC_INTER_MB; } } } }