diff options
Diffstat (limited to 'third_party/libaom/source/libaom/av1/common/av1_loopfilter.c')
-rw-r--r-- | third_party/libaom/source/libaom/av1/common/av1_loopfilter.c | 268 |
1 files changed, 247 insertions, 21 deletions
diff --git a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c index caa15c21e2..18ae0f28f4 100644 --- a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c +++ b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c @@ -351,8 +351,14 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm, const uint32_t scale_vert = plane_ptr->subsampling_y; uint8_t *const dst_ptr = plane_ptr->dst.buf; const int dst_stride = plane_ptr->dst.stride; - const int y_range = (MAX_MIB_SIZE >> scale_vert); - const int x_range = (MAX_MIB_SIZE >> scale_horz); + const int plane_mi_rows = + ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert); + const int plane_mi_cols = + ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz); + const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), + (MAX_MIB_SIZE >> scale_vert)); + const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), + (MAX_MIB_SIZE >> scale_horz)); for (int y = 0; y < y_range; y++) { uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride; for (int x = 0; x < x_range;) { @@ -376,8 +382,8 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm, } #if CONFIG_AV1_HIGHBITDEPTH - const int use_highbitdepth = cm->seq_params.use_highbitdepth; - const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth; + const int use_highbitdepth = cm->seq_params->use_highbitdepth; + const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth; switch (params.filter_length) { // apply 4-tap filtering case 4: @@ -456,6 +462,84 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm, } } +void av1_filter_block_plane_vert_rt(const AV1_COMMON *const cm, + const MACROBLOCKD *const xd, + const int plane, + const MACROBLOCKD_PLANE *const plane_ptr, + const uint32_t mi_row, + const uint32_t mi_col) { + const uint32_t scale_horz = plane_ptr->subsampling_x; + const uint32_t scale_vert = plane_ptr->subsampling_y; + uint8_t *const dst_ptr = plane_ptr->dst.buf; + const int dst_stride = plane_ptr->dst.stride; + const int plane_mi_rows = + ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert); + const int plane_mi_cols = + ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz); + const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), + (MAX_MIB_SIZE >> scale_vert)); + const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), + (MAX_MIB_SIZE >> scale_horz)); + assert(!plane); + assert(!(y_range % 2)); + for (int y = 0; y < y_range; y += 2) { + uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride; + for (int x = 0; x < x_range;) { + // inner loop always filter vertical edges in a MI block. If MI size + // is 8x8, it will filter the vertical edge aligned with a 8x8 block. + // If 4x4 transform is used, it will then filter the internal edge + // aligned with a 4x4 block + const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE; + const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE; + uint32_t advance_units; + TX_SIZE tx_size; + AV1_DEBLOCKING_PARAMETERS params; + memset(¶ms, 0, sizeof(params)); + + tx_size = + set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, xd, + VERT_EDGE, curr_x, curr_y, plane, plane_ptr); + if (tx_size == TX_INVALID) { + params.filter_length = 0; + tx_size = TX_4X4; + } + + switch (params.filter_length) { + // apply 4-tap filtering + case 4: + aom_lpf_vertical_4_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + case 6: // apply 6-tap filter for chroma plane only + assert(plane != 0); + aom_lpf_vertical_6_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + // apply 8-tap filtering + case 8: + aom_lpf_vertical_8_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + // apply 14-tap filtering + case 14: + aom_lpf_vertical_14_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + // no filtering + default: break; + } + // advance the destination pointer + advance_units = tx_size_wide_unit[tx_size]; + x += advance_units; + p += advance_units * MI_SIZE; + } + } +} + void av1_filter_block_plane_horz(const AV1_COMMON *const cm, const MACROBLOCKD *const xd, const int plane, const MACROBLOCKD_PLANE *const plane_ptr, @@ -464,8 +548,14 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm, const uint32_t scale_vert = plane_ptr->subsampling_y; uint8_t *const dst_ptr = plane_ptr->dst.buf; const int dst_stride = plane_ptr->dst.stride; - const int y_range = (MAX_MIB_SIZE >> scale_vert); - const int x_range = (MAX_MIB_SIZE >> scale_horz); + const int plane_mi_rows = + ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert); + const int plane_mi_cols = + ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz); + const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), + (MAX_MIB_SIZE >> scale_vert)); + const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), + (MAX_MIB_SIZE >> scale_horz)); for (int x = 0; x < x_range; x++) { uint8_t *p = dst_ptr + x * MI_SIZE; for (int y = 0; y < y_range;) { @@ -489,8 +579,8 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm, } #if CONFIG_AV1_HIGHBITDEPTH - const int use_highbitdepth = cm->seq_params.use_highbitdepth; - const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth; + const int use_highbitdepth = cm->seq_params->use_highbitdepth; + const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth; switch (params.filter_length) { // apply 4-tap filtering case 4: @@ -572,6 +662,84 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm, } } +void av1_filter_block_plane_horz_rt(const AV1_COMMON *const cm, + const MACROBLOCKD *const xd, + const int plane, + const MACROBLOCKD_PLANE *const plane_ptr, + const uint32_t mi_row, + const uint32_t mi_col) { + const uint32_t scale_horz = plane_ptr->subsampling_x; + const uint32_t scale_vert = plane_ptr->subsampling_y; + uint8_t *const dst_ptr = plane_ptr->dst.buf; + const int dst_stride = plane_ptr->dst.stride; + const int plane_mi_rows = + ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert); + const int plane_mi_cols = + ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz); + const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), + (MAX_MIB_SIZE >> scale_vert)); + const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), + (MAX_MIB_SIZE >> scale_horz)); + assert(!plane); + for (int x = 0; x < x_range; x += 2) { + uint8_t *p = dst_ptr + x * MI_SIZE; + for (int y = 0; y < y_range;) { + // inner loop always filter vertical edges in a MI block. If MI size + // is 8x8, it will first filter the vertical edge aligned with a 8x8 + // block. If 4x4 transform is used, it will then filter the internal + // edge aligned with a 4x4 block + const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE; + const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE; + uint32_t advance_units; + TX_SIZE tx_size; + AV1_DEBLOCKING_PARAMETERS params; + memset(¶ms, 0, sizeof(params)); + + tx_size = set_lpf_parameters( + ¶ms, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE, + curr_x, curr_y, plane, plane_ptr); + if (tx_size == TX_INVALID) { + params.filter_length = 0; + tx_size = TX_4X4; + } + + switch (params.filter_length) { + // apply 4-tap filtering + case 4: + aom_lpf_horizontal_4_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + // apply 6-tap filtering + case 6: + assert(plane != 0); + aom_lpf_horizontal_6_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + // apply 8-tap filtering + case 8: + aom_lpf_horizontal_8_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + // apply 14-tap filtering + case 14: + aom_lpf_horizontal_14_dual(p, dst_stride, params.mblim, params.lim, + params.hev_thr, params.mblim, params.lim, + params.hev_thr); + break; + // no filtering + default: break; + } + // advance the destination pointer + advance_units = tx_size_high_unit[tx_size]; + y += advance_units; + p += advance_units * dst_stride * MI_SIZE; + } + } +} + void av1_filter_block_plane_vert_test(const AV1_COMMON *const cm, const MACROBLOCKD *const xd, const int plane, @@ -661,7 +829,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, #if CONFIG_LPF_MASK int is_decoding, #endif - int plane_start, int plane_end) { + int plane_start, int plane_end, int is_realtime) { struct macroblockd_plane *pd = xd->plane; const int col_start = 0; const int col_end = cm->mi_params.mi_cols; @@ -679,7 +847,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, else if (plane == 2 && !(cm->lf.filter_level_v)) continue; - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0, + av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, 0, 0, plane, plane + 1); av1_build_bitmask_vert_info(cm, &pd[plane], plane); @@ -716,49 +884,106 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, continue; else if (plane == 2 && !(cm->lf.filter_level_v)) continue; - if (cm->lf.combine_vert_horz_lf) { // filter all vertical and horizontal edges in every 128x128 super block for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { // filter vertical edges - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, + mi_row, mi_col, plane, plane + 1); +#if CONFIG_AV1_HIGHBITDEPTH + (void)is_realtime; av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col); +#else + if (is_realtime && !plane) { + av1_filter_block_plane_vert_rt(cm, xd, plane, &pd[plane], mi_row, + mi_col); + + } else { + av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, + mi_col); + } +#endif // filter horizontal edges if (mi_col - MAX_MIB_SIZE >= 0) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, + av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, mi_row, mi_col - MAX_MIB_SIZE, plane, plane + 1); +#if CONFIG_AV1_HIGHBITDEPTH + (void)is_realtime; av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col - MAX_MIB_SIZE); +#else + if (is_realtime && !plane) { + av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row, + mi_col - MAX_MIB_SIZE); + } else { + av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, + mi_col - MAX_MIB_SIZE); + } +#endif } } // filter horizontal edges - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, + av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, mi_row, mi_col - MAX_MIB_SIZE, plane, plane + 1); +#if CONFIG_AV1_HIGHBITDEPTH + (void)is_realtime; av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col - MAX_MIB_SIZE); +#else + if (is_realtime && !plane) { + av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row, + mi_col - MAX_MIB_SIZE); + + } else { + av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, + mi_col - MAX_MIB_SIZE); + } +#endif } } else { // filter all vertical edges in every 128x128 super block for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, + mi_row, mi_col, plane, plane + 1); +#if CONFIG_AV1_HIGHBITDEPTH + (void)is_realtime; av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, mi_col); +#else + if (is_realtime && !plane) { + av1_filter_block_plane_vert_rt(cm, xd, plane, &pd[plane], mi_row, + mi_col); + } else { + av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row, + mi_col); + } +#endif } } // filter all horizontal edges in every 128x128 super block for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) { - av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row, - mi_col, plane, plane + 1); + av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, + mi_row, mi_col, plane, plane + 1); +#if CONFIG_AV1_HIGHBITDEPTH + (void)is_realtime; av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, mi_col); +#else + if (is_realtime && !plane) { + av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row, + mi_col); + + } else { + av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row, + mi_col); + } +#endif } } } @@ -770,7 +995,8 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, #if CONFIG_LPF_MASK int is_decoding, #endif - int plane_start, int plane_end, int partial_frame) { + int plane_start, int plane_end, int partial_frame, + int is_realtime) { int start_mi_row, end_mi_row, mi_rows_to_filter; start_mi_row = 0; @@ -786,5 +1012,5 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, #if CONFIG_LPF_MASK is_decoding, #endif - plane_start, plane_end); + plane_start, plane_end, is_realtime); } |