diff options
author | Johann <johannkoenig@google.com> | 2016-07-21 12:09:52 -0700 |
---|---|---|
committer | Johann <johannkoenig@google.com> | 2016-07-21 12:09:52 -0700 |
commit | 68e1c830ade592be74773e249bf94e2bbfb50de7 (patch) | |
tree | 08299f7deb6079690f0a3d2118ef3882fa77bdc6 /libvpx/vp9/common | |
parent | 96ebd06cb9832f583f7c181ec886eade209524df (diff) | |
download | libvpx-68e1c830ade592be74773e249bf94e2bbfb50de7.tar.gz |
Update external/libvpx to v1.6.0
Change-Id: I9425a3d3c3524d43823bc89f9f03556420c3dd42
Diffstat (limited to 'libvpx/vp9/common')
32 files changed, 654 insertions, 833 deletions
diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c index 24c6c54ed..7dd1005d3 100644 --- a/libvpx/vp9/common/vp9_alloccommon.c +++ b/libvpx/vp9/common/vp9_alloccommon.c @@ -119,6 +119,20 @@ void vp9_free_context_buffers(VP9_COMMON *cm) { cm->lf.lfm = NULL; } + +int vp9_alloc_loop_filter(VP9_COMMON *cm) { + vpx_free(cm->lf.lfm); + // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The + // stride and rows are rounded up / truncated to a multiple of 8. + cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; + cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, + sizeof(*cm->lf.lfm)); + if (!cm->lf.lfm) + return 1; + return 0; +} + int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { int new_mi_size; @@ -151,15 +165,8 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { cm->above_context_alloc_cols = cm->mi_cols; } - vpx_free(cm->lf.lfm); - - // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The - // stride and rows are rounded up / truncated to a multiple of 8. - cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; - cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( - ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, - sizeof(*cm->lf.lfm)); - if (!cm->lf.lfm) goto fail; + if (vp9_alloc_loop_filter(cm)) + goto fail; return 0; diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h index c0e51a6ce..e53955b99 100644 --- a/libvpx/vp9/common/vp9_alloccommon.h +++ b/libvpx/vp9/common/vp9_alloccommon.h @@ -23,6 +23,7 @@ struct BufferPool; void vp9_remove_common(struct VP9Common *cm); +int vp9_alloc_loop_filter(struct VP9Common *cm); int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); void vp9_init_context_buffers(struct VP9Common *cm); void vp9_free_context_buffers(struct VP9Common *cm); diff --git a/libvpx/vp9/common/vp9_blockd.c b/libvpx/vp9/common/vp9_blockd.c index 0e104ee59..7bab27d4f 100644 --- a/libvpx/vp9/common/vp9_blockd.c +++ b/libvpx/vp9/common/vp9_blockd.c @@ -13,7 +13,7 @@ PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *left_mi, int b) { if (b == 0 || b == 2) { - if (!left_mi || is_inter_block(&left_mi->mbmi)) + if (!left_mi || is_inter_block(left_mi)) return DC_PRED; return get_y_mode(left_mi, b + 1); @@ -26,7 +26,7 @@ PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *above_mi, int b) { if (b == 0 || b == 1) { - if (!above_mi || is_inter_block(&above_mi->mbmi)) + if (!above_mi || is_inter_block(above_mi)) return DC_PRED; return get_y_mode(above_mi, b + 2); @@ -40,12 +40,12 @@ void vp9_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg) { const struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi; + const MODE_INFO* mi = xd->mi[0]; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) - : mbmi->tx_size; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) + : mi->tx_size; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h index 61eb59162..3d26fb2b5 100644 --- a/libvpx/vp9/common/vp9_blockd.h +++ b/libvpx/vp9/common/vp9_blockd.h @@ -64,7 +64,7 @@ typedef struct { typedef int8_t MV_REFERENCE_FRAME; // This structure now relates to 8x8 block regions. -typedef struct { +typedef struct MODE_INFO { // Common for both INTER and INTRA blocks BLOCK_SIZE sb_type; PREDICTION_MODE mode; @@ -82,24 +82,21 @@ typedef struct { // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. int_mv mv[2]; -} MB_MODE_INFO; -typedef struct MODE_INFO { - MB_MODE_INFO mbmi; b_mode_info bmi[4]; } MODE_INFO; static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { - return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode - : mi->mbmi.mode; + return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode + : mi->mode; } -static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) { - return mbmi->ref_frame[0] > INTRA_FRAME; +static INLINE int is_inter_block(const MODE_INFO *mi) { + return mi->ref_frame[0] > INTRA_FRAME; } -static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) { - return mbmi->ref_frame[1] > INTRA_FRAME; +static INLINE int has_second_ref(const MODE_INFO *mi) { + return mi->ref_frame[1] > INTRA_FRAME; } PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, @@ -160,11 +157,9 @@ typedef struct macroblockd { MODE_INFO **mi; MODE_INFO *left_mi; MODE_INFO *above_mi; - MB_MODE_INFO *left_mbmi; - MB_MODE_INFO *above_mbmi; - int up_available; - int left_available; + unsigned int max_blocks_wide; + unsigned int max_blocks_high; const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; @@ -212,19 +207,19 @@ extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd) { - const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const MODE_INFO *const mi = xd->mi[0]; - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi)) + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) return DCT_DCT; - return intra_mode_to_tx_type_lookup[mbmi->mode]; + return intra_mode_to_tx_type_lookup[mi->mode]; } static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int ib) { const MODE_INFO *const mi = xd->mi[0]; - if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi)) + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) return DCT_DCT; return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; @@ -242,9 +237,9 @@ static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, } } -static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi, +static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi, const struct macroblockd_plane *pd) { - return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x, + return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x, pd->subsampling_y); } diff --git a/libvpx/vp9/common/vp9_common.h b/libvpx/vp9/common/vp9_common.h index 76e7cd440..908fa80a3 100644 --- a/libvpx/vp9/common/vp9_common.h +++ b/libvpx/vp9/common/vp9_common.h @@ -67,7 +67,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) { #define VP9_FRAME_MARKER 0x2 - #ifdef __cplusplus } // extern "C" #endif diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c index a6dae6a1c..3409d0484 100644 --- a/libvpx/vp9/common/vp9_common_data.c +++ b/libvpx/vp9/common/vp9_common_data.c @@ -159,3 +159,18 @@ const struct { {0, 8 }, // 64X32 - {0b0000, 0b1000} {0, 0 }, // 64X64 - {0b0000, 0b0000} }; + +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +const uint8_t need_top_left[INTRA_MODES] = { + 0, // DC_PRED + 0, // V_PRED + 0, // H_PRED + 0, // D45_PRED + 1, // D135_PRED + 1, // D117_PRED + 1, // D153_PRED + 0, // D207_PRED + 0, // D63_PRED + 1, // TM_PRED +}; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH diff --git a/libvpx/vp9/common/vp9_common_data.h b/libvpx/vp9/common/vp9_common_data.h index 95a117961..0ae24dad5 100644 --- a/libvpx/vp9/common/vp9_common_data.h +++ b/libvpx/vp9/common/vp9_common_data.h @@ -33,6 +33,9 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES]; extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +extern const uint8_t need_top_left[INTRA_MODES]; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" diff --git a/libvpx/vp9/common/vp9_debugmodes.c b/libvpx/vp9/common/vp9_debugmodes.c index 3d80103d2..d9c1fd968 100644 --- a/libvpx/vp9/common/vp9_debugmodes.c +++ b/libvpx/vp9/common/vp9_debugmodes.c @@ -35,7 +35,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(file, "%2d ", - *((int*) ((char *) (&mi[0]->mbmi) + + *((int*) ((char *) (mi[0]) + member_offset))); mi++; } @@ -53,18 +53,18 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { int rows = cm->mi_rows; int cols = cm->mi_cols; - print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type)); - print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode)); - print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0])); - print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size)); - print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode)); + print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type)); + print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode)); + print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0])); + print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size)); + print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode)); // output skip infomation. log_frame_info(cm, "Skips:", mvs); for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "S "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%2d ", mi[0]->mbmi.skip); + fprintf(mvs, "%2d ", mi[0]->skip); mi++; } fprintf(mvs, "\n"); @@ -78,8 +78,8 @@ void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "V "); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row, - mi[0]->mbmi.mv[0].as_mv.col); + fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, + mi[0]->mv[0].as_mv.col); mi++; } fprintf(mvs, "\n"); diff --git a/libvpx/vp9/common/vp9_entropy.c b/libvpx/vp9/common/vp9_entropy.c index 579857bc9..7b490af34 100644 --- a/libvpx/vp9/common/vp9_entropy.c +++ b/libvpx/vp9/common/vp9_entropy.c @@ -36,20 +36,6 @@ const vpx_prob vp9_cat6_prob[] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; #if CONFIG_VP9_HIGHBITDEPTH -const vpx_prob vp9_cat1_prob_high10[] = { 159 }; -const vpx_prob vp9_cat2_prob_high10[] = { 165, 145 }; -const vpx_prob vp9_cat3_prob_high10[] = { 173, 148, 140 }; -const vpx_prob vp9_cat4_prob_high10[] = { 176, 155, 140, 135 }; -const vpx_prob vp9_cat5_prob_high10[] = { 180, 157, 141, 134, 130 }; -const vpx_prob vp9_cat6_prob_high10[] = { - 255, 255, 254, 254, 254, 252, 249, 243, - 230, 196, 177, 153, 140, 133, 130, 129 -}; -const vpx_prob vp9_cat1_prob_high12[] = { 159 }; -const vpx_prob vp9_cat2_prob_high12[] = { 165, 145 }; -const vpx_prob vp9_cat3_prob_high12[] = { 173, 148, 140 }; -const vpx_prob vp9_cat4_prob_high12[] = { 176, 155, 140, 135 }; -const vpx_prob vp9_cat5_prob_high12[] = { 180, 157, 141, 134, 130 }; const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 @@ -403,7 +389,6 @@ const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { {255, 241, 243, 255, 236, 255, 252, 254}, {255, 243, 245, 255, 237, 255, 252, 254}, {255, 246, 247, 255, 239, 255, 253, 255}, - {255, 246, 247, 255, 239, 255, 253, 255}, }; static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = { @@ -743,8 +728,8 @@ static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { }; static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { - memcpy(probs, vp9_pareto8_full[p = 0 ? 0 : p - 1], - MODEL_NODES * sizeof(vpx_prob)); + assert(p != 0); + memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); } void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h index 21611ed6d..63b3bff5d 100644 --- a/libvpx/vp9/common/vp9_entropy.h +++ b/libvpx/vp9/common/vp9_entropy.h @@ -138,7 +138,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { // 1, 3, 5, 7, ..., 253, 255 // In between probabilities are interpolated linearly -#define COEFF_PROB_MODELS 256 +#define COEFF_PROB_MODELS 255 #define UNCONSTRAINED_NODES 3 diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c index 3acfe1448..566ae91cf 100644 --- a/libvpx/vp9/common/vp9_entropymv.c +++ b/libvpx/vp9/common/vp9_entropymv.c @@ -11,9 +11,6 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymv.h" -// Integer pel reference mv threshold for use of high-precision 1/8 mv -#define COMPANDED_MVREF_THRESH 8 - const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, @@ -127,11 +124,6 @@ MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { return c; } -int vp9_use_mv_hp(const MV *ref) { - return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && - (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; -} - static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr, int usehp) { int s, z, c, o, d, e, f; diff --git a/libvpx/vp9/common/vp9_entropymv.h b/libvpx/vp9/common/vp9_entropymv.h index 8c817bf7b..2f05ad44b 100644 --- a/libvpx/vp9/common/vp9_entropymv.h +++ b/libvpx/vp9/common/vp9_entropymv.h @@ -27,7 +27,14 @@ struct VP9Common; void vp9_init_mv_probs(struct VP9Common *cm); void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); -int vp9_use_mv_hp(const MV *ref); + +// Integer pel reference mv threshold for use of high-precision 1/8 mv +#define COMPANDED_MVREF_THRESH 8 + +static INLINE int use_mv_hp(const MV *ref) { + return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && + (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; +} #define MV_UPDATE_PROB 252 diff --git a/libvpx/vp9/common/vp9_idct.c b/libvpx/vp9/common/vp9_idct.c index d12cd76db..1b420143b 100644 --- a/libvpx/vp9/common/vp9_idct.c +++ b/libvpx/vp9/common/vp9_idct.c @@ -174,6 +174,9 @@ void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, else if (eob <= 34) // non-zero coeff only in upper-left 8x8 vpx_idct32x32_34_add(input, dest, stride); + else if (eob <= 135) + // non-zero coeff only in upper-left 16x16 + vpx_idct32x32_135_add(input, dest, stride); else vpx_idct32x32_1024_add(input, dest, stride); } diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c index b8a113223..183dec4e7 100644 --- a/libvpx/vp9/common/vp9_loopfilter.c +++ b/libvpx/vp9/common/vp9_loopfilter.c @@ -232,9 +232,9 @@ static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { } static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, - const MB_MODE_INFO *mbmi) { - return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]] - [mode_lf_lut[mbmi->mode]]; + const MODE_INFO *mi) { + return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]] + [mode_lf_lut[mi->mode]]; } void vp9_loop_filter_init(VP9_COMMON *cm) { @@ -298,200 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s, int pitch, - unsigned int mask_16x16_l, - unsigned int mask_8x8_l, - unsigned int mask_4x4_l, - unsigned int mask_4x4_int_l, - const loop_filter_info_n *lfi_n, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { - const int mask_shift = subsampling_factor ? 4 : 8; - const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; - - unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; - unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; - unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; - unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; - unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; - unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; + const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | - mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; - mask; mask >>= 1) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - - // TODO(yunqingwang): count in loopfilter functions should be removed. - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - if ((mask_16x16_0 & mask_16x16_1) & 1) { - vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); - } else if (mask_16x16_0 & 1) { - vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr); + uint8_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr); } else { - vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr); + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); } } - if ((mask_8x8_0 | mask_8x8_1) & 1) { - if ((mask_8x8_0 & mask_8x8_1) & 1) { - vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_8x8_0 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); } else { - vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_4x4_0 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, - 1); + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); } else { - vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } - if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { - if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr); - } else if (mask_4x4_int_0 & 1) { - vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1); + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr); } else { - vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, 1); + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); } } } - s += 8; + ss[0] += 8; lfl += 1; - mask_16x16_0 >>= 1; - mask_8x8_0 >>= 1; - mask_4x4_0 >>= 1; - mask_4x4_int_0 >>= 1; - mask_16x16_1 >>= 1; - mask_8x8_1 >>= 1; - mask_4x4_1 >>= 1; - mask_4x4_int_1 >>= 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_filter_selectively_vert_row2(int subsampling_factor, uint16_t *s, int pitch, - unsigned int mask_16x16_l, - unsigned int mask_8x8_l, - unsigned int mask_4x4_l, - unsigned int mask_4x4_int_l, - const loop_filter_info_n *lfi_n, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { - const int mask_shift = subsampling_factor ? 4 : 8; - const int mask_cutoff = subsampling_factor ? 0xf : 0xff; + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; - - unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff; - unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff; - unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff; - unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff; - unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff; - unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff; - unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff; + const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; - - for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 | - mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1; - mask; mask >>= 1) { - const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl; - const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward); - - // TODO(yunqingwang): count in loopfilter functions should be removed. - if (mask & 1) { - if ((mask_16x16_0 | mask_16x16_1) & 1) { - if ((mask_16x16_0 & mask_16x16_1) & 1) { - vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); - } else if (mask_16x16_0 & 1) { - vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, bd); + uint16_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, bd); + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_8x8_0 | mask_8x8_1) & 1) { - if ((mask_8x8_0 & mask_8x8_1) & 1) { - vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_8x8_0 & 1) { - vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_4x4_0 | mask_4x4_1) & 1) { - if ((mask_4x4_0 & mask_4x4_1) & 1) { - vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_4x4_0 & 1) { - vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); } } - if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) { - if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) { - vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, lfi1->mblim, lfi1->lim, - lfi1->hev_thr, bd); - } else if (mask_4x4_int_0 & 1) { - vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, - lfi0->hev_thr, 1, bd); + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); } else { - vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, - lfi1->lim, lfi1->hev_thr, 1, bd); + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, + lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } } - s += 8; + ss[0] += 8; lfl += 1; - mask_16x16_0 >>= 1; - mask_8x8_0 >>= 1; - mask_4x4_0 >>= 1; - mask_4x4_int_0 >>= 1; - mask_16x16_1 >>= 1; - mask_8x8_1 >>= 1; - mask_4x4_1 >>= 1; - mask_4x4_int_1 >>= 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -501,30 +469,30 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - count = 1; if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2); + vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); count = 2; } else { - vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -537,23 +505,23 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -565,22 +533,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch, } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1); + lfin->lim, lfin->hev_thr); } count = 2; } else { - vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } - } else if (mask_4x4_int & 1) { + } else { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1); + lfi->hev_thr); } } s += 8 * count; @@ -598,30 +566,30 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; - count = 1; if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 2, bd); + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); count = 2; } else { - vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -635,26 +603,26 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. - const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1); + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, @@ -667,25 +635,25 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, - lfin->lim, lfin->hev_thr, 1, bd); + lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, - lfi->lim, lfi->hev_thr, 1, bd); + lfi->lim, lfi->hev_thr, bd); } } - } else if (mask_4x4_int & 1) { + } else { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } s += 8 * count; @@ -704,16 +672,14 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, // whether there were any coefficients encoded, and the loop filter strength // block we are currently looking at. Shift is used to position the // 1's we produce. -// TODO(JBB) Need another function for different resolution color.. static void build_masks(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, const int shift_uv, LOOP_FILTER_MASK *lfm) { - const MB_MODE_INFO *mbmi = &mi->mbmi; - const BLOCK_SIZE block_size = mbmi->sb_type; - const TX_SIZE tx_size_y = mbmi->tx_size; + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); - const int filter_level = get_filter_level(lfi_n, mbmi); + const int filter_level = get_filter_level(lfi_n, mi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; uint64_t *const int_4x4_y = &lfm->int_4x4_y; @@ -754,7 +720,7 @@ static void build_masks(const loop_filter_info_n *const lfi_n, // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. - if (mbmi->skip && is_inter_block(mbmi)) + if (mi->skip && is_inter_block(mi)) return; // Here we are adding a mask for the transform size. The transform @@ -788,10 +754,9 @@ static void build_masks(const loop_filter_info_n *const lfi_n, static void build_y_mask(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, LOOP_FILTER_MASK *lfm) { - const MB_MODE_INFO *mbmi = &mi->mbmi; - const BLOCK_SIZE block_size = mbmi->sb_type; - const TX_SIZE tx_size_y = mbmi->tx_size; - const int filter_level = get_filter_level(lfi_n, mbmi); + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const int filter_level = get_filter_level(lfi_n, mi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; uint64_t *const int_4x4_y = &lfm->int_4x4_y; @@ -812,7 +777,7 @@ static void build_y_mask(const loop_filter_info_n *const lfi_n, *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; - if (mbmi->skip && is_inter_block(mbmi)) + if (mi->skip && is_inter_block(mi)) return; *above_y |= (size_mask[block_size] & @@ -941,7 +906,6 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. -// TODO(JBB): This function only works for yv12. void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, MODE_INFO **mi, const int mode_info_stride, LOOP_FILTER_MASK *lfm) { @@ -977,10 +941,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, vp9_zero(*lfm); assert(mip[0] != NULL); - // TODO(jimbankoski): Try moving most of the following code into decode - // loop and storing lfm in the mbmi structure so that we don't have to go - // through the recursive loop structure multiple times. - switch (mip[0]->mbmi.sb_type) { + switch (mip[0]->sb_type) { case BLOCK_64X64: build_masks(lfi_n, mip[0] , 0, 0, lfm); break; @@ -1006,7 +967,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, const int mi_32_row_offset = ((idx_32 >> 1) << 2); if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) continue; - switch (mip[0]->mbmi.sb_type) { + switch (mip[0]->sb_type) { case BLOCK_32X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; @@ -1036,7 +997,7 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) continue; - switch (mip[0]->mbmi.sb_type) { + switch (mip[0]->sb_type) { case BLOCK_16X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; @@ -1083,8 +1044,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, } break; } - - vp9_adjust_mask(cm, mi_row, mi_col, lfm); } static void filter_selectively_vert(uint8_t *s, int pitch, @@ -1092,25 +1051,25 @@ static void filter_selectively_vert(uint8_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_8x8 & 1) { - vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { - vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & 1) - vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); s += 8; lfl += 1; mask_16x16 >>= 1; @@ -1126,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, - const loop_filter_info_n *lfi_n, + const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { - const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl; + const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { @@ -1140,15 +1099,15 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch, lfi->hev_thr, bd); } else if (mask_8x8 & 1) { vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } else if (mask_4x4 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); } } if (mask_4x4_int & 1) vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, - lfi->hev_thr, 1, bd); + lfi->hev_thr, bd); s += 8; lfl += 1; mask_16x16 >>= 1; @@ -1186,8 +1145,8 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { const MODE_INFO *mi = mi_8x8[c]; - const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type; - const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi); + const BLOCK_SIZE sb_type = mi[0].sb_type; + const int skip_this = mi[0].skip && is_inter_block(mi); // left edge of current unit is block/partition edge -> no skip const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; @@ -1196,13 +1155,13 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; const int skip_this_r = skip_this && !block_edge_above; - const TX_SIZE tx_size = get_uv_tx_size(&mi[0].mbmi, plane); + const TX_SIZE tx_size = get_uv_tx_size(mi, plane); const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; // Filter level can vary per MI if (!(lfl[(r << 3) + (c >> ss_x)] = - get_filter_level(&cm->lf_info, &mi[0].mbmi))) + get_filter_level(&cm->lf_info, mi))) continue; // Build masks based on the transform size of each block @@ -1263,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3], + cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_vert(dst->buf, dst->stride, - mask_16x16_c & border_mask, - mask_8x8_c & border_mask, - mask_4x4_c & border_mask, - mask_4x4_int[r], - &cm->lf_info, &lfl[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; mi_8x8 += row_step_stride; @@ -1312,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3], + cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, - mask_16x16_r, - mask_8x8_r, - mask_4x4_r, - mask_4x4_int_r, - &cm->lf_info, &lfl[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; } @@ -1350,27 +1299,29 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, // Vertical pass: do 2 rows at one time for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { - unsigned int mask_16x16_l = mask_16x16 & 0xffff; - unsigned int mask_8x8_l = mask_8x8 & 0xffff; - unsigned int mask_4x4_l = mask_4x4 & 0xffff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff; - -// Disable filtering on the leftmost column. + // Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2( - plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfm->lfl_y[r << 3], (int)cm->bit_depth); + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, + &lfm->lfl_y[r << 3], + (int)cm->bit_depth); } else { - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l, - mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 16 * dst->stride; mask_16x16 >>= 16; @@ -1403,19 +1354,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - highbd_filter_selectively_horiz( - CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3], - (int)cm->bit_depth); + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3], + (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, - &lfm->lfl_y[r << 3]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; @@ -1449,38 +1399,35 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; } - { - unsigned int mask_16x16_l = mask_16x16 & 0xff; - unsigned int mask_8x8_l = mask_8x8 & 0xff; - unsigned int mask_4x4_l = mask_4x4 & 0xff; - unsigned int mask_4x4_int_l = mask_4x4_int & 0xff; - -// Disable filtering on the leftmost column. + // Disable filtering on the leftmost column. #if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) { - highbd_filter_selectively_vert_row2( - plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1], (int)cm->bit_depth); - } else { - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1]); - } -#else - filter_selectively_vert_row2( - plane->subsampling_x, dst->buf, dst->stride, - mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info, - &lfl_uv[r << 1]); + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); + } else { #endif // CONFIG_VP9_HIGHBITDEPTH - - dst->buf += 16 * dst->stride; - mask_16x16 >>= 8; - mask_8x8 >>= 8; - mask_4x4 >>= 8; - mask_4x4_int >>= 8; + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 16 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; } // Horizontal pass @@ -1512,17 +1459,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, if (cm->use_highbitdepth) { highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfl_uv[r << 1], (int)cm->bit_depth); + mask_4x4_r, mask_4x4_int_r, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); } else { +#endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, + mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH } -#else - filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, - mask_4x4_r, mask_4x4_int_r, &cm->lf_info, - &lfl_uv[r << 1]); #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; @@ -1558,7 +1504,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); - // TODO(JBB): Make setup_mask work for non 420. + // TODO(jimbankoski): For 444 only need to do y mask. vp9_adjust_mask(cm, mi_row, mi_col, lfm); vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); @@ -1598,6 +1544,8 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, } // Used by the encoder to build the loopfilter masks. +// TODO(slavarnway): Do the encoder the same way the decoder does it and +// build the masks in line as part of the encode process. void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; @@ -1640,12 +1588,12 @@ static const uint8_t first_block_in_16x16[8][8] = { // This function sets up the bit masks for a block represented // by mi_row, mi_col in a 64x64 region. // TODO(SJL): This function only works for yv12. -void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row, +void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, int mi_col, int bw, int bh) { - const BLOCK_SIZE block_size = mbmi->sb_type; - const TX_SIZE tx_size_y = mbmi->tx_size; + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; const loop_filter_info_n *const lfi_n = &cm->lf_info; - const int filter_level = get_filter_level(lfi_n, mbmi); + const int filter_level = get_filter_level(lfi_n, mi); const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); uint64_t *const left_y = &lfm->left_y[tx_size_y]; @@ -1693,7 +1641,7 @@ void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row, // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. - if (mbmi->skip && is_inter_block(mbmi)) + if (mi->skip && is_inter_block(mi)) return; // Add a mask for the transform size. The transform size mask is set to diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h index 7f943ea09..fca8830fa 100644 --- a/libvpx/vp9/common/vp9_loopfilter.h +++ b/libvpx/vp9/common/vp9_loopfilter.h @@ -69,6 +69,7 @@ typedef struct { struct loopfilter { int filter_level; + int last_filt_level; int sharpness_level; int last_sharpness_level; @@ -134,7 +135,7 @@ static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; } -void vp9_build_mask(struct VP9Common *cm, const MB_MODE_INFO *mbmi, int mi_row, +void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row, int mi_col, int bw, int bh); void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, const int mi_col, LOOP_FILTER_MASK *lfm); diff --git a/libvpx/vp9/common/vp9_mfqe.c b/libvpx/vp9/common/vp9_mfqe.c index 6d560f438..f5264665b 100644 --- a/libvpx/vp9/common/vp9_mfqe.c +++ b/libvpx/vp9/common/vp9_mfqe.c @@ -203,12 +203,12 @@ static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) { // Check the motion in current block(for inter frame), // or check the motion in the correlated block in last frame (for keyframe). - const int mv_len_square = mi->mbmi.mv[0].as_mv.row * - mi->mbmi.mv[0].as_mv.row + - mi->mbmi.mv[0].as_mv.col * - mi->mbmi.mv[0].as_mv.col; + const int mv_len_square = mi->mv[0].as_mv.row * + mi->mv[0].as_mv.row + + mi->mv[0].as_mv.col * + mi->mv[0].as_mv.col; const int mv_threshold = 100; - return mi->mbmi.mode >= NEARESTMV && // Not an intra block + return mi->mode >= NEARESTMV && // Not an intra block cur_bs >= BLOCK_16X16 && mv_len_square <= mv_threshold; } @@ -220,7 +220,7 @@ static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs, uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, int uvd_stride) { int mi_offset, y_offset, uv_offset; - const BLOCK_SIZE cur_bs = mi->mbmi.sb_type; + const BLOCK_SIZE cur_bs = mi->sb_type; const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex; const int bsl = b_width_log2_lookup[bs]; PARTITION_TYPE partition = partition_lookup[bsl][cur_bs]; diff --git a/libvpx/vp9/common/vp9_mvref_common.c b/libvpx/vp9/common/vp9_mvref_common.c index 77d1ff459..0eb01a51b 100644 --- a/libvpx/vp9/common/vp9_mvref_common.c +++ b/libvpx/vp9/common/vp9_mvref_common.c @@ -11,20 +11,19 @@ #include "vp9/common/vp9_mvref_common.h" -// This function searches the neighbourhood of a given MB/SB +// This function searches the neighborhood of a given MB/SB // to try and find candidate reference vectors. static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int block, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, uint8_t *mode_context) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type]; + const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; int different_ref_found = 0; int context_counter = 0; - const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; const TileInfo *const tile = &xd->tile; @@ -39,15 +38,14 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - const MB_MODE_INFO *const candidate = &candidate_mi->mbmi; // Keep counts for entropy encoding. - context_counter += mode_2_counter[candidate->mode]; + context_counter += mode_2_counter[candidate_mi->mode]; different_ref_found = 1; - if (candidate->ref_frame[0] == ref_frame) + if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), refmv_count, mv_ref_list, Done); - else if (candidate->ref_frame[1] == ref_frame) + else if (candidate_mi->ref_frame[1] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), refmv_count, mv_ref_list, Done); } @@ -59,34 +57,19 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (; i < MVREF_NEIGHBOURS; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row * - xd->mi_stride]->mbmi; + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; different_ref_found = 1; - if (candidate->ref_frame[0] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, Done); - else if (candidate->ref_frame[1] == ref_frame) - ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, Done); + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done); } } - // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast - // on windows platform. The sync here is unncessary if use_perv_frame_mvs - // is 0. But after removing it, there will be hang in the unit test on windows - // due to several threads waiting for a thread's signal. -#if defined(_WIN32) && !HAVE_PTHREAD_H - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } -#endif - // Check the last frame's mode and mv info. if (cm->use_prev_frame_mvs) { - // Synchronize here for frame parallel decode if sync function is provided. - if (cm->frame_parallel_decode && sync != NULL) { - sync(data, mi_row); - } - if (prev_frame_mvs->ref_frame[0] == ref_frame) { ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { @@ -101,11 +84,11 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row - * xd->mi_stride]->mbmi; + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // If the candidate is INTRA we don't want to consider its mv. - IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias, + IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, refmv_count, mv_ref_list, Done); } } @@ -150,20 +133,9 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, uint8_t *mode_context) { find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, - mi_row, mi_col, sync, data, mode_context); -} - -static void lower_mv_precision(MV *mv, int allow_hp) { - const int use_hp = allow_hp && vp9_use_mv_hp(mv); - if (!use_hp) { - if (mv->row & 1) - mv->row += (mv->row > 0 ? -1 : 1); - if (mv->col & 1) - mv->col += (mv->col > 0 ? -1 : 1); - } + mi_row, mi_col, mode_context); } void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, @@ -190,8 +162,8 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, assert(MAX_MV_REF_CANDIDATES == 2); - find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, - mi_row, mi_col, NULL, NULL, mode_context); + find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, + mi_row, mi_col, mode_context); near_mv->as_int = 0; switch (block) { diff --git a/libvpx/vp9/common/vp9_mvref_common.h b/libvpx/vp9/common/vp9_mvref_common.h index bd216d433..4380843e2 100644 --- a/libvpx/vp9/common/vp9_mvref_common.h +++ b/libvpx/vp9/common/vp9_mvref_common.h @@ -136,19 +136,19 @@ static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { // on whether the block_size < 8x8 and we have check_sub_blocks set. static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, int search_col, int block_idx) { - return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8 + return block_idx >= 0 && candidate->sb_type < BLOCK_8X8 ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] .as_mv[which_mv] - : candidate->mbmi.mv[which_mv]; + : candidate->mv[which_mv]; } // Performs mv sign inversion if indicated by the reference frame combination. -static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, +static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, const MV_REFERENCE_FRAME this_ref_frame, const int *ref_sign_bias) { - int_mv mv = mbmi->mv[ref]; - if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { + int_mv mv = mi->mv[ref]; + if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { mv.as_mv.row *= -1; mv.as_mv.col *= -1; } @@ -157,7 +157,7 @@ static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref, // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector it will also -// skip all additional processing and jump to done! +// skip all additional processing and jump to Done! #define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \ do { \ if (refmv_count) { \ @@ -207,11 +207,20 @@ static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } +static INLINE void lower_mv_precision(MV *mv, int allow_hp) { + const int use_hp = allow_hp && use_mv_hp(mv); + if (!use_hp) { + if (mv->row & 1) + mv->row += (mv->row > 0 ? -1 : 1); + if (mv->col & 1) + mv->col += (mv->col > 0 ? -1 : 1); + } +} + typedef void (*find_mv_refs_sync)(void *const data, int mi_row); void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int mi_row, int mi_col, - find_mv_refs_sync sync, void *const data, uint8_t *mode_context); // check a list of motion vectors by sad score using a number rows of pixels diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h index ceffdedf9..3fd935e62 100644 --- a/libvpx/vp9/common/vp9_onyxc_int.h +++ b/libvpx/vp9/common/vp9_onyxc_int.h @@ -404,25 +404,8 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; // Are edges available for intra prediction? - xd->up_available = (mi_row != 0); - xd->left_available = (mi_col > tile->mi_col_start); - if (xd->up_available) { - xd->above_mi = xd->mi[-xd->mi_stride]; - // above_mi may be NULL in VP9 encoder's first pass. - xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL; - } else { - xd->above_mi = NULL; - xd->above_mbmi = NULL; - } - - if (xd->left_available) { - xd->left_mi = xd->mi[-1]; - // left_mi may be NULL in VP9 encoder's first pass. - xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL; - } else { - xd->left_mi = NULL; - xd->left_mbmi = NULL; - } + xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL; + xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL; } static INLINE void update_partition_context(MACROBLOCKD *xd, diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c index b685d813b..c04cc8f05 100644 --- a/libvpx/vp9/common/vp9_postproc.c +++ b/libvpx/vp9/common/vp9_postproc.c @@ -12,6 +12,7 @@ #include <stdlib.h> #include <stdio.h> +#include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "./vpx_scale_rtcd.h" #include "./vp9_rtcd.h" @@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) { state->last_noise = a; } -void vp9_plane_add_noise_c(uint8_t *start, char *noise, - char blackclamp[16], - char whiteclamp[16], - char bothclamp[16], - unsigned int width, unsigned int height, int pitch) { - unsigned int i, j; - - // TODO(jbb): why does simd code use both but c doesn't, normalize and - // fix.. - (void) bothclamp; - for (i = 0; i < height; i++) { - uint8_t *pos = start + i * pitch; - char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT - - for (j = 0; j < width; j++) { - if (pos[j] < blackclamp[0]) - pos[j] = blackclamp[0]; - - if (pos[j] > 255 + whiteclamp[0]) - pos[j] = 255 + whiteclamp[0]; - - pos[j] += ref[j]; - } - } -} - static void swap_mi_and_prev_mi(VP9_COMMON *cm) { // Current mip will be the prev_mip for the next frame. MODE_INFO *temp = cm->postproc_state.prev_mip; @@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm, ppstate->last_noise != noise_level) { fillrd(ppstate, 63 - q, noise_level); } - - vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp, + vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp, ppstate->whiteclamp, ppstate->bothclamp, ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride); } diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c index 1f1632573..8f90e70e7 100644 --- a/libvpx/vp9/common/vp9_pred_common.c +++ b/libvpx/vp9/common/vp9_pred_common.c @@ -17,82 +17,57 @@ int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int left_type = xd->left_available && is_inter_block(left_mbmi) ? - left_mbmi->interp_filter : SWITCHABLE_FILTERS; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const int above_type = xd->up_available && is_inter_block(above_mbmi) ? - above_mbmi->interp_filter : SWITCHABLE_FILTERS; + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const MODE_INFO *const left_mi = xd->left_mi; + const int left_type = left_mi && is_inter_block(left_mi) ? + left_mi->interp_filter : SWITCHABLE_FILTERS; + const MODE_INFO *const above_mi = xd->above_mi; + const int above_type = above_mi && is_inter_block(above_mi) ? + above_mi->interp_filter : SWITCHABLE_FILTERS; if (left_type == above_type) return left_type; - else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS) + else if (left_type == SWITCHABLE_FILTERS) return above_type; - else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS) + else if (above_type == SWITCHABLE_FILTERS) return left_type; else return SWITCHABLE_FILTERS; } -// The mode info data structure has a one element border above and to the -// left of the entries corresponding to real macroblocks. -// The prediction flags in these dummy entries are initialized to 0. -// 0 - inter/inter, inter/--, --/inter, --/-- -// 1 - intra/inter, inter/intra -// 2 - intra/--, --/intra -// 3 - intra/intra -int vp9_get_intra_inter_context(const MACROBLOCKD *xd) { - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; - - if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); - return left_intra && above_intra ? 3 - : left_intra || above_intra; - } else if (has_above || has_left) { // one edge available - return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi); - } else { - return 0; - } -} - int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd) { int ctx; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available - if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi)) + if (!has_second_ref(above_mi) && !has_second_ref(left_mi)) // neither edge uses comp pred (0/1) - ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^ - (left_mbmi->ref_frame[0] == cm->comp_fixed_ref); - else if (!has_second_ref(above_mbmi)) + ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^ + (left_mi->ref_frame[0] == cm->comp_fixed_ref); + else if (!has_second_ref(above_mi)) // one of two edges uses comp pred (2/3) - ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(above_mbmi)); - else if (!has_second_ref(left_mbmi)) + ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(above_mi)); + else if (!has_second_ref(left_mi)) // one of two edges uses comp pred (2/3) - ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref || - !is_inter_block(left_mbmi)); + ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(left_mi)); else // both edges use comp pred (4) ctx = 4; } else if (has_above || has_left) { // one edge available - const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - if (!has_second_ref(edge_mbmi)) + if (!has_second_ref(edge_mi)) // edge does not use comp pred (0/1) - ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref; + ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref; else // edge uses comp pred (3) ctx = 3; @@ -107,39 +82,39 @@ int vp9_get_reference_mode_context(const VP9_COMMON *cm, int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, const MACROBLOCKD *xd) { int pred_context; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int above_in_image = xd->up_available; - const int left_in_image = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_in_image = !!above_mi; + const int left_in_image = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int var_ref_idx = !fix_ref_idx; if (above_in_image && left_in_image) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra (2) pred_context = 2; } else if (above_intra || left_intra) { // intra/inter - const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; - if (!has_second_ref(edge_mbmi)) // single pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]); + if (!has_second_ref(edge_mi)) // single pred (1/3) + pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); else // comp pred (1/3) - pred_context = 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] + pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); } else { // inter/inter - const int l_sg = !has_second_ref(left_mbmi); - const int a_sg = !has_second_ref(above_mbmi); - const MV_REFERENCE_FRAME vrfa = a_sg ? above_mbmi->ref_frame[0] - : above_mbmi->ref_frame[var_ref_idx]; - const MV_REFERENCE_FRAME vrfl = l_sg ? left_mbmi->ref_frame[0] - : left_mbmi->ref_frame[var_ref_idx]; + const int l_sg = !has_second_ref(left_mi); + const int a_sg = !has_second_ref(above_mi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0] + : above_mi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0] + : left_mi->ref_frame[var_ref_idx]; if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { pred_context = 0; @@ -167,16 +142,16 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, } } } else if (above_in_image || left_in_image) { // one edge available - const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi; + const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi; - if (!is_inter_block(edge_mbmi)) { + if (!is_inter_block(edge_mi)) { pred_context = 2; } else { - if (has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[var_ref_idx] + if (has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); else - pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]); + pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); } } else { // no edges available (2) pred_context = 2; @@ -188,34 +163,34 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { int pred_context; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra pred_context = 2; } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; - if (!has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME); + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); else - pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME || - edge_mbmi->ref_frame[1] == LAST_FRAME); + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); } else { // inter/inter - const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); - const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; if (above_has_second && left_has_second) { pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME || @@ -234,15 +209,15 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { } } } else if (has_above || has_left) { // one edge available - const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; - if (!is_inter_block(edge_mbmi)) { // intra + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + if (!is_inter_block(edge_mi)) { // intra pred_context = 2; } else { // inter - if (!has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME); + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); else - pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME || - edge_mbmi->ref_frame[1] == LAST_FRAME); + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); } } else { // no edges available pred_context = 2; @@ -254,39 +229,39 @@ int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { int pred_context; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the - // left of the entries correpsonding to real macroblocks. - // The prediction flags in these dummy entries are initialised to 0. + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available - const int above_intra = !is_inter_block(above_mbmi); - const int left_intra = !is_inter_block(left_mbmi); + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra pred_context = 2; } else if (above_intra || left_intra) { // intra/inter or inter/intra - const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi; - if (!has_second_ref(edge_mbmi)) { - if (edge_mbmi->ref_frame[0] == LAST_FRAME) + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) { + if (edge_mi->ref_frame[0] == LAST_FRAME) pred_context = 3; else - pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME); + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); } else { - pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || - edge_mbmi->ref_frame[1] == GOLDEN_FRAME); + pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); } } else { // inter/inter - const int above_has_second = has_second_ref(above_mbmi); - const int left_has_second = has_second_ref(left_mbmi); - const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1]; - const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1]; + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; if (above_has_second && left_has_second) { if (above0 == left0 && above1 == left1) @@ -321,16 +296,16 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { } } } else if (has_above || has_left) { // one edge available - const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi; + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; - if (!is_inter_block(edge_mbmi) || - (edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi))) + if (!is_inter_block(edge_mi) || + (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi))) pred_context = 2; - else if (!has_second_ref(edge_mbmi)) - pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME); + else if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); else - pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME || - edge_mbmi->ref_frame[1] == GOLDEN_FRAME); + pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); } else { // no edges available (2) pred_context = 2; } diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h index 6f7af4a50..f3c676e95 100644 --- a/libvpx/vp9/common/vp9_pred_common.h +++ b/libvpx/vp9/common/vp9_pred_common.h @@ -42,8 +42,8 @@ static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int above_sip = (above_mi != NULL) ? - above_mi->mbmi.seg_id_predicted : 0; - const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0; + above_mi->seg_id_predicted : 0; + const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; return above_sip + left_sip; } @@ -56,8 +56,8 @@ static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg, static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; - const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0; - const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0; + const int above_skip = (above_mi != NULL) ? above_mi->skip : 0; + const int left_skip = (left_mi != NULL) ? left_mi->skip : 0; return above_skip + left_skip; } @@ -68,11 +68,32 @@ static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm, int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd); -int vp9_get_intra_inter_context(const MACROBLOCKD *xd); +// The mode info data structure has a one element border above and to the +// left of the entries corresponding to real macroblocks. +// The prediction flags in these dummy entries are initialized to 0. +// 0 - inter/inter, inter/--, --/inter, --/-- +// 1 - intra/inter, inter/intra +// 2 - intra/--, --/intra +// 3 - intra/intra +static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + return left_intra && above_intra ? 3 : left_intra || above_intra; + } else if (has_above || has_left) { // one edge available + return 2 * !is_inter_block(has_above ? above_mi : left_mi); + } + return 0; +} static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm, const MACROBLOCKD *xd) { - return cm->fc->intra_inter_prob[vp9_get_intra_inter_context(xd)]; + return cm->fc->intra_inter_prob[get_intra_inter_context(xd)]; } int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd); @@ -110,15 +131,15 @@ static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, // left of the entries corresponding to real blocks. // The prediction flags in these dummy entries are initialized to 0. static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { - const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type]; - const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; - const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; - const int has_above = xd->up_available; - const int has_left = xd->left_available; - int above_ctx = (has_above && !above_mbmi->skip) ? (int)above_mbmi->tx_size - : max_tx_size; - int left_ctx = (has_left && !left_mbmi->skip) ? (int)left_mbmi->tx_size - : max_tx_size; + const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type]; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size + : max_tx_size; + int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size + : max_tx_size; if (!has_left) left_ctx = above_ctx; diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c index d8c14ecc8..84718e970 100644 --- a/libvpx/vp9/common/vp9_reconinter.c +++ b/libvpx/vp9/common/vp9_reconinter.c @@ -20,19 +20,6 @@ #include "vp9/common/vp9_reconintra.h" #if CONFIG_VP9_HIGHBITDEPTH -void high_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys, int bd) { - sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( - src, src_stride, dst, dst_stride, - kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); -} - void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *src_mv, @@ -50,8 +37,9 @@ void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); - high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, - sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd); + highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, + bd); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -159,8 +147,8 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; - const int is_compound = has_second_ref(&mi->mbmi); - const InterpKernel *kernel = vp9_filter_kernels[mi->mbmi.interp_filter]; + const int is_compound = has_second_ref(mi); + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; int ref; for (ref = 0; ref < 1 + is_compound; ++ref) { @@ -168,9 +156,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, struct buf_2d *const pre_buf = &pd->pre[ref]; struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - const MV mv = mi->mbmi.sb_type < BLOCK_8X8 + const MV mv = mi->sb_type < BLOCK_8X8 ? average_split_mvs(pd, mi, ref, block) - : mi->mbmi.mv[ref].as_mv; + : mi->mv[ref].as_mv; // TODO(jkoleszar): This clamping is done in the incorrect place for the // scaling case. It needs to be done on the scaled MV, not the pre-scaling @@ -190,6 +178,12 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, // Co-ordinate of containing block to pixel precision. const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); +#if CONFIG_BETTER_HW_COMPATIBILITY + assert(xd->mi[0]->sb_type != BLOCK_4X8 && + xd->mi[0]->sb_type != BLOCK_8X4); + assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && + mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x))); +#endif if (plane == 0) pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; else if (plane == 1) @@ -216,9 +210,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, - subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, - xd->bd); + highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, + xd->bd); } else { inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); @@ -244,7 +238,7 @@ static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; - if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) { + if (xd->mi[0]->sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) diff --git a/libvpx/vp9/common/vp9_reconinter.h b/libvpx/vp9/common/vp9_reconinter.h index 7d907748e..07745e3aa 100644 --- a/libvpx/vp9/common/vp9_reconinter.h +++ b/libvpx/vp9/common/vp9_reconinter.h @@ -34,14 +34,18 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, } #if CONFIG_VP9_HIGHBITDEPTH -void high_inter_predictor(const uint8_t *src, int src_stride, - uint8_t *dst, int dst_stride, - const int subpel_x, - const int subpel_y, - const struct scale_factors *sf, - int w, int h, int ref, - const InterpKernel *kernel, - int xs, int ys, int bd); +static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys, int bd) { + sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); +} #endif // CONFIG_VP9_HIGHBITDEPTH MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, diff --git a/libvpx/vp9/common/vp9_reconintra.c b/libvpx/vp9/common/vp9_reconintra.c index 3d84a2883..445785835 100644 --- a/libvpx/vp9/common/vp9_reconintra.c +++ b/libvpx/vp9/common/vp9_reconintra.c @@ -133,12 +133,16 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, int frame_width, frame_height; int x0, y0; const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int need_left = extend_modes[mode] & NEED_LEFT; + const int need_above = extend_modes[mode] & NEED_ABOVE; + const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; int base = 128 << (bd - 8); // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z // 129 C D .. W X // 129 E F .. U V // 129 G H .. S T T T T T + // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. // Get current frame pointer, width and height. if (plane == 0) { @@ -153,79 +157,106 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd, x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; - // left - if (left_available) { - if (xd->mb_to_bottom_edge < 0) { - /* slower path if the block needs border extension */ - if (y0 + bs <= frame_height) { - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; + // NEED_LEFT + if (need_left) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } } else { - const int extend_bottom = frame_height - y0; - for (i = 0; i < extend_bottom; ++i) + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; - for (; i < bs; ++i) - left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; } } else { - /* faster path if the block does not need extension */ - for (i = 0; i < bs; ++i) - left_col[i] = ref[i * ref_stride - 1]; + vpx_memset16(left_col, base + 1, bs); } - } else { - // TODO(Peter): this value should probably change for high bitdepth - vpx_memset16(left_col, base + 1, bs); } - // TODO(hkuang) do not extend 2*bs pixels for all modes. - // above - if (up_available) { - const uint16_t *above_ref = ref - ref_stride; - if (xd->mb_to_right_edge < 0) { - /* slower path if the block needs border extension */ - if (x0 + 2 * bs <= frame_width) { - if (right_available && bs == 4) { - memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); + // NEED_ABOVE + if (need_above) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); } - } else if (x0 + bs <= frame_width) { - const int r = frame_width - x0; - if (right_available && bs == 4) { + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + vpx_memset16(above_row, base - 1, bs); + above_row[-1] = base - 1; + } + } + + // NEED_ABOVERIGHT + if (need_aboveright) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; memcpy(above_row, above_ref, r * sizeof(above_row[0])); vpx_memset16(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - vpx_memset16(above_row + bs, above_row[bs - 1], bs); + if (bs == 4 && right_available) + memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); + else + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + above_row[-1] = left_available ? above_ref[-1] : (base + 1); } - } else if (x0 <= frame_width) { - const int r = frame_width - x0; - memcpy(above_row, above_ref, r * sizeof(above_row[0])); - vpx_memset16(above_row + r, above_row[r - 1], - x0 + 2 * bs - frame_width); } - // TODO(Peter) this value should probably change for high bitdepth - above_row[-1] = left_available ? above_ref[-1] : (base+1); } else { - /* faster path if the block does not need extension */ - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; - } else { - memcpy(above_row, above_ref, bs * sizeof(above_row[0])); - if (bs == 4 && right_available) - memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); - else - vpx_memset16(above_row + bs, above_row[bs - 1], bs); - // TODO(Peter): this value should probably change for high bitdepth - above_row[-1] = left_available ? above_ref[-1] : (base+1); - } + vpx_memset16(above_row, base - 1, bs * 2); + above_row[-1] = base - 1; } - } else { - vpx_memset16(above_row, base - 1, bs * 2); - // TODO(Peter): this value should probably change for high bitdepth - above_row[-1] = base - 1; } // predict @@ -391,8 +422,8 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int aoff, int loff, int plane) { const int bw = (1 << bwl_in); const int txw = (1 << tx_size); - const int have_top = loff || xd->up_available; - const int have_left = aoff || xd->left_available; + const int have_top = loff || (xd->above_mi != NULL); + const int have_left = aoff || (xd->left_mi != NULL); const int have_right = (aoff + txw) < bw; const int x = aoff * 4; const int y = loff * 4; diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl index 5bf71ef9f..846133674 100644 --- a/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8 specialize qw/vp9_post_proc_down_and_across sse2/; $vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm; -add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; -specialize qw/vp9_plane_add_noise sse2/; -$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt; - add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight"; specialize qw/vp9_filter_by_weight16x16 sse2 msa/; @@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit"; specialize qw/vp9_highbd_post_proc_down_and_across/; - - add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch"; - specialize qw/vp9_highbd_plane_add_noise/; } # @@ -194,42 +187,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { -add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p"; -specialize qw/vp9_avg_8x8 sse2 neon msa/; - -add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p"; -specialize qw/vp9_avg_4x4 sse2 msa/; - -add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; -specialize qw/vp9_minmax_8x8 sse2/; - -add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff"; -specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc"; - -add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff"; -specialize qw/vp9_hadamard_16x16 sse2/; - -add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length"; -specialize qw/vp9_satd sse2/; - -add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height"; -specialize qw/vp9_int_pro_row sse2 neon/; - -add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width"; -specialize qw/vp9_int_pro_col sse2 neon/; - -add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl"; -specialize qw/vp9_vector_var neon sse2/; - -if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p"; - specialize qw/vp9_highbd_avg_8x8/; - add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p"; - specialize qw/vp9_highbd_avg_4x4/; - add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; - specialize qw/vp9_highbd_minmax_8x8/; -} - # ENCODEMB INVOKE # @@ -288,7 +245,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_fht16x16 sse2/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp9_fwht4x4/, "$mmx_x86inc"; + specialize qw/vp9_fwht4x4/, "$sse2_x86inc"; } else { add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; specialize qw/vp9_fht4x4 sse2 msa/; @@ -300,7 +257,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_fht16x16 sse2 msa/; add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc"; + specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc"; } # @@ -312,10 +269,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3; $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; -specialize qw/vp9_diamond_search_sad/; - -add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; -specialize qw/vp9_full_range_search/; +specialize qw/vp9_diamond_search_sad avx/; add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; specialize qw/vp9_temporal_filter_apply sse2 msa/; @@ -349,6 +303,15 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { } # End vp9_high encoder functions +# +# frame based scale +# +if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { +} else { + add_proto qw/void vp9_scale_and_extend_frame/, "const struct yv12_buffer_config *src, struct yv12_buffer_config *dst"; + specialize qw/vp9_scale_and_extend_frame ssse3/; +} + } # end encoder functions 1; diff --git a/libvpx/vp9/common/vp9_scan.c b/libvpx/vp9/common/vp9_scan.c index d6fb8b2d7..8b8b09f4a 100644 --- a/libvpx/vp9/common/vp9_scan.c +++ b/libvpx/vp9/common/vp9_scan.c @@ -229,10 +229,8 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { 990, 959, 1022, 991, 1023, }; -// Neighborhood 5-tuples for various scans and blocksizes, -// in {top, left, topleft, topright, bottomleft} order -// for each position in raster scan order. -// -1 indicates the neighbor does not exist. +// Neighborhood 2-tuples for various scans and blocksizes, +// in {top, left} order for each position in corresponding scan order. DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, diff --git a/libvpx/vp9/common/vp9_scan.h b/libvpx/vp9/common/vp9_scan.h index 1d86b5cfe..4c1ee8107 100644 --- a/libvpx/vp9/common/vp9_scan.h +++ b/libvpx/vp9/common/vp9_scan.h @@ -42,7 +42,7 @@ static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, PLANE_TYPE type, int block_idx) { const MODE_INFO *const mi = xd->mi[0]; - if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) { + if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) { return &vp9_default_scan_orders[tx_size]; } else { const PREDICTION_MODE mode = get_y_mode(mi, block_idx); diff --git a/libvpx/vp9/common/vp9_seg_common.c b/libvpx/vp9/common/vp9_seg_common.c index c8ef618b7..7af61629a 100644 --- a/libvpx/vp9/common/vp9_seg_common.c +++ b/libvpx/vp9/common/vp9_seg_common.c @@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = { void vp9_clearall_segfeatures(struct segmentation *seg) { vp9_zero(seg->feature_data); vp9_zero(seg->feature_mask); + seg->aq_av_offset = 0; } void vp9_enable_segfeature(struct segmentation *seg, int segment_id, diff --git a/libvpx/vp9/common/vp9_seg_common.h b/libvpx/vp9/common/vp9_seg_common.h index 5b75d8d4e..99a9440c1 100644 --- a/libvpx/vp9/common/vp9_seg_common.h +++ b/libvpx/vp9/common/vp9_seg_common.h @@ -46,7 +46,8 @@ struct segmentation { vpx_prob pred_probs[PREDICTION_PROBS]; int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; - unsigned int feature_mask[MAX_SEGMENTS]; + uint32_t feature_mask[MAX_SEGMENTS]; + int aq_av_offset; }; static INLINE int segfeature_active(const struct segmentation *seg, diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c index 8d312d03f..1c77b57ff 100644 --- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" diff --git a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm index ec8bfdb18..430762815 100644 --- a/libvpx/vp9/common/x86/vp9_postproc_sse2.asm +++ b/libvpx/vp9/common/x86/vp9_postproc_sse2.asm @@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm): %undef flimit4 -;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise, -; unsigned char blackclamp[16], -; unsigned char whiteclamp[16], -; unsigned char bothclamp[16], -; unsigned int width, unsigned int height, int pitch) -global sym(vp9_plane_add_noise_wmt) PRIVATE -sym(vp9_plane_add_noise_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - GET_GOT rbx - push rsi - push rdi - ; end prolog - -.addnoise_loop: - call sym(LIBVPX_RAND) WRT_PLT - mov rcx, arg(1) ;noise - and rax, 0xff - add rcx, rax - - ; we rely on the fact that the clamping vectors are stored contiguously - ; in black/white/both order. Note that we have to reload this here because - ; rdx could be trashed by rand() - mov rdx, arg(2) ; blackclamp - - - mov rdi, rcx - movsxd rcx, dword arg(5) ;[Width] - mov rsi, arg(0) ;Pos - xor rax,rax - -.addnoise_nextset: - movdqu xmm1,[rsi+rax] ; get the source - - psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise - paddusb xmm1, [rdx+32] ;bothclamp - psubusb xmm1, [rdx+16] ;whiteclamp - - movdqu xmm2,[rdi+rax] ; get the noise for this line - paddb xmm1,xmm2 ; add it in - movdqu [rsi+rax],xmm1 ; store the result - - add rax,16 ; move to the next line - - cmp rax, rcx - jl .addnoise_nextset - - movsxd rax, dword arg(7) ; Pitch - add arg(0), rax ; Start += Pitch - sub dword arg(6), 1 ; Height -= 1 - jg .addnoise_loop - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - SECTION_RODATA align 16 rd42: |