diff options
Diffstat (limited to 'libvpx/vp9/encoder/vp9_rdopt.c')
-rw-r--r-- | libvpx/vp9/encoder/vp9_rdopt.c | 2185 |
1 files changed, 760 insertions, 1425 deletions
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c index dcd28525a..e368037a6 100644 --- a/libvpx/vp9/encoder/vp9_rdopt.c +++ b/libvpx/vp9/encoder/vp9_rdopt.c @@ -9,9 +9,7 @@ */ #include <assert.h> -#include <limits.h> #include <math.h> -#include <stdio.h> #include "./vp9_rtcd.h" @@ -22,7 +20,6 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_pragmas.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" @@ -33,21 +30,16 @@ #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" +#include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_mcomp.h" -#include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_rdopt.h" -#include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_variance.h" #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 -#define RD_THRESH_POW 1.25 -#define RD_MULT_EPB_RATIO 64 - -/* Factor to weigh the rate for switchable interp filters */ -#define SWITCHABLE_INTERP_RATE_FACTOR 1 #define LAST_FRAME_MODE_MASK 0xFFEDCD60 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0 @@ -56,7 +48,7 @@ #define MIN_EARLY_TERM_INDEX 3 typedef struct { - MB_PREDICTION_MODE mode; + PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame[2]; } MODE_DEFINITION; @@ -81,7 +73,7 @@ struct rdcost_block_args { const scan_order *so; }; -const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { +static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEARESTMV, {LAST_FRAME, NONE}}, {NEARESTMV, {ALTREF_FRAME, NONE}}, {NEARESTMV, {GOLDEN_FRAME, NONE}}, @@ -121,7 +113,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {D45_PRED, {INTRA_FRAME, NONE}}, }; -const REF_DEFINITION vp9_ref_order[MAX_REFS] = { +static const REF_DEFINITION vp9_ref_order[MAX_REFS] = { {{LAST_FRAME, NONE}}, {{GOLDEN_FRAME, NONE}}, {{ALTREF_FRAME, NONE}}, @@ -130,13 +122,6 @@ const REF_DEFINITION vp9_ref_order[MAX_REFS] = { {{INTRA_FRAME, NONE}}, }; -// The baseline rd thresholds for breaking out of the rd loop for -// certain modes are assumed to be based on 8x8 blocks. -// This table is used to correct for blocks size. -// The factors here are << 2 (2 = x0.5, 32 = x8 etc). -static int rd_thresh_block_size_factor[BLOCK_SIZES] = - {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; - static int raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, int stride) { const int bw = b_width_log2(plane_bsize); @@ -150,276 +135,28 @@ static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize, return base + raster_block_offset(plane_bsize, raster_block, stride); } -static void fill_mode_costs(VP9_COMP *cpi) { - MACROBLOCK *const x = &cpi->mb; - const FRAME_CONTEXT *const fc = &cpi->common.fc; - int i, j; - - for (i = 0; i < INTRA_MODES; i++) - for (j = 0; j < INTRA_MODES; j++) - vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], - vp9_intra_mode_tree); - - // TODO(rbultje) separate tables for superblock costing? - vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); - vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME], - vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree); - vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME], - fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree); - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - vp9_cost_tokens((int *)x->switchable_interp_costs[i], - fc->switchable_interp_prob[i], vp9_switchable_interp_tree); -} - -static void fill_token_costs(vp9_coeff_cost *c, - vp9_coeff_probs_model (*p)[PLANE_TYPES]) { - int i, j, k, l; - TX_SIZE t; - for (t = TX_4X4; t <= TX_32X32; ++t) - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - vp9_prob probs[ENTROPY_NODES]; - vp9_model_to_full_probs(p[t][i][j][k][l], probs); - vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, - vp9_coef_tree); - vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, - vp9_coef_tree); - assert(c[t][i][j][k][0][l][EOB_TOKEN] == - c[t][i][j][k][1][l][EOB_TOKEN]); - } -} - -static const int rd_iifactor[32] = { - 4, 4, 3, 2, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// 3* dc_qlookup[Q]*dc_qlookup[Q]; - -/* values are now correlated to quantizer */ -static int sad_per_bit16lut[QINDEX_RANGE]; -static int sad_per_bit4lut[QINDEX_RANGE]; - -void vp9_init_me_luts() { - int i; - - // Initialize the sad lut tables using a formulaic calculation for now - // This is to make it easier to resolve the impact of experimental changes - // to the quantizer tables. - for (i = 0; i < QINDEX_RANGE; i++) { - const double q = vp9_convert_qindex_to_q(i); - sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107); - sad_per_bit4lut[i] = (int)(0.063 * q + 2.742); - } -} - -int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { - const int q = vp9_dc_quant(qindex, 0); - // TODO(debargha): Adjust the function below - int rdmult = 88 * q * q / 25; - if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { - if (cpi->twopass.next_iiratio > 31) - rdmult += (rdmult * rd_iifactor[31]) >> 4; - else - rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; - } - return rdmult; -} - -static int compute_rd_thresh_factor(int qindex) { - // TODO(debargha): Adjust the function below - const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12); - return MAX(q, 8); -} - -void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { - cpi->mb.sadperbit16 = sad_per_bit16lut[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; -} - -static void set_block_thresholds(VP9_COMP *cpi) { - const VP9_COMMON *const cm = &cpi->common; - int i, bsize, segment_id; - - for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { - const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id, - cm->base_qindex) + cm->y_dc_delta_q, - 0, MAXQ); - const int q = compute_rd_thresh_factor(qindex); - - for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { - // Threshold here seems unnecessarily harsh but fine given actual - // range of values used for cpi->sf.thresh_mult[]. - const int t = q * rd_thresh_block_size_factor[bsize]; - const int thresh_max = INT_MAX / t; - - for (i = 0; i < MAX_MODES; ++i) - cpi->rd_threshes[segment_id][bsize][i] = - cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4 - : INT_MAX; - - for (i = 0; i < MAX_REFS; ++i) { - cpi->rd_thresh_sub8x8[segment_id][bsize][i] = - cpi->rd_thresh_mult_sub8x8[i] < thresh_max - ? cpi->rd_thresh_mult_sub8x8[i] * t / 4 - : INT_MAX; - } - } - } -} - -void vp9_initialize_rd_consts(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; +static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, + int m, int n, int min_plane, int max_plane) { int i; - vp9_clear_system_state(); - - cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) - cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - - x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; - x->errorperbit += (x->errorperbit == 0); - - x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && - cm->frame_type != KEY_FRAME) ? 0 : 1; - - set_block_thresholds(cpi); - - if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) { - fill_token_costs(x->token_costs, cm->fc.coef_probs); - - for (i = 0; i < PARTITION_CONTEXTS; i++) - vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i), - vp9_partition_tree); - } - - if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || - cm->frame_type == KEY_FRAME) { - fill_mode_costs(cpi); - - if (!frame_is_intra_only(cm)) { - vp9_build_nmv_cost_table(x->nmvjointcost, - cm->allow_high_precision_mv ? x->nmvcost_hp - : x->nmvcost, - &cm->fc.nmvc, cm->allow_high_precision_mv); + for (i = min_plane; i < max_plane; ++i) { + struct macroblock_plane *const p = &x->plane[i]; + struct macroblockd_plane *const pd = &x->e_mbd.plane[i]; - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)x->inter_mode_cost[i], - cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); - } - } -} + p->coeff = ctx->coeff_pbuf[i][m]; + p->qcoeff = ctx->qcoeff_pbuf[i][m]; + pd->dqcoeff = ctx->dqcoeff_pbuf[i][m]; + p->eobs = ctx->eobs_pbuf[i][m]; -static const int MAX_XSQ_Q10 = 245727; - -static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { - // NOTE: The tables below must be of the same size - - // The functions described below are sampled at the four most significant - // bits of x^2 + 8 / 256 - - // Normalized rate - // This table models the rate for a Laplacian source - // source with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expression is: - // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], - // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), - // and H(x) is the binary entropy function. - static const int rate_tab_q10[] = { - 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, - 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811, - 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, - 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, - 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130, - 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, - 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, - 1159, 1086, 1021, 963, 911, 864, 821, 781, - 745, 680, 623, 574, 530, 490, 455, 424, - 395, 345, 304, 269, 239, 213, 190, 171, - 154, 126, 104, 87, 73, 61, 52, 44, - 38, 28, 21, 16, 12, 10, 8, 6, - 5, 3, 2, 1, 1, 1, 0, 0, - }; - // Normalized distortion - // This table models the normalized distortion for a Laplacian source - // source with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expression is: - // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) - // where x = qpstep / sqrt(variance) - // Note the actual distortion is Dn * variance. - static const int dist_tab_q10[] = { - 0, 0, 1, 1, 1, 2, 2, 2, - 3, 3, 4, 5, 5, 6, 7, 7, - 8, 9, 11, 12, 13, 15, 16, 17, - 18, 21, 24, 26, 29, 31, 34, 36, - 39, 44, 49, 54, 59, 64, 69, 73, - 78, 88, 97, 106, 115, 124, 133, 142, - 151, 167, 184, 200, 215, 231, 245, 260, - 274, 301, 327, 351, 375, 397, 418, 439, - 458, 495, 528, 559, 587, 613, 637, 659, - 680, 717, 749, 777, 801, 823, 842, 859, - 874, 899, 919, 936, 949, 960, 969, 977, - 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, - 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, - }; - static const int xsq_iq_q10[] = { - 0, 4, 8, 12, 16, 20, 24, 28, - 32, 40, 48, 56, 64, 72, 80, 88, - 96, 112, 128, 144, 160, 176, 192, 208, - 224, 256, 288, 320, 352, 384, 416, 448, - 480, 544, 608, 672, 736, 800, 864, 928, - 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888, - 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808, - 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648, - 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, - 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, - 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408, - 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848, - 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728, - }; - /* - static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]); - assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size); - assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size); - assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]); - */ - int tmp = (xsq_q10 >> 2) + 8; - int k = get_msb(tmp) - 3; - int xq = (k << 3) + ((tmp >> k) & 0x7); - const int one_q10 = 1 << 10; - const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); - const int b_q10 = one_q10 - a_q10; - *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; - *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; -} + ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n]; + ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n]; + ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n]; + ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n]; -void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, - unsigned int qstep, int *rate, - int64_t *dist) { - // This function models the rate and distortion for a Laplacian - // source with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expressions are in: - // Hang and Chen, "Source Model for transform video coder and its - // application - Part I: Fundamental Theory", IEEE Trans. Circ. - // Sys. for Video Tech., April 1997. - if (var == 0) { - *rate = 0; - *dist = 0; - } else { - int d_q10, r_q10; - const uint64_t xsq_q10_64 = - ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var; - const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ? - MAX_XSQ_Q10 : (int)xsq_q10_64; - model_rd_norm(xsq_q10, &r_q10, &d_q10); - *rate = (n * r_q10 + 2) >> 2; - *dist = (var * (int64_t)d_q10 + 512) >> 10; + ctx->coeff_pbuf[i][n] = p->coeff; + ctx->qcoeff_pbuf[i][n] = p->qcoeff; + ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff; + ctx->eobs_pbuf[i][n] = p->eobs; } } @@ -434,20 +171,32 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int64_t dist_sum = 0; const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; + const int shift = 8; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, &sse); + const unsigned int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, + &sse); + + if (!x->select_tx_size) { + if (sse < p->quant_thred[0] >> shift) + x->skip_txfm[i] = 1; + else if (var < p->quant_thred[1] >> shift) + x->skip_txfm[i] = 2; + else + x->skip_txfm[i] = 0; + } + x->bsse[i] = sse; if (i == 0) x->pred_sse[ref] = sse; // Fast approximate the modelling function. - if (cpi->speed > 4) { + if (cpi->oxcf.speed > 4) { int64_t rate; int64_t dist; int64_t square_error = sse; @@ -474,55 +223,6 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, *out_dist_sum = dist_sum << 4; } -static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, - TX_SIZE tx_size, - MACROBLOCK *x, MACROBLOCKD *xd, - int *out_rate_sum, int64_t *out_dist_sum, - int *out_skip) { - int j, k; - BLOCK_SIZE bs; - const struct macroblock_plane *const p = &x->plane[0]; - const struct macroblockd_plane *const pd = &xd->plane[0]; - const int width = 4 * num_4x4_blocks_wide_lookup[bsize]; - const int height = 4 * num_4x4_blocks_high_lookup[bsize]; - int rate_sum = 0; - int64_t dist_sum = 0; - const int t = 4 << tx_size; - - if (tx_size == TX_4X4) { - bs = BLOCK_4X4; - } else if (tx_size == TX_8X8) { - bs = BLOCK_8X8; - } else if (tx_size == TX_16X16) { - bs = BLOCK_16X16; - } else if (tx_size == TX_32X32) { - bs = BLOCK_32X32; - } else { - assert(0); - } - - *out_skip = 1; - for (j = 0; j < height; j += t) { - for (k = 0; k < width; k += t) { - int rate; - int64_t dist; - unsigned int sse; - cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride, - &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, - &sse); - // sse works better than var, since there is no dc prediction used - vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, - &rate, &dist); - rate_sum += rate; - dist_sum += dist; - *out_skip &= (rate < 1024); - } - } - - *out_rate_sum = rate_sum; - *out_dist_sum = dist_sum << 4; -} - int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz) { int i; @@ -570,7 +270,7 @@ static INLINE int cost_coeffs(MACROBLOCK *x, int c, cost; // Check for consistency of tx_size with mode info assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size - : get_uv_tx_size(mbmi) == tx_size); + : get_uv_tx_size(mbmi, pd) == tx_size); if (eob == 0) { // single eob token @@ -669,12 +369,32 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (args->skip) return; - if (!is_inter_block(mbmi)) + if (!is_inter_block(mbmi)) { vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip); - else - vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + dist_block(plane, block, tx_size, args); + } else { + if (x->skip_txfm[plane] == 0) { + // full forward transform and quantization + vp9_xform_quant(x, plane, block, plane_bsize, tx_size); + dist_block(plane, block, tx_size, args); + } else if (x->skip_txfm[plane] == 2) { + // compute DC coefficient + int16_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); + int16_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); + vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + args->sse = x->bsse[plane] << 4; + args->dist = args->sse; + if (!x->plane[plane].eobs[block]) + args->dist = args->sse - ((coeff[0] * coeff[0] - + (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2); + } else { + // skip forward transform + x->plane[plane].eobs[block] = 0; + args->sse = x->bsse[plane] << 4; + args->dist = args->sse; + } + } - dist_block(plane, block, tx_size, args); rate_block(plane, block, plane_bsize, tx_size, args); rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); @@ -696,45 +416,6 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } } -void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, - const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[16], - ENTROPY_CONTEXT t_left[16]) { - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; - const ENTROPY_CONTEXT *const above = pd->above_context; - const ENTROPY_CONTEXT *const left = pd->left_context; - - int i; - switch (tx_size) { - case TX_4X4: - vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); - vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); - break; - case TX_8X8: - for (i = 0; i < num_4x4_w; i += 2) - t_above[i] = !!*(const uint16_t *)&above[i]; - for (i = 0; i < num_4x4_h; i += 2) - t_left[i] = !!*(const uint16_t *)&left[i]; - break; - case TX_16X16: - for (i = 0; i < num_4x4_w; i += 4) - t_above[i] = !!*(const uint32_t *)&above[i]; - for (i = 0; i < num_4x4_h; i += 4) - t_left[i] = !!*(const uint32_t *)&left[i]; - break; - case TX_32X32: - for (i = 0; i < num_4x4_w; i += 8) - t_above[i] = !!*(const uint64_t *)&above[i]; - for (i = 0; i < num_4x4_h; i += 8) - t_left[i] = !!*(const uint64_t *)&left[i]; - break; - default: - assert(0 && "Invalid transform size."); - } -} - static void txfm_rd_in_plane(MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, @@ -743,7 +424,8 @@ static void txfm_rd_in_plane(MACROBLOCK *x, int use_fast_coef_casting) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblockd_plane *const pd = &xd->plane[plane]; - struct rdcost_block_args args = { 0 }; + struct rdcost_block_args args; + vp9_zero(args); args.x = x; args.best_rd = ref_best_rd; args.use_fast_coef_costing = use_fast_coef_casting; @@ -770,11 +452,11 @@ static void txfm_rd_in_plane(MACROBLOCK *x, } } -static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, - int *rate, int64_t *distortion, - int *skip, int64_t *sse, - int64_t ref_best_rd, - BLOCK_SIZE bs) { +static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, + int *skip, int64_t *sse, + int64_t ref_best_rd, + BLOCK_SIZE bs) { const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; @@ -784,27 +466,31 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = MIN(max_tx_size, largest_tx_size); txfm_rd_in_plane(x, rate, distortion, skip, - &sse[mbmi->tx_size], ref_best_rd, 0, bs, + sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); cpi->tx_stepdown_count[0]++; } -static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, - int (*r)[2], int *rate, - int64_t *d, int64_t *distortion, - int *s, int *skip, - int64_t tx_cache[TX_MODES], - BLOCK_SIZE bs) { +static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, + int *rate, + int64_t *distortion, + int *skip, + int64_t *psse, + int64_t tx_cache[TX_MODES], + int64_t ref_best_rd, + BLOCK_SIZE bs) { const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); + int r[TX_SIZES][2], s[TX_SIZES]; + int64_t d[TX_SIZES], sse[TX_SIZES]; int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}}; - int n, m; + TX_SIZE n, m; int s0, s1; const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; int64_t best_rd = INT64_MAX; @@ -816,6 +502,9 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, s1 = vp9_cost_bit(skip_prob, 1); for (n = TX_4X4; n <= max_tx_size; n++) { + txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n], + &sse[n], ref_best_rd, 0, bs, n, + cpi->sf.use_fast_coef_costing); r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { for (m = 0; m <= n - (n == max_tx_size); m++) { @@ -846,6 +535,7 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, *distortion = d[mbmi->tx_size]; *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; *skip = s[mbmi->tx_size]; + *psse = sse[mbmi->tx_size]; tx_cache[ONLY_4X4] = rd[TX_4X4][0]; tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; @@ -867,159 +557,49 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } } -static int64_t scaled_rd_cost(int rdmult, int rddiv, - int rate, int64_t dist, double scale) { - return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale); -} - -static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, - int (*r)[2], int *rate, - int64_t *d, int64_t *distortion, - int *s, int *skip, int64_t *sse, - int64_t ref_best_rd, - BLOCK_SIZE bs) { - const TX_SIZE max_tx_size = max_txsize_lookup[bs]; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); - int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX}, - {INT64_MAX, INT64_MAX}, - {INT64_MAX, INT64_MAX}, - {INT64_MAX, INT64_MAX}}; - int n, m; - int s0, s1; - double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; - const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; - int64_t best_rd = INT64_MAX; - TX_SIZE best_tx = TX_4X4; - - const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); - assert(skip_prob > 0); - s0 = vp9_cost_bit(skip_prob, 0); - s1 = vp9_cost_bit(skip_prob, 1); - - for (n = TX_4X4; n <= max_tx_size; n++) { - double scale = scale_rd[n]; - r[n][1] = r[n][0]; - for (m = 0; m <= n - (n == max_tx_size); m++) { - if (m == n) - r[n][1] += vp9_cost_zero(tx_probs[m]); - else - r[n][1] += vp9_cost_one(tx_probs[m]); - } - if (s[n]) { - rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n], - scale); - } else { - rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n], - scale); - rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n], - scale); - } - if (rd[n][1] < best_rd) { - best_rd = rd[n][1]; - best_tx = n; - } - } - - mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ? - best_tx : MIN(max_tx_size, max_mode_tx_size); - - // Actually encode using the chosen mode if a model was used, but do not - // update the r, d costs - txfm_rd_in_plane(x, rate, distortion, skip, - &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size, - cpi->sf.use_fast_coef_costing); - - if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { - cpi->tx_stepdown_count[0]++; - } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) { - cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; - } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { - cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; - } else { - cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; - } -} - static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *psse, BLOCK_SIZE bs, int64_t txfm_cache[TX_MODES], int64_t ref_best_rd) { - int r[TX_SIZES][2], s[TX_SIZES]; - int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const TX_SIZE max_tx_size = max_txsize_lookup[bs]; - TX_SIZE tx_size; - assert(bs == mbmi->sb_type); + assert(bs == xd->mi[0]->mbmi.sb_type); vp9_subtract_plane(x, bs, 0); - if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { + if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); - choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, - ref_best_rd, bs); - if (psse) - *psse = sse[mbmi->tx_size]; - return; - } - - if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) { - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd, - &r[tx_size][0], &d[tx_size], &s[tx_size]); - choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, - skip, sse, ref_best_rd, bs); + choose_largest_tx_size(cpi, x, rate, distortion, skip, psse, ref_best_rd, + bs); } else { - for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], - &s[tx_size], &sse[tx_size], - ref_best_rd, 0, bs, tx_size, - cpi->sf.use_fast_coef_costing); - choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, - skip, txfm_cache, bs); + choose_tx_size_from_rd(cpi, x, rate, distortion, skip, psse, + txfm_cache, ref_best_rd, bs); } - if (psse) - *psse = sse[mbmi->tx_size]; } static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, - int64_t *psse, BLOCK_SIZE bs, + BLOCK_SIZE bs, int64_t txfm_cache[TX_MODES], int64_t ref_best_rd) { - int64_t sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + int64_t sse; - assert(bs == mbmi->sb_type); - if (cpi->sf.tx_size_search_method != USE_FULL_RD) { + assert(bs == xd->mi[0]->mbmi.sb_type); + if (cpi->sf.tx_size_search_method != USE_FULL_RD || xd->lossless) { vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t)); - choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, - ref_best_rd, bs); + choose_largest_tx_size(cpi, x, rate, distortion, skip, &sse, ref_best_rd, + bs); } else { - int r[TX_SIZES][2], s[TX_SIZES]; - int64_t d[TX_SIZES]; - TX_SIZE tx_size; - for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size) - txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], - &s[tx_size], &sse[tx_size], - ref_best_rd, 0, bs, tx_size, - cpi->sf.use_fast_coef_costing); - choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, - skip, txfm_cache, bs); + choose_tx_size_from_rd(cpi, x, rate, distortion, skip, &sse, + txfm_cache, ref_best_rd, bs); } - if (psse) - *psse = sse[mbmi->tx_size]; } -static int conditional_skipintra(MB_PREDICTION_MODE mode, - MB_PREDICTION_MODE best_intra_mode) { +static int conditional_skipintra(PREDICTION_MODE mode, + PREDICTION_MODE best_intra_mode) { if (mode == D117_PRED && best_intra_mode != V_PRED && best_intra_mode != D135_PRED) @@ -1040,13 +620,13 @@ static int conditional_skipintra(MB_PREDICTION_MODE mode, } static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, - MB_PREDICTION_MODE *best_mode, + PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion, BLOCK_SIZE bsize, int64_t rd_thresh) { - MB_PREDICTION_MODE mode; + PREDICTION_MODE mode; MACROBLOCKD *const xd = &x->e_mbd; int64_t best_rd = rd_thresh; @@ -1184,7 +764,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; - const int *bmode_costs = mb->mbmode_cost; + const int *bmode_costs = cpi->mbmode_cost; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); @@ -1192,15 +772,15 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - MB_PREDICTION_MODE best_mode = DC_PRED; + PREDICTION_MODE best_mode = DC_PRED; int r = INT_MAX, ry = INT_MAX; int64_t d = INT64_MAX, this_rd = INT64_MAX; i = idy * 2 + idx; if (cpi->common.frame_type == KEY_FRAME) { - const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); - const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); + const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); + const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); - bmode_costs = mb->y_mode_costs[A][L]; + bmode_costs = cpi->y_mode_costs[A][L]; } this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, @@ -1239,15 +819,15 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int64_t tx_cache[TX_MODES], int64_t best_rd) { - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE mode_selected = DC_PRED; + PREDICTION_MODE mode; + PREDICTION_MODE mode_selected = DC_PRED; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi[0]; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; int i; - int *bmode_costs = x->mbmode_cost; + int *bmode_costs = cpi->mbmode_cost; if (cpi->sf.tx_size_search_method == USE_FULL_RD) for (i = 0; i < TX_MODES; i++) @@ -1259,19 +839,16 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, MODE_INFO *above_mi = xd->mi[-xd->mi_stride]; MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL; - if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode))) - continue; - if (cpi->common.frame_type == KEY_FRAME) { - const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); - const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); + const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); + const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); - bmode_costs = x->y_mode_costs[A][L]; + bmode_costs = cpi->y_mode_costs[A][L]; } mic->mbmi.mode = mode; intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, - &s, NULL, bsize, local_tx_cache, best_rd); + &s, bsize, local_tx_cache, best_rd); if (this_rate_tokenonly == INT_MAX) continue; @@ -1312,7 +889,7 @@ static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi); + const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); int plane; int pnrate = 0, pnskip = 1; int64_t pndist = 0, pnsse = 0; @@ -1333,7 +910,7 @@ static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, for (plane = 1; plane < MAX_MB_PLANE; ++plane) { txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, - ref_best_rd, plane, bsize, uv_txfm_size, + ref_best_rd, plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing); if (pnrate == INT_MAX) goto term; @@ -1358,8 +935,8 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, TX_SIZE max_tx_size) { MACROBLOCKD *xd = &x->e_mbd; - MB_PREDICTION_MODE mode; - MB_PREDICTION_MODE mode_selected = DC_PRED; + PREDICTION_MODE mode; + PREDICTION_MODE mode_selected = DC_PRED; int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; @@ -1375,7 +952,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (this_rate_tokenonly == INT_MAX) continue; this_rate = this_rate_tokenonly + - x->intra_uv_mode_cost[cpi->common.frame_type][mode]; + cpi->intra_uv_mode_cost[cpi->common.frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { @@ -1385,27 +962,8 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; - if (!x->select_txfm_size) { - int i; - struct macroblock_plane *const p = x->plane; - struct macroblockd_plane *const pd = xd->plane; - for (i = 1; i < MAX_MB_PLANE; ++i) { - p[i].coeff = ctx->coeff_pbuf[i][2]; - p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; - pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; - p[i].eobs = ctx->eobs_pbuf[i][2]; - - ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0]; - ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0]; - ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0]; - ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0]; - - ctx->coeff_pbuf[i][0] = p[i].coeff; - ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; - ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; - ctx->eobs_pbuf[i][0] = p[i].eobs; - } - } + if (!x->select_tx_size) + swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE); } } @@ -1423,7 +981,7 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); - *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; + *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED]; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } @@ -1431,7 +989,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, TX_SIZE max_tx_size, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, - MB_PREDICTION_MODE *mode_uv) { + PREDICTION_MODE *mode_uv) { MACROBLOCK *const x = &cpi->mb; // Use an estimated rd for uv_intra based on DC_PRED if the @@ -1449,18 +1007,10 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; } -static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode, +static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, int mode_context) { - const MACROBLOCK *const x = &cpi->mb; - const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id; - - // Don't account for mode here if segment skip is enabled. - if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { - assert(is_inter_mode(mode)); - return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; - } else { - return 0; - } + assert(is_inter_mode(mode)); + return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; } static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -1470,13 +1020,12 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int_mv single_newmv[MAX_REF_FRAMES], int *rate_mv); -static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, - MB_PREDICTION_MODE mode, - int_mv this_mv[2], - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int_mv seg_mvs[MAX_REF_FRAMES], - int_mv *best_ref_mv[2], - const int *mvjcost, int *mvcost[2]) { +static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i, + PREDICTION_MODE mode, int_mv this_mv[2], + int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], + int_mv seg_mvs[MAX_REF_FRAMES], + int_mv *best_ref_mv[2], const int *mvjcost, + int *mvcost[2]) { MODE_INFO *const mic = xd->mi[0]; const MB_MODE_INFO *const mbmi = &mic->mbmi; int thismvcost = 0; @@ -1485,8 +1034,6 @@ static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; const int is_compound = has_second_ref(mbmi); - // the only time we should do costing for new motion vector or mode - // is when we are on a new label (jbb May 08, 2007) switch (mode) { case NEWMV: this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; @@ -1498,15 +1045,11 @@ static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i, mvjcost, mvcost, MV_COST_WEIGHT_SUB); } break; - case NEARESTMV: - this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int; - if (is_compound) - this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int; - break; case NEARMV: - this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int; + case NEARESTMV: + this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int; if (is_compound) - this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int; + this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int; break; case ZEROMV: this_mv[0].as_int = 0; @@ -1631,7 +1174,7 @@ typedef struct { int64_t d; int64_t sse; int segment_yrate; - MB_PREDICTION_MODE modes[4]; + PREDICTION_MODE modes[4]; SEG_RDSTAT rdstat[4][INTER_MODES]; int mvthresh; } BEST_SEG_INFO; @@ -1675,14 +1218,14 @@ static INLINE int mv_has_subpel(const MV *mv) { static int check_best_zero_mv( const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int disable_inter_mode_mask, int this_mode, int ref_frame, - int second_ref_frame) { - if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + int inter_mode_mask, int this_mode, + const MV_REFERENCE_FRAME ref_frames[2]) { + if ((inter_mode_mask & (1 << ZEROMV)) && (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && - frame_mv[this_mode][ref_frame].as_int == 0 && - (second_ref_frame == NONE || - frame_mv[this_mode][second_ref_frame].as_int == 0)) { - int rfc = mode_context[ref_frame]; + frame_mv[this_mode][ref_frames[0]].as_int == 0 && + (ref_frames[1] == NONE || + frame_mv[this_mode][ref_frames[1]].as_int == 0)) { + int rfc = mode_context[ref_frames[0]]; int c1 = cost_mv_ref(cpi, NEARMV, rfc); int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); int c3 = cost_mv_ref(cpi, ZEROMV, rfc); @@ -1693,15 +1236,15 @@ static int check_best_zero_mv( if (c2 > c3) return 0; } else { assert(this_mode == ZEROMV); - if (second_ref_frame == NONE) { - if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) || - (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0)) + if (ref_frames[1] == NONE) { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0)) return 0; } else { - if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 && - frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || - (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 && - frame_mv[NEARMV][second_ref_frame].as_int == 0)) + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 && + frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 && + frame_mv[NEARMV][ref_frames[1]].as_int == 0)) return 0; } } @@ -1709,18 +1252,28 @@ static int check_best_zero_mv( return 1; } -static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, - BEST_SEG_INFO *bsi_buf, int filter_idx, - int_mv seg_mvs[4][MAX_REF_FRAMES], - int mi_row, int mi_col) { +static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo * const tile, + int_mv *best_ref_mv, + int_mv *second_best_ref_mv, + int64_t best_rd, int *returntotrate, + int *returnyrate, + int64_t *returndistortion, + int *skippable, int64_t *psse, + int mvthresh, + int_mv seg_mvs[4][MAX_REF_FRAMES], + BEST_SEG_INFO *bsi_buf, int filter_idx, + int mi_row, int mi_col) { + int i; + BEST_SEG_INFO *bsi = bsi_buf + filter_idx; + MACROBLOCKD *xd = &x->e_mbd; + MODE_INFO *mi = xd->mi[0]; + MB_MODE_INFO *mbmi = &mi->mbmi; + int mode_idx; int k, br = 0, idx, idy; int64_t bd = 0, block_sse = 0; - MB_PREDICTION_MODE this_mode; - MACROBLOCKD *xd = &x->e_mbd; + PREDICTION_MODE this_mode; VP9_COMMON *cm = &cpi->common; - MODE_INFO *mi = xd->mi[0]; - MB_MODE_INFO *const mbmi = &mi->mbmi; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const int label_count = 4; @@ -1730,13 +1283,21 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize = mbmi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize]; ENTROPY_CONTEXT t_above[2], t_left[2]; - BEST_SEG_INFO *bsi = bsi_buf + filter_idx; - int mode_idx; int subpelmv = 1, have_ref = 0; const int has_second_rf = has_second_ref(mbmi); - const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; + const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; + + vp9_zero(*bsi); + + bsi->segment_rd = best_rd; + bsi->ref_mv[0] = best_ref_mv; + bsi->ref_mv[1] = second_best_ref_mv; + bsi->mvp.as_int = best_ref_mv->as_int; + bsi->mvthresh = mvthresh; + + for (i = 0; i < 4; i++) + bsi->modes[i] = ZEROMV; vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); @@ -1754,7 +1315,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop int_mv mode_mv[MB_MODE_COUNT][2]; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - MB_PREDICTION_MODE mode_selected = ZEROMV; + PREDICTION_MODE mode_selected = ZEROMV; int64_t best_rd = INT64_MAX; const int i = idy * 2 + idx; int ref; @@ -1774,13 +1335,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mode_idx = INTER_OFFSET(this_mode); bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; - if (disable_inter_mode_mask & (1 << mode_idx)) + if (!(inter_mode_mask & (1 << this_mode))) continue; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, - this_mode, mbmi->ref_frame[0], - mbmi->ref_frame[1])) + inter_mode_mask, + this_mode, mbmi->ref_frame)) continue; vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); @@ -1792,9 +1352,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { - int_mv *const new_mv = &mode_mv[NEWMV][0]; + MV *const new_mv = &mode_mv[NEWMV][0].as_mv; int step_param = 0; - int further_steps; int thissme, bestsme = INT_MAX; int sadpb = x->sadperbit4; MV mvp_full; @@ -1805,8 +1364,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (best_rd < label_mv_thresh) break; - if (cpi->oxcf.mode != MODE_SECONDPASS_BEST && - cpi->oxcf.mode != MODE_BESTQUALITY) { + if (!is_best_mode(cpi->oxcf.mode)) { // use previous block's result as next block's MV predictor. if (i > 0) { bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; @@ -1819,12 +1377,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, else max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; - if (cpi->sf.auto_mv_step_size && cm->show_frame) { + if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and the best ref mvs of the current block for // the given reference. - step_param = (vp9_init_search_range(cpi, max_mv) + - cpi->mv_step_param) >> 1; + step_param = (vp9_init_search_range(max_mv) + + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } @@ -1833,95 +1391,60 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.col = bsi->mvp.as_mv.col >> 3; if (cpi->sf.adaptive_motion_search && cm->show_frame) { - mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3; - mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3; + mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3; + mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3; step_param = MAX(step_param, 8); } - further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; // adjust src pointer for this block mi_buf_shift(x, i); vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); - if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full, - step_param, - sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, - &bsi->ref_mv[0]->as_mv, - v_fn_ptr, 1); - } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full, - step_param, - sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, - &bsi->ref_mv[0]->as_mv, - v_fn_ptr, 1); - } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full, - step_param, - sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv, - &bsi->ref_mv[0]->as_mv, - v_fn_ptr, 1); - } else { - bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 0, v_fn_ptr, - &bsi->ref_mv[0]->as_mv, - &new_mv->as_mv); - } + bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, + sadpb, &bsi->ref_mv[0]->as_mv, new_mv, + INT_MAX, 1); // Should we do a full search (best quality only) - if (cpi->oxcf.mode == MODE_BESTQUALITY || - cpi->oxcf.mode == MODE_SECONDPASS_BEST) { + if (is_best_mode(cpi->oxcf.mode)) { int_mv *const best_mv = &mi->bmi[i].as_mv[0]; /* Check if mvp_full is within the range. */ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, &mvp_full, - sadpb, 16, v_fn_ptr, - x->nmvjointcost, x->mvcost, + sadpb, 16, &cpi->fn_ptr[bsize], &bsi->ref_mv[0]->as_mv, &best_mv->as_mv); if (thissme < bestsme) { bestsme = thissme; - new_mv->as_int = best_mv->as_int; + *new_mv = best_mv->as_mv; } else { // The full search result is actually worse so re-instate the // previous best vector - best_mv->as_int = new_mv->as_int; + best_mv->as_mv = *new_mv; } } if (bestsme < INT_MAX) { int distortion; cpi->find_fractional_mv_step(x, - &new_mv->as_mv, + new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, - x->errorperbit, v_fn_ptr, - cpi->sf.subpel_force_stop, - cpi->sf.subpel_iters_per_step, + x->errorperbit, &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &distortion, - &x->pred_sse[mbmi->ref_frame[0]]); + &x->pred_sse[mbmi->ref_frame[0]], + NULL, 0, 0); // save motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int; + seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv; } if (cpi->sf.adaptive_motion_search) - x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int; + x->pred_mv[mbmi->ref_frame[0]] = *new_mv; // restore src pointers mi_buf_restore(x, orig_src, orig_pre); @@ -1952,8 +1475,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, } bsi->rdstat[i][mode_idx].brate = - labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv, - seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost); + set_and_cost_bmi_mvs(cpi, xd, i, this_mode, mode_mv[this_mode], + frame_mv, seg_mvs[i], bsi->ref_mv, + x->nmvjointcost, x->mvcost); for (ref = 0; ref < 1 + has_second_rf; ++ref) { bsi->rdstat[i][mode_idx].mvs[ref].as_int = @@ -2042,16 +1566,16 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; - return; + return INT64_MAX;; } mode_idx = INTER_OFFSET(mode_selected); vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); - labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected], - frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, - x->mvcost); + set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected], + frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, + x->mvcost); br += bsi->rdstat[i][mode_idx].brate; bd += bsi->rdstat[i][mode_idx].bdist; @@ -2065,7 +1589,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; - return; + return INT64_MAX;; } } } /* for each label */ @@ -2079,42 +1603,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // update the coding decisions for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode; -} - -static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - int64_t best_rd, - int *returntotrate, - int *returnyrate, - int64_t *returndistortion, - int *skippable, int64_t *psse, - int mvthresh, - int_mv seg_mvs[4][MAX_REF_FRAMES], - BEST_SEG_INFO *bsi_buf, - int filter_idx, - int mi_row, int mi_col) { - int i; - BEST_SEG_INFO *bsi = bsi_buf + filter_idx; - MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi[0]; - MB_MODE_INFO *mbmi = &mi->mbmi; - int mode_idx; - - vp9_zero(*bsi); - - bsi->segment_rd = best_rd; - bsi->ref_mv[0] = best_ref_mv; - bsi->ref_mv[1] = second_best_ref_mv; - bsi->mvp.as_int = best_ref_mv->as_int; - bsi->mvthresh = mvthresh; - - for (i = 0; i < 4; i++) - bsi->modes[i] = ZEROMV; - - rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs, - mi_row, mi_col); if (bsi->segment_rd > best_rd) return INT64_MAX; @@ -2141,72 +1629,12 @@ static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, return bsi->segment_rd; } -static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, - uint8_t *ref_y_buffer, int ref_y_stride, - int ref_frame, BLOCK_SIZE block_size ) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - int_mv this_mv; - int i; - int zero_seen = 0; - int best_index = 0; - int best_sad = INT_MAX; - int this_sad = INT_MAX; - int max_mv = 0; - - uint8_t *src_y_ptr = x->plane[0].src.buf; - uint8_t *ref_y_ptr; - int row_offset, col_offset; - int num_mv_refs = MAX_MV_REF_CANDIDATES + - (cpi->sf.adaptive_motion_search && - cpi->common.show_frame && - block_size < cpi->sf.max_partition_size); - - int_mv pred_mv[3]; - pred_mv[0] = mbmi->ref_mvs[ref_frame][0]; - pred_mv[1] = mbmi->ref_mvs[ref_frame][1]; - pred_mv[2] = x->pred_mv[ref_frame]; - - // Get the sad for each candidate reference mv - for (i = 0; i < num_mv_refs; i++) { - this_mv.as_int = pred_mv[i].as_int; - - max_mv = MAX(max_mv, - MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); - // only need to check zero mv once - if (!this_mv.as_int && zero_seen) - continue; - - zero_seen = zero_seen || !this_mv.as_int; - - row_offset = this_mv.as_mv.row >> 3; - col_offset = this_mv.as_mv.col >> 3; - ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; - - // Find sad for current vector. - this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_ptr, ref_y_stride, - 0x7fffffff); - - // Note if it is the best so far. - if (this_sad < best_sad) { - best_sad = this_sad; - best_index = i; - } - } - - // Note the index of the mv that worked best in the reference list. - x->mv_best_ref_index[ref_frame] = best_index; - x->max_mv_context[ref_frame] = max_mv; - x->pred_mv_sad[ref_frame] = best_sad; -} - -static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, +static void estimate_ref_frame_costs(const VP9_COMMON *cm, + const MACROBLOCKD *xd, + int segment_id, unsigned int *ref_costs_single, unsigned int *ref_costs_comp, vp9_prob *comp_mode_p) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); if (seg_ref_active) { @@ -2264,10 +1692,8 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, - int_mv *ref_mv, - int_mv *second_ref_mv, int64_t comp_pred_diff[REFERENCE_MODES], - int64_t tx_size_diff[TX_MODES], + const int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { MACROBLOCKD *const xd = &x->e_mbd; @@ -2276,10 +1702,6 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->skip = x->skip; ctx->best_mode_index = mode_index; ctx->mic = *xd->mi[0]; - - ctx->best_ref_mv[0].as_int = ref_mv->as_int; - ctx->best_ref_mv[1].as_int = second_ref_mv->as_int; - ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; @@ -2289,40 +1711,14 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); } -static void setup_pred_block(const MACROBLOCKD *xd, - struct buf_2d dst[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, - const struct scale_factors *scale, - const struct scale_factors *scale_uv) { - int i; - - dst[0].buf = src->y_buffer; - dst[0].stride = src->y_stride; - dst[1].buf = src->u_buffer; - dst[2].buf = src->v_buffer; - dst[1].stride = dst[2].stride = src->uv_stride; -#if CONFIG_ALPHA - dst[3].buf = src->alpha_buffer; - dst[3].stride = src->alpha_stride; -#endif - - // TODO(jkoleszar): Make scale factors per-plane data - for (i = 0; i < MAX_MB_PLANE; i++) { - setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, - i ? scale_uv : scale, - xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); - } -} - -void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, - MV_REFERENCE_FRAME ref_frame, - BLOCK_SIZE block_size, - int mi_row, int mi_col, - int_mv frame_nearest_mv[MAX_REF_FRAMES], - int_mv frame_near_mv[MAX_REF_FRAMES], - struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { +static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo *const tile, + MV_REFERENCE_FRAME ref_frame, + BLOCK_SIZE block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { const VP9_COMMON *cm = &cpi->common; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; @@ -2332,7 +1728,7 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this // use the UV scaling factors. - setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); + vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col); @@ -2346,37 +1742,20 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, // in full and choose the best as the centre point for subsequent searches. // The current implementation doesn't support scaling. if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) - mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, - ref_frame, block_size); -} - -const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, - int ref_frame) { - const VP9_COMMON *const cm = &cpi->common; - const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; - return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; -} - -int vp9_get_switchable_rate(const MACROBLOCK *x) { - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const int ctx = vp9_get_pred_context_switchable_interp(xd); - return SWITCHABLE_INTERP_RATE_FACTOR * - x->switchable_interp_costs[ctx][mbmi->interp_filter]; + vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, + ref_frame, block_size); } static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; - VP9_COMMON *cm = &cpi->common; + const VP9_COMMON *cm = &cpi->common; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}}; int bestsme = INT_MAX; - int further_steps, step_param; + int step_param; int sadpb = x->sadperbit16; MV mvp_full; int ref = mbmi->ref_frame[0]; @@ -2393,7 +1772,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, MV pred_mv[3]; pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv; pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv; - pred_mv[2] = x->pred_mv[ref].as_mv; + pred_mv[2] = x->pred_mv[ref]; if (scaled_ref_frame) { int i; @@ -2410,35 +1789,36 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // Work out the size of the first step in the mv step search. // 0 here is maximum length first step. 1 is MAX >> 1 etc. - if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { + if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and that based on the best ref mvs of the current // block for the given reference. - step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) + - cpi->mv_step_param) >> 1; + step_param = (vp9_init_search_range(x->max_mv_context[ref]) + + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && - cpi->common.show_frame) { + cm->show_frame) { int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize), b_width_log2(bsize))); step_param = MAX(step_param, boffset); } if (cpi->sf.adaptive_motion_search) { - int bwl = b_width_log2_lookup[bsize]; - int bhl = b_height_log2_lookup[bsize]; + int bwl = b_width_log2(bsize); + int bhl = b_height_log2(bsize); int i; int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); if (tlevel < 5) step_param += 2; - for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) { + for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { - x->pred_mv[ref].as_int = 0; + x->pred_mv[ref].row = 0; + x->pred_mv[ref].col = 0; tmp_mv->as_int = INVALID_MV; if (scaled_ref_frame) { @@ -2456,50 +1836,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.col >>= 3; mvp_full.row >>= 3; - // Further step/diamond searches as necessary - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - - if (cpi->sf.search_method == FAST_DIAMOND) { - bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == FAST_HEX) { - bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else { - bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 1, - &cpi->fn_ptr[bsize], - &ref_mv, &tmp_mv->as_mv); - } + bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, + &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -2512,16 +1850,16 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.subpel_force_stop, - cpi->sf.subpel_iters_per_step, + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, - &dis, &x->pred_sse[ref]); + &dis, &x->pred_sse[ref], NULL, 0, 0); } *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) - x->pred_mv[ref].as_int = tmp_mv->as_int; + if (cpi->sf.adaptive_motion_search && cm->show_frame) + x->pred_mv[ref] = tmp_mv->as_mv; if (scaled_ref_frame) { int i; @@ -2580,7 +1918,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, struct buf_2d ref_yv12[2]; int bestsme = INT_MAX; int sadpb = x->sadperbit16; - int_mv tmp_mv; + MV tmp_mv; int search_range = 3; int tmp_col_min = x->mv_col_min; @@ -2609,20 +1947,18 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, vp9_set_mv_search_range(x, &ref_mv[id].as_mv); // Use mv result from single mode as mvp. - tmp_mv.as_int = frame_mv[refs[id]].as_int; + tmp_mv = frame_mv[refs[id]].as_mv; - tmp_mv.as_mv.col >>= 3; - tmp_mv.as_mv.row >>= 3; + tmp_mv.col >>= 3; + tmp_mv.row >>= 3; // Small-range full-pixel motion search - bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb, + bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range, &cpi->fn_ptr[bsize], - x->nmvjointcost, x->mvcost, - &ref_mv[id].as_mv, second_pred, - pw, ph); + &ref_mv[id].as_mv, second_pred); if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv, + bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, second_pred, &cpi->fn_ptr[bsize], 1); x->mv_col_min = tmp_col_min; @@ -2633,13 +1969,13 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - bestsme = cpi->find_fractional_mv_step_comp( - x, &tmp_mv.as_mv, + bestsme = cpi->find_fractional_mv_step( + x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - 0, cpi->sf.subpel_iters_per_step, + 0, cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, pw, ph); @@ -2649,7 +1985,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[0].pre[0] = scaled_first_yv12; if (bestsme < last_besterr[id]) { - frame_mv[refs[id]].as_int = tmp_mv.as_int; + frame_mv[refs[id]].as_mv = tmp_mv; last_besterr[id] = bestsme; } else { break; @@ -2684,26 +2020,104 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, } } +static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, int *rate2, + int64_t *distortion, int64_t *distortion_uv, + int *disable_skip) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); + const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); + unsigned int var, sse; + // Skipping threshold for ac. + unsigned int thresh_ac; + // Skipping threshold for dc + unsigned int thresh_dc; + + var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride, &sse); + + if (x->encode_breakout > 0) { + // Set a maximum for threshold to avoid big PSNR loss in low bitrate + // case. Use extreme low threshold for static frames to limit skipping. + const unsigned int max_thresh = (cpi->allow_encode_breakout == + ENCODE_BREAKOUT_LIMITED) ? 128 : 36000; + // The encode_breakout input + const unsigned int min_thresh = + MIN(((unsigned int)x->encode_breakout << 4), max_thresh); + + // Calculate threshold according to dequant value. + thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; + thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); + + // Adjust threshold according to partition size. + thresh_ac >>= 8 - (b_width_log2(bsize) + + b_height_log2(bsize)); + thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); + } else { + thresh_ac = 0; + thresh_dc = 0; + } + + // Y skipping condition checking + if (sse < thresh_ac || sse == 0) { + // dc skipping checking + if ((sse - var) < thresh_dc || sse == var) { + unsigned int sse_u, sse_v; + unsigned int var_u, var_v; + + var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, + x->plane[1].src.stride, + xd->plane[1].dst.buf, + xd->plane[1].dst.stride, &sse_u); + + // U skipping condition checking + if ((sse_u * 4 < thresh_ac || sse_u == 0) && + (sse_u - var_u < thresh_dc || sse_u == var_u)) { + var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, + x->plane[2].src.stride, + xd->plane[2].dst.buf, + xd->plane[2].dst.stride, &sse_v); + + // V skipping condition checking + if ((sse_v * 4 < thresh_ac || sse_v == 0) && + (sse_v - var_v < thresh_dc || sse_v == var_v)) { + x->skip = 1; + + // The cost of skip bit needs to be added. + *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); + + // Scaling factor for SSE from spatial domain to frequency domain + // is 16. Adjust distortion accordingly. + *distortion_uv = (sse_u + sse_v) << 4; + *distortion = (sse << 4) + *distortion_uv; + + *disable_skip = 1; + } + } + } + } +} + static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, BLOCK_SIZE bsize, int64_t txfm_cache[], int *rate2, int64_t *distortion, int *skippable, int *rate_y, int64_t *distortion_y, int *rate_uv, int64_t *distortion_uv, - int *mode_excluded, int *disable_skip, - INTERP_FILTER *best_filter, + int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], int64_t *psse, const int64_t ref_best_rd) { VP9_COMMON *cm = &cpi->common; + RD_OPT *rd_opt = &cpi->rd; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_comp_pred = has_second_ref(mbmi); - const int num_refs = is_comp_pred ? 2 : 1; const int this_mode = mbmi->mode; int_mv *frame_mv = mode_mv[this_mode]; int i; @@ -2719,6 +2133,25 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *orig_dst[MAX_MB_PLANE]; int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; + INTERP_FILTER best_filter = SWITCHABLE; + int skip_txfm[MAX_MB_PLANE] = {0}; + int64_t bsse[MAX_MB_PLANE] = {0}; + + int bsl = mi_width_log2_lookup[bsize]; + int pred_filter_search = cpi->sf.cb_pred_filter_search ? + (((mi_row + mi_col) >> bsl) + + get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; + + if (pred_filter_search) { + INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; + if (xd->up_available) + af = xd->mi[-xd->mi_stride]->mbmi.interp_filter; + if (xd->left_available) + lf = xd->mi[-1]->mbmi.interp_filter; + + if ((this_mode != NEWMV) || (af == lf)) + best_filter = af; + } if (is_comp_pred) { if (frame_mv[refs[0]].as_int == INVALID_MV || @@ -2747,7 +2180,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += rate_mv; } else { int_mv tmp_mv; - single_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); if (tmp_mv.as_int == INVALID_MV) return INT64_MAX; @@ -2758,7 +2191,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - for (i = 0; i < num_refs; ++i) { + for (i = 0; i < is_comp_pred + 1; ++i) { cur_mv[i] = frame_mv[refs[i]]; // Clip "next_nearest" so that it does not extend to far out of image if (this_mode != NEWMV) @@ -2785,10 +2218,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, * if the first is known */ *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]); - if (!(*mode_excluded)) - *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE - : cm->reference_mode == COMPOUND_REFERENCE; - pred_exists = 0; // Are all MVs integer pel for Y and UV intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); @@ -2797,16 +2226,14 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used - cpi->mask_filter_rd = 0; + rd_opt->mask_filter = 0; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - cpi->rd_filter_cache[i] = INT64_MAX; + rd_opt->filter_cache[i] = INT64_MAX; if (cm->interp_filter != BILINEAR) { - *best_filter = EIGHTTAP; - if (x->source_variance < - cpi->sf.disable_filter_search_var_thresh) { - *best_filter = EIGHTTAP; - } else { + if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { + best_filter = EIGHTTAP; + } else if (best_filter == SWITCHABLE) { int newbest; int tmp_rate_sum = 0; int64_t tmp_dist_sum = 0; @@ -2815,17 +2242,17 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - rs = vp9_get_switchable_rate(x); + rs = vp9_get_switchable_rate(cpi); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); - cpi->rd_filter_cache[i] = rd; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + rd_opt->filter_cache[i] = rd; + rd_opt->filter_cache[SWITCHABLE_FILTERS] = + MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; - cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); + rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; @@ -2845,12 +2272,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); - cpi->rd_filter_cache[i] = rd; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + rd_opt->filter_cache[i] = rd; + rd_opt->filter_cache[SWITCHABLE_FILTERS] = + MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; - cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); + rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd); if (i == 0 && intpel_mv) { tmp_rate_sum = rate_sum; @@ -2868,9 +2295,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (newbest) { best_rd = rd; - *best_filter = mbmi->interp_filter; + best_filter = mbmi->interp_filter; if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; + vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); + vpx_memcpy(bsse, x->bsse, sizeof(bsse)); } if ((cm->interp_filter == SWITCHABLE && newbest) || @@ -2884,8 +2313,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? - cm->interp_filter : *best_filter; - rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0; + cm->interp_filter : best_filter; + rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2915,87 +2344,17 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) - *rate2 += vp9_get_switchable_rate(x); + *rate2 += vp9_get_switchable_rate(cpi); if (!is_comp_pred) { - if (!x->in_active_map) { - if (psse) - *psse = 0; - *distortion = 0; - x->skip = 1; - } else if (cpi->allow_encode_breakout && x->encode_breakout) { - const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]); - const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); - unsigned int var, sse; - // Skipping threshold for ac. - unsigned int thresh_ac; - // Set a maximum for threshold to avoid big PSNR loss in low bitrate case. - // Use extreme low threshold for static frames to limit skipping. - const unsigned int max_thresh = (cpi->allow_encode_breakout == - ENCODE_BREAKOUT_LIMITED) ? 128 : 36000; - // The encode_breakout input - const unsigned int min_thresh = - MIN(((unsigned int)x->encode_breakout << 4), max_thresh); - - // Calculate threshold according to dequant value. - thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9; - thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); - - var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride, &sse); - - // Adjust threshold according to partition size. - thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + - b_height_log2_lookup[bsize]); - - // Y skipping condition checking - if (sse < thresh_ac || sse == 0) { - // Skipping threshold for dc - unsigned int thresh_dc; - - thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); - - // dc skipping checking - if ((sse - var) < thresh_dc || sse == var) { - unsigned int sse_u, sse_v; - unsigned int var_u, var_v; - - var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, - x->plane[1].src.stride, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride, &sse_u); - - // U skipping condition checking - if ((sse_u * 4 < thresh_ac || sse_u == 0) && - (sse_u - var_u < thresh_dc || sse_u == var_u)) { - var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf, - x->plane[2].src.stride, - xd->plane[2].dst.buf, - xd->plane[2].dst.stride, &sse_v); - - // V skipping condition checking - if ((sse_v * 4 < thresh_ac || sse_v == 0) && - (sse_v - var_v < thresh_dc || sse_v == var_v)) { - x->skip = 1; - - // The cost of skip bit needs to be added. - *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); - - // Scaling factor for SSE from spatial domain to frequency domain - // is 16. Adjust distortion accordingly. - *distortion_uv = (sse_u + sse_v) << 4; - *distortion = (sse << 4) + *distortion_uv; - - *disable_skip = 1; - this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); - } - } - } - } - } + if (cpi->allow_encode_breakout) + rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv, + disable_skip); } + vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); + vpx_memcpy(x->bsse, bsse, sizeof(bsse)); + if (!x->skip) { int skippable_y, skippable_uv; int64_t sseuv = INT64_MAX; @@ -3037,36 +2396,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return this_rd; // if 0, this will be re-calculated by caller } -static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, - int max_plane) { - struct macroblock_plane *const p = x->plane; - struct macroblockd_plane *const pd = x->e_mbd.plane; - int i; - - for (i = 0; i < max_plane; ++i) { - p[i].coeff = ctx->coeff_pbuf[i][1]; - p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; - pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; - p[i].eobs = ctx->eobs_pbuf[i][1]; - - ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0]; - ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0]; - ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0]; - ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0]; - - ctx->coeff_pbuf[i][0] = p[i].coeff; - ctx->qcoeff_pbuf[i][0] = p[i].qcoeff; - ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff; - ctx->eobs_pbuf[i][0] = p[i].eobs; - } -} - void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int64_t *returndist, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; + struct macroblockd_plane *const pd = xd->plane; int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip = 0; int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 }; @@ -3082,7 +2418,9 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize, + pd[1].subsampling_x, + pd[1].subsampling_y); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize, max_uv_tx_size); } else { @@ -3092,7 +2430,9 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; return; } - max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize); + max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize, + pd[1].subsampling_x, + pd[1].subsampling_y); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size); } @@ -3118,6 +2458,29 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ctx->mic = *xd->mi[0]; } +// Updating rd_thresh_freq_fact[] here means that the different +// partition/block sizes are handled independently based on the best +// choice for the current partition. It may well be better to keep a scaled +// best rd so far value and update rd_thresh_freq_fact based on the mode/size +// combination that wins out. +static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize, + int best_mode_index) { + if (cpi->sf.adaptive_rd_thresh > 0) { + const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; + int mode; + for (mode = 0; mode < top_mode; ++mode) { + int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode]; + + if (mode == best_mode_index) { + *fact -= (*fact >> 3); + } else { + *fact = MIN(*fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); + } + } + } +} + int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, int mi_row, int mi_col, @@ -3127,10 +2490,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; - MB_PREDICTION_MODE this_mode; + struct macroblockd_plane *const pd = xd->plane; + PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; int comp_pred, i; @@ -3146,19 +2511,18 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - MB_MODE_INFO best_mbmode = { 0 }; - int mode_index, best_mode_index = 0; + MB_MODE_INFO best_mbmode; + int mode_index, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; int64_t best_intra_rd = INT64_MAX; int64_t best_inter_rd = INT64_MAX; - MB_PREDICTION_MODE best_intra_mode = DC_PRED; + PREDICTION_MODE best_intra_mode = DC_PRED; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; - INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; - MB_PREDICTION_MODE mode_uv[TX_SIZES]; + PREDICTION_MODE mode_uv[TX_SIZES]; int64_t mode_distortions[MB_MODE_COUNT] = {-1}; int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); const int bws = num_8x8_blocks_wide_lookup[bsize] / 2; @@ -3166,16 +2530,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int best_skip2 = 0; int mode_skip_mask = 0; int mode_skip_start = cpi->sf.mode_skip_start + 1; - const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; - const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize]; const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; const int intra_y_mode_mask = cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; - int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; - + int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; + vp9_zero(best_mbmode); x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; - estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, + estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); for (i = 0; i < REFERENCE_MODES; ++i) @@ -3194,9 +2558,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { - vp9_setup_buffer_inter(cpi, x, tile, - ref_frame, bsize, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); + setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col, + frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; @@ -3232,13 +2595,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { - const int inter_non_zero_mode_mask = 0x1F7F7; - mode_skip_mask |= inter_non_zero_mode_mask; - } - // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. @@ -3248,9 +2604,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // an unfiltered alternative. We allow near/nearest as well // because they may result in zero-zero MVs but be cheaper. if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - const int altref_zero_mask = + mode_skip_mask = ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA)); - mode_skip_mask |= altref_zero_mask; if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) mode_skip_mask |= (1 << THR_NEARA); if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) @@ -3271,21 +2626,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (bsize > cpi->sf.max_intra_bsize) { - mode_skip_mask |= 0xFF30808; - } - - if (!x->in_active_map) { - int mode_index; - assert(cpi->ref_frame_flags & VP9_LAST_FLAG); - if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0) - mode_index = THR_NEARESTMV; - else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0) - mode_index = THR_NEARMV; - else - mode_index = THR_ZEROMV; - mode_skip_mask = ~(1 << mode_index); - mode_skip_start = MAX_MODES; - disable_inter_mode_mask = 0; + const int all_intra_modes = (1 << THR_DC) | (1 << THR_TM) | + (1 << THR_H_PRED) | (1 << THR_V_PRED) | (1 << THR_D135_PRED) | + (1 << THR_D207_PRED) | (1 << THR_D153_PRED) | (1 << THR_D63_PRED) | + (1 << THR_D117_PRED) | (1 << THR_D45_PRED); + mode_skip_mask |= all_intra_modes; } for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { @@ -3304,7 +2649,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. - if (mode_index == mode_skip_start) { + if (mode_index == mode_skip_start && best_mode_index >= 0) { switch (vp9_mode_order[best_mode_index].ref_frame[0]) { case INTRA_FRAME: break; @@ -3320,27 +2665,76 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, case NONE: case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); + break; } } if (mode_skip_mask & (1 << mode_index)) continue; // Test best rd so far against threshold for trying this mode. - if (best_rd < ((int64_t)rd_threshes[mode_index] * - rd_thresh_freq_fact[mode_index] >> 5) || - rd_threshes[mode_index] == INT_MAX) - continue; + if (rd_less_than_thresh(best_rd, rd_threshes[mode_index], + rd_thresh_freq_fact[mode_index])) + continue; this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame[0]; - if (ref_frame != INTRA_FRAME && - disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode))) + if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) continue; second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; + if (cpi->sf.motion_field_mode_search) { + const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize], + tile->mi_col_end - mi_col); + const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize], + tile->mi_row_end - mi_row); + const int bsl = mi_width_log2(bsize); + int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl) + + get_chessboard_index(cm->current_video_frame)) & 0x1; + MB_MODE_INFO *ref_mbmi; + int const_motion = 1; + int skip_ref_frame = !cb_partition_search_ctrl; + MV_REFERENCE_FRAME rf = NONE; + int_mv ref_mv; + ref_mv.as_int = INVALID_MV; + + if ((mi_row - 1) >= tile->mi_row_start) { + ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0]; + rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0]; + for (i = 0; i < mi_width; ++i) { + ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi; + const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && + (ref_frame == ref_mbmi->ref_frame[0]); + skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); + } + } + + if ((mi_col - 1) >= tile->mi_col_start) { + if (ref_mv.as_int == INVALID_MV) + ref_mv = xd->mi[-1]->mbmi.mv[0]; + if (rf == NONE) + rf = xd->mi[-1]->mbmi.ref_frame[0]; + for (i = 0; i < mi_height; ++i) { + ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi; + const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) && + (ref_frame == ref_mbmi->ref_frame[0]); + skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]); + } + } + + if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV) + if (rf > INTRA_FRAME) + if (ref_frame != rf) + continue; + + if (const_motion) + if (this_mode == NEARMV || this_mode == ZEROMV) + continue; + } + comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && + best_mode_index >=0 && vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) continue; if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) && @@ -3368,7 +2762,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // one of the neighboring directional modes if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && (this_mode >= D45_PRED && this_mode <= TM_PRED)) { - if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) + if (best_mode_index >= 0 && + vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) continue; } if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { @@ -3377,16 +2772,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } } else { - if (x->in_active_map && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) - if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, this_mode, ref_frame, - second_ref_frame)) - continue; + const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; + if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, + inter_mode_mask, this_mode, ref_frames)) + continue; } mbmi->mode = this_mode; - mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode; + mbmi->uv_mode = DC_PRED; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; // Evaluate all sub-pel filters irrespective of whether we can use @@ -3406,21 +2799,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < TX_MODES; ++i) tx_cache[i] = INT64_MAX; -#ifdef MODE_TEST_HIT_STATS - // TEST/DEBUG CODE - // Keep a rcord of the number of test hits at each size - cpi->mode_test_hits[bsize]++; -#endif - if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; - intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, + intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, bsize, tx_cache, best_rd); if (rate_y == INT_MAX) continue; - uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize); + uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x, + pd[1].subsampling_y); if (rate_uv_intra[uv_tx] == INT_MAX) { choose_intra_uv_mode(cpi, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], @@ -3432,18 +2820,17 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, skippable = skippable && skip_uv[uv_tx]; mbmi->uv_mode = mode_uv[uv_tx]; - rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; + rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; if (this_mode != DC_PRED && this_mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; } else { - this_rd = handle_inter_mode(cpi, x, tile, bsize, + this_rd = handle_inter_mode(cpi, x, bsize, tx_cache, &rate2, &distortion2, &skippable, &rate_y, &distortion_y, &rate_uv, &distortion_uv, - &mode_excluded, &disable_skip, - &tmp_best_filter, frame_mv, + &disable_skip, frame_mv, mi_row, mi_col, single_newmv, &total_sse, best_rd); if (this_rd == INT64_MAX) @@ -3464,31 +2851,20 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, - SEG_LVL_SKIP); - if (skippable) { + vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); + // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // for best yrd calculation rate_uv = 0; - if (mb_skip_allowed) { - int prob_skip_cost; - - // Cost the skip mb case - vp9_prob skip_prob = vp9_get_skip_prob(cm, xd); - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - } + // Cost the skip mb case + if (skip_prob) { + int prob_skip_cost = vp9_cost_bit(skip_prob, 1); + rate2 += prob_skip_cost; } - } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { + } else if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. @@ -3503,7 +2879,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rate_uv = 0; this_skip2 = 1; } - } else if (mb_skip_allowed) { + } else { // Add in the cost of the no skip flag. rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } @@ -3557,8 +2933,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_rd = this_rd; best_mbmode = *mbmi; best_skip2 = this_skip2; - if (!x->select_txfm_size) - swap_block_ptr(x, ctx, max_plane); + if (!x->select_tx_size) + swap_block_ptr(x, ctx, 1, 0, 0, max_plane); vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], sizeof(uint8_t) * ctx->num_4x4_blk); @@ -3610,21 +2986,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && cm->interp_filter != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS : cm->interp_filter]; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { int64_t adj_rd; if (ref == INT64_MAX) adj_rd = 0; - else if (cpi->rd_filter_cache[i] == INT64_MAX) + else if (rd_opt->filter_cache[i] == INT64_MAX) // when early termination is triggered, the encoder does not have // access to the rate-distortion cost. it only knows that the cost // should be above the maximum valid value. hence it takes the known // maximum plus an arbitrary constant as the rate-distortion cost. - adj_rd = cpi->mask_filter_rd - ref + 10; + adj_rd = rd_opt->mask_filter - ref + 10; else - adj_rd = cpi->rd_filter_cache[i] - ref; + adj_rd = rd_opt->filter_cache[i] - ref; adj_rd += this_rd; best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); @@ -3656,7 +3032,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, break; } - if (best_rd >= best_rd_so_far) + if (best_mode_index < 0 || best_rd >= best_rd_so_far) return INT64_MAX; // If we used an estimate for the uv intra rd in the loop above... @@ -3665,7 +3041,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) { TX_SIZE uv_tx_size; *mbmi = best_mbmode; - uv_tx_size = get_uv_tx_size(mbmi); + uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], @@ -3679,23 +3055,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); - // Updating rd_thresh_freq_fact[] here means that the different - // partition/block sizes are handled independently based on the best - // choice for the current partition. It may well be better to keep a scaled - // best rd so far value and update rd_thresh_freq_fact based on the mode/size - // combination that wins out. - if (cpi->sf.adaptive_rd_thresh) { - for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { - int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index]; - - if (mode_index == best_mode_index) { - *fact -= (*fact >> 3); - } else { - *fact = MIN(*fact + RD_THRESH_INC, - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); - } - } - } + update_rd_thresh_fact(cpi, bsize, best_mode_index); // macroblock modes *mbmi = best_mbmode; @@ -3728,26 +3088,117 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } - if (!x->in_active_map) { - assert(mbmi->ref_frame[0] == LAST_FRAME); - assert(mbmi->ref_frame[1] == NONE); - assert(mbmi->mode == NEARESTMV || - mbmi->mode == NEARMV || - mbmi->mode == ZEROMV); - assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0); - assert(mbmi->mode == mbmi->uv_mode); - } - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; } +int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x, + int *returnrate, + int64_t *returndistortion, + BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx, + int64_t best_rd_so_far) { + VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + unsigned char segment_id = mbmi->segment_id; + const int comp_pred = 0; + int i; + int64_t best_tx_diff[TX_MODES]; + int64_t best_pred_diff[REFERENCE_MODES]; + int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; + unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; + vp9_prob comp_mode_p; + INTERP_FILTER best_filter = SWITCHABLE; + int64_t this_rd = INT64_MAX; + int rate2 = 0; + const int64_t distortion2 = 0; + + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; + + estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, + &comp_mode_p); + + for (i = 0; i < MAX_REF_FRAMES; ++i) + x->pred_sse[i] = INT_MAX; + for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) + x->pred_mv_sad[i] = INT_MAX; + + *returnrate = INT_MAX; + + assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); + + mbmi->mode = ZEROMV; + mbmi->uv_mode = DC_PRED; + mbmi->ref_frame[0] = LAST_FRAME; + mbmi->ref_frame[1] = NONE; + mbmi->mv[0].as_int = 0; + x->skip = 1; + + // Search for best switchable filter by checking the variance of + // pred error irrespective of whether the filter will be used + rd_opt->mask_filter = 0; + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + rd_opt->filter_cache[i] = INT64_MAX; + + if (cm->interp_filter != BILINEAR) { + best_filter = EIGHTTAP; + if (cm->interp_filter == SWITCHABLE && + x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { + int rs; + int best_rs = INT_MAX; + for (i = 0; i < SWITCHABLE_FILTERS; ++i) { + mbmi->interp_filter = i; + rs = vp9_get_switchable_rate(cpi); + if (rs < best_rs) { + best_rs = rs; + best_filter = mbmi->interp_filter; + } + } + } + } + // Set the appropriate filter + if (cm->interp_filter == SWITCHABLE) { + mbmi->interp_filter = best_filter; + rate2 += vp9_get_switchable_rate(cpi); + } else { + mbmi->interp_filter = cm->interp_filter; + } + + if (cm->reference_mode == REFERENCE_MODE_SELECT) + rate2 += vp9_cost_bit(comp_mode_p, comp_pred); + + // Estimate the reference frame signaling cost and add it + // to the rolling cost variable. + rate2 += ref_costs_single[LAST_FRAME]; + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); + + *returnrate = rate2; + *returndistortion = distortion2; + + if (this_rd >= best_rd_so_far) + return INT64_MAX; + + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == mbmi->interp_filter)); + + update_rd_thresh_fact(cpi, bsize, THR_ZEROMV); + + vp9_zero(best_pred_diff); + vp9_zero(best_filter_diff); + vp9_zero(best_tx_diff); + + if (!x->select_tx_size) + swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); + store_coding_context(x, ctx, THR_ZEROMV, + best_pred_diff, best_tx_diff, best_filter_diff); + + return this_rd; +} int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, @@ -3757,10 +3208,11 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - const struct segmentation *seg = &cm->seg; + VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mbmi->segment_id; int comp_pred, i; @@ -3770,32 +3222,31 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, VP9_ALT_FLAG }; int64_t best_rd = best_rd_so_far; int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise - int64_t best_tx_rd[TX_MODES]; - int64_t best_tx_diff[TX_MODES]; + static const int64_t best_tx_diff[TX_MODES] = { 0 }; int64_t best_pred_diff[REFERENCE_MODES]; int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - MB_MODE_INFO best_mbmode = { 0 }; - int mode_index, best_mode_index = 0; + MB_MODE_INFO best_mbmode; + int ref_index, best_ref_index = 0; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; int64_t best_inter_rd = INT64_MAX; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; INTERP_FILTER tmp_best_filter = SWITCHABLE; - int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; - int64_t dist_uv[TX_SIZES]; - int skip_uv[TX_SIZES]; - MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 }; + int rate_uv_intra, rate_uv_tokenonly; + int64_t dist_uv; + int skip_uv; + PREDICTION_MODE mode_uv = DC_PRED; int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); int_mv seg_mvs[4][MAX_REF_FRAMES]; b_mode_info best_bmodes[4]; int best_skip2 = 0; - int ref_frame_mask = 0; int mode_skip_mask = 0; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4); + vp9_zero(best_mbmode); for (i = 0; i < 4; i++) { int j; @@ -3803,23 +3254,20 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, seg_mvs[i][j].as_int = INVALID_MV; } - estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, + estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; - for (i = 0; i < TX_MODES; i++) - best_tx_rd[i] = INT64_MAX; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = INT64_MAX; - for (i = 0; i < TX_SIZES; i++) - rate_uv_intra[i] = INT_MAX; + rate_uv_intra = INT_MAX; *returnrate = INT_MAX; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { - vp9_setup_buffer_inter(cpi, x, tile, + setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); @@ -3828,18 +3276,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - for (ref_frame = LAST_FRAME; - ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { - int i; - for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { - if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { - ref_frame_mask |= (1 << ref_frame); - break; - } - } - } - - for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { + for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; int disable_skip = 0; @@ -3847,24 +3284,19 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable = 0; - int64_t tx_cache[TX_MODES]; int i; int this_skip2 = 0; int64_t total_sse = INT_MAX; int early_term = 0; - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = INT64_MAX; - - x->skip = 0; - ref_frame = vp9_ref_order[mode_index].ref_frame[0]; - second_ref_frame = vp9_ref_order[mode_index].ref_frame[1]; + ref_frame = vp9_ref_order[ref_index].ref_frame[0]; + second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. - if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) { - if (mode_index == 3) { - switch (vp9_ref_order[best_mode_index].ref_frame[0]) { + if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) { + if (ref_index == 3) { + switch (vp9_ref_order[best_ref_index].ref_frame[0]) { case INTRA_FRAME: mode_skip_mask = 0; break; @@ -3880,84 +3312,55 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, case NONE: case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); + break; } } - if (mode_skip_mask & (1 << mode_index)) + if (mode_skip_mask & (1 << ref_index)) continue; } // Test best rd so far against threshold for trying this mode. - if ((best_rd < - ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) || - cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX) - continue; - - // Do not allow compound prediction if the segment level reference - // frame feature is in use as in this case there can only be one reference. - if ((second_ref_frame > INTRA_FRAME) && - vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) + if (rd_less_than_thresh(best_rd, + rd_opt->threshes[segment_id][bsize][ref_index], + rd_opt->thresh_freq_fact[bsize][ref_index])) continue; - mbmi->ref_frame[0] = ref_frame; - mbmi->ref_frame[1] = second_ref_frame; - - if (!(ref_frame == INTRA_FRAME - || (cpi->ref_frame_flags & flag_list[ref_frame]))) { - continue; - } - if (!(second_ref_frame == NONE - || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) { + if (ref_frame > INTRA_FRAME && + !(cpi->ref_frame_flags & flag_list[ref_frame])) { continue; } comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) - if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) - continue; - if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) - if (ref_frame != best_inter_ref_frame && - second_ref_frame != best_inter_ref_frame) - continue; + if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) + continue; + // Do not allow compound prediction if the segment level reference frame + // feature is in use as in this case there can only be one reference. + if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) + continue; + if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && + vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME) + continue; + if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) && + ref_frame != best_inter_ref_frame && + second_ref_frame != best_inter_ref_frame) + continue; } // TODO(jingning, jkoleszar): scaling reference frame not supported for // sub8x8 blocks. - if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) + if (ref_frame > INTRA_FRAME && + vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) continue; - if (second_ref_frame > 0 && + if (second_ref_frame > INTRA_FRAME && vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; - set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); - mbmi->uv_mode = DC_PRED; - - // Evaluate all sub-pel filters irrespective of whether we can use - // them for this frame. - mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP - : cm->interp_filter; - - if (comp_pred) { - if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) - continue; - - mode_excluded = mode_excluded ? mode_excluded - : cm->reference_mode == SINGLE_REFERENCE; - } else { - if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) { - mode_excluded = mode_excluded ? - mode_excluded : cm->reference_mode == COMPOUND_REFERENCE; - } - } - - // Select prediction reference frames. - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; - if (comp_pred) - xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; - } + if (comp_pred) + mode_excluded = cm->reference_mode == SINGLE_REFERENCE; + else if (ref_frame != INTRA_FRAME) + mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. @@ -3965,11 +3368,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && - ref_frame != INTRA_FRAME) { - continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. @@ -3983,15 +3381,26 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, continue; } -#ifdef MODE_TEST_HIT_STATS - // TEST/DEBUG CODE - // Keep a rcord of the number of test hits at each size - cpi->mode_test_hits[bsize]++; -#endif + mbmi->tx_size = TX_4X4; + mbmi->uv_mode = DC_PRED; + mbmi->ref_frame[0] = ref_frame; + mbmi->ref_frame[1] = second_ref_frame; + // Evaluate all sub-pel filters irrespective of whether we can use + // them for this frame. + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + x->skip = 0; + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); + + // Select prediction reference frames. + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; + if (comp_pred) + xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; + } if (ref_frame == INTRA_FRAME) { int rate; - mbmi->tx_size = TX_4X4; if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, best_rd) >= best_rd) continue; @@ -3999,21 +3408,18 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate2 += intra_cost_penalty; distortion2 += distortion_y; - if (rate_uv_intra[TX_4X4] == INT_MAX) { + if (rate_uv_intra == INT_MAX) { choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4, - &rate_uv_intra[TX_4X4], - &rate_uv_tokenonly[TX_4X4], - &dist_uv[TX_4X4], &skip_uv[TX_4X4], - &mode_uv[TX_4X4]); + &rate_uv_intra, + &rate_uv_tokenonly, + &dist_uv, &skip_uv, + &mode_uv); } - rate2 += rate_uv_intra[TX_4X4]; - rate_uv = rate_uv_tokenonly[TX_4X4]; - distortion2 += dist_uv[TX_4X4]; - distortion_uv = dist_uv[TX_4X4]; - mbmi->uv_mode = mode_uv[TX_4X4]; - tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = tx_cache[ONLY_4X4]; + rate2 += rate_uv_intra; + rate_uv = rate_uv_tokenonly; + distortion2 += dist_uv; + distortion_uv = dist_uv; + mbmi->uv_mode = mode_uv; } else { int rate; int64_t distortion; @@ -4032,20 +3438,17 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int uv_skippable; this_rd_thresh = (ref_frame == LAST_FRAME) ? - cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] : - cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; + rd_opt->threshes[segment_id][bsize][THR_LAST] : + rd_opt->threshes[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? - cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; - xd->mi[0]->mbmi.tx_size = TX_4X4; - - cpi->mask_filter_rd = 0; + rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; + rd_opt->mask_filter = 0; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - cpi->rd_filter_cache[i] = INT64_MAX; + rd_opt->filter_cache[i] = INT64_MAX; if (cm->interp_filter != BILINEAR) { tmp_best_filter = EIGHTTAP; - if (x->source_variance < - cpi->sf.disable_filter_search_var_thresh) { + if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { tmp_best_filter = EIGHTTAP; } else if (cpi->sf.adaptive_pred_interp_filter == 1 && ctx->pred_interp_filter < SWITCHABLE) { @@ -4060,28 +3463,27 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int newbest, rs; int64_t rs_rd; mbmi->interp_filter = switchable_filter_index; - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, - &mbmi->ref_mvs[ref_frame][0], - second_ref, - best_yrd, - &rate, &rate_y, &distortion, - &skippable, &total_sse, - (int)this_rd_thresh, seg_mvs, - bsi, switchable_filter_index, - mi_row, mi_col); + tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile, + &mbmi->ref_mvs[ref_frame][0], + second_ref, best_yrd, &rate, + &rate_y, &distortion, + &skippable, &total_sse, + (int) this_rd_thresh, seg_mvs, + bsi, switchable_filter_index, + mi_row, mi_col); if (tmp_rd == INT64_MAX) continue; - rs = vp9_get_switchable_rate(x); + rs = vp9_get_switchable_rate(cpi); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); - cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], + rd_opt->filter_cache[switchable_filter_index] = tmp_rd; + rd_opt->filter_cache[SWITCHABLE_FILTERS] = + MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; - cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd); + rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd); newbest = (tmp_rd < tmp_best_rd); if (newbest) { @@ -4127,15 +3529,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, - &mbmi->ref_mvs[ref_frame][0], - second_ref, - best_yrd, - &rate, &rate_y, &distortion, - &skippable, &total_sse, - (int)this_rd_thresh, seg_mvs, - bsi, 0, - mi_row, mi_col); + tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile, + &mbmi->ref_mvs[ref_frame][0], + second_ref, best_yrd, &rate, &rate_y, + &distortion, &skippable, &total_sse, + (int) this_rd_thresh, seg_mvs, bsi, 0, + mi_row, mi_col); if (tmp_rd == INT64_MAX) continue; } else { @@ -4153,7 +3552,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) - rate2 += vp9_get_switchable_rate(x); + rate2 += vp9_get_switchable_rate(cpi); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE @@ -4178,10 +3577,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_uv; skippable = skippable && uv_skippable; total_sse += uv_sse; - - tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = tx_cache[ONLY_4X4]; } } @@ -4197,15 +3592,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, - SEG_LVL_SKIP); - - if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { + // Skip is never coded at the segment level for sub8x8 blocks and instead + // always coded in the bitstream at the mode info level. + + if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. @@ -4220,7 +3610,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate_uv = 0; this_skip2 = 1; } - } else if (mb_skip_allowed) { + } else { // Add in the cost of the no skip flag. rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } @@ -4230,8 +3620,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } // Keep record of best inter rd with single reference - if (is_inter_block(&xd->mi[0]->mbmi) && - !has_second_ref(&xd->mi[0]->mbmi) && + if (is_inter_block(mbmi) && + !has_second_ref(mbmi) && !mode_excluded && this_rd < best_inter_rd) { best_inter_rd = this_rd; @@ -4250,7 +3640,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (!mode_excluded) { int max_plane = MAX_MB_PLANE; // Note index of best mode so far - best_mode_index = mode_index; + best_ref_index = ref_index; if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ @@ -4265,9 +3655,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); best_mbmode = *mbmi; best_skip2 = this_skip2; - if (!x->select_txfm_size) - swap_block_ptr(x, ctx, max_plane); - vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], + if (!x->select_tx_size) + swap_block_ptr(x, ctx, 1, 0, 0, max_plane); + vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], sizeof(uint8_t) * ctx->num_4x4_blk); for (i = 0; i < 4; i++) @@ -4276,7 +3666,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) && - (mode_index > MIN_EARLY_TERM_INDEX)) { + (ref_index > MIN_EARLY_TERM_INDEX)) { const int qstep = xd->plane[0].dequant[1]; // TODO(debargha): Enhance this by specializing for each mode_index int scale = 4; @@ -4307,11 +3697,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - if (second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_REFERENCE]) { + if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) { best_pred_rd[SINGLE_REFERENCE] = single_rd; - } else if (second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMPOUND_REFERENCE]) { + } else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) { best_pred_rd[COMPOUND_REFERENCE] = single_rd; } if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) @@ -4321,47 +3709,26 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && cm->interp_filter != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS : cm->interp_filter]; int64_t adj_rd; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { if (ref == INT64_MAX) adj_rd = 0; - else if (cpi->rd_filter_cache[i] == INT64_MAX) + else if (rd_opt->filter_cache[i] == INT64_MAX) // when early termination is triggered, the encoder does not have // access to the rate-distortion cost. it only knows that the cost // should be above the maximum valid value. hence it takes the known // maximum plus an arbitrary constant as the rate-distortion cost. - adj_rd = cpi->mask_filter_rd - ref + 10; + adj_rd = rd_opt->mask_filter - ref + 10; else - adj_rd = cpi->rd_filter_cache[i] - ref; + adj_rd = rd_opt->filter_cache[i] - ref; adj_rd += this_rd; best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); } } - /* keep record of best txfm size */ - if (bsize < BLOCK_32X32) { - if (bsize < BLOCK_16X16) { - tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; - tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; - } - tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; - } - if (!mode_excluded && this_rd != INT64_MAX) { - for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { - int64_t adj_rd = INT64_MAX; - if (ref_frame > INTRA_FRAME) - adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; - else - adj_rd = this_rd; - - if (adj_rd < best_tx_rd[i]) - best_tx_rd[i] = adj_rd; - } - } - if (early_term) break; @@ -4375,19 +3742,17 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // If we used an estimate for the uv intra rd in the loop above... if (cpi->sf.use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. - if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) { - TX_SIZE uv_tx_size; + if (vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME) { *mbmi = best_mbmode; - uv_tx_size = get_uv_tx_size(mbmi); - rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], - &rate_uv_tokenonly[uv_tx_size], - &dist_uv[uv_tx_size], - &skip_uv[uv_tx_size], - BLOCK_8X8, uv_tx_size); + rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, + &rate_uv_tokenonly, + &dist_uv, + &skip_uv, + BLOCK_8X8, TX_4X4); } } - if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { + if (best_rd == INT64_MAX) { *returnrate = INT_MAX; *returndistortion = INT64_MAX; return best_rd; @@ -4397,23 +3762,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); - // Updating rd_thresh_freq_fact[] here means that the different - // partition/block sizes are handled independently based on the best - // choice for the current partition. It may well be better to keep a scaled - // best rd so far value and update rd_thresh_freq_fact based on the mode/size - // combination that wins out. - if (cpi->sf.adaptive_rd_thresh) { - for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { - int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index]; - - if (mode_index == best_mode_index) { - *fact -= (*fact >> 3); - } else { - *fact = MIN(*fact + RD_THRESH_INC, - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); - } - } - } + update_rd_thresh_fact(cpi, bsize, best_ref_index); // macroblock modes *mbmi = best_mbmode; @@ -4449,22 +3798,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_filter_diff); } - if (!x->skip) { - for (i = 0; i < TX_MODES; i++) { - if (best_tx_rd[i] == INT64_MAX) - best_tx_diff[i] = 0; - else - best_tx_diff[i] = best_rd - best_tx_rd[i]; - } - } else { - vp9_zero(best_tx_diff); - } - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - store_coding_context(x, ctx, best_mode_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], + store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; |