diff options
Diffstat (limited to 'vp9/encoder/vp9_encoder.h')
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 307 |
1 files changed, 285 insertions, 22 deletions
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index cca8b53f8..7136f7faa 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -14,9 +14,11 @@ #include <stdio.h> #include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vpx_ext_ratectrl.h" #include "vpx/vp8cx.h" +#include "vpx/vpx_tpl.h" #if CONFIG_INTERNAL_STATS #include "vpx_dsp/ssim.h" #endif @@ -91,13 +93,6 @@ typedef enum { } ENCODE_BREAKOUT_TYPE; typedef enum { - NORMAL = 0, - FOURFIVE = 1, - THREEFIVE = 2, - ONETWO = 3 -} VPX_SCALING; - -typedef enum { // Good Quality Fast Encoding. The encoder balances quality with the amount of // time it takes to encode the output. Speed setting controls how fast. GOOD, @@ -336,15 +331,14 @@ typedef struct TplDepFrame { typedef struct TileDataEnc { TileInfo tile_info; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; -#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES]; -#endif // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL int8_t mode_map[BLOCK_SIZES][MAX_MODES]; FIRSTPASS_DATA fp_data; VP9RowMTSync row_mt_sync; // Used for adaptive_rd_thresh with row multithreading int *row_base_thresh_freq_fact; + MV firstpass_top_mv; } TileDataEnc; typedef struct RowMTInfo { @@ -513,6 +507,7 @@ typedef struct EncFrameBuf { } EncFrameBuf; // Maximum operating frame buffer size needed for a GOP using ARF reference. +// This is used to allocate the memory for TPL stats for a GOP. #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS) #define MAX_KMEANS_GROUPS 8 @@ -659,6 +654,72 @@ static INLINE int get_num_unit_4x4(int size) { return (size + 3) >> 2; } static INLINE int get_num_unit_16x16(int size) { return (size + 15) >> 4; } #endif // CONFIG_RATE_CTRL +#if CONFIG_COLLECT_COMPONENT_TIMING +#include "vpx_ports/vpx_timer.h" +// Adjust the following to add new components. +typedef enum { + vp9_get_compressed_data_time, + vp9_temporal_filter_time, + vp9_rc_get_second_pass_params_time, + setup_tpl_stats_time, + Pass2Encode_time, + + encode_with_recode_loop_time, + loopfilter_frame_time, + vp9_pack_bitstream_time, + + encode_frame_internal_time, + rd_pick_partition_time, + rd_pick_sb_modes_time, + encode_sb_time, + + vp9_rd_pick_inter_mode_sb_time, + vp9_rd_pick_inter_mode_sub8x8_time, + + intra_mode_search_time, + handle_inter_mode_time, + single_motion_search_time, + joint_motion_search_time, + interp_filter_time, + + kTimingComponents, +} TIMING_COMPONENT; + +static INLINE char const *get_component_name(int index) { + switch (index) { + case vp9_get_compressed_data_time: return "vp9_get_compressed_data_time"; + case vp9_temporal_filter_time: return "vp9_temporal_filter_time"; + case vp9_rc_get_second_pass_params_time: + return "vp9_rc_get_second_pass_params_time"; + case setup_tpl_stats_time: return "setup_tpl_stats_time"; + case Pass2Encode_time: return "Pass2Encode_time"; + + case encode_with_recode_loop_time: return "encode_with_recode_loop_time"; + case loopfilter_frame_time: return "loopfilter_frame_time"; + case vp9_pack_bitstream_time: return "vp9_pack_bitstream_time"; + + case encode_frame_internal_time: return "encode_frame_internal_time"; + case rd_pick_partition_time: return "rd_pick_partition_time"; + case rd_pick_sb_modes_time: return "rd_pick_sb_modes_time"; + case encode_sb_time: return "encode_sb_time"; + + case vp9_rd_pick_inter_mode_sb_time: + return "vp9_rd_pick_inter_mode_sb_time"; + case vp9_rd_pick_inter_mode_sub8x8_time: + return "vp9_rd_pick_inter_mode_sub8x8_time"; + + case intra_mode_search_time: return "intra_mode_search_time"; + case handle_inter_mode_time: return "handle_inter_mode_time"; + case single_motion_search_time: return "single_motion_search_time"; + case joint_motion_search_time: return "joint_motion_search_time"; + case interp_filter_time: return "interp_filter_time"; + + default: assert(0); + } + return "error"; +} +#endif + typedef struct VP9_COMP { FRAME_INFO frame_info; QUANTS quants; @@ -685,6 +746,8 @@ typedef struct VP9_COMP { BLOCK_SIZE tpl_bsize; TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE]; + // Used to store TPL stats before propagation + VpxTplGopStats tpl_gop_stats; YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES]; EncFrameBuf enc_frame_buf[REF_FRAMES]; #if CONFIG_MULTITHREAD @@ -784,7 +847,7 @@ typedef struct VP9_COMP { uint8_t *skin_map; - // segment threashold for encode breakout + // segment threshold for encode breakout int segment_encode_breakout[MAX_SEGMENTS]; CYCLIC_REFRESH *cyclic_refresh; @@ -858,12 +921,15 @@ typedef struct VP9_COMP { // number of MBs in the current frame when the frame is // scaled. + int last_coded_width; + int last_coded_height; + int use_svc; SVC svc; // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type. - diff *source_diff_var; + Diff *source_diff_var; // The threshold used in SOURCE_VAR_BASED_PARTITION search type. unsigned int source_var_thresh; int frames_till_next_var_check; @@ -973,6 +1039,29 @@ typedef struct VP9_COMP { EXT_RATECTRL ext_ratectrl; int fixed_qp_onepass; + + // Flag to keep track of dynamic change in deadline mode + // (good/best/realtime). + MODE deadline_mode_previous_frame; + + // Flag to disable scene detection when rtc rate control library is used. + int disable_scene_detection_rtc_ratectrl; + +#if CONFIG_COLLECT_COMPONENT_TIMING + /*! + * component_time[] are initialized to zero while encoder starts. + */ + uint64_t component_time[kTimingComponents]; + /*! + * Stores timing for individual components between calls of start_timing() + * and end_timing(). + */ + struct vpx_usec_timer component_timer[kTimingComponents]; + /*! + * frame_component_time[] are initialized to zero at beginning of each frame. + */ + uint64_t frame_component_time[kTimingComponents]; +#endif } VP9_COMP; #if CONFIG_RATE_CTRL @@ -983,7 +1072,7 @@ static INLINE void partition_info_init(struct VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int unit_width = get_num_unit_4x4(cpi->frame_info.frame_width); const int unit_height = get_num_unit_4x4(cpi->frame_info.frame_height); - CHECK_MEM_ERROR(cm, cpi->partition_info, + CHECK_MEM_ERROR(&cm->error, cpi->partition_info, (PARTITION_INFO *)vpx_calloc(unit_width * unit_height, sizeof(PARTITION_INFO))); memset(cpi->partition_info, 0, @@ -998,8 +1087,8 @@ static INLINE void free_partition_info(struct VP9_COMP *cpi) { } static INLINE void reset_mv_info(MOTION_VECTOR_INFO *mv_info) { - mv_info->ref_frame[0] = NONE; - mv_info->ref_frame[1] = NONE; + mv_info->ref_frame[0] = NO_REF_FRAME; + mv_info->ref_frame[1] = NO_REF_FRAME; mv_info->mv[0].as_int = INVALID_MV; mv_info->mv[1].as_int = INVALID_MV; } @@ -1011,7 +1100,7 @@ static INLINE void motion_vector_info_init(struct VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int unit_width = get_num_unit_4x4(cpi->frame_info.frame_width); const int unit_height = get_num_unit_4x4(cpi->frame_info.frame_height); - CHECK_MEM_ERROR(cm, cpi->motion_vector_info, + CHECK_MEM_ERROR(&cm->error, cpi->motion_vector_info, (MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height, sizeof(MOTION_VECTOR_INFO))); memset(cpi->motion_vector_info, 0, @@ -1030,7 +1119,7 @@ static INLINE void free_motion_vector_info(struct VP9_COMP *cpi) { static INLINE void tpl_stats_info_init(struct VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; CHECK_MEM_ERROR( - cm, cpi->tpl_stats_info, + &cm->error, cpi->tpl_stats_info, (TplDepStats *)vpx_calloc(MAX_LAG_BUFFERS, sizeof(TplDepStats))); memset(cpi->tpl_stats_info, 0, MAX_LAG_BUFFERS * sizeof(TplDepStats)); } @@ -1049,7 +1138,7 @@ static INLINE void fp_motion_vector_info_init(struct VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int unit_width = get_num_unit_16x16(cpi->frame_info.frame_width); const int unit_height = get_num_unit_16x16(cpi->frame_info.frame_height); - CHECK_MEM_ERROR(cm, cpi->fp_motion_vector_info, + CHECK_MEM_ERROR(&cm->error, cpi->fp_motion_vector_info, (MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height, sizeof(MOTION_VECTOR_INFO))); } @@ -1154,8 +1243,8 @@ int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int cols); -int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, - VPX_SCALING vert_mode); +int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING_MODE horiz_mode, + VPX_SCALING_MODE vert_mode); int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, unsigned int height); @@ -1296,6 +1385,14 @@ void vp9_get_ref_frame_info(FRAME_UPDATE_TYPE update_type, int ref_frame_flags, void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, int bd); +#else +void vp9_scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst); +#endif // CONFIG_VP9_HIGHBITDEPTH + YV12_BUFFER_CONFIG *vp9_svc_twostage_scale( VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled, YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type, @@ -1380,9 +1477,10 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width, VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec); -int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows, - unsigned int cols, int delta_q[8], int delta_lf[8], - int skip[8], int ref_frame[8]); +vpx_codec_err_t vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, + unsigned int rows, unsigned int cols, + int delta_q[8], int delta_lf[8], int skip[8], + int ref_frame[8]); void vp9_new_framerate(VP9_COMP *cpi, double framerate); @@ -1392,6 +1490,171 @@ int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr); #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) +static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) { + RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx]; + if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows || + new_fb_ptr->mi_cols < cm->mi_cols) { + vpx_free(new_fb_ptr->mvs); + CHECK_MEM_ERROR(&cm->error, new_fb_ptr->mvs, + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*new_fb_ptr->mvs))); + new_fb_ptr->mi_rows = cm->mi_rows; + new_fb_ptr->mi_cols = cm->mi_cols; + } +} + +static INLINE int mv_cost(const MV *mv, const int *joint_cost, + int *const comp_cost[2]) { + assert(mv->row >= -MV_MAX && mv->row < MV_MAX); + assert(mv->col >= -MV_MAX && mv->col < MV_MAX); + return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + + comp_cost[1][mv->col]; +} + +static INLINE int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, + const MV *ref, int sad_per_bit) { + MV diff; + diff.row = mv->row - ref->row; + diff.col = mv->col - ref->col; + return ROUND_POWER_OF_TWO( + (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit, + VP9_PROB_COST_SHIFT); +} + +static INLINE uint32_t get_start_mv_sad(const MACROBLOCK *x, const MV *mvp_full, + const MV *ref_mv_full, + vpx_sad_fn_t sad_fn_ptr, int sadpb) { + const int src_buf_stride = x->plane[0].src.stride; + const uint8_t *const src_buf = x->plane[0].src.buf; + const MACROBLOCKD *const xd = &x->e_mbd; + const int pred_buf_stride = xd->plane[0].pre[0].stride; + const uint8_t *const pred_buf = + xd->plane[0].pre[0].buf + mvp_full->row * pred_buf_stride + mvp_full->col; + uint32_t start_mv_sad = + sad_fn_ptr(src_buf, src_buf_stride, pred_buf, pred_buf_stride); + start_mv_sad += mvsad_err_cost(x, mvp_full, ref_mv_full, sadpb); + + return start_mv_sad; +} + +static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim, + int subsampling_dim, int blk_dim) { + return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim; +} + +// Compute the sum of squares on all visible 4x4s in the transform block. +static int64_t sum_squares_visible(const MACROBLOCKD *xd, + const struct macroblockd_plane *const pd, + const int16_t *diff, const int diff_stride, + int blk_row, int blk_col, + const BLOCK_SIZE plane_bsize, + const BLOCK_SIZE tx_bsize, + int *visible_width, int *visible_height) { + int64_t sse; + const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; + const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; + const int b4x4s_to_right_edge = num_4x4_to_edge( + plane_4x4_w, xd->mb_to_right_edge, pd->subsampling_x, blk_col); + const int b4x4s_to_bottom_edge = num_4x4_to_edge( + plane_4x4_h, xd->mb_to_bottom_edge, pd->subsampling_y, blk_row); + if (tx_bsize == BLOCK_4X4 || + (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { + assert(tx_4x4_w == tx_4x4_h); + sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_4x4_w << 2); + *visible_width = tx_4x4_w << 2; + *visible_height = tx_4x4_h << 2; + } else { + int r, c; + const int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); + const int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); + sse = 0; + // if we are in the unrestricted motion border. + for (r = 0; r < max_r; ++r) { + // Skip visiting the sub blocks that are wholly within the UMV. + for (c = 0; c < max_c; ++c) { + sse += (int64_t)vpx_sum_squares_2d_i16( + diff + r * diff_stride * 4 + c * 4, diff_stride, 4); + } + } + *visible_width = max_c << 2; + *visible_height = max_r << 2; + } + return sse; +} + +// Check if trellis coefficient optimization of the transform block is enabled. +static INLINE int do_trellis_opt(const struct macroblockd_plane *pd, + const int16_t *src_diff, int diff_stride, + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + void *arg) { + const struct encode_b_args *const args = (struct encode_b_args *)arg; + const MACROBLOCK *const x = args->x; + + switch (args->enable_trellis_opt) { + case DISABLE_TRELLIS_OPT: return 0; + case ENABLE_TRELLIS_OPT: return 1; + case ENABLE_TRELLIS_OPT_TX_RD_SRC_VAR: { + vpx_clear_system_state(); + + return (args->trellis_opt_thresh > 0.0) + ? (x->log_block_src_var <= args->trellis_opt_thresh) + : 1; + } + case ENABLE_TRELLIS_OPT_TX_RD_RESIDUAL_MSE: { + const MACROBLOCKD *const xd = &x->e_mbd; + const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; +#if CONFIG_VP9_HIGHBITDEPTH + const int dequant_shift = + (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3; +#else + const int dequant_shift = 3; +#endif // CONFIG_VP9_HIGHBITDEPTH + const int qstep = pd->dequant[1] >> dequant_shift; + int *sse_calc_done = args->sse_calc_done; + int64_t *sse = args->sse; + int visible_width = 0, visible_height = 0; + + // TODO: Enable the sf for high bit-depth case + if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) || !sse || + !sse_calc_done) + return 1; + + *sse = sum_squares_visible(xd, pd, src_diff, diff_stride, blk_row, + blk_col, plane_bsize, tx_bsize, &visible_width, + &visible_height); + *sse_calc_done = 1; + + vpx_clear_system_state(); + + return (*(sse) <= (int64_t)visible_width * visible_height * qstep * + qstep * args->trellis_opt_thresh); + } + default: assert(0 && "Invalid trellis optimization method."); return 1; + } +} + +#if CONFIG_COLLECT_COMPONENT_TIMING +static INLINE void start_timing(VP9_COMP *cpi, int component) { + vpx_usec_timer_start(&cpi->component_timer[component]); +} +static INLINE void end_timing(VP9_COMP *cpi, int component) { + vpx_usec_timer_mark(&cpi->component_timer[component]); + cpi->frame_component_time[component] += + vpx_usec_timer_elapsed(&cpi->component_timer[component]); +} +static INLINE char const *get_frame_type_enum(int type) { + switch (type) { + case 0: return "KEY_FRAME"; + case 1: return "INTER_FRAME"; + default: assert(0); + } + return "error"; +} +#endif + #ifdef __cplusplus } // extern "C" #endif |