aboutsummaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_encoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder/vp9_encoder.h')
-rw-r--r--vp9/encoder/vp9_encoder.h307
1 files changed, 285 insertions, 22 deletions
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index cca8b53f8..7136f7faa 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -14,9 +14,11 @@
#include <stdio.h>
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vpx_ext_ratectrl.h"
#include "vpx/vp8cx.h"
+#include "vpx/vpx_tpl.h"
#if CONFIG_INTERNAL_STATS
#include "vpx_dsp/ssim.h"
#endif
@@ -91,13 +93,6 @@ typedef enum {
} ENCODE_BREAKOUT_TYPE;
typedef enum {
- NORMAL = 0,
- FOURFIVE = 1,
- THREEFIVE = 2,
- ONETWO = 3
-} VPX_SCALING;
-
-typedef enum {
// Good Quality Fast Encoding. The encoder balances quality with the amount of
// time it takes to encode the output. Speed setting controls how fast.
GOOD,
@@ -336,15 +331,14 @@ typedef struct TplDepFrame {
typedef struct TileDataEnc {
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
-#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES];
-#endif // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
int8_t mode_map[BLOCK_SIZES][MAX_MODES];
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
// Used for adaptive_rd_thresh with row multithreading
int *row_base_thresh_freq_fact;
+ MV firstpass_top_mv;
} TileDataEnc;
typedef struct RowMTInfo {
@@ -513,6 +507,7 @@ typedef struct EncFrameBuf {
} EncFrameBuf;
// Maximum operating frame buffer size needed for a GOP using ARF reference.
+// This is used to allocate the memory for TPL stats for a GOP.
#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
#define MAX_KMEANS_GROUPS 8
@@ -659,6 +654,72 @@ static INLINE int get_num_unit_4x4(int size) { return (size + 3) >> 2; }
static INLINE int get_num_unit_16x16(int size) { return (size + 15) >> 4; }
#endif // CONFIG_RATE_CTRL
+#if CONFIG_COLLECT_COMPONENT_TIMING
+#include "vpx_ports/vpx_timer.h"
+// Adjust the following to add new components.
+typedef enum {
+ vp9_get_compressed_data_time,
+ vp9_temporal_filter_time,
+ vp9_rc_get_second_pass_params_time,
+ setup_tpl_stats_time,
+ Pass2Encode_time,
+
+ encode_with_recode_loop_time,
+ loopfilter_frame_time,
+ vp9_pack_bitstream_time,
+
+ encode_frame_internal_time,
+ rd_pick_partition_time,
+ rd_pick_sb_modes_time,
+ encode_sb_time,
+
+ vp9_rd_pick_inter_mode_sb_time,
+ vp9_rd_pick_inter_mode_sub8x8_time,
+
+ intra_mode_search_time,
+ handle_inter_mode_time,
+ single_motion_search_time,
+ joint_motion_search_time,
+ interp_filter_time,
+
+ kTimingComponents,
+} TIMING_COMPONENT;
+
+static INLINE char const *get_component_name(int index) {
+ switch (index) {
+ case vp9_get_compressed_data_time: return "vp9_get_compressed_data_time";
+ case vp9_temporal_filter_time: return "vp9_temporal_filter_time";
+ case vp9_rc_get_second_pass_params_time:
+ return "vp9_rc_get_second_pass_params_time";
+ case setup_tpl_stats_time: return "setup_tpl_stats_time";
+ case Pass2Encode_time: return "Pass2Encode_time";
+
+ case encode_with_recode_loop_time: return "encode_with_recode_loop_time";
+ case loopfilter_frame_time: return "loopfilter_frame_time";
+ case vp9_pack_bitstream_time: return "vp9_pack_bitstream_time";
+
+ case encode_frame_internal_time: return "encode_frame_internal_time";
+ case rd_pick_partition_time: return "rd_pick_partition_time";
+ case rd_pick_sb_modes_time: return "rd_pick_sb_modes_time";
+ case encode_sb_time: return "encode_sb_time";
+
+ case vp9_rd_pick_inter_mode_sb_time:
+ return "vp9_rd_pick_inter_mode_sb_time";
+ case vp9_rd_pick_inter_mode_sub8x8_time:
+ return "vp9_rd_pick_inter_mode_sub8x8_time";
+
+ case intra_mode_search_time: return "intra_mode_search_time";
+ case handle_inter_mode_time: return "handle_inter_mode_time";
+ case single_motion_search_time: return "single_motion_search_time";
+ case joint_motion_search_time: return "joint_motion_search_time";
+ case interp_filter_time: return "interp_filter_time";
+
+ default: assert(0);
+ }
+ return "error";
+}
+#endif
+
typedef struct VP9_COMP {
FRAME_INFO frame_info;
QUANTS quants;
@@ -685,6 +746,8 @@ typedef struct VP9_COMP {
BLOCK_SIZE tpl_bsize;
TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE];
+ // Used to store TPL stats before propagation
+ VpxTplGopStats tpl_gop_stats;
YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
EncFrameBuf enc_frame_buf[REF_FRAMES];
#if CONFIG_MULTITHREAD
@@ -784,7 +847,7 @@ typedef struct VP9_COMP {
uint8_t *skin_map;
- // segment threashold for encode breakout
+ // segment threshold for encode breakout
int segment_encode_breakout[MAX_SEGMENTS];
CYCLIC_REFRESH *cyclic_refresh;
@@ -858,12 +921,15 @@ typedef struct VP9_COMP {
// number of MBs in the current frame when the frame is
// scaled.
+ int last_coded_width;
+ int last_coded_height;
+
int use_svc;
SVC svc;
// Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
- diff *source_diff_var;
+ Diff *source_diff_var;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
unsigned int source_var_thresh;
int frames_till_next_var_check;
@@ -973,6 +1039,29 @@ typedef struct VP9_COMP {
EXT_RATECTRL ext_ratectrl;
int fixed_qp_onepass;
+
+ // Flag to keep track of dynamic change in deadline mode
+ // (good/best/realtime).
+ MODE deadline_mode_previous_frame;
+
+ // Flag to disable scene detection when rtc rate control library is used.
+ int disable_scene_detection_rtc_ratectrl;
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ /*!
+ * component_time[] are initialized to zero while encoder starts.
+ */
+ uint64_t component_time[kTimingComponents];
+ /*!
+ * Stores timing for individual components between calls of start_timing()
+ * and end_timing().
+ */
+ struct vpx_usec_timer component_timer[kTimingComponents];
+ /*!
+ * frame_component_time[] are initialized to zero at beginning of each frame.
+ */
+ uint64_t frame_component_time[kTimingComponents];
+#endif
} VP9_COMP;
#if CONFIG_RATE_CTRL
@@ -983,7 +1072,7 @@ static INLINE void partition_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int unit_width = get_num_unit_4x4(cpi->frame_info.frame_width);
const int unit_height = get_num_unit_4x4(cpi->frame_info.frame_height);
- CHECK_MEM_ERROR(cm, cpi->partition_info,
+ CHECK_MEM_ERROR(&cm->error, cpi->partition_info,
(PARTITION_INFO *)vpx_calloc(unit_width * unit_height,
sizeof(PARTITION_INFO)));
memset(cpi->partition_info, 0,
@@ -998,8 +1087,8 @@ static INLINE void free_partition_info(struct VP9_COMP *cpi) {
}
static INLINE void reset_mv_info(MOTION_VECTOR_INFO *mv_info) {
- mv_info->ref_frame[0] = NONE;
- mv_info->ref_frame[1] = NONE;
+ mv_info->ref_frame[0] = NO_REF_FRAME;
+ mv_info->ref_frame[1] = NO_REF_FRAME;
mv_info->mv[0].as_int = INVALID_MV;
mv_info->mv[1].as_int = INVALID_MV;
}
@@ -1011,7 +1100,7 @@ static INLINE void motion_vector_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int unit_width = get_num_unit_4x4(cpi->frame_info.frame_width);
const int unit_height = get_num_unit_4x4(cpi->frame_info.frame_height);
- CHECK_MEM_ERROR(cm, cpi->motion_vector_info,
+ CHECK_MEM_ERROR(&cm->error, cpi->motion_vector_info,
(MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,
sizeof(MOTION_VECTOR_INFO)));
memset(cpi->motion_vector_info, 0,
@@ -1030,7 +1119,7 @@ static INLINE void free_motion_vector_info(struct VP9_COMP *cpi) {
static INLINE void tpl_stats_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
CHECK_MEM_ERROR(
- cm, cpi->tpl_stats_info,
+ &cm->error, cpi->tpl_stats_info,
(TplDepStats *)vpx_calloc(MAX_LAG_BUFFERS, sizeof(TplDepStats)));
memset(cpi->tpl_stats_info, 0, MAX_LAG_BUFFERS * sizeof(TplDepStats));
}
@@ -1049,7 +1138,7 @@ static INLINE void fp_motion_vector_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int unit_width = get_num_unit_16x16(cpi->frame_info.frame_width);
const int unit_height = get_num_unit_16x16(cpi->frame_info.frame_height);
- CHECK_MEM_ERROR(cm, cpi->fp_motion_vector_info,
+ CHECK_MEM_ERROR(&cm->error, cpi->fp_motion_vector_info,
(MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,
sizeof(MOTION_VECTOR_INFO)));
}
@@ -1154,8 +1243,8 @@ int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
int cols);
-int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
- VPX_SCALING vert_mode);
+int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING_MODE horiz_mode,
+ VPX_SCALING_MODE vert_mode);
int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
unsigned int height);
@@ -1296,6 +1385,14 @@ void vp9_get_ref_frame_info(FRAME_UPDATE_TYPE update_type, int ref_frame_flags,
void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp9_scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int bd);
+#else
+void vp9_scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
YV12_BUFFER_CONFIG *vp9_svc_twostage_scale(
VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type,
@@ -1380,9 +1477,10 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
-int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
- unsigned int cols, int delta_q[8], int delta_lf[8],
- int skip[8], int ref_frame[8]);
+vpx_codec_err_t vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map,
+ unsigned int rows, unsigned int cols,
+ int delta_q[8], int delta_lf[8], int skip[8],
+ int ref_frame[8]);
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
@@ -1392,6 +1490,171 @@ int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr);
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
+static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
+ RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
+ if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
+ new_fb_ptr->mi_cols < cm->mi_cols) {
+ vpx_free(new_fb_ptr->mvs);
+ CHECK_MEM_ERROR(&cm->error, new_fb_ptr->mvs,
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*new_fb_ptr->mvs)));
+ new_fb_ptr->mi_rows = cm->mi_rows;
+ new_fb_ptr->mi_cols = cm->mi_cols;
+ }
+}
+
+static INLINE int mv_cost(const MV *mv, const int *joint_cost,
+ int *const comp_cost[2]) {
+ assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
+ assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
+ return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
+ comp_cost[1][mv->col];
+}
+
+static INLINE int mvsad_err_cost(const MACROBLOCK *x, const MV *mv,
+ const MV *ref, int sad_per_bit) {
+ MV diff;
+ diff.row = mv->row - ref->row;
+ diff.col = mv->col - ref->col;
+ return ROUND_POWER_OF_TWO(
+ (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
+ VP9_PROB_COST_SHIFT);
+}
+
+static INLINE uint32_t get_start_mv_sad(const MACROBLOCK *x, const MV *mvp_full,
+ const MV *ref_mv_full,
+ vpx_sad_fn_t sad_fn_ptr, int sadpb) {
+ const int src_buf_stride = x->plane[0].src.stride;
+ const uint8_t *const src_buf = x->plane[0].src.buf;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const int pred_buf_stride = xd->plane[0].pre[0].stride;
+ const uint8_t *const pred_buf =
+ xd->plane[0].pre[0].buf + mvp_full->row * pred_buf_stride + mvp_full->col;
+ uint32_t start_mv_sad =
+ sad_fn_ptr(src_buf, src_buf_stride, pred_buf, pred_buf_stride);
+ start_mv_sad += mvsad_err_cost(x, mvp_full, ref_mv_full, sadpb);
+
+ return start_mv_sad;
+}
+
+static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim,
+ int subsampling_dim, int blk_dim) {
+ return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim;
+}
+
+// Compute the sum of squares on all visible 4x4s in the transform block.
+static int64_t sum_squares_visible(const MACROBLOCKD *xd,
+ const struct macroblockd_plane *const pd,
+ const int16_t *diff, const int diff_stride,
+ int blk_row, int blk_col,
+ const BLOCK_SIZE plane_bsize,
+ const BLOCK_SIZE tx_bsize,
+ int *visible_width, int *visible_height) {
+ int64_t sse;
+ const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
+ const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
+ const int b4x4s_to_right_edge = num_4x4_to_edge(
+ plane_4x4_w, xd->mb_to_right_edge, pd->subsampling_x, blk_col);
+ const int b4x4s_to_bottom_edge = num_4x4_to_edge(
+ plane_4x4_h, xd->mb_to_bottom_edge, pd->subsampling_y, blk_row);
+ if (tx_bsize == BLOCK_4X4 ||
+ (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
+ assert(tx_4x4_w == tx_4x4_h);
+ sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_4x4_w << 2);
+ *visible_width = tx_4x4_w << 2;
+ *visible_height = tx_4x4_h << 2;
+ } else {
+ int r, c;
+ const int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
+ const int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
+ sse = 0;
+ // if we are in the unrestricted motion border.
+ for (r = 0; r < max_r; ++r) {
+ // Skip visiting the sub blocks that are wholly within the UMV.
+ for (c = 0; c < max_c; ++c) {
+ sse += (int64_t)vpx_sum_squares_2d_i16(
+ diff + r * diff_stride * 4 + c * 4, diff_stride, 4);
+ }
+ }
+ *visible_width = max_c << 2;
+ *visible_height = max_r << 2;
+ }
+ return sse;
+}
+
+// Check if trellis coefficient optimization of the transform block is enabled.
+static INLINE int do_trellis_opt(const struct macroblockd_plane *pd,
+ const int16_t *src_diff, int diff_stride,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ const struct encode_b_args *const args = (struct encode_b_args *)arg;
+ const MACROBLOCK *const x = args->x;
+
+ switch (args->enable_trellis_opt) {
+ case DISABLE_TRELLIS_OPT: return 0;
+ case ENABLE_TRELLIS_OPT: return 1;
+ case ENABLE_TRELLIS_OPT_TX_RD_SRC_VAR: {
+ vpx_clear_system_state();
+
+ return (args->trellis_opt_thresh > 0.0)
+ ? (x->log_block_src_var <= args->trellis_opt_thresh)
+ : 1;
+ }
+ case ENABLE_TRELLIS_OPT_TX_RD_RESIDUAL_MSE: {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int dequant_shift =
+ (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
+#else
+ const int dequant_shift = 3;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ const int qstep = pd->dequant[1] >> dequant_shift;
+ int *sse_calc_done = args->sse_calc_done;
+ int64_t *sse = args->sse;
+ int visible_width = 0, visible_height = 0;
+
+ // TODO: Enable the sf for high bit-depth case
+ if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) || !sse ||
+ !sse_calc_done)
+ return 1;
+
+ *sse = sum_squares_visible(xd, pd, src_diff, diff_stride, blk_row,
+ blk_col, plane_bsize, tx_bsize, &visible_width,
+ &visible_height);
+ *sse_calc_done = 1;
+
+ vpx_clear_system_state();
+
+ return (*(sse) <= (int64_t)visible_width * visible_height * qstep *
+ qstep * args->trellis_opt_thresh);
+ }
+ default: assert(0 && "Invalid trellis optimization method."); return 1;
+ }
+}
+
+#if CONFIG_COLLECT_COMPONENT_TIMING
+static INLINE void start_timing(VP9_COMP *cpi, int component) {
+ vpx_usec_timer_start(&cpi->component_timer[component]);
+}
+static INLINE void end_timing(VP9_COMP *cpi, int component) {
+ vpx_usec_timer_mark(&cpi->component_timer[component]);
+ cpi->frame_component_time[component] +=
+ vpx_usec_timer_elapsed(&cpi->component_timer[component]);
+}
+static INLINE char const *get_frame_type_enum(int type) {
+ switch (type) {
+ case 0: return "KEY_FRAME";
+ case 1: return "INTER_FRAME";
+ default: assert(0);
+ }
+ return "error";
+}
+#endif
+
#ifdef __cplusplus
} // extern "C"
#endif